def build_film_reviews(): """Builds the reviews using the nltk movie reviews corpus. Preprocesses the reviews first, then saves them as a pickle file before returning them. @returns {List} reviews""" reviews = [(preprocess_reviews_keep_stop_words(list(movie_reviews.words(fileid))), category) for category in movie_reviews.categories() for fileid in movie_reviews.fileids(category)] save_pickle(reviews, 'movie_reviews.pkl') return reviews
def heartbeat(): """ Processes data from Bittrex into a simpler dictionary, calls the save function on it, deletes the oldest saved dictionary(if it's out of lookback range), and finally creates a list of the best coins to be used in tkinter listboxes. :return: A list containing triples of (coin name, increase rate, volume) """ data = misc.retrieve_data() # Processing for saving latest data from Bittrex API latest_data = {} for d in data.get('result', {}): name = d.get('Market', {}).get('MarketCurrencyLong', '') last_price = d.get('Summary', {}).get('Last', 0.0) last_vol = d.get('Summary', {}).get('BaseVolume', 0.0) base_currency = d.get('Market', {}).get('BaseCurrency', '') if base_currency == 'BTC' and last_price >= \ config.FASTTICK_MIN_PRICE and last_vol >= config.FASTTICK_MIN_VOL: latest_data[name] = {'Summary': d['Summary']} # Processing all data within 9 ticks + latest and returning # rate for output in GUI prev_data = list( misc.open_pickles('fasttick_history', config.FASTTICK_LOOKBACK)) prev_data.append(latest_data) ticker_data = [] if prev_data: for name in latest_data: prev_changes = [] for i in range(len(prev_data) - 1): old_price = float(prev_data[i].get(name, {}).get( 'Summary', {}).get('Last', 0.0)) new_price = float(prev_data[i + 1].get(name, {}).get( 'Summary', {}).get('Last', 0.0)) if old_price != 0: change = (((new_price - old_price) / old_price) * 100) prev_changes.append(change) if prev_changes: volume = float( latest_data.get(name, {}).get('Summary', {}).get('BaseVolume', 0.0)) average_rate = float((sum(prev_changes) / len(prev_changes))) if average_rate >= config.FASTTICK_MIN_RATE: ticker_data.append((name, average_rate, volume)) misc.save_pickle(latest_data, 'fasttick_history') misc.delete_ancient_pickles('fasttick_history', config.FASTTICK_LOOKBACK) return ticker_data
def build_classifier(): """Creates, trains, tests and evaluates the film classifier using the nltk movie reviews corpus. The reviews are shuffled in order to randomise the results and improve accuracy. Classifier is saved as a pickle file. @returns {Classifier} filmClassifier""" reviews = get_film_reviews() shuffle(reviews) trainset = reviews[:1600] testset = reviews[1600:] filmClassifier = Classifier(trainset) filmClassifier.train() results = filmClassifier.test(testset) analysis(results) save_pickle(filmClassifier, "filmClassifier.pkl") return filmClassifier
def vis_validate(loader, net, device, dtype, path): """ Function for visualizing network performance on one batch of validation data Input(s): - loader (PyTorch loader object): loader for queueing minibatches - net (module object): Pytorch network module object (set to eval mode) - device (PyTorch device) - dtype (PyTorch datatype) Output(s): [None] """ mask_path = path + 'val_masks/' pred_path = path + 'val_preds/' force_dir(mask_path) force_dir(pred_path) with torch.no_grad(): for (x, y, name) in loader: x = x.to(device=device, dtype=dtype) y = y.to(device=device, dtype=torch.long) # Make predictions scores = net(x) # Define ground-truth and predictions batch_masks = y.data.cpu().numpy() batch_preds = F.softmax(scores, dim=1).data.cpu().numpy()[:, 1, :, :] break # Save data save_h5(data=batch_masks, path=mask_path + 'mask_data.h5') save_h5(data=batch_preds, path=pred_path + 'pred_data.h5') save_pickle(data=name, path=pred_path + 'pred_name.pickle') # Save images for i, n in enumerate(name): # Save ground-truth mask cv2.imwrite(mask_path + n, batch_masks[i]) # Save mask prediction cv2.imwrite(pred_path + n, batch_preds[i]) return None
def heartbeat(): """ Processes data from Bittrex into a simpler dictionary, calls the save function on it, deletes the oldest saved dictionary(if it's out of lookback range), and finally creates a list of the best coins to be used in tkinter listboxes. :return: A list containing triples of (coin name, % change, volume) """ data = misc.retrieve_data() # Processing for saving latest data from Bittrex API latest_data = {} for d in data.get('result', {}): name = d.get('Market', {}).get('MarketCurrencyLong', '') last_price = d.get('Summary', {}).get('Last', 0.0) last_vol = d.get('Summary', {}).get('BaseVolume', 0.0) base_currency = d.get('Market', {}).get('BaseCurrency', '') if base_currency == 'BTC' and last_price >= \ config.SLOWTICK_MIN_PRICE and last_vol >= config.SLOWTICK_MIN_VOL: latest_data[name] = {'Summary': d['Summary']} # Processing change between prev data and new, returning # change for output in GUI prev_data = list(misc.open_pickles('slowtick_history', config.SLOWTICK_LOOKBACK)) ticker_data = [] if prev_data: for name in latest_data: volume = float(latest_data.get(name, {}).get('Summary', {}).get('BaseVolume', 0.0)) old_price = float(latest_data.get(name, {}).get('Summary', {}).get('Last', 0.0)) new_price = float(prev_data[-1].get(name, {}).get('Summary', {}).get('Last', 0.0)) if old_price != 0: change = (((new_price - old_price) / old_price) * 100) if change >= config.SLOWTICK_MIN_CHANGE: ticker_data.append([name, float(f'{change:.02f}'), float(f'{volume:.02f}')]) misc.save_pickle(latest_data, 'slowtick_history') misc.delete_ancient_pickles('slowtick_history', config.SLOWTICK_LOOKBACK) return ticker_data
def build_collaborative_recommender(): """Creates the collaborative recommender. Gets all user (of the chatbot) film ratings and the stock movie lens film ratings, combines them and makes a dataframe. Using the dataframe, the chosen algorithm will be tested and used to predict film ratings for users. The trained algorithm object is saved as a pickle file. @returns {Algorithm Object} algorithm""" userFilmRatings = db.getAllUserRatings() MLFilmRatings = db.getAllMlUserRatings() userFilmRatings.extend(MLFilmRatings) userFilmRatings = pd.DataFrame( userFilmRatings, columns=['MLUR_ID', 'UserID', 'FilmID', 'Liked', 'Rating']) userFilmRatings = userFilmRatings.drop(columns=['MLUR_ID', 'Liked']) reader = Reader(rating_scale=(1, 5)) dataset = Dataset.load_from_df(userFilmRatings, reader) trainset = dataset.build_full_trainset() testset = trainset.build_anti_testset() algorithm = choose_algorithm(dataset) algorithm.fit(trainset) algorithm.test(testset) save_pickle(algorithm, 'collaborative.pkl') return algorithm