예제 #1
0
def build_film_reviews():
    """Builds the reviews using the nltk movie reviews corpus. Preprocesses the reviews first, then
    saves them as a pickle file before returning them.
    @returns {List} reviews"""
    reviews = [(preprocess_reviews_keep_stop_words(list(movie_reviews.words(fileid))), category)
        for category in movie_reviews.categories()
        for fileid in movie_reviews.fileids(category)]
    save_pickle(reviews, 'movie_reviews.pkl')
    return reviews
예제 #2
0
def heartbeat():
    """
    Processes data from Bittrex into a simpler dictionary,
    calls the save function on it, deletes the oldest
    saved dictionary(if it's out of lookback range), and finally
    creates a list of the best coins to be used in tkinter listboxes.
    :return: A list containing triples of (coin name, increase rate, volume)
    """
    data = misc.retrieve_data()

    # Processing for saving latest data from Bittrex API
    latest_data = {}
    for d in data.get('result', {}):
        name = d.get('Market', {}).get('MarketCurrencyLong', '')
        last_price = d.get('Summary', {}).get('Last', 0.0)
        last_vol = d.get('Summary', {}).get('BaseVolume', 0.0)
        base_currency = d.get('Market', {}).get('BaseCurrency', '')
        if base_currency == 'BTC' and last_price >= \
            config.FASTTICK_MIN_PRICE and last_vol >= config.FASTTICK_MIN_VOL:
            latest_data[name] = {'Summary': d['Summary']}

    # Processing all data within 9 ticks + latest and returning
    #  rate for output in GUI
    prev_data = list(
        misc.open_pickles('fasttick_history', config.FASTTICK_LOOKBACK))
    prev_data.append(latest_data)
    ticker_data = []
    if prev_data:
        for name in latest_data:
            prev_changes = []
            for i in range(len(prev_data) - 1):
                old_price = float(prev_data[i].get(name, {}).get(
                    'Summary', {}).get('Last', 0.0))
                new_price = float(prev_data[i + 1].get(name, {}).get(
                    'Summary', {}).get('Last', 0.0))
                if old_price != 0:
                    change = (((new_price - old_price) / old_price) * 100)
                    prev_changes.append(change)
            if prev_changes:
                volume = float(
                    latest_data.get(name, {}).get('Summary',
                                                  {}).get('BaseVolume', 0.0))
                average_rate = float((sum(prev_changes) / len(prev_changes)))
                if average_rate >= config.FASTTICK_MIN_RATE:
                    ticker_data.append((name, average_rate, volume))

    misc.save_pickle(latest_data, 'fasttick_history')
    misc.delete_ancient_pickles('fasttick_history', config.FASTTICK_LOOKBACK)
    return ticker_data
예제 #3
0
def build_classifier():
    """Creates, trains, tests and evaluates the film classifier using the
    nltk movie reviews corpus. The reviews are shuffled in order to randomise
    the results and improve accuracy. Classifier is saved as a pickle file.
    @returns {Classifier} filmClassifier"""
    reviews = get_film_reviews()
    shuffle(reviews)
    trainset = reviews[:1600]
    testset = reviews[1600:]
    filmClassifier = Classifier(trainset)
    filmClassifier.train()
    results = filmClassifier.test(testset)
    analysis(results)
    save_pickle(filmClassifier, "filmClassifier.pkl")
    return filmClassifier
예제 #4
0
def vis_validate(loader, net, device, dtype, path):
    """
    Function for visualizing network performance on one batch of validation
    data
    Input(s):
    - loader (PyTorch loader object): loader for queueing minibatches
    - net (module object): Pytorch network module object (set to eval mode)
    - device (PyTorch device)
    - dtype (PyTorch datatype)
    Output(s):
    [None]
    """
    mask_path = path + 'val_masks/'
    pred_path = path + 'val_preds/'

    force_dir(mask_path)
    force_dir(pred_path)

    with torch.no_grad():
        for (x, y, name) in loader:
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)

            # Make predictions
            scores = net(x)

            # Define ground-truth and predictions
            batch_masks = y.data.cpu().numpy()
            batch_preds = F.softmax(scores, dim=1).data.cpu().numpy()[:,
                                                                      1, :, :]
            break

    # Save data
    save_h5(data=batch_masks, path=mask_path + 'mask_data.h5')
    save_h5(data=batch_preds, path=pred_path + 'pred_data.h5')
    save_pickle(data=name, path=pred_path + 'pred_name.pickle')
    # Save images
    for i, n in enumerate(name):
        # Save ground-truth mask
        cv2.imwrite(mask_path + n, batch_masks[i])
        # Save mask prediction
        cv2.imwrite(pred_path + n, batch_preds[i])

    return None
예제 #5
0
def heartbeat():
    """
    Processes data from Bittrex into a simpler dictionary,
    calls the save function on it, deletes the oldest
    saved dictionary(if it's out of lookback range), and finally
    creates a list of the best coins to be used in tkinter listboxes.
    :return: A list containing triples of (coin name, % change, volume)
    """
    data = misc.retrieve_data()

    # Processing for saving latest data from Bittrex API
    latest_data = {}
    for d in data.get('result', {}):
        name = d.get('Market', {}).get('MarketCurrencyLong', '')
        last_price = d.get('Summary', {}).get('Last', 0.0)
        last_vol = d.get('Summary', {}).get('BaseVolume', 0.0)
        base_currency = d.get('Market', {}).get('BaseCurrency', '')
        if base_currency == 'BTC' and last_price >= \
            config.SLOWTICK_MIN_PRICE and last_vol >= config.SLOWTICK_MIN_VOL:
                latest_data[name] = {'Summary': d['Summary']}

    # Processing change between prev data and new, returning
    #  change for output in GUI
    prev_data = list(misc.open_pickles('slowtick_history', config.SLOWTICK_LOOKBACK))
    ticker_data = []
    if prev_data:
        for name in latest_data:
            volume = float(latest_data.get(name, {}).get('Summary', {}).get('BaseVolume', 0.0))
            old_price = float(latest_data.get(name, {}).get('Summary', {}).get('Last', 0.0))
            new_price = float(prev_data[-1].get(name, {}).get('Summary', {}).get('Last', 0.0))
            if old_price != 0:
                change = (((new_price - old_price) / old_price) * 100)
                if change >= config.SLOWTICK_MIN_CHANGE:
                    ticker_data.append([name,
                                        float(f'{change:.02f}'),
                                        float(f'{volume:.02f}')])

    misc.save_pickle(latest_data, 'slowtick_history')
    misc.delete_ancient_pickles('slowtick_history', config.SLOWTICK_LOOKBACK)
    return ticker_data
예제 #6
0
def build_collaborative_recommender():
    """Creates the collaborative recommender. Gets all user (of the chatbot) film ratings and the
    stock movie lens film ratings, combines them and makes a dataframe. Using the dataframe, the chosen algorithm
    will be tested and used to predict film ratings for users. The trained algorithm object is saved as a pickle file.
    @returns {Algorithm Object} algorithm"""
    userFilmRatings = db.getAllUserRatings()
    MLFilmRatings = db.getAllMlUserRatings()
    userFilmRatings.extend(MLFilmRatings)
    userFilmRatings = pd.DataFrame(
        userFilmRatings,
        columns=['MLUR_ID', 'UserID', 'FilmID', 'Liked', 'Rating'])
    userFilmRatings = userFilmRatings.drop(columns=['MLUR_ID', 'Liked'])
    reader = Reader(rating_scale=(1, 5))
    dataset = Dataset.load_from_df(userFilmRatings, reader)
    trainset = dataset.build_full_trainset()
    testset = trainset.build_anti_testset()
    algorithm = choose_algorithm(dataset)
    algorithm.fit(trainset)
    algorithm.test(testset)

    save_pickle(algorithm, 'collaborative.pkl')
    return algorithm