Beispiel #1
0
def test_find_truffles():
    posts = create_n_random_posts(300)

    post_frame = pd.DataFrame(posts)

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
    pipeline = tpmo.train_pipeline(post_frame,
                                   topic_kwargs=topic_kwargs,
                                   regressor_kwargs=regressor_kwargs)

    posts = create_n_random_posts(50)

    post_frame = pd.DataFrame(posts)
    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
    truffles = tpmo.find_truffles(post_frame, pipeline, account='aa')

    assert truffles.iloc[0].rank_score == truffles.rank_score.max()
Beispiel #2
0
def test_test_top_trending_post(steem):

    if config.PASSWORD:
        steem.wallet.unlock(config.PASSWORD)

    poster = Poster(steem=steem,
                    account=config.ACCOUNT,
                    waiting_time=0.1,
                    no_posting_key_mode=config.PASSWORD is None)

    posts = random_data.create_n_random_posts(10)
    df = pd.DataFrame(posts)
    df['reward'] = df.reward
    df['predicted_votes'] = df.votes

    df = tppp.preprocess(df, ncores=1)

    date = pd.datetime.utcnow().date()

    tbpd.post_top_trending_list(df,
                                poster,
                                date,
                                overview_permalink='iii',
                                trufflepicks_permalink='kkk',
                                steem_amount=10,
                                sbd_amount=10)
Beispiel #3
0
def test_crossval():
    posts = create_n_random_posts(100)

    post_frame = pd.DataFrame(posts)

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=20)

    param_grid = {
        'feature_generation__topic_model__no_above': [0.2, 0.3],
        'regressor__max_leaf_nodes': [50, 100],
    }

    tpmo.cross_validate(post_frame,
                        param_grid,
                        topic_kwargs=topic_kwargs,
                        regressor_kwargs=regressor_kwargs)
def test_statistics():
    posts = create_n_random_posts(300)

    post_frame = pd.DataFrame(posts)
    current_date = pd.datetime.utcnow()

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
    pipeline = tpmo.train_pipeline(post_frame,
                                   topic_kwargs=topic_kwargs,
                                   regressor_kwargs=regressor_kwargs)

    post_frame['steem_bought_reward'] = 0
    post_frame['sbd_bought_reward'] = 0
    post_frame['bought_votes'] = 0

    stats = tppw.compute_weekly_statistics(post_frame, pipeline)
    steem_per_mvests = 490

    delegator_list = ['peter', 'paul']

    title, body = tpbp.weekly_update(steem_per_mvests=steem_per_mvests,
                                     current_datetime=current_date,
                                     delegator_list=delegator_list,
                                     **stats)
    assert title
    assert body
def test_weekly_post(steem):
    posts = create_n_random_posts(300)

    post_frame = pd.DataFrame(posts)
    current_date = pd.datetime.utcnow()

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
    pipeline = tpmo.train_pipeline(post_frame,
                                   topic_kwargs=topic_kwargs,
                                   regressor_kwargs=regressor_kwargs)

    post_frame['steem_bought_reward'] = 0
    post_frame['sbd_bought_reward'] = 0
    post_frame['bought_votes'] = 0

    poster = Poster(account=config.ACCOUNT,
                    steem=steem,
                    no_posting_key_mode=config.PASSWORD is None)
    permalink = tppw.post_weakly_update(pipeline,
                                        post_frame,
                                        poster=poster,
                                        current_datetime=current_date)

    assert permalink
Beispiel #6
0
def test_comment():
    post = random_data.create_n_random_posts(1)[0]

    result = tbpo.truffle_comment(reward=post['reward'],
                                  votes=post['votes'],
                                  rank=1,
                                  topN_link='www.example.com',
                                  truffle_link='www.tf.tf')

    assert result
Beispiel #7
0
def test_preprocessing_random_parallel():
    posts = create_n_random_posts(50)
    post_frame = pd.DataFrame(posts)
    filtered = tppp.preprocess(post_frame,
                               ncores=5,
                               chunksize=10,
                               min_en_prob=0.5,
                               max_errors_per_word=0.5,
                               min_max_num_words=(10, 99999))

    assert len(filtered) > 20
Beispiel #8
0
def test_store_load_frame_test(temp_dir):
    filename = os.path.join(temp_dir, 'test.sqlite')

    x = pd.DataFrame(create_n_random_posts(42))
    x = tppp.preprocess(x)

    tppe.to_sqlite(x, filename, 'test')

    y = tppe.from_sqlite(filename, 'test')

    pd.testing.assert_frame_equal(x,y)
Beispiel #9
0
def test_tag_measure():
    posts = create_n_random_posts(100)

    post_frame = pd.DataFrame(posts)

    post_frame = tppp.preprocess(post_frame, ncores=1)

    post_frame['predicted_reward'] = post_frame.reward

    tag_measure = tpmo.compute_tag_factor(post_frame.tags, tpmo.PUNISH_LIST)

    assert np.all(tag_measure > 0)
Beispiel #10
0
def test_topN_comment():
    posts = random_data.create_n_random_posts(25)
    df = pd.DataFrame(posts)
    df = tppp.preprocess(df, ncores=1)

    post = tbpo.topN_comment(topN_authors=df.author,
                             topN_permalinks=df.permalink,
                             topN_titles=df.title,
                             topN_votes=df.votes,
                             topN_rewards=df.reward)

    assert post
Beispiel #11
0
def test_Doc2Vec_KNN():
    posts = create_n_random_posts(100)

    post_frame = pd.DataFrame(posts)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=30)

    pipe = tpmo.create_pure_doc2vec_pipeline(dict(epochs=2, size=16))

    pipe, frame = tpmo.train_test_pipeline(post_frame,
                                           pipeline=pipe,
                                           sample_weight_function=None)
    pass
Beispiel #12
0
def test_train_test_pipeline():
    posts = create_n_random_posts(300)

    post_frame = pd.DataFrame(posts)

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
    tpmo.train_test_pipeline(post_frame,
                             topic_kwargs=topic_kwargs,
                             regressor_kwargs=regressor_kwargs)
Beispiel #13
0
def test_load_or_preproc(temp_dir):
    filename = os.path.join(temp_dir, 'pptest.gz')

    post_frame = pd.DataFrame(create_n_random_posts(10))

    frame = tppp.load_or_preprocess(post_frame,
                                    filename,
                                    ncores=5,
                                    chunksize=20)

    assert len(os.listdir(temp_dir)) == 1

    frame2 = tppp.load_or_preprocess(post_frame,
                                     filename,
                                     ncores=5,
                                     chunksize=20)

    assert len(os.listdir(temp_dir)) == 1
    assert_frame_equal(frame, frame2)
Beispiel #14
0
def test_topN_post():
    posts = random_data.create_n_random_posts(10)
    df = pd.DataFrame(posts)
    df = tppp.preprocess(df, ncores=1)

    date = pd.datetime.utcnow().date()
    df.image_urls = df.body.apply(lambda x: tptf.get_image_urls(x))

    title, post = tbpo.topN_post(topN_authors=df.author,
                                 topN_permalinks=df.permalink,
                                 topN_titles=df.title,
                                 topN_filtered_bodies=df.filtered_body,
                                 topN_image_urls=df.image_urls,
                                 topN_rewards=df.reward,
                                 topN_votes=df.votes,
                                 title_date=date,
                                 truffle_link='de.de.de')

    assert post
    assert title
Beispiel #15
0
def test_test_top20_vote_and_comment(steem):

    if config.PASSWORD:
        steem.wallet.unlock(config.PASSWORD)

    poster = Poster(steem=steem,
                    account=config.ACCOUNT,
                    waiting_time=0.1,
                    no_posting_key_mode=config.PASSWORD is None)

    posts = random_data.create_n_random_posts(10)
    df = pd.DataFrame(posts)
    df['predicted_reward'] = df.reward
    df['predicted_votes'] = df.votes

    df = tppp.preprocess(df, ncores=1)

    tbpd.vote_and_comment_on_topK(df,
                                  poster,
                                  'laida',
                                  overview_permalink='lll')
Beispiel #16
0
def test_bid_bot_correction():
    posts = create_n_random_posts(30)
    post_frame = pd.DataFrame(posts)

    bought = {}
    bought[('hello', 'kitty')] = ['19 STEEM']
    sample_frame = post_frame[['author', 'permalink']].sample(10)
    for _, (author, permalink) in sample_frame.iterrows():
        bought[(author, permalink)] = {
            'aaa': {
                'amount': '3 STEEM'
            },
            'bbb': {
                'amount': '4 SBD'
            }
        }

    post_frame = tppp.compute_bidbot_correction(post_frame, bought)

    assert post_frame.adjusted_reward.mean() < post_frame.reward.mean()
    assert all(post_frame.adjusted_reward >= 0)
    assert post_frame.adjusted_votes.mean() < post_frame.votes.mean()
    assert all(post_frame.adjusted_votes >= 0)
Beispiel #17
0
def test_test_top10post(steem):

    if config.PASSWORD:
        steem.wallet.unlock(config.PASSWORD)

    poster = Poster(steem=steem,
                    account=config.ACCOUNT,
                    waiting_time=0.1,
                    no_posting_key_mode=config.PASSWORD is None)

    posts = random_data.create_n_random_posts(10)
    df = pd.DataFrame(posts)
    df['predicted_reward'] = df.reward
    df['predicted_votes'] = df.votes

    df = tppp.preprocess(df, ncores=1)

    date = pd.datetime.utcnow().date()

    account = config.ACCOUNT

    permalink = tbpd.post_topN_list(df, poster, date, overview_permalink='iii')
    tbpd.comment_on_own_top_list(df, poster, permalink)
Beispiel #18
0
def test_load_or_train(temp_dir):
    cdt = pd.datetime.utcnow()
    posts = create_n_random_posts(300)

    post_frame = pd.DataFrame(posts)

    regressor_kwargs = dict(n_estimators=20,
                            max_leaf_nodes=100,
                            max_features=0.1,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7)

    post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)

    pipe = tpmo.load_or_train_pipeline(post_frame,
                                       temp_dir,
                                       current_datetime=cdt,
                                       topic_kwargs=topic_kwargs,
                                       regressor_kwargs=regressor_kwargs)

    topic_model = pipe.named_steps['feature_generation'].transformer_list[1][1]
    result = topic_model.print_topics()
    assert result

    assert len(os.listdir(temp_dir)) == 1

    pipe2 = tpmo.load_or_train_pipeline(post_frame,
                                        temp_dir,
                                        current_datetime=cdt,
                                        topic_kwargs=topic_kwargs,
                                        regressor_kwargs=regressor_kwargs)

    assert len(os.listdir(temp_dir)) == 1
    assert set(pipe.named_steps.keys()) == set(pipe2.named_steps.keys())