def test_find_truffles(): posts = create_n_random_posts(300) post_frame = pd.DataFrame(posts) regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs) posts = create_n_random_posts(50) post_frame = pd.DataFrame(posts) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) truffles = tpmo.find_truffles(post_frame, pipeline, account='aa') assert truffles.iloc[0].rank_score == truffles.rank_score.max()
def test_test_top_trending_post(steem): if config.PASSWORD: steem.wallet.unlock(config.PASSWORD) poster = Poster(steem=steem, account=config.ACCOUNT, waiting_time=0.1, no_posting_key_mode=config.PASSWORD is None) posts = random_data.create_n_random_posts(10) df = pd.DataFrame(posts) df['reward'] = df.reward df['predicted_votes'] = df.votes df = tppp.preprocess(df, ncores=1) date = pd.datetime.utcnow().date() tbpd.post_top_trending_list(df, poster, date, overview_permalink='iii', trufflepicks_permalink='kkk', steem_amount=10, sbd_amount=10)
def test_crossval(): posts = create_n_random_posts(100) post_frame = pd.DataFrame(posts) regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=20) param_grid = { 'feature_generation__topic_model__no_above': [0.2, 0.3], 'regressor__max_leaf_nodes': [50, 100], } tpmo.cross_validate(post_frame, param_grid, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs)
def test_statistics(): posts = create_n_random_posts(300) post_frame = pd.DataFrame(posts) current_date = pd.datetime.utcnow() regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs) post_frame['steem_bought_reward'] = 0 post_frame['sbd_bought_reward'] = 0 post_frame['bought_votes'] = 0 stats = tppw.compute_weekly_statistics(post_frame, pipeline) steem_per_mvests = 490 delegator_list = ['peter', 'paul'] title, body = tpbp.weekly_update(steem_per_mvests=steem_per_mvests, current_datetime=current_date, delegator_list=delegator_list, **stats) assert title assert body
def test_weekly_post(steem): posts = create_n_random_posts(300) post_frame = pd.DataFrame(posts) current_date = pd.datetime.utcnow() regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs) post_frame['steem_bought_reward'] = 0 post_frame['sbd_bought_reward'] = 0 post_frame['bought_votes'] = 0 poster = Poster(account=config.ACCOUNT, steem=steem, no_posting_key_mode=config.PASSWORD is None) permalink = tppw.post_weakly_update(pipeline, post_frame, poster=poster, current_datetime=current_date) assert permalink
def test_comment(): post = random_data.create_n_random_posts(1)[0] result = tbpo.truffle_comment(reward=post['reward'], votes=post['votes'], rank=1, topN_link='www.example.com', truffle_link='www.tf.tf') assert result
def test_preprocessing_random_parallel(): posts = create_n_random_posts(50) post_frame = pd.DataFrame(posts) filtered = tppp.preprocess(post_frame, ncores=5, chunksize=10, min_en_prob=0.5, max_errors_per_word=0.5, min_max_num_words=(10, 99999)) assert len(filtered) > 20
def test_store_load_frame_test(temp_dir): filename = os.path.join(temp_dir, 'test.sqlite') x = pd.DataFrame(create_n_random_posts(42)) x = tppp.preprocess(x) tppe.to_sqlite(x, filename, 'test') y = tppe.from_sqlite(filename, 'test') pd.testing.assert_frame_equal(x,y)
def test_tag_measure(): posts = create_n_random_posts(100) post_frame = pd.DataFrame(posts) post_frame = tppp.preprocess(post_frame, ncores=1) post_frame['predicted_reward'] = post_frame.reward tag_measure = tpmo.compute_tag_factor(post_frame.tags, tpmo.PUNISH_LIST) assert np.all(tag_measure > 0)
def test_topN_comment(): posts = random_data.create_n_random_posts(25) df = pd.DataFrame(posts) df = tppp.preprocess(df, ncores=1) post = tbpo.topN_comment(topN_authors=df.author, topN_permalinks=df.permalink, topN_titles=df.title, topN_votes=df.votes, topN_rewards=df.reward) assert post
def test_Doc2Vec_KNN(): posts = create_n_random_posts(100) post_frame = pd.DataFrame(posts) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=30) pipe = tpmo.create_pure_doc2vec_pipeline(dict(epochs=2, size=16)) pipe, frame = tpmo.train_test_pipeline(post_frame, pipeline=pipe, sample_weight_function=None) pass
def test_train_test_pipeline(): posts = create_n_random_posts(300) post_frame = pd.DataFrame(posts) regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) tpmo.train_test_pipeline(post_frame, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs)
def test_load_or_preproc(temp_dir): filename = os.path.join(temp_dir, 'pptest.gz') post_frame = pd.DataFrame(create_n_random_posts(10)) frame = tppp.load_or_preprocess(post_frame, filename, ncores=5, chunksize=20) assert len(os.listdir(temp_dir)) == 1 frame2 = tppp.load_or_preprocess(post_frame, filename, ncores=5, chunksize=20) assert len(os.listdir(temp_dir)) == 1 assert_frame_equal(frame, frame2)
def test_topN_post(): posts = random_data.create_n_random_posts(10) df = pd.DataFrame(posts) df = tppp.preprocess(df, ncores=1) date = pd.datetime.utcnow().date() df.image_urls = df.body.apply(lambda x: tptf.get_image_urls(x)) title, post = tbpo.topN_post(topN_authors=df.author, topN_permalinks=df.permalink, topN_titles=df.title, topN_filtered_bodies=df.filtered_body, topN_image_urls=df.image_urls, topN_rewards=df.reward, topN_votes=df.votes, title_date=date, truffle_link='de.de.de') assert post assert title
def test_test_top20_vote_and_comment(steem): if config.PASSWORD: steem.wallet.unlock(config.PASSWORD) poster = Poster(steem=steem, account=config.ACCOUNT, waiting_time=0.1, no_posting_key_mode=config.PASSWORD is None) posts = random_data.create_n_random_posts(10) df = pd.DataFrame(posts) df['predicted_reward'] = df.reward df['predicted_votes'] = df.votes df = tppp.preprocess(df, ncores=1) tbpd.vote_and_comment_on_topK(df, poster, 'laida', overview_permalink='lll')
def test_bid_bot_correction(): posts = create_n_random_posts(30) post_frame = pd.DataFrame(posts) bought = {} bought[('hello', 'kitty')] = ['19 STEEM'] sample_frame = post_frame[['author', 'permalink']].sample(10) for _, (author, permalink) in sample_frame.iterrows(): bought[(author, permalink)] = { 'aaa': { 'amount': '3 STEEM' }, 'bbb': { 'amount': '4 SBD' } } post_frame = tppp.compute_bidbot_correction(post_frame, bought) assert post_frame.adjusted_reward.mean() < post_frame.reward.mean() assert all(post_frame.adjusted_reward >= 0) assert post_frame.adjusted_votes.mean() < post_frame.votes.mean() assert all(post_frame.adjusted_votes >= 0)
def test_test_top10post(steem): if config.PASSWORD: steem.wallet.unlock(config.PASSWORD) poster = Poster(steem=steem, account=config.ACCOUNT, waiting_time=0.1, no_posting_key_mode=config.PASSWORD is None) posts = random_data.create_n_random_posts(10) df = pd.DataFrame(posts) df['predicted_reward'] = df.reward df['predicted_votes'] = df.votes df = tppp.preprocess(df, ncores=1) date = pd.datetime.utcnow().date() account = config.ACCOUNT permalink = tbpd.post_topN_list(df, poster, date, overview_permalink='iii') tbpd.comment_on_own_top_list(df, poster, permalink)
def test_load_or_train(temp_dir): cdt = pd.datetime.utcnow() posts = create_n_random_posts(300) post_frame = pd.DataFrame(posts) regressor_kwargs = dict(n_estimators=20, max_leaf_nodes=100, max_features=0.1, n_jobs=-1, verbose=1, random_state=42) topic_kwargs = dict(num_topics=50, no_below=5, no_above=0.7) post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50) pipe = tpmo.load_or_train_pipeline(post_frame, temp_dir, current_datetime=cdt, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs) topic_model = pipe.named_steps['feature_generation'].transformer_list[1][1] result = topic_model.print_topics() assert result assert len(os.listdir(temp_dir)) == 1 pipe2 = tpmo.load_or_train_pipeline(post_frame, temp_dir, current_datetime=cdt, topic_kwargs=topic_kwargs, regressor_kwargs=regressor_kwargs) assert len(os.listdir(temp_dir)) == 1 assert set(pipe.named_steps.keys()) == set(pipe2.named_steps.keys())