Beispiel #1
0
def test_create_trending_post(noapisteem):

    current_datetime = pd.datetime.utcnow()

    data_frame = tpgd.scrape_hour_data(steem=noapisteem,
                                       current_datetime=current_datetime,
                                       ncores=32,
                                       offset_hours=8,
                                       hours=1,
                                       stop_after=20)

    min_datetime = data_frame.created.min()
    max_datetime = data_frame.created.max() + pd.Timedelta(days=8)
    upvote_payments, bots = tpad.get_upvote_payments_to_bots(
        steem=noapisteem,
        min_datetime=min_datetime,
        max_datetime=max_datetime,
        bots=['booster'])

    data_frame = tppp.preprocess(data_frame, ncores=1)

    data_frame = tppp.compute_bidbot_correction(
        post_frame=data_frame, upvote_payments=upvote_payments)
    account = config.ACCOUNT
    poster = Poster(account=account,
                    steem=noapisteem,
                    no_posting_key_mode=config.PASSWORD is None)

    tt0b.create_trending_post(data_frame,
                              upvote_payments,
                              poster,
                              'test',
                              'test',
                              current_datetime,
                              bots=bots)
Beispiel #2
0
def main():
    """Main loop started from command line"""

    no_broadcast, current_datetime = parse_args()

    if current_datetime is None:
        current_datetime = pd.datetime.utcnow()
    else:
        current_datetime = pd.to_datetime(current_datetime)

    data_directory = os.path.join(config.PROJECT_DIRECTORY, 'scraped_data')
    model_directoy = os.path.join(config.PROJECT_DIRECTORY, 'trained_models')
    log_directory =  os.path.join(config.PROJECT_DIRECTORY, 'logs')



    configure_logging(log_directory, current_datetime)

    logger.info('STARTING main script at {}'.format(current_datetime))
    if no_broadcast:
        logger.info('Run without broadcasting.')
    else:
        logger.info('ATTENTION I WILL BROADCAST TO STEEMIT!!!')
    time.sleep(2)

    steem = MPSteem(nodes=config.NODES, no_broadcast=no_broadcast)
    # To post stuff
    account = config.ACCOUNT
    poster = Poster(account=account, steem=steem)

    prediction_frame = tpgd.scrape_hour_data(steem=steem,
                                             current_datetime=current_datetime,
                                             ncores=32,
                                             offset_hours=2)
    prediction_frame = tppp.preprocess(prediction_frame, ncores=8)


    permalink = 'daily-truffle-picks-2018-03-27'

    overview_permalink = 'weekly-truffle-updates-2018-12'

    logger.info('Computing the top trending without bidbots')
    logger.info('Searching for bid bots and bought votes')
    min_datetime = prediction_frame.created.min()
    max_datetime = prediction_frame.created.max() + pd.Timedelta(days=1)
    upvote_payments, bots = tpad.get_upvote_payments_to_bots(steem=steem,
                                                  min_datetime=min_datetime,
                                                  max_datetime=max_datetime)
    logger.info('Adjusting votes and reward')
    sorted_frame = tppp.compute_bidbot_correction(post_frame=prediction_frame,
                                                upvote_payments=upvote_payments)
    tt0b.create_trending_post(sorted_frame,
                              upvote_payments=upvote_payments,
                              poster=poster,
                              topN_permalink=permalink,
                              overview_permalink=overview_permalink,
                              current_datetime=current_datetime,
                              bots=bots)

    logger.info('DONE at {}'.format(current_datetime))
Beispiel #3
0
def load_and_preprocess_2_frames(log_directory, current_datetime, steem,
                                 data_directory, offset_days=8,
                                 days=7, days2=7):
    """ Function to load and preprocess the time span split into 2
    for better memory footprint

    Parameters
    ----------
    log_directory: str
    current_datetime: datetime
    steem: MPSteem
    data_directory: str
    offset_days: int
    days: int
    days2: int
    ncores: int

    Returns
    -------
    DataFrame

    """
    # hack for better memory footprint,
    # see https://stackoverflow.com/questions/15455048/releasing-memory-in-python
    with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
        post_frame = executor.submit(large_mp_preprocess,
                                     log_directory=log_directory,
                                     current_datetime=current_datetime,
                                     steem=steem,
                                     data_directory=data_directory,
                                     days=days,
                                     offset_days=offset_days).result()
    with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
        post_frame2 = executor.submit(large_mp_preprocess,
                                     log_directory=log_directory,
                                     current_datetime=current_datetime,
                                     steem=steem,
                                     data_directory=data_directory,
                                     days=days2,
                                     offset_days=offset_days + days).result()

    post_frame = pd.concat([post_frame, post_frame2], axis=0)
    # We need to reset the index because due to concatenation
    # the default indices are duplicates!
    post_frame.reset_index(inplace=True, drop=True)
    logger.info('Combining 2 frames into 1')
    post_frame = tppp.filter_duplicates(post_frame)

    logger.info('Searching for bid bots and bought votes')
    min_datetime = post_frame.created.min()
    max_datetime = post_frame.created.max() + pd.Timedelta(days=8)
    upvote_payments, _ = tpad.get_upvote_payments_to_bots(steem=steem,
                                                  min_datetime=min_datetime,
                                                  max_datetime=max_datetime)
    logger.info('Adjusting votes and reward')
    post_frame = tppp.compute_bidbot_correction(post_frame=post_frame,
                                                upvote_payments=upvote_payments)
    return post_frame
Beispiel #4
0
def test_bottracker_api(steem):

    min_datetime = pd.datetime.utcnow() - pd.Timedelta(minutes=30)
    max_datetime = pd.datetime.utcnow()
    upvote_payments, bots = tpad.get_upvote_payments_to_bots(steem=steem,
                                                       min_datetime=min_datetime,
                                                       max_datetime=max_datetime,
                                                       bots='default')
    assert upvote_payments
    assert bots
Beispiel #5
0
def load_or_preprocess(post_frame,
                       filename,
                       *args,
                       overwrite=False,
                       store=True,
                       steem_args_for_upvote=None,
                       bots=tfga.BITBOTS,
                       **kwargs):
    """ Tries to load a preprocessed frame if not found preprocessing starts.

    Parameters
    ----------
    post_frame: DataFrame
    filename: str
        Filename of data to load
    args: *args
        Arguments passed to normal preprocessing
    steem_args_for_upvote: dict
        Steem arguments, leave None to not load corrections
    overwrite: bool
        If preprocessing should be started and overwrite existing file
    store: bool
        If preprocessed frame should be stored to file
    kwargs: **kwargs
        Arguments passed to preprocessing

    Returns
    -------
    DataFrame

    """
    if os.path.isfile(filename) and not overwrite:
        logger.info('Found file {} will load it'.format(filename))
        post_frame = pd.read_pickle(filename, compression='gzip')
    else:
        logger.info(
            'File {} not found, will start prepocessing'.format(filename))
        post_frame = preprocess(post_frame, *args, **kwargs)
        if steem_args_for_upvote:
            logger.info('Looking for bought upvotes!')
            min_datetime = post_frame.created.min()
            max_datetime = post_frame.created.max() + pd.Timedelta(days=8)
            upvote_payments, _ = tfga.get_upvote_payments_to_bots(
                steem_args_for_upvote,
                min_datetime=min_datetime,
                max_datetime=max_datetime,
                bots=bots)
            post_frame = compute_bidbot_correction(post_frame, upvote_payments)
        if store:
            directory = os.path.dirname(filename)
            if not os.path.isdir(directory):
                os.makedirs(directory)
            logger.info('Storing file {} to disk'.format(filename))
            post_frame.to_pickle(filename, compression='gzip')
    return post_frame
Beispiel #6
0
def main():
    """Main loop started from command line"""

    no_broadcast, current_datetime = parse_args()

    if current_datetime is None:
        current_datetime = pd.datetime.utcnow()
    else:
        current_datetime = pd.to_datetime(current_datetime)

    data_directory = os.path.join(config.PROJECT_DIRECTORY, 'scraped_data')
    model_directoy = os.path.join(config.PROJECT_DIRECTORY, 'trained_models')
    log_directory = os.path.join(config.PROJECT_DIRECTORY, 'logs')

    configure_logging(log_directory, current_datetime)

    logger.info('STARTING main script at {}'.format(current_datetime))
    if no_broadcast:
        logger.info('Run without broadcasting.')
    else:
        logger.info('ATTENTION I WILL BROADCAST TO STEEMIT!!!')
    time.sleep(2)

    steem = MPSteem(nodes=config.NODES, no_broadcast=no_broadcast)
    # hack to allow for payments, because of https://github.com/steemit/steem-python/issues/191
    noapisteem = MPSteem(nodes=config.NODES[1:], no_broadcast=no_broadcast)
    # To post stuff
    account = config.ACCOUNT
    poster = Poster(account=account, steem=noapisteem)

    tppd.create_wallet(steem,
                       config.PASSWORD,
                       posting_key=config.POSTING_KEY,
                       active_key=config.ACTIVE_KEY)

    logger.info('Paying out investors')
    tpde.pay_delegates(
        account=account,
        steem=noapisteem,  # use a steem instance without api.steem!
        current_datetime=current_datetime)

    if not tpmo.model_exists(current_datetime, model_directoy):

        post_frame = load_and_preprocess_2_frames(
            log_directory=log_directory,
            current_datetime=current_datetime,
            steem=steem,
            noapisteem=noapisteem,
            data_directory=data_directory)
        logger.info('Garbage collecting')
        gc.collect()
    else:
        post_frame = None

    regressor_kwargs = dict(n_estimators=256,
                            max_leaf_nodes=5000,
                            max_features=0.2,
                            n_jobs=-1,
                            verbose=1,
                            random_state=42)

    topic_kwargs = dict(num_topics=128,
                        no_below=7,
                        no_above=0.1,
                        ngrams=(1, 2),
                        keep_n=333000)

    if post_frame is not None and len(post_frame) > MAX_DOCUMENTS:
        logger.info('Frame has {} Documents, too many, '
                    'reducing to {}'.format(len(post_frame), MAX_DOCUMENTS))
        post_frame.sort_values('created', inplace=True, ascending=False)
        train_frame = post_frame.iloc[:MAX_DOCUMENTS, :]
    else:
        train_frame = post_frame

    pipeline = tpmo.load_or_train_pipeline(
        train_frame,
        model_directoy,
        current_datetime,
        regressor_kwargs=regressor_kwargs,
        topic_kwargs=topic_kwargs,
        targets=['adjusted_reward', 'adjusted_votes'])

    tpmo.log_pipeline_info(pipeline=pipeline)

    overview_permalink = tppw.return_overview_permalink_if_exists(
        account=account, current_datetime=current_datetime, steem=steem)

    if not overview_permalink:
        if post_frame is None:
            logger.info('Need to reaload data for weekly overview')
            post_frame = load_and_preprocess_2_frames(
                log_directory=log_directory,
                current_datetime=current_datetime,
                steem=steem,
                noapisteem=noapisteem,
                data_directory=data_directory)

        logger.info('I want to post my weekly overview')
        overview_permalink = tppw.post_weakly_update(
            pipeline=pipeline,
            post_frame=post_frame,
            poster=poster,
            current_datetime=current_datetime)

    logger.info('Garbage collecting')
    del post_frame
    gc.collect()

    prediction_frame = tpgd.scrape_hour_data(steem=steem,
                                             current_datetime=current_datetime,
                                             ncores=32,
                                             offset_hours=2)
    prediction_frame = tppp.preprocess(prediction_frame, ncores=8)

    sorted_frame = tpmo.find_truffles(prediction_frame,
                                      pipeline,
                                      account=account)

    permalink = tppd.post_topN_list(sorted_frame,
                                    poster=poster,
                                    current_datetime=current_datetime,
                                    overview_permalink=overview_permalink)

    tppd.comment_on_own_top_list(sorted_frame,
                                 poster=poster,
                                 topN_permalink=permalink)

    tppd.vote_and_comment_on_topK(sorted_frame,
                                  poster=poster,
                                  topN_permalink=permalink,
                                  overview_permalink=overview_permalink)

    logger.info('Computing the top trending without bidbots')
    logger.info('Searching for bid bots and bought votes')
    min_datetime = sorted_frame.created.min()
    max_datetime = sorted_frame.created.max() + pd.Timedelta(days=1)
    upvote_payments, bots = tpad.get_upvote_payments_to_bots(
        steem=noapisteem, min_datetime=min_datetime, max_datetime=max_datetime)
    logger.info('Adjusting votes and reward')
    sorted_frame = tppp.compute_bidbot_correction(
        post_frame=sorted_frame, upvote_payments=upvote_payments)
    tt0b.create_trending_post(sorted_frame,
                              upvote_payments=upvote_payments,
                              poster=poster,
                              topN_permalink=permalink,
                              overview_permalink=overview_permalink,
                              current_datetime=current_datetime,
                              bots=bots)

    logger.info('Done with normal duty, answering manual calls!')
    tfod.call_a_pig(poster=poster,
                    pipeline=pipeline,
                    topN_permalink=permalink,
                    current_datetime=current_datetime,
                    offset_hours=2,
                    hours=24,
                    overview_permalink=overview_permalink)

    logger.info('Cleaning up after myself')
    tfut.clean_up_directory(model_directoy, keep_last=3)
    tfut.clean_up_directory(data_directory, keep_last=25)
    tfut.clean_up_directory(log_directory, keep_last=14)

    logger.info('Preloading -8 days for later training')
    tpgd.load_or_scrape_training_data(steem,
                                      data_directory,
                                      current_datetime=current_datetime,
                                      days=1,
                                      offset_days=8,
                                      ncores=32)

    logger.info('DONE at {}'.format(current_datetime))