def main():
    length = 500
    n_pairs = 50

    cfg = load_config('..\\cfg.json')

    dg = DataGatherer(cfg)
    dg.video_catalog = get_catalog(length)
    dg.pairs = get_pairs(dg.video_catalog, n_pairs)
    sorted_catalog = dg.sort_by_pairs()
    print sorted_catalog
Esempio n. 2
0
        db = create_engine('postgres://%s%s/%s' % (dbuser, dbhost, dbname))
        con = None
        con = psycopg2.connect(
            database=dbname,
            host=dbhost,
            user=dbuser,
            password=dbpass)

        return con


if __name__ == '__main__':

    con = get_db_conn()

    data_gather = DataGatherer()
    data_updater = EventUpdater()

    data_gather.set_target_file()
    data_gather.download_zip()
    data_gather.unzip_download()
    df = data_gather.get_csv_dataframe()

    df = data_updater.initial_df_clean(df)
    df = data_updater.get_states(df)
    df = data_updater.normalize_goldstein(df)
    df = data_updater.clean_df(df)
    df = data_updater.aggregate_data(df)
    df = data_updater.batch_update(df, con)

    data_gather.delete_recent_files()
Esempio n. 3
0
def count_all_video(path):
    pairs = dg.read_raw_pairs(path)
    dg.fill_video_catalog(pairs)
    print 'total videos: {}'.format(len(dg.get_video_catalog()))


def update_stats(dg, limit=None):
    dg.update_video_catalog(limit=limit)


def print_words(dg):
    words = dg.get_all_words()
    for k in sorted(words.keys()):
        print k,
        print ': ',
        print words[k]


if __name__ == '__main__':
    cfg = load_config('..\\cfg.json')
    print 'starting {} v.{}\n'.format(PROJECT_NAME, VERSION)
    dg = DataGatherer(cfg)
    print dg.calculate_title_rank('Cyanide And Happiness- Can Of Paint', lambda x: x.rank1)
    print dg.calculate_title_rank('Geoff Ramsey: Expert Parent - Drunk Tank Animated Adventures', lambda x: x.rank1)
    print dg.calculate_title_rank('Raju Shrivastav - Bevda Train Main', lambda x: x.rank1)
    print dg.calculate_title_rank('Hitler Rants Parodies Outro', lambda x: x.rank1)
    print dg.calculate_title_rank('Man Goes Crazy with ROAD RAGE & A GUN', lambda x: x.rank1)
    #cl = Correlator()
    #print cl.calculate_c(dg.db_handler.get_all_videos(), lambda x: x.rank1, lambda x: x.rank1)
    #dg.update_views()