def main(*args):
    if len(sys.argv) > 2:
        print('Error: Invalid number of arguments')
        return 1

    write_bin = True
    dest_path = os.path.join('precooked_data', 'networks')
    if len(sys.argv) == 2:
        if sys.argv[1] == '-gml':
            write_bin = False
            dest_path = os.path.join(dest_path, 'gml')
        elif sys.argv[1] == '-bin':
            write_bin = True
            dest_path = os.path.join(dest_path, 'bin')
        else:
            print(f'Error: argument {sys.argv[1]} is not allowed')
            return 1

    if not os.path.isdir(data_dir):
        print('Error: the path "{}" was not found'.format(source_path))
        return 1

    if not os.path.isdir(dest_path):
        print(f'Creating directory for first time: {dest_path}')
        os.makedirs(dest_path)

    wikis = json.load(open(os.path.join(data_dir, 'wikis.json')))

    for wiki in wikis:
        df = lib.get_dataframe_from_csv(os.path.join(data_dir, wiki['data']))
        lib.prepare_data(df)
        df = lib.clean_up_bot_activity(df, wiki)
 
        net = CoEditingNetwork()
        print(f"Generating network data for {wiki['name']}")
        time_start_generating_network = time.perf_counter()

        net.generate_from_pandas(data = df)
        time_end_generating_network = time.perf_counter() - time_start_generating_network
        print(' * [Timing] Generating network for {} : {} seconds'
                .format(wiki['name'], time_end_generating_network))
        o_f = wiki['data'][:-3]
        if write_bin:
            o_f = '{}{}.bin'.format(o_f, net.code)
            net.write_pickle(fname = os.path.join(dest_path, o_f))
        else:
            o_f = '{}{}.gml'.format(o_f, net.code)
            net.copy_and_write_gml(os.path.join(dest_path, o_f))


    return 0
Exemple #2
0
def load_data(wiki):
    df = get_dataframe_from_csv(wiki['data'])
    lib.prepare_data(df)
    df = clean_up_bot_activity(df, wiki)
    return df