Esempio n. 1
0
def main():
    if len(sys.argv) != 4:
        print(
            'Usage: ./measure.py [nvme | ssd | hdd] [config.json file] [greenplum | postgres]'
        )
        sys.exit()

    disk = sys.argv[1]
    if disk not in ['nvme', 'ssd', 'hdd']:
        print(
            'Usage: ./measure.py [nvme | ssd | hdd] [config.json file] [greenplum | postgres]'
        )
        sys.exit()

    conf_file = sys.argv[2]
    config = read_config(conf_file)
    gen_datafiles(config)

    db = sys.argv[3]
    print db

    # csv_headers = ['bench', 'total_cold', 'uda_cold', 'compute_cold',
    #                'data_cold', 'io_cold', 'total_hot', 'uda_hot',
    #                'compute_hot', 'data_hot', 'io_hot']
    csv_headers = [
        'bench', 'total_cold', 'uda_cold', 'compute_cold', 'data_cold',
        'io_cold'
    ]
    csv_rows = []

    # connection and cursor
    conn, cur = connect_to_database(db, measurement_dir, csv_rows, csv_headers)

    print('*' * 32)
    for i, cfg in enumerate(config):
        print('[Bench {:d}] Running benchmark {:s}: '.format(i, cfg['bench']))
        sql = SQLGenerator(cfg, db)
        tablename = sql.tablename
        try:
            csv_row = {}
            csv_row['bench'] = tablename

            ##############
            # Cold Cache #
            ##############
            print('(1) Cold cache run...')

            # Copy data from datafile to postgres table
            if not table_exists(cur, sql.tablename):
                start = datetime.now()
                print('[{}] creating table ({})...'.format(
                    str(start), sql.tablename))
                print(sql.create_table())
                cur.execute(sql.create_table())
                if cfg['bench'] != 'lrmf':
                    cur.execute(
                        'alter table {} alter column features set storage plain;'
                        .format(sql.tablename))
                elif cfg['bench'] == 'lrmf':
                    # column val's storage should be plain?
                    cur.execute(
                        'alter table {} alter column val set storage plain;'.
                        format(sql.tablename))
                conn.commit()
                stop = datetime.now()
                elapsed = stop - start
                print('[{}] done creating table. Elapsed: {}'.format(
                    str(datetime.now()), elapsed.__str__()))
                with open(cfg['filename'], 'r') as f:
                    print('[{}] copying data from datafile ({}) to table...'.
                          format(str(datetime.now()), cfg['filename']))
                    if cfg['bench'] == 'lrmf':
                        start = datetime.now()
                        cur.copy_expert(
                            "COPY " + tablename +
                            " (row, col, val) FROM STDIN CSV", f)
                        conn.commit()
                        stop = datetime.now()
                        elapsed = stop - start
                    else:
                        start = datetime.now()
                        cur.copy_expert(
                            "COPY " + tablename +
                            " (y, features) FROM STDIN CSV", f)
                        conn.commit()
                        stop = datetime.now()
                        elapsed = stop - start
                    print('[{}] done copying data. Elapsed: {}'.format(
                        str(datetime.now()), elapsed.__str__()))

            #continue # just to copy data

            # # The following actions are done only for cold cache.
            # # Must close connection before restarting.
            # disconnect_from_database(conn, cur)
            # print '[{}] After disconnect db, sleep for 5 sec...'.format(str(datetime.now()))
            # time.sleep(5)
            # print '[{}] done sleeping!'.format(str(datetime.now()))
            # restart_database(db)
            # print '[{}] After restart db, sleep for 5 sec...'.format(str(datetime.now()))
            # time.sleep(5)
            # print '[{}] done sleeping!'.format(str(datetime.now()))
            # conn, cur = connect_to_database(db, measurement_dir, csv_rows, csv_headers)

            # # OS buffer cache before cache flush
            # subprocess.call(['free', '-h'])
            # #flush_os_cache()
            # # buffer cache after flush
            # subprocess.call(['free', '-h'])

            # measure disk IO time
            io = float(measure_io(conn, cur, tablename, db))
            csv_row['io_cold'] = io

            # run madlib
            start = datetime.now()
            print('[{}] Running madlib function {}()...'.format(
                str(start), madlib_func_lookup[sql.bench]))
            print sql.madlib_func()
            cur.execute(sql.madlib_func())
            conn.commit()
            stop = datetime.now()
            elapsed = stop - start
            print('[{}] Done running madlib function. Elapsed: {}'.format(
                str(stop), elapsed.__str__()))
            if db == 'greenplum':
                exectime_str = cur.fetchall()[11][0]
            elif db == 'postgres':
                exectime_str = cur.fetchall()[2][0]
            exectime = float(exectime_str.split()[2])

            # Get the uda, compute, and data time from duration.txt
            if db == 'greenplum':
                # if cfg['bench'] == 'lrmf':  # lrmf only uses master node for some reason
                #     transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/qddir/demoDataDir-1/trans.txt'
                # else:
                #     transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0/trans.txt'
                #transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/qddir/demoDataDir-1/trans.txt'
                transfile = search_trans_file()
                if transfile is None:  # safeguard
                    transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/qddir/demoDataDir-1/trans.txt'
                with open(transfile, 'r') as f:
                    lastline = f.read().splitlines()[-1].split(',')
                    uda_cumulative = float(lastline[0]) / 1000.0
                    compute_cumulative = float(lastline[1]) / 1000.0
                duration_file = search_duration_file()
                with open(duration_file, 'r') as f:
                    durations = f.read().splitlines()[-1].split(',')
                    udatime = float(durations[0]) / 1000.0  # us to ms
                    computetime = float(durations[1]) / 1000.0  # us to ms
            elif db == 'postgres':
                #cmd = 'tail -1 /usr/local/pgsql/data/duration.txt'
                cmd = 'tail -1 /home/postgres/duration.txt'
                p = Popen(['sudo', 'su', '-c', cmd, 'postgres'], stdout=PIPE)
                lastline = p.stdout.read()
                durations = lastline.split(',')
                udatime = float(durations[0]) / 1000.0  # us to ms
                computetime = float(durations[1]) / 1000.0  # us to ms
                # delete trans.txt for setting 2 runs
                print '[INFO] Deleting trans.txt file...'
                Popen(['sudo', 'rm', '/usr/local/pgsql/data/trans.txt'])

            # greenplum is weird
            if db == 'greenplum':
                udatime += uda_cumulative
                computetime += compute_cumulative
                print('deleting duration.txt file...' + duration_file)
                os.remove(duration_file)

            # lrmf only runs for 1 epoch, so we need to multiply by
            # however many epochs it's supposed to run for
            # if cfg['bench'] == 'lrmf':
            #     print '[DEBUG] lrmf epoch multiply: ' + str(sql.max_iter)
            #     exectime *= sql.max_iter
            #     udatime *= sql.max_iter
            #     computetime *= sql.max_iter

            data_cold = exectime - (udatime + computetime) - io
            csv_row['total_cold'] = exectime
            csv_row['uda_cold'] = '{:.2f}'.format(udatime)
            csv_row['compute_cold'] = '{:.2f}'.format(computetime)
            csv_row['data_cold'] = '{:.2f}'.format(data_cold)
            print ''

            #############
            # Hot Cache #
            #############
            # print('(2) Hot cache run...')
            # # madlib complains if madlib output tables already exist
            # drop_madlib_tables(conn, cur, sql)

            # # measure disk IO time
            # io = float(measure_io(conn, cur, tablename, db))
            # csv_row['io_hot'] = io

            # # run madlib
            # start = datetime.now()
            # print('[{}] Running madlib function {}()...'.format(str(start), madlib_func_lookup[sql.bench]))
            # cur.execute(sql.madlib_func())
            # conn.commit()
            # stop = datetime.now()
            # elapsed = stop - start
            # print('[{}] Done running madlib function. Elapsed: {}'.format(str(stop), elapsed.__str__()))

            # if db == 'greenplum':
            #     exectime_str = cur.fetchall()[11][0]
            # elif db == 'postgres':
            #     exectime_str = cur.fetchall()[2][0]
            # exectime = float(exectime_str.split()[2])

            # # Get the uda, compute, and data time from duration.txt
            # if db == 'greenplum':
            #     if cfg['bench'] == 'lrmf':  # lrmf only uses master node for some reason
            #         transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/qddir/demoDataDir-1/trans.txt'
            #     else:
            #         transfile = '/home/joon/gpdb-5.1.0/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0/trans.txt'
            #     with open(transfile, 'r') as f:
            #         lastline = f.read().splitlines()[-1].split(',')
            #         uda_cumulative = float(lastline[0]) / 1000.0
            #         compute_cumulative = float(lastline[1]) / 1000.0
            #     duration_file = search_duration_file()
            #     with open(duration_file, 'r') as f:
            #         durations = f.read().splitlines()[-1].split(',')
            #         udatime = float(durations[0]) / 1000.0  # us to ms
            #         computetime = float(durations[1]) / 1000.0  # us to ms
            # elif db == 'postgres':
            #     #cmd = 'tail -1 /usr/local/pgsql/data/duration.txt'
            #     cmd = 'tail -1 /home/postgres/duration.txt'
            #     p = Popen(['sudo', 'su', '-c', cmd, 'postgres'], stdout=PIPE)
            #     lastline = p.stdout.read()
            #     durations = lastline.split(',')
            #     udatime = float(durations[0]) / 1000.0  # us to ms
            #     computetime = float(durations[1]) / 1000.0  # us to ms
            #     # delete trans.txt for setting 2 runs
            #     print '[INFO] Deleting trans.txt file...'
            #     Popen(['sudo', 'rm', '/usr/local/pgsql/data/trans.txt'])

            # # greenplum is weird
            # if db == 'greenplum':
            #     udatime += uda_cumulative
            #     computetime += compute_cumulative
            #     print('deleting duration.txt file...' + duration_file)
            #     print('')
            #     os.remove(duration_file)

            # # lrmf only runs for 1 epoch, so we need to multiply by
            # # however many epochs it's supposed to run for
            # # if cfg['bench'] == 'lrmf':
            # #     exectime *= sql.max_iter
            # #     udatime *= sql.max_iter
            # #     computetime *= sql.max_iter

            # data_hot = exectime - (udatime + computetime) - io
            # csv_row['total_hot'] = exectime
            # csv_row['uda_hot'] = '{:.2f}'.format(udatime)
            # csv_row['compute_hot'] = '{:.2f}'.format(computetime)
            # csv_row['data_hot'] = '{:.2f}'.format(data_hot)

            csv_rows.append(csv_row)
            drop_madlib_tables(conn, cur, sql)
            conn.commit()
            print('*' * 32)
        except psycopg2.Error as e:
            print("[EXCEPTION] unable to execute query")
            print(e.pgerror)
            filename = gen_filename(conn, cur)
            write_to_file(filename, measurement_dir, csv_rows, csv_headers)
            try:
                sys.exit(0)
            except SystemExit:
                os._exit(0)
        except KeyboardInterrupt:
            print('Keyboard interrupt')
            write_to_file(disk, measurement_dir, csv_rows, csv_headers)
            try:
                sys.exit(0)
            except SystemExit:
                os._exit(0)
        # finally:
        #     csv_rows.append(csv_row)
        #     # drop madlib and data tables
        #     drop_madlib_tables(conn, cur, sql)
        #     # cur.execute(sql.drop_table(sql.tablename))
        #     conn.commit()
        #     print('*' * 32)
        #     # csv_rows.append(csv_row)

    filename = gen_filename(conn, cur)
    write_to_file(filename, measurement_dir, csv_rows, csv_headers)
Esempio n. 2
0
                                password='******')
    except psycopg2.Error as e:
        print("[EXCEPTION] unable to conenct to database")
        print(e.pgerror)
        exit()
    cur = conn.cursor()

    config = measure.read_config(
        '/home/joon/tabla.db/measurements/config_set2.json')

    csv_headers = ['bench', 'tablesize', 'pagecount']
    csv_rows = []

    for cfg in config:
        sql = SQLGenerator(cfg)
        cur.execute(sql.create_table())

        csv_row = {}
        with open(cfg['filename'], 'r') as f:
            if cfg['bench'] == 'lrmf':
                cur.copy_expert(
                    "COPY " + sql.tablename +
                    " (row, col, val) FROM STDIN CSV", f)
            else:
                cur.execute(
                    'alter table {} alter column features set storage plain;'.
                    format(sql.tablename))
                cur.copy_expert(
                    "COPY " + sql.tablename + " (y, features) FROM STDIN CSV",
                    f)
            conn.commit()