def insert_copy(logger):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "COPY trips FROM '{}' WITH (FORMAT CSV, HEADER true)".format(
        '/opt/data.csv')
    logger.info('Empezando el COPY')
    conn.execute(query)
    logger.info('COPY finalizado')
Esempio n. 2
0
def export_copy(logger, csv_path):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "COPY (SELECT data->>'location' as location, \
data->>'uri' as uri, \
data->>'num_requests' as num_requests, \
data->>'bytes' as bytes, \
data->>'ip' as ip \
FROM events WHERE \
data->>'location'='MAD50' AND \
data->>'num_requests'='1') TO '{}' DELIMITER ',' CSV HEADER".format(csv_path)
    conn.execute(query)
    logger.info('Done writing')
def insert_line_by_line(logger, csv_path):
    with open(csv_path) as trips_csv:
        reader = csv.reader(trips_csv)
        # ignore header
        next(reader)
        conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
        for idx, row in enumerate(reader):
            query = "INSERT INTO trips VALUES(%s, %s, %s, %s, %s, %s, %s)"
            conn.execute(
                query,
                (row[0], row[1], row[2], row[3], row[4], row[5], row[6]))
            if idx % PAGE_SIZE == 0:
                logger.info("{} registros insertados".format(PAGE_SIZE))
Esempio n. 4
0
def export_line_by_line_json(logger, csv_path):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "SELECT data FROM events WHERE data->>'location'='MAD50' and data->>'num_requests'='1'"
    records = conn.execute(query)

    with open(csv_path, 'w') as f:
        dict_writer = csv.DictWriter(
            f, fieldnames=['uri', 'ip', 'location', 'num_requests', 'bytes'])
        dict_writer.writeheader()
        for idx, row in enumerate(records):
            dict_writer.writerow(row[0])
            if idx % 1000 == 0:
                print('1000 inserted: {}'.format(idx))
    logger.info('Done writing')
def batch_insert(logger, csv_path):
    # http://initd.org/psycopg/docs/extras.html#fast-exec
    with open(csv_path) as trips_csv:
        reader = csv.reader(trips_csv)
        # ignore header
        next(reader)
        try:
            conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
            arg_list = []
            for idx, row in enumerate(reader):
                arg_list.append(
                    (row[0], row[1], row[2], row[3], row[4], row[5], row[6]))
                if idx % PAGE_SIZE == 0:
                    conn.execute_values("INSERT INTO trips VALUES %s",
                                        arg_list,
                                        page_size=1000,
                                        disconnect=False)
                    logger.info("{} registros insertados".format(PAGE_SIZE))
                    arg_list = []
            else:
                if arg_list:
                    conn.execute_values("execute bulkplan %s",
                                        arg_list,
                                        page_size=10,
                                        disconnect=False)
        finally:
            logger.info('closing connection ...')
            conn.disconnect()
Esempio n. 6
0
def export_line_by_line(logger, csv_path):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "SELECT data->'location' as location, \
data->>'uri' as uri, \
data->>'num_requests' as num_requests, \
data->>'bytes' as bytes, \
data->>'ip' as ip \
FROM events WHERE \
data->>'location'='MAD50' and data->>'num_requests'='1'"

    records = conn.execute(query)

    with open(csv_path, 'w') as f:
        writer = csv.writer(f,
                            delimiter=';',
                            quotechar='|',
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['location', 'uri', 'num_requests', 'bytes', 'ip'])
        for idx, row in enumerate(records):
            writer.writerow(row)
            if idx % 1000 == 0:
                print('1000 inserted: {}'.format(idx))
    logger.info('Done writing')
def insert_line_by_line_prepared_statements(logger, csv_path):
    # http://initd.org/psycopg/articles/2012/10/01/prepared-statements-psycopg/
    # https://www.postgresql.org/docs/current/static/sql-prepare.html
    with open(csv_path) as trips_csv:
        reader = csv.reader(trips_csv)
        # ignore header
        next(reader)
        try:
            conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
            prepared_statement = "PREPARE bulkplan as INSERT INTO trips VALUES \
            ($1, $2, $3, $4, $5, $6, $7)"

            conn.execute(prepared_statement, disconnect=False)
            for idx, row in enumerate(reader):
                conn.execute(
                    "execute bulkplan (%s, %s, %s, %s, %s, %s, %s)",
                    (row[0], row[1], row[2], row[3], row[4], row[5], row[6]),
                    disconnect=False)
                if idx % PAGE_SIZE == 0:
                    logger.info("{} registros insertados".format(PAGE_SIZE))
        finally:
            logger.info('closing connection ...')
            conn.disconnect()
Esempio n. 8
0
def count(logger):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "select count(*) from trips"
    count = conn.execute(query)
    logger.info("total records: {}".format(count[0][0]))
Esempio n. 9
0
def clean_db(logger):
    logger.info("Cleaning database")
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "delete from trips"
    conn.execute(query)
    logger.info("Finished")
def insert_copy_events(logger, csv_path):
    conn = PostgresDb('workshop', 'postgres', 'db', 'postgres')
    query = "COPY events \
FROM '{}' WITH (FORMAT CSV, HEADER true)".format(csv_path)
    logger.info('Executing COPY...')
    conn.execute(query)