Ejemplo n.º 1
0
 def test_type(self):
     bincopy = CopyManager(self.conn, self.table, self.cols)
     bincopy.copy(self.data)
     select_list = ','.join(self.cols)
     self.cur.execute("SELECT %s from %s" % (select_list, self.table))
     for rec in self.data:
         self.checkValues(rec, self.cur.fetchone())
Ejemplo n.º 2
0
 def test_default_public(self):
     # Use public schema by default
     bincopy = CopyManager(self.conn, self.table, self.cols)
     bincopy.copy(self.data)
     select_list = ','.join(self.cols)
     self.cur.execute("SELECT %s from %s" % (select_list, self.table))
     self.checkResults()
Ejemplo n.º 3
0
 def test_type(self):
     bincopy = CopyManager(self.conn, self.table, self.cols)
     bincopy.copy(self.data)
     select_list = ",".join(self.cols)
     self.cur.execute("SELECT %s from %s" % (select_list, self.table))
     for rec in self.data:
         self.checkValues(rec, self.cur.fetchone())
Ejemplo n.º 4
0
def dump_node_metrics(timestamp: object, node_metrics: dict, conn: object):
    """dump_node_metrics Dump Node Metrics

    Dump node metrics to TimeScaleDB

    Args:
        timestamp (object): attached timestamp
        node_metrics (dict): node metrics
        conn (object): TimeScaleDB connection object
    """
    schema = 'slurm'
    try:
        metric_names = list(list(node_metrics.values())[0].keys())

        for metric_name in metric_names:
            all_records = []
            target_table = f'{schema}.{metric_name}'
            cols = ('timestamp', 'nodeid', 'value')
            for node, node_data in node_metrics.items():
                all_records.append(
                    (timestamp, int(node), node_data[metric_name]))
            mgr = CopyManager(conn, target_table, cols)
            mgr.copy(all_records)
            conn.commit()
    except Exception as err:
        curs = conn.cursor()
        curs.execute("ROLLBACK")
        conn.commit()
        log.error(f"Fail to dump node metrics : {err}")
Ejemplo n.º 5
0
def dump_node_jobs(timestamp: object, node_jobs: dict, conn: object):
    """dump_node_jobs Dump Node-Jobs

    Dump node-jobs correlation to TimeScaleDB

    Args:
        timestamp (object): Attached timestamp
        node_jobs (dict): Node-jobs correlation
        conn (object): TimeScaleDB connection object
    """
    try:
        all_records = []
        target_table = 'slurm.node_jobs'
        cols = ('timestamp', 'nodeid', 'jobs', 'cpus')
        for node, job_info in node_jobs.items():
            all_records.append(
                (timestamp, int(node), job_info['jobs'], job_info['cpus']))
        mgr = CopyManager(conn, target_table, cols)
        mgr.copy(all_records)
        conn.commit()
    except Exception as err:
        curs = conn.cursor()
        curs.execute("ROLLBACK")
        conn.commit()
        log.error(f"Fail to dump node-jobs correlation: {err}")
Ejemplo n.º 6
0
Archivo: sql.py Proyecto: dellhpc/omnia
def write_metric_definitions(conn: object, metric_definitions: list):
    """write_metric_definitions Write Metric Definitions

    Write metric definitions to the table

    Args:
        conn (object): database connection
        metric_definitions (list): the metric definitions
    """
    if not check_table_exist(conn, 'metrics_definition'):
        cols = ('metric_id', 'metric_name', 'description', 'metric_type',
                'metric_data_type', 'units', 'accuracy', 'sensing_interval',
                'discrete_values', 'data_type')

        metric_definitions_table = [
            (i['Id'], i['Name'], i['Description'], i['MetricType'],
             i['MetricDataType'], i['Units'], i['Accuracy'],
             i['SensingInterval'], i['DiscreteValues'],
             utils.data_type_mapping[i['MetricDataType']])
            for i in metric_definitions
        ]

        # Sort
        metric_definitions_table = utils.sort_tuple_list(
            metric_definitions_table)

        mgr = CopyManager(conn, 'metrics_definition', cols)
        mgr.copy(metric_definitions_table)

    conn.commit()
Ejemplo n.º 7
0
def dump_job_metrics(job_metrics: dict, conn: object):
    """dump_job_metrics Dump Job Metircs

    Dump job metrics to TimeScaleDB

    Args:
        job_metrics (dict): Job Metrics
        conn (object): TimeScaleDB connection object
    """
    try:
        target_table = 'slurm.jobs'
        cols = ('job_id', 'array_job_id', 'array_task_id', 'name', 'job_state',
                'user_id', 'user_name', 'group_id', 'cluster', 'partition',
                'command', 'current_working_directory', 'batch_flag',
                'batch_host', 'nodes', 'node_count', 'cpus', 'tasks',
                'tasks_per_node', 'cpus_per_task', 'memory_per_node',
                'memory_per_cpu', 'priority', 'time_limit', 'deadline',
                'submit_time', 'preempt_time', 'suspend_time', 'eligible_time',
                'start_time', 'end_time', 'resize_time', 'restart_cnt',
                'exit_code', 'derived_exit_code')

        cur = conn.cursor()
        all_records = []

        for job in job_metrics:
            job_id = job[cols.index('job_id')]
            check_sql = f"SELECT EXISTS(SELECT 1 FROM slurm.jobs WHERE job_id={job_id})"
            cur.execute(check_sql)
            (job_exists, ) = cur.fetchall()[0]

            if job_exists:
                # Update
                nodes = job[cols.index('nodes')]
                job_state = job[cols.index('job_state')]
                user_name = job[cols.index('user_name')]
                start_time = job[cols.index('start_time')]
                end_time = job[cols.index('end_time')]
                resize_time = job[cols.index('resize_time')]
                restart_cnt = job[cols.index('restart_cnt')]
                exit_code = job[cols.index('exit_code')]
                derived_exit_code = job[cols.index('derived_exit_code')]
                update_sql = """ UPDATE slurm.jobs 
                                 SET nodes = %s, job_state = %s, user_name = %s, start_time = %s, end_time = %s, resize_time = %s, restart_cnt = %s, exit_code = %s, derived_exit_code = %s
                                 WHERE job_id = %s """
                cur.execute(update_sql,
                            (nodes, job_state, user_name, start_time, end_time,
                             resize_time, restart_cnt, exit_code,
                             derived_exit_code, job_id))
            else:
                all_records.append(job)

        mgr = CopyManager(conn, target_table, cols)
        mgr.copy(all_records)
        conn.commit()
    except Exception as err:
        curs = conn.cursor()
        curs.execute("ROLLBACK")
        conn.commit()
        log.error(f"Fail to dump job metrics: {err}")
Ejemplo n.º 8
0
 def test_fallback_schema_honors_search_path(self, conn, cursor, data,
                                             schema):
     cursor.execute('SET search_path TO {}'.format(schema))
     bincopy = CopyManager(conn, self.table, self.cols)
     bincopy.copy(data)
     select_list = ','.join(self.cols)
     cursor.execute("SELECT %s from %s" % (select_list, self.table))
     self.checkResults(cursor, data)
Ejemplo n.º 9
0
def copy_data(records, connection, table, fields):
    try:
        mgr = CopyManager(connection, table, fields)
        mgr.copy(records)
        connection.commit()
    except psycopg2.Error as e:
        typer.echo(str(e), err=True)
        return False
    return True
Ejemplo n.º 10
0
    def copy(self, tbl_name, data, cols=None):
        "Use pg_copy to copy over a large amount of data."
        logger.info("Received request to copy %d entries into %s." %
                    (len(data), tbl_name))
        if len(data) is 0:
            return  # Nothing to do....

        # If cols is not specified, use all the cols in the table, else check
        # to make sure the names are valid.
        if cols is None:
            cols = self.get_column_names(tbl_name)
        else:
            db_cols = self.get_column_names(tbl_name)
            assert all([col in db_cols for col in cols]),\
                "Do not recognize one of the columns in %s for table %s." % \
                (cols, tbl_name)

        # Do the copy. Use pgcopy if available.
        if self.sqltype == sqltypes.POSTGRESQL and CAN_COPY:
            # Check for automatic timestamps which won't be applied by the
            # database when using copy, and manually insert them.
            auto_timestamp_type = type(func.now())
            for col in self.get_column_objects(tbl_name):
                if col.default is not None:
                    if isinstance(col.default.arg, auto_timestamp_type) \
                       and col.name not in cols:
                        logger.info("Applying timestamps to %s." % col.name)
                        now = datetime.utcnow()
                        cols += (col.name, )
                        data = [datum + (now, ) for datum in data]

            # Now actually do the copy
            conn = self.engine.raw_connection()
            mngr = CopyManager(conn, tbl_name, cols)
            data_bts = []
            for entry in data:
                new_entry = []
                for element in entry:
                    if isinstance(element, str):
                        new_entry.append(element.encode('utf8'))
                    elif (isinstance(element, bytes) or element is None
                          or isinstance(element, Number)
                          or isinstance(element, datetime)):
                        new_entry.append(element)
                    else:
                        raise IndraDatabaseError(
                            "Don't know what to do with element of type %s."
                            "Should be str, bytes, datetime, None, or a "
                            "number." % type(element))
                data_bts.append(tuple(new_entry))
            mngr.copy(data_bts, BytesIO)
            conn.commit()
        else:
            # TODO: use bulk insert mappings?
            logger.warning("You are not using postgresql or do not have "
                           "pgcopy, so this will likely be very slow.")
            self.insert_many(tbl_name, [dict(zip(cols, ro)) for ro in data])
Ejemplo n.º 11
0
def import_data(connection, table_name, column_names, data):
    """
    Import the temporary and anonymized data to a temporary table and write the changes back.
    :param connection: A database connection instance.
    :param str table_name: Name of the table to be populated with data.
    :param list column_names: A list of table fields
    :param list data: The table data.
    """
    mgr = CopyManager(connection, table_name, column_names)
    mgr.copy([[escape_str_replace(val) for col, val in row.items()]
              for row in data])
Ejemplo n.º 12
0
def fast_insert(table_name, data, schema, verbose=False):
    conn = psycopg2.connect(host=hostname,
                            port=port,
                            user=username,
                            password=password,
                            database=database_name)
    mgr = CopyManager(conn, table_name, list(zip(*schema))[0])
    try:
        mgr.copy(data)
    except Exception as e:
        print(e)
Ejemplo n.º 13
0
def main():
    symbols = get_symbols()
    for symbol in symbols:
        print("Fetching data for: ", symbol)
        for month in range(
                1,
                3):  # last 2 months, you can go up to 24 month if you want to
            stock_data = fetch_stock_data(symbol, month)
            print('Inserting data...')
            mgr = CopyManager(conn, 'stocks_intraday', columns)
            mgr.copy(stock_data)
            conn.commit()
Ejemplo n.º 14
0
def dump_idrac(
    ip: str,
    idrac_metrics: dict,
    metric_dtype_mapping: dict,
    ip_id_mapping: dict,
    conn: object,
):
    """dump_idrac Dump iDRAC Metrics

    Dump node metrics to TimeScaleDB

    Args:
        ip (str): ip address of iDRAC
        idrac_metrics (dict): iDRAC Metrics
        metric_dtype_mapping (dict): Metric-Datatype mapping
        ip_id_mapping (dict): ip-id mapping
        conn (object): TimeScaleDB connection object
    """
    try:
        schema_name = 'idrac'
        nodeid = ip_id_mapping[ip]

        for table_name, table_metrics in idrac_metrics.items():
            all_records = []
            dtype = metric_dtype_mapping[table_name]

            table_name = table_name.lower()
            target_table = f"{schema_name}.{table_name}"

            cols = ('timestamp', 'nodeid', 'source', 'fqdd', 'value')
            for metric in table_metrics:
                # We have to offset timestamp by -6/-5 hours. For some unknow
                # reasons, the timestamp reported in iDRAC is not configured
                # correctly.
                timestamp = parse_time(metric['Timestamp'])
                timestamp = timestamp.astimezone(tz.tzlocal())
                timestamp = timestamp.replace(tzinfo=tz.tzutc())
                timestamp = timestamp.astimezone(tz.tzlocal())

                source = metric['Source']
                fqdd = metric['FQDD']
                if metric['Value']:
                    value = utils.cast_value_type(metric['Value'], dtype)
                    all_records.append(
                        (timestamp, nodeid, source, fqdd, value))

            mgr = CopyManager(conn, target_table, cols)
            mgr.copy(all_records)
        conn.commit()
    except Exception as err:
        log.error(f"Fail to dump idrac metrics ({ip}): {err}")
Ejemplo n.º 15
0
 def load_tuples_to_postgres(pg_schema, db_table, columns):
     # Use db_wrapper or different PG SQL instantiation for this
     '''Load the CSV data to a table in PostgreSQL database.
     Requires import psycopg2 and from pgcopy import CopyManager, Replace.
     Uses a psycopg2 database connection and pgcopy for fast bulk inserts.
     INPUTS:
     pg_obj: The name of the instantiated psycopg2 object.
     pg_schema (str): The name of the PostgreSQL schema.
     db_table (str): The table in which to insert values.
     columns (tuple of strings): A tuple containing the PostgreSQL table names to insert.
     RETURNS: None
     '''
     mgr = CopyManager(pgdb_obj, pg_schema + '.' + db_table, columns)
     mgr.copy(data)
     pfdb_obj.commit()
Ejemplo n.º 16
0
 def test_dropped_col(self, conn, cursor, schema):
     sql = 'ALTER TABLE {} DROP COLUMN {}'
     col = self.cols[1]
     cursor.execute(sql.format(self.table, col))
     msg = '"{}" is not a column of table "{}"."{}"'
     with pytest.raises(ValueError, match=msg.format(col, schema, self.table)):
         CopyManager(conn, self.table, self.cols)
Ejemplo n.º 17
0
def store_backup(conn, table_name, columns, records, query):
    """
    Stores records in backup database
    :param conn: Backup psycopg2 connection
    :param table_name: Table name
    :param columns: List of column names
    :param records: Ordered table records
    :param query: Query string for creating table
    """

    cur = conn.cursor()
    cur.execute(query.format(Identifier(table_name)))

    with Replace(conn, table_name) as temp:
        mgr = CopyManager(conn, temp, columns)
        mgr.copy(records)

    cur.close()
Ejemplo n.º 18
0
Archivo: sql.py Proyecto: dellhpc/omnia
def insert_nodes_metadata(conn: object, nodes_metadata: list, table_name: str):
    """insert_nodes_metadata Insert Nodes Metadata

    Insert nodes metadata to metadata table

    Args:
        conn (object): database connection
        nodes_metadata (list): nodes metadata list
        table_name (str): table name
    """
    cols = tuple([col.lower() for col in list(nodes_metadata[0].keys())])
    records = []
    for record in nodes_metadata:
        values = [str(value) for value in record.values()]
        records.append(tuple(values))

    mgr = CopyManager(conn, table_name, cols)
    mgr.copy(records)
    conn.commit()
Ejemplo n.º 19
0
def insert_random_data(connection):
    cur = connection.cursor()
    for sensor_id in range(1, 4, 1):
        data = (sensor_id, )
        # create random data
        simulate_query = """SELECT  generate_series(now() - interval '24 hour', now(), interval '5 minute') AS time,
           %s as sensor_id,
           random()*100 AS temperature,
           random() AS cpu
           """
        cur.execute(simulate_query, data)
        values = cur.fetchall()
        # define columns names of the table you're inserting into
        cols = ('time', 'sensor_id', 'temperature', 'cpu')
        # create copy manager with the target table and insert!
        mgr = CopyManager(connection, 'sensor_data', cols)
        mgr.copy(values)
    # commit after all sensor data is inserted
    connection.commit()
def load_tuples_to_postgres(db_table, columns, data, dbhost, dbname, dbschema,
                            dbuser, dbpw):
    '''Load the CSV data to a table in PostgreSQL database.
    Requires import psycopg2 and from pgcopy import CopyManager, Replace.
    Uses a psycopg2 database connection and pgcopy for fast bulk inserts.
    INPUTS:
    pg_obj: The name of the instantiated psycopg2 object.
    pg_schema (str): The name of the PostgreSQL schema.
    db_table (str): The table in which to insert values.
    db<credential>: The Postgres login, database and schema credentials.
    columns (tuple of strings): A tuple containing the PostgreSQL table names to insert.
    RETURNS: None
    '''
    connection = psycopg2.connect(host=dbhost,
                                  dbname=dbname,
                                  user=dbuser,
                                  password=dbpw)
    mgr = CopyManager(connection, dbschema + '.' + db_table, columns)
    mgr.copy(data)
    connection.commit()
Ejemplo n.º 21
0
def order_table(conn, cur, table_name, columns):
    """
    Orders table by timestamp in ascending order
    :param conn: AWS psycopg2 connection
    :param cur: AWS psycopg2 cursor
    :param table_name: Table name
    :param columns: List of column names
    :return: List of tuples with ordered data
    """
    print('sorting')
    cur.execute(
        SQL('SELECT * FROM {} ORDER BY {}.timestamp ASC;').format(
            Identifier(table_name), Identifier(table_name)))
    data = cur.fetchall()
    print('done sorting')
    with Replace(conn, table_name) as temp:
        mgr = CopyManager(conn, temp, columns)
        mgr.copy(data)

    return data
Ejemplo n.º 22
0
    def __init__(self, starttime, af, saverQueue, host="localhost", dbname="ihr"):
       

        self.saverQueue = saverQueue
        self.expid = None
        self.prevts = 0 
        self.asNames = defaultdict(str, json.load(open("data/asNames.json")))
        self.currenttime = starttime
        self.af = af
        self.dataHege = [] 
        self.cpmgr = None

        local_port = 5432
        if host != "localhost" and host!="127.0.0.1":
            from sshtunnel import SSHTunnelForwarder
            self.server = SSHTunnelForwarder(
                host,
                ssh_username="******",
                ssh_private_key="/home/romain/.ssh/id_rsa",
                remote_bind_address=('127.0.0.1', 5432),
                set_keepalive=60) 

            self.server.start()
            logging.debug("SSH tunnel opened")
            local_port = str(self.server.local_bind_port)

            conn_string = "host='127.0.0.1' port='%s' dbname='%s'" % (local_port, dbname)
        else:
            conn_string = "dbname='%s'" % dbname

        self.conn = psycopg2.connect(conn_string)
        columns=("timebin", "originasn_id", "asn_id", "hege", "af")
        self.cpmgr = CopyManager(self.conn, 'ihr_hegemony', columns)
        self.cursor = self.conn.cursor()
        logging.debug("Connected to the PostgreSQL server")

        self.cursor.execute("SELECT number FROM ihr_asn WHERE ashash=TRUE")
        self.asns = set([x[0] for x in self.cursor.fetchall()])
        logging.debug("%s ASNS already registered in the database" % len(self.asns))

        self.run()
Ejemplo n.º 23
0
def insertDataFast(conn):
    """快速插入数据

    :conn: 数据库连接对象
    :returns: TODO

    """
    # 生成随机数据的SQL语句
    query_insert_rows_fast = ("SELECT generate_series("
                              "now() - interval '24 hour', "
                              "now(), "
                              "interval '5 minute') "
                              "AS time, "
                              "%s as sensor_id, "
                              "random()*100 AS temperature, "
                              "random() AS cpu")
    # 查询数据的SQL语句
    query_select_data = "SELECT * FROM sensor_data LIMIT %s;"

    # 创建数据库游标
    cur = conn.cursor()
    # 快速插入数据
    for id_ in range(1, 5):
        data = (id_, )
        cur.execute(query_insert_rows_fast, data)

        # 获取要插入的数据
        values = cur.fetchall()
        # 定义要插入的表(sensor_data)的列名
        cols = ('time', 'sensor_id', 'temperature', 'cpu')
        # 使用目标表创建CopyManager对象并插入数据
        mgr = CopyManager(conn, 'sensor_data', cols)
        mgr.copy(values)
    # 快速插入完成后提交更改
    conn.commit()
    # 检查数据是否成功插入
    num = (5, )
    cur.execute(query_select_data, num)
    print(cur.fetchall())
    # 关闭游标
    cur.close()
Ejemplo n.º 24
0
    def copy(self, tbl_name, data, cols=None):
        "Use pg_copy to copy over a large amount of data."
        if len(data) is 0:
            return  # Nothing to do....

        if cols is None:
            cols = self.get_columns(tbl_name)
        else:
            assert all([col in self.get_columns(tbl_name) for col in cols]),\
                "Do not recognize one of the columns in %s for table %s." % \
                (cols, tbl_name)
        if self.sqltype == sqltypes.POSTGRESQL and CAN_COPY:
            conn = self.engine.raw_connection()
            mngr = CopyManager(conn, tbl_name, cols)
            data_bts = []
            for entry in data:
                new_entry = []
                for element in entry:
                    if isinstance(element, str):
                        new_entry.append(element.encode('utf8'))
                    elif (isinstance(element, bytes) or element is None
                          or isinstance(element, Number)):
                        new_entry.append(element)
                    else:
                        raise IndraDatabaseError(
                            "Don't know what to do with element of type %s."
                            "Should be str, bytes, None, or a number." %
                            type(element))
                data_bts.append(tuple(new_entry))
            mngr.copy(data_bts, BytesIO)
            conn.commit()
        else:
            # TODO: use bulk insert mappings?
            logger.warning("You are not using postgresql or do not have "
                           "pgcopy, so this will likely be very slow.")
            self.insert_many(tbl_name, [dict(zip(cols, ro)) for ro in data])
Ejemplo n.º 25
0
 def test_nosuchcolumn(self, conn, schema):
     col = self.cols[0] + '_does_not_exist'
     msg = '"{}" is not a column of table "{}"."{}"'
     with pytest.raises(ValueError, match=msg.format(col, schema, self.table)):
         CopyManager(conn, self.table, [col])
Ejemplo n.º 26
0
def store_data(conn, cur, backup_conn, columns, table_name, query, data):
    """
    Stores records in AWS and backup database
    :param conn: AWS psycopg2 connection
    :param cur: AWS psycopg2 cursor
    :param backup_conn: Backup psycopg2 connection
    :param columns: List of column names
    :param table_name: Table name
    :param query: Query string for creating table
    :param data: Raw API data
    """

    BENCH.mark('Create table if not exists')

    cur.execute(query.format(Identifier(table_name)))

    cur.execute(
        SQL('CREATE INDEX IF NOT EXISTS {} ON {}(timestamp ASC);').format(
            Identifier(table_name + '_timestamp'), Identifier(table_name)))

    BENCH.mark('Create table if not exists')

    BENCH.mark('Initial table copy')

    mgr = CopyManager(conn, table_name, columns)

    # api_cols = data[0]
    # verify_columns(api_cols, columns)

    # records = data[1:]
    records = data
    mgr.copy(records)

    BENCH.mark('Initial table copy')

    BENCH.mark('Delete duplicates')

    delete_duplicates(cur, table_name)

    BENCH.mark('Delete duplicates')

    # BENCH.mark('Order table')

    # ordered_records = order_table(conn, cur, table_name, columns)

    # BENCH.mark('Order table')

    BENCH.mark('Store backup table')

    if BACKUP:
        store_backup(backup_conn, table_name, columns, ordered_records, query)

    BENCH.mark('Store backup table')

    BENCH.mark('Commit connections')

    conn.commit()

    if BACKUP:
        backup_conn.commit()

    BENCH.mark('Commit connections')

    print('{message: <20}: Cached!\n'.format(message=table_name))
Ejemplo n.º 27
0
        begindata = 0
        endat = 1550
        tm_list = tm_code[begindata:endat]
        cols = ('time', *tm_list)
        table_name = 'tm_value'

        with connection.cursor() as cursor:
            create_tm_table(cursor, tm_list, table_name)

        data = transformation(path, begindata, endat)

        elapsed = time.perf_counter() - start
        print(f' {elapsed:0.4}', 'transformation end,', 'copy to db start')

        with connection.cursor() as cursor:
            mgr = CopyManager(connection, table_name, cols)
            mgr.copy(data)

        elapsed = time.perf_counter() - start
        print(f'{elapsed:0.4}', 'copy to db end')

        #######################################################################################################################
        begindata = 1550  #+1050
        endat = len(glob.glob(path))
        tm_list = tm_code[begindata:endat]
        cols = ('time', *tm_list)
        table_name = 'tm_value2'

        data = transformation(path, begindata, endat)

        with connection.cursor() as cursor:
Ejemplo n.º 28
0
 def do_copy(self):
     mgr = CopyManager(self.conn, self.table, self.cols)
     mgr.copy(self.data)
Ejemplo n.º 29
0
 def do_copy(self):
     mgr = CopyManager(self.conn, self.schema_table, self.cols)
     mgr.copy(self.data)
Ejemplo n.º 30
0
 def test_copy(self, conn, cursor, schema_table, data):
     bincopy = CopyManager(conn, schema_table, self.cols)
     bincopy.copy(data, BytesIO)
     select_list = ','.join(self.cols)
     cursor.execute("SELECT %s from %s" % (select_list, schema_table))
     self.checkResults(cursor, data)
Ejemplo n.º 31
0
 def test_type(self):
     bincopy = CopyManager(self.conn, self.schema_table, self.cols)
     bincopy.copy(self.data)
     select_list = ','.join(self.cols)
     self.cur.execute("SELECT %s from %s" % (select_list, self.table))
     self.checkResults()
Ejemplo n.º 32
0
class saverPostgresql(object):

    """Dumps only hegemony results to a Postgresql database. """

    def __init__(self, starttime, af, saverQueue, host="localhost", dbname="ihr"):
       

        self.saverQueue = saverQueue
        self.expid = None
        self.prevts = 0 
        self.asNames = defaultdict(str, json.load(open("data/asNames.json")))
        self.currenttime = starttime
        self.af = af
        self.dataHege = [] 
        self.cpmgr = None

        local_port = 5432
        if host != "localhost" and host!="127.0.0.1":
            from sshtunnel import SSHTunnelForwarder
            self.server = SSHTunnelForwarder(
                host,
                ssh_username="******",
                ssh_private_key="/home/romain/.ssh/id_rsa",
                remote_bind_address=('127.0.0.1', 5432),
                set_keepalive=60) 

            self.server.start()
            logging.debug("SSH tunnel opened")
            local_port = str(self.server.local_bind_port)

            conn_string = "host='127.0.0.1' port='%s' dbname='%s'" % (local_port, dbname)
        else:
            conn_string = "dbname='%s'" % dbname

        self.conn = psycopg2.connect(conn_string)
        columns=("timebin", "originasn_id", "asn_id", "hege", "af")
        self.cpmgr = CopyManager(self.conn, 'ihr_hegemony', columns)
        self.cursor = self.conn.cursor()
        logging.debug("Connected to the PostgreSQL server")

        self.cursor.execute("SELECT number FROM ihr_asn WHERE ashash=TRUE")
        self.asns = set([x[0] for x in self.cursor.fetchall()])
        logging.debug("%s ASNS already registered in the database" % len(self.asns))

        self.run()

    def run(self):

        while True:
            elem = self.saverQueue.get()
            if isinstance(elem, str) and elem.endswith(";"):
                if elem.startswith("COMMIT"):
                    logging.warn("psql: start commit")
                    self.commit()
                    logging.warn("psql: end commit")
            else:
                self.save(elem)
            self.saverQueue.task_done()


    def save(self, elem):
        t, data = elem

        if t == "hegemony":
            ts, scope, hege = data

            # Update the current bin timestamp
            if self.prevts != ts:
                self.prevts = ts
                self.currenttime = datetime.utcfromtimestamp(ts)
                logging.debug("start recording hegemony")

            # Update seen ASNs
            if int(scope) not in self.asns:
                self.asns.add(int(scope))
                logging.warn("psql: add new scope %s" % scope)
                self.cursor.execute("INSERT INTO ihr_asn(number, name, tartiflette, disco, ashash) select %s, %s, FALSE, FALSE, TRUE WHERE NOT EXISTS ( SELECT number FROM ihr_asn WHERE number = %s)", (scope, self.asNames["AS"+str(scope)], scope))
                self.cursor.execute("UPDATE ihr_asn SET ashash = TRUE where number = %s", (int(scope),))
            insertQuery = "INSERT INTO ihr_asn(number, name, tartiflette, disco, ashash) select %s, %s, FALSE, FALSE, TRUE WHERE NOT EXISTS ( SELECT number FROM ihr_asn WHERE number = %s)"
            param = [(asn, self.asNames["AS"+str(asn)], asn) for asn in hege.keys() if  (isinstance(asn, int) or not asn.startswith("{") ) and int(asn) not in self.asns]
            #toremove?
            for asn, _, _ in param:
                self.asns.add(int(asn))
                self.cursor.execute("UPDATE ihr_asn SET ashash = TRUE where number = %s", (int(asn),))
            if len(param)>0:
                psycopg2.extras.execute_batch(self.cursor, insertQuery, param, page_size=100)
            
            # Hegemony values to copy in the database
            self.dataHege.extend([(self.currenttime, int(scope), int(k), float(v), int(self.af)) for k,v in hege.iteritems() if (isinstance(k,int) or not k.startswith("{")) and v!=0 ])

    def commit(self):
        logging.warn("psql: start copy")
        self.cpmgr.copy(self.dataHege, StringIO)
        self.conn.commit()
        logging.warn("psql: end copy")
        # Populate the table for AS hegemony cone
        logging.warn("psql: adding hegemony cone")
        self.cursor.execute("INSERT INTO ihr_hegemonycone (timebin, conesize, af, asn_id) SELECT timebin, count(distinct originasn_id), af, asn_id FROM ihr_hegemony WHERE timebin=%s and asn_id!=originasn_id and originasn_id!=0 GROUP BY timebin, af, asn_id;", (self.currenttime,))
        self.conn.commit()
        self.dataHege = []
        logging.warn("psql: end hegemony cone")