Example #1
0
File: db.py Project: voitsik/pypima
    def model2db(self, run_id, clock_model):
        """
        Parameters
        ----------
        run_id : int
            Id of record in `pima_runs` table.

        clock_model : list
            List of clock model components.

        """
        query = """INSERT INTO clock_models
(sta, time, clock_offset, clock_rate, group_delay, delay_rate, run_id)
VALUES %s;"""

        data = []

        for rec in clock_model:
            row = list(rec)
            row.append(run_id)
            data.append(row)

        with self.connw.cursor() as cursor:
                # cursor.execute(query, parameters)
                execute_values(cursor, query, data)

        self.connw.commit()
def calibrate_cbg():
    """Worker function: calculate CBG calibration "bestlines" for each
       probe."""
    state = get_worker_state()
    with state.db:
        progress("computing bestlines")
        with state.db.cursor(name='cbgcal_read_{:016x}'.format(
                random.randint(0, sys.maxsize))) as cur:
            cur.itersize = 1024
            cur.execute("""
                SELECT l.probeid,
                       ST_Distance(l.location, m.location) AS distance,
                       n.median AS rtt
                  FROM landmarks l, landmarks m,
                       (SELECT s_id, d_id, median(minrtt)
                          FROM (SELECT *
                                  FROM (SELECT s_id, d_id, odate, minrtt,
                                               rank() OVER (
                                                   PARTITION BY s_id, d_id
                                                   ORDER BY odate DESC)
                                          FROM calibration_rtts
                                         WHERE minrtt IS NOT NULL) _a
                                 WHERE rank <= 14) _b
                      GROUP BY s_id, d_id) n
                WHERE l.probeid = n.s_id and m.anchorid = n.d_id
            """)

            results = []
            block = None
            prev_sid = None
            for row in cur:
                if row[0] != prev_sid:
                    if prev_sid is not None:
                        results.extend(calibrate_cbg_for_block(prev_sid, block))
                    prev_sid = row[0]
                    block = []
                block.append((row[1], row[2]))

            if prev_sid is not None:
                results.extend(calibrate_cbg_for_block(prev_sid, block))

        progress("recording bestlines for {} probes".format(len(results)))
        with state.db.cursor() as cur:
            execute_values(
                cur,
                """
                    UPDATE landmarks AS l SET cbg_m = d.m, cbg_b = d.b
                      FROM (VALUES %s) AS d (id, m, b)
                     WHERE l.probeid = d.id
                """,
                results)
Example #3
0
File: db.py Project: voitsik/pypima
    def uvfits2db(self, fits_file, b1950_name, run_id):
        """
        Parameters
        ----------
        fits_file : UVFits object
            UV-FITS file object.
        b1950_name : str
            B1950 source name.
        run_id : int
            Id of the record in ``pima_runs`` table.

        """
        data = []
        file_name = os.path.basename(fits_file.file_name)

        for ind in range(fits_file.gcount):
            time = fits_file.dates[ind]  # As datetime.datetime object
            ant1_name, ant2_name = fits_file.get_ant_by_ind(ind)
            inttime = float(fits_file.inttime[ind])

            for if_ind in range(fits_file.no_if):
                flux = float(fits_file.amplitudes[ind, if_ind])
                weight = float(fits_file.weights[ind, if_ind])

                if weight <= 0:
                    continue

                freq = float(fits_file.freq +
                             fits_file.freq_table[if_ind]['if_freq'])
                uu = float(fits_file.u_raw[ind])
                vv = float(fits_file.v_raw[ind])

                row = (ind+1, time, if_ind+1, b1950_name, fits_file.exper_name,
                       fits_file.band, fits_file.stokes, ant1_name, ant2_name,
                       uu, vv, freq, flux, weight, inttime, file_name, run_id)
                data.append(row)

        query = """INSERT INTO ra_uvfits (ind, time, if_id, source, exper_name,
band, polar, sta1, sta2, u, v, freq, ampl, weight, inttime, file_name, run_id)
VALUES %s;"""

        if data:
            with self.connw.cursor() as cursor:
                # cursor.execute('DELETE FROM ra_uvfits WHERE file_name = %s;',
                #                (file_name, ))
                # cur.executemany(query, data)
                execute_values(cursor, query, data)
            self.connw.commit()
def seedData(cursor, rows):
  start_time = time.time()

  # ... bad loop
  # posts_data = []
  # for i in range(rows):
  #   posts_data.append((fake.sentence(), fake.text()))

  # ... good loop
  posts_data = [ (fake.sentence(), fake.text()) for i in range(rows) ]

  # ... map
  # posts_data = map(lambda i: (fake.sentence(), fake.text()), range(rows))

  # seed posts data
  execute_values(cursor, "INSERT INTO posts (title, content) VALUES %s", posts_data)
  print("seeding %s rows of faker data\n... took %s seconds\n" % (rows, time_util.getTime(start_time, time.time())))
Example #5
0
    def insert(self, data, chunks=None):
        if not isinstance(data, BatchIterator):
            data = Iterator(data, dtypes=self.dtypes).batchs(chunks=chunks)

        columns = "(" + ", ".join(self.groups) + ")"
        insert_str = "INSERT INTO {name} {columns} VALUES".format(
            name=self.name, columns=columns)
        insert = insert_str + " %s"
        cur = self.conn.cursor()
        num_groups = len(data.groups)
        for row in data:
            shape = row.batch.shape.to_tuple()
            if len(shape) == 1 and num_groups > 1:
                value = row.batch.to_df.values  # .to_ndarray().reshape(1, -1)
            elif len(shape) == 1 and num_groups == 1:
                value = row.batch.to_df().values  # .to_ndarray().reshape(-1, 1)
            else:
                value = row.batch.to_df().values
            execute_values(cur, insert, value, page_size=len(data))
        cur.close()
def retrieve_anchor_ping_measurements(ainfo):
    """Worker function: retrieve all ping measurements targeting the
       anchor with IPv4 address ADDR, and record their metadata in the
       database.
    """
    addr, _, max_stop_time = ainfo
    state = get_worker_state()
    params = {
        "target_ip": addr,
        "status": "2,4,8",
        "type": "ping",
        "optional_fields": "probes"
    }
    if max_stop_time is not None:
        params["stop_time__gt"] = max_stop_time.timestamp()

    updates = list(retrieve_atlas(
        state.session, 'measurements', params=params,
        constructor = lambda m: (
            addr, m["id"],
            maybe_dt_from_timestamp(m["start_time"]),
            maybe_dt_from_timestamp(m["stop_time"]),
            [p["id"] for p in m["probes"]]
        )))
    if updates:
        with state.db, state.db.cursor() as cur:
            execute_values(cur, """
                    INSERT INTO ripe_measurements
                        (d_addr, meas_id, start_time, stop_time, probes)
                    VALUES %s
                    ON CONFLICT (meas_id)
                    DO UPDATE SET d_addr     = excluded.d_addr,
                                  start_time = excluded.start_time,
                                  stop_time  = excluded.stop_time,
                                  probes     = excluded.probes
                """, updates)

            progress("{}: updated {} measurement{}"
                     .format(addr, len(updates),
                             "" if len(updates) == 1 else "s"))
 def run_query(self, query, vars=None, bulk_query=False):
     str_query = query.decode(encoding='UTF-8', errors='replace')
     str_query = str_query.replace(u"\uFFFD", "?")
     ctx.logger.debug('Running query: {0}'.format(str_query))
     with closing(self._connection.cursor()) as cur:
         try:
             if bulk_query:
                 execute_values(cur, query, vars)
             else:
                 cur.execute(query, vars)
             status_message = cur.statusmessage
             fetchall = cur.fetchall()
             result = {'status': status_message, 'all': fetchall}
             ctx.logger.debug('Running query result status: {0}'
                              .format(status_message))
         except Exception, e:
             fetchall = None
             status_message = str(e)
             result = {'status': status_message, 'all': fetchall}
             if status_message != 'no results to fetch':
                 ctx.logger.error('Running query result status: {0}'
                                  .format(status_message))
         return result
Example #8
0
File: db.py Project: voitsik/pypima
    def autospec2db(self, acta_file):
        """
        Store autocorrelation spectrum to the database.

        """
        exper, band = acta_file.header['experiment'].split('_')
        polar = acta_file.header['polar']
        sta = acta_file.header['station']
        start_date = acta_file.header['start_date']
        stop_date = acta_file.header['stop_date']
        obs = acta_file.header['obs']
        scan_name = acta_file.header['scan_name']

        delete_query = """DELETE FROM autospec_info
WHERE exper_name = %s AND band = %s AND polar = %s AND sta = %s AND
scan_name = %s;"""
        query_info = """INSERT INTO autospec_info
(exper_name, band, polar, sta, start_date, stop_date, obs, scan_name)
VALUES %s RETURNING id;"""
        query_data = """INSERT INTO autospec
(if_num, chann_num, freq, ampl, info_id) VALUES %s;"""
        data = []

        with self.connw.cursor() as cursor:
            cursor.execute(delete_query, (exper, band, polar, sta, scan_name))
            cursor.execute(query_info, [(exper, band, polar, sta, start_date,
                                        stop_date, obs, scan_name)])
            info_id = cursor.fetchone()[0]

            for ind in range(acta_file.header['num_of_points']):
                row = (acta_file.if_num[ind], acta_file.channel[ind],
                       acta_file.freq[ind], acta_file.ampl[ind], info_id)
                data.append(row)
            execute_values(cursor, query_data, data, page_size=2048)

        self.connw.commit()
Example #9
0
connection_string = os.environ.get("TIMESCALEDB_CONNECTION",
                                   default="dbname=pi user=pi")

with psycopg2.connect(connection_string) as conn:
    with conn.cursor() as cur:
        with DigitalInOut(board.D23) as pin:
            while True:
                start = time.time()
                values = []
                while time.time() - start < 1.0:
                    time.sleep(0.099)
                    pin.direction = Direction.OUTPUT
                    pin.value = False
                    pin.direction = Direction.INPUT
                    reading = 0
                    while pin.value is False:
                        reading += 1
                    values.append(
                        (datetime.utcnow(), "photoresistor", reading))

                execute_values(
                    cur,
                    """
                    INSERT INTO pi_obs (time, metric, value)
                    VALUES %s
                    """,
                    values,
                )
                conn.commit()
                print("Inserted ", len(values))
def update_caches(cur, rh_account_id: int, system_id: int, opt_out: bool, stale: bool, new_rule_hits: dict,   # pylint: disable=too-many-branches
                  updated_rules: dict, fixed_rules: dict):
    """Update CVE caches"""
    new_cves_ids = list(new_rule_hits.keys())
    updated_cves_ids = list(updated_rules.keys())
    fixed_cves_ids = list(fixed_rules.keys())
    all_cves_ids = new_cves_ids + updated_cves_ids + fixed_cves_ids

    if not all_cves_ids:
        return

    inc_cves = []
    inc_cves_divergent_status = []
    dec_cves = []
    dec_cves_divergent_status = []
    cur.execute("""select cve_id, status_id from cve_account_data
                       where rh_account_id = %s and cve_id in %s
                       order by cve_id for update""",
                (rh_account_id, tuple(all_cves_ids),))
    for cve_id, status_id in cur.fetchall():
        # completely new system_vulnerabilities record, update caches
        if cve_id in new_cves_ids:
            if status_id == 0:
                inc_cves.append(cve_id)
            else:
                inc_cves_divergent_status.append(cve_id)
        # do not double increase counts if CVE has already been imported by VMaaS
        elif cve_id in updated_cves_ids and (updated_rules[cve_id][2] is not None or updated_rules[cve_id][4]):
            if status_id == updated_rules[cve_id][3]:
                inc_cves.append(cve_id)
            else:
                inc_cves_divergent_status.append(cve_id)
        # decrease counter only when VMaaS thinks we're not vulnerabale or we're appyling rule mitigation
        elif cve_id in fixed_cves_ids and (fixed_rules[cve_id][4] or fixed_rules[cve_id][2] is not None):
            if status_id == fixed_rules[cve_id][3]:
                dec_cves.append(cve_id)
            else:
                dec_cves_divergent_status.append(cve_id)
        all_cves_ids.remove(cve_id)

    insert_cves = [(cve_id, rh_account_id, 1, 0) for cve_id in all_cves_ids]

    system_cve_change = len(insert_cves) + len(inc_cves) + len(inc_cves_divergent_status) - len(dec_cves) - len(dec_cves_divergent_status)

    cur.execute("""update system_platform set cve_count_cache = cve_count_cache + %s where id = %s""", (system_cve_change, system_id,))

    if opt_out or stale:  # do not update CVE level caches if the system is opted out or stale
        return

    if dec_cves:
        cur.execute("""update cve_account_data set systems_affected = systems_affected - 1
                            where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(dec_cves)))

    if dec_cves_divergent_status:
        cur.execute("""update cve_account_data set systems_affected = systems_affected - 1,
                                                    systems_status_divergent = systems_status_divergent - 1
                        where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(dec_cves_divergent_status)))

    if inc_cves:
        cur.execute("""update cve_account_data set systems_affected = systems_affected + 1
                            where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(inc_cves)))

    if inc_cves_divergent_status:
        cur.execute("""update cve_account_data set systems_affected = systems_affected + 1,
                                                    systems_status_divergent = systems_status_divergent + 1
                        where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(inc_cves_divergent_status)))

    if insert_cves:
        execute_values(cur, """insert into cve_account_data
                                (cve_id, rh_account_id, systems_affected, systems_status_divergent) values %s
                                on conflict (cve_id, rh_account_id) do update set
                                    systems_affected = cve_account_data.systems_affected
                                                    + excluded.systems_affected,
                                    systems_status_divergent = cve_account_data.systems_status_divergent
                                                            + excluded.systems_status_divergent""",
                       insert_cves, page_size=len(insert_cves))
Example #11
0
def update_batch_summary(batch_summary):
    '''
    This function will update the batch summary as we go for each value in batch_summary
    :param batch_summary: the dictionary we are using
    :return:
    '''
    db = get_db_connection(CONFIG)
    cur = db.cursor()
    ###two cases...this is a new batch (so batch id doesn't exist) or the batch already exists
    batch_exists = get_table_noconn(
        '''select * from batch_summary where batch_id={}'''.format(
            batch_summary['batch_id']), db)
    if CONFIG['sql_flavor'] == 'sqlite':
        if len(batch_exists) == 0:
            # create individual data tables
            diskEngine = create_engine('sqlite:///{}/{}.db'.format(
                CONFIG['projectPath'], CONFIG['db_name']))
            ###IF the batch doesn't exist, make the row (it's just the batch status and batch_id)
            pd.DataFrame(batch_summary, index=[0]).to_sql('batch_summary',
                                                          diskEngine,
                                                          if_exists='append',
                                                          index=False)
            db.commit()
        else:
            ##First check for any json and do a json.dumps
            for key in batch_summary:
                if batch_summary[key].__class__ == dt.datetime:
                    batch_summary[key] = batch_summary[key].strftime(
                        '%Y-%m-%d %H:%M:%S')
                if batch_summary[key].__class__ == dict:
                    batch_summary[key] = json.dumps(batch_summary[key])
                if batch_summary[key] != batch_exists[0][key]:
                    update_statement = 'update batch_summary set {}=? where batch_id={}'.format(
                        key, batch_summary['batch_id'])
                    try:
                        cur.execute(update_statement, (batch_summary[key], ))
                        db.commit()
                    except Exception as err:
                        print(err)
                        print(batch_summary[key])
    elif CONFIG['sql_flavor'] == 'postgres':
        if len(batch_exists) == 0:
            ###IF the batch doesn't exist, make the row (it's just the batch status and batch_id)
            columns = batch_summary.keys()
            columns_list = str(tuple([str(i)
                                      for i in columns])).replace("'", "")
            values = [tuple(batch_summary[key] for key in batch_summary)]
            insert_statement = 'insert into {table} {collist} values %s'.format(
                table='batch_summary', collist=columns_list)
            execute_values(cur,
                           insert_statement,
                           values,
                           page_size=len(values))
            db.commit()
        else:
            ##First check for any json and do a json.dumps
            for key in batch_summary:
                if batch_summary[key].__class__ == dict:
                    batch_summary[key] = json.dumps(batch_summary[key])
            columns = [i for i in batch_summary.keys() if i != 'batch_id']
            columns_statement = ', '.join(['{}=%s'.format(i) for i in columns])
            values = [
                tuple(batch_summary[key] for key in batch_summary
                      if key != 'batch_id')
            ]
            update_statement = 'update batch_summary set {} where batch_id={}'.format(
                columns_statement, batch_summary['batch_id'])
            cur.execute(update_statement, values[0])
            db.commit()
    cur.close()
    db.close()
Example #12
0
 def insert_armory_items(self, armory_items):
     insertion_query = "INSERT INTO armory_item (item_id, name, value, weight) VALUES %s"
     list_of_tuples = armory_items
     execute_values(self.pg_cursor, insertion_query, list_of_tuples)
     self.pg_connection.commit()
def lambda_handler(event, context):
    
    # Get parameters for database
    ssm = boto3.client('ssm', 'us-east-1')
    user = ssm.get_parameter(Name='/apartments/db/user', WithDecryption=True)['Parameter']['Value']
    password = ssm.get_parameter(Name='/apartments/db/password', WithDecryption=True)['Parameter']['Value']
    host = ssm.get_parameter(Name='/apartments/db/host', WithDecryption=True)['Parameter']['Value']
    database = ssm.get_parameter(Name='/apartments/db/database', WithDecryption=True)['Parameter']['Value']
    
    # Connect
    connection = psycopg2.connect(user = "******",
                                    password = "******",
                                    host = "aa10uxjrfmskobd.cf8tqavognjx.us-east-1.rds.amazonaws.com",
                                    port = "5432",
                                    database = "ebdb")
    cursor = connection.cursor()
    
    # Parse event for needed data
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    key = key.replace('%3A', ':').replace('+', ' ')
    
    # Read in the object
    s3 = boto3.resource('s3')
    content_object = s3.Object(bucket, key)
    file_content = content_object.get()['Body'].read().decode('utf-8')
    json_data = json.loads(file_content)


    # Neighborhood Map
    neighborhoodMaps = {'10453': 'Central Bronx',
                            '10457': 'Central Bronx',
                            '10460': 'Central Bronx',
                            '10458': 'Bronx Park and Fordham',
                            '10467': 'Bronx Park and Fordham',
                            '10468': 'Bronx Park and Fordham',
                            '10451': 'High Bridge and Morrisania',
                            '10452': 'High Bridge and Morrisania',
                            '10456': 'High Bridge and Morrisania',
                            '10454': 'Hunts Point and Mott Haven',
                            '10455': 'Hunts Point and Mott Haven',
                            '10459': 'Hunts Point and Mott Haven',
                            '10474': 'Hunts Point and Mott Haven',
                            '10463': 'Kingsbridge and Riverdale',
                            '10471': 'Kingsbridge and Riverdale',
                            '10466': 'Northeast Bronx',
                            '10469': 'Northeast Bronx',
                            '10470': 'Northeast Bronx',
                            '10475': 'Northeast Bronx',
                            '10461': 'Southeast Bronx',
                            '10462': 'Southeast Bronx',
                            '10464': 'Southeast Bronx',
                            '10465': 'Southeast Bronx',
                            '10472': 'Southeast Bronx',
                            '10473': 'Southeast Bronx',
                            '11212': 'Central Brooklyn',
                            '11213': 'Central Brooklyn',
                            '11216': 'Central Brooklyn',
                            '11233': 'Central Brooklyn',
                            '11238': 'Central Brooklyn',
                            '11209': 'Southwest Brooklyn',
                            '11214': 'Southwest Brooklyn',
                            '11228': 'Southwest Brooklyn',
                            '11204': 'Borough Park',
                            '11218': 'Borough Park',
                            '11219': 'Borough Park',
                            '11230': 'Borough Park',
                            '11234': 'Canarsie and Flatlands',
                            '11236': 'Canarsie and Flatlands',
                            '11239': 'Canarsie and Flatlands',
                            '11223': 'Southern Brooklyn',
                            '11224': 'Southern Brooklyn',
                            '11229': 'Southern Brooklyn',
                            '11235': 'Southern Brooklyn',
                            '11201': 'Northwest Brooklyn',
                            '11205': 'Northwest Brooklyn',
                            '11215': 'Northwest Brooklyn',
                            '11217': 'Northwest Brooklyn',
                            '11231': 'Northwest Brooklyn',
                            '11203': 'Flatbush',
                            '11210': 'Flatbush',
                            '11225': 'Flatbush',
                            '11226': 'Flatbush',
                            '11207': 'East New York and New Lots',
                            '11208': 'East New York and New Lots',
                            '11211': 'Greenpoint',
                            '11222': 'Greenpoint',
                            '11220': 'Sunset Park',
                            '11232': 'Sunset Park',
                            '11206': 'Bushwick and Williamsburg',
                            '11221': 'Bushwick and Williamsburg',
                            '11237': 'Bushwick and Williamsburg',
                            '10026': 'Central Harlem',
                            '10027': 'Central Harlem',
                            '10030': 'Central Harlem',
                            '10037': 'Central Harlem',
                            '10039': 'Central Harlem',
                            '10001': 'Chelsea and Clinton',
                            '10011': 'Chelsea and Clinton',
                            '10018': 'Chelsea and Clinton',
                            '10019': 'Chelsea and Clinton',
                            '10020': 'Chelsea and Clinton',
                            '10036': 'Chelsea and Clinton',
                            '10029': 'East Harlem',
                            '10035': 'East Harlem',
                            '10010': 'Gramercy Park and Murray Hill',
                            '10016': 'Gramercy Park and Murray Hill',
                            '10017': 'Gramercy Park and Murray Hill',
                            '10022': 'Gramercy Park and Murray Hill',
                            '10012': 'Greenwich Village and Soho',
                            '10013': 'Greenwich Village and Soho',
                            '10014': 'Greenwich Village and Soho',
                            '10004': 'Lower Manhattan',
                            '10005': 'Lower Manhattan',
                            '10006': 'Lower Manhattan',
                            '10007': 'Lower Manhattan',
                            '10038': 'Lower Manhattan',
                            '10280': 'Lower Manhattan',
                            '10002': 'Lower East Side',
                            '10003': 'Lower East Side',
                            '10009': 'Lower East Side',
                            '10021': 'Upper East Side',
                            '10028': 'Upper East Side',
                            '10044': 'Upper East Side',
                            '10065': 'Upper East Side',
                            '10075': 'Upper East Side',
                            '10128': 'Upper East Side',
                            '10023': 'Upper West Side',
                            '10024': 'Upper West Side',
                            '10025': 'Upper West Side',
                            '10031': 'Inwood and Washington Heights',
                            '10032': 'Inwood and Washington Heights',
                            '10033': 'Inwood and Washington Heights',
                            '10034': 'Inwood and Washington Heights',
                            '10040': 'Inwood and Washington Heights',
                            '11361': 'Northeast Queens',
                            '11362': 'Northeast Queens',
                            '11363': 'Northeast Queens',
                            '11364': 'Northeast Queens',
                            '11354': 'North Queens',
                            '11355': 'North Queens',
                            '11356': 'North Queens',
                            '11357': 'North Queens',
                            '11358': 'North Queens',
                            '11359': 'North Queens',
                            '11360': 'North Queens',
                            '11365': 'Central Queens',
                            '11366': 'Central Queens',
                            '11367': 'Central Queens',
                            '11412': 'Jamaica',
                            '11423': 'Jamaica',
                            '11432': 'Jamaica',
                            '11433': 'Jamaica',
                            '11434': 'Jamaica',
                            '11435': 'Jamaica',
                            '11436': 'Jamaica',
                            '11101': 'Northwest Queens',
                            '11102': 'Northwest Queens',
                            '11103': 'Northwest Queens',
                            '11104': 'Northwest Queens',
                            '11105': 'Northwest Queens',
                            '11106': 'Northwest Queens',
                            '11374': 'West Central Queens',
                            '11375': 'West Central Queens',
                            '11379': 'West Central Queens',
                            '11385': 'West Central Queens',
                            '11691': 'Rockaways',
                            '11692': 'Rockaways',
                            '11693': 'Rockaways',
                            '11694': 'Rockaways',
                            '11695': 'Rockaways',
                            '11697': 'Rockaways',
                            '11004': 'Southeast Queens',
                            '11005': 'Southeast Queens',
                            '11411': 'Southeast Queens',
                            '11413': 'Southeast Queens',
                            '11422': 'Southeast Queens',
                            '11426': 'Southeast Queens',
                            '11427': 'Southeast Queens',
                            '11428': 'Southeast Queens',
                            '11429': 'Southeast Queens',
                            '11414': 'Southwest Queens',
                            '11415': 'Southwest Queens',
                            '11416': 'Southwest Queens',
                            '11417': 'Southwest Queens',
                            '11418': 'Southwest Queens',
                            '11419': 'Southwest Queens',
                            '11420': 'Southwest Queens',
                            '11421': 'Southwest Queens',
                            '11368': 'West Queens',
                            '11369': 'West Queens',
                            '11370': 'West Queens',
                            '11372': 'West Queens',
                            '11373': 'West Queens',
                            '11377': 'West Queens',
                            '11378': 'West Queens',
                            '10302': 'Port Richmond',
                            '10303': 'Port Richmond',
                            '10310': 'Port Richmond',
                            '10306': 'South Shore',
                            '10307': 'South Shore',
                            '10308': 'South Shore',
                            '10309': 'South Shore',
                            '10312': 'South Shore',
                            '10301': 'Stapleton and St. George',
                            '10304': 'Stapleton and St. George',
                            '10305': 'Stapleton and St. George',
                            '10314': 'Mid-Island'
                        }


    # Borough Maps
    boroughMaps = {'Central Bronx': 'Bronx', 
                'Bronx Park and Fordham': 'Bronx', 
                'High Bridge and Morrisania': 'Bronx', 
                'Hunts Point and Mott Haven': 'Bronx', 
                'Kingsbridge and Riverdale': 'Bronx', 
                'Northeast Bronx': 'Bronx',
                'Southeast Bronx': 'Bronx',
                'Central Brooklyn': 'Brooklyn',
                'Southwest Brooklyn': 'Brooklyn', 
                'Borough Park': 'Brooklyn', 
                'Canarsie and Flatlands': 'Brooklyn', 
                'Southern Brooklyn': 'Brooklyn', 
                'Northwest Brooklyn': 'Brooklyn', 
                'Flatbush': 'Brooklyn',
                'East New York and New Lots': 'Brooklyn', 
                'Greenpoint': 'Brooklyn', 
                'Sunset Park': 'Brooklyn', 
                'Bushwick and Williamsburg': 'Brooklyn',
                'Central Harlem': 'Manhattan', 
                'Chelsea and Clinton': 'Manhattan', 
                'East Harlem': 'Manhattan', 
                'Gramercy Park and Murray Hill': 'Manhattan',
                'Greenwich Village and Soho': 'Manhattan', 
                'Lower Manhattan': 'Manhattan', 
                'Lower East Side': 'Manhattan', 
                'Upper East Side': 'Manhattan',
                'Upper West Side': 'Manhattan', 
                'Inwood and Washington Heights': 'Manhattan',
                'Northeast Queens': 'Queens', 
                'North Queens': 'Queens', 
                'Central Queens': 'Queens', 
                'Jamaica': 'Queens', 
                'Northwest Queens': 'Queens',
                'West Central Queens': 'Queens', 
                'Rockaways': 'Queens', 
                'Southeast Queens': 'Queens', 
                'Southwest Queens': 'Queens', 
                'West Queens': 'Queens',
                'Port Richmond': 'Staten Island', 
                'South Shore': 'Staten Island', 
                'Stapleton and St. George': 'Staten Island', 
                'Mid-Island': 'Staten Island',
                'No Neighborhood Found': 'None',
                'Kearney': 'None',
                'Stamford': 'None'
            }

    # Check data in database
    sqlCurrentDataInDB = "SELECT DISTINCT name, address FROM website_apartment;"
    cursor.execute(sqlCurrentDataInDB)
    currentDataInDB = cursor.fetchall()

    rowsToInsert = []
    for i, row in enumerate(json_data):
        address = row['address']
        area = row['area']

        if type(area) == str:
            area = area.replace('ft2', '')

        bedrooms = row['bedrooms']
        bikeScore = row['bikeScore']
        transitScore = row['transitScore']
        walkScore = row['walkScore']
        datetime = row['datetime']
        distanceToNearestIntersection = row['distanceToNearestIntersection']
        hasImage = row['has_image']
        hasMap = row['has_map']
        name = row['name']
        price = row['price']

        if type(price) == str:
            price = price.replace('$', '')

        sideOfStreet = row['sideOfStreet']
        url = row['url']
        longitude = row['geotag'][0]
        latitude = row['geotag'][1]
        
        # Check for area
        if row['area']:
            includesArea = True
        else:
            includesArea = False
        
        # Check for no fee
        if 'no fee' in row['name'].lower():
            advertisesNoFee = True
        else:
            advertisesNoFee = False
        
        # Check if repost
        if row['repost_of']:
            is_repost = True
        else:
            is_repost = False

        postalCode = row['postalCode'][0:5]

        try:
            neighborhood = neighborhoodMaps[postalCode]
        except KeyError:
            neighborhood = 'No Neighborhood Found'
        borough = boroughMaps[neighborhood]

        thisRow = (address, area, bedrooms, bikeScore, transitScore, walkScore, datetime, distanceToNearestIntersection, hasImage, hasMap, name, price, sideOfStreet, url, longitude, latitude, includesArea, advertisesNoFee, is_repost, postalCode, neighborhood, borough)
        
        rowAlreadyInDatabase = False

        # Check data in this file
        if i > 0:
            for row in rowsToInsert:
                if thisRow[10] == row[10] and thisRow[0] == row[0]:
                    rowAlreadyInDatabase = True

        if (thisRow[10], thisRow[0]) in currentDataInDB:
            rowAlreadyInDatabase = True

        if not rowAlreadyInDatabase:
            rowsToInsert.append(thisRow)



    insert_query = 'INSERT INTO website_apartment (address, area, bedrooms, bike_score, transit_score, walk_score, datetime, distance_to_nearest_intersection, has_image, has_map, name, price, side_of_street, url, longitude, latitude, includes_area, advertises_no_fee, is_repost, postal_code, Neighborhood, borough) values %s'

    execute_values(cursor, insert_query, rowsToInsert, template=None)

    connection.commit()

    return None
    except:
        print('no table to drop')

    try:
        cursor.execute(
            'CREATE TABLE  embeddings (key varchar, embedding jsonb)')
        conn.commit()
    except:
        print("DB already created")


if __name__ == ("__main__"):

    conn, cursor = connect_db()
    create_embed_table(conn, cursor)

    print("loading embeding")
    model_path = '/data/GoogleNews-vectors-negative300.bin'
    model = KeyedVectors.load_word2vec_format(model_path, binary=True)
    print("embedding loaded")

    done = 0
    vocab = list(set(model.index2word))
    embeds = [model[key] for key in vocab]
    data = list(zip(vocab, embeds))
    print('running inserts')
    insert_query = "INSERT INTO EMBEDDINGS (key,embedding) VALUES %s"
    execute_values(cursor, insert_query, data, page_size=10)
    print('running commit')
    conn.commit()
Example #15
0
import traceback
import psycopg2
from psycopg2.extras import execute_values
from psycopg2 import sql

try:
    conn = psycopg2.connect("dbname='test' host='127.0.0.1'")
except:
    print("I am unable to connect to the database")

cur = conn.cursor()
try:
    arr = [("James", 26), ("Kristof", 25)]
    execute_values(cur, "INSERT INTO aaa (name, age) VALUES %s", arr)
    conn.commit()

    tablename = "aaa"
    queryText = "INSERT INTO {table} (name, age) VALUES %s"
    query = sql.SQL(queryText).format(table=sql.Identifier(tablename))
    execute_values(cur, query.as_string(cur), arr)
    conn.commit()

except:
    print(traceback.format_exc())
Example #16
0
    def insert_dicts(self, kwargss, dropExtra=True):
        """
        Insert records into the table for the current test. Checks arguments to make
        sure all required columns are present, and prevents extra data from being entered.
        """

        # Chunk uploads so we get some progress, and don't create a huge transaction.
        for chunk_start in range(0, len(kwargss), 10):
            chunk_end = min(chunk_start + 10, len(kwargss))
            chunk = kwargss[chunk_start:chunk_end]

            self.log('building %d rows' % len(chunk))
            self._clear_memo()

            if chunk_start != 0:
                self._refresh_connection()

            with self.connection.cursor() as cursor:
                records = []
                for kwargs in chunk:

                    # Replace aliases with internal, canonical form
                    for alias, replacement in list(ALIASES.items()):
                        if alias in kwargs:
                            if replacement:
                                kwargs[replacement] = kwargs[alias]
                            del kwargs[alias]

                    # Check for extras
                    extra_columns = []
                    extra_values = []
                    for k, v in kwargs.items():
                        if k not in self.denormalized_columns:
                            extra_columns.append(k)
                            extra_values.append(v)

                    # Check for missing
                    missing_columns = []
                    for col in self.denormalized_columns:
                        if not col in kwargs:
                            if col not in self.optional_cols:
                                missing_columns.append(col)

                    # If we are missing stuff, bad!
                    if missing_columns:
                        required_columns = filter(lambda x: x not in self.optional_cols, self.denormalized_columns)
                        raise Exception('Columns [%s] not provided, expected columns [%s] - optional [%s]' % ((', ').join(missing_columns), (', ').join(required_columns), (', ').join(self.optional_cols)))

                    # Extra stuff is only sometimes bad
                    blob = None
                    if not dropExtra:
                        # If we have a 'blob' column, use that to accumulate extra fields (unless the blob field has been explicitly set)
                        if 'blob' in self.columns and 'blob' not in kwargs:
                            blob = dict(zip(extra_columns, extra_values))
                        elif extra_columns:
                            raise Exception('Extra columns [%s] provided, expected only: %s' % ((', ').join(extra_columns), (', ').join(self.denormalized_columns)))
                    records.append(self.get_normalized_row(cursor, kwargs, blob))

                self.log('adding %d rows' % len(records))

                try:
                    results = execute_values(cursor, self._insert_sql, records, fetch=True)
                    rowids = [x["id"] for x in results]
                    if rowids:
                        self.connection.commit()
                    else:
                        self.log('rolling back...')
                        self.connection.rollback()
                    return rowids
                except Exception as e:
                    self.log('rolling back...')
                    self.connection.rollback()
                    raise e
Example #17
0
    ,siblings_spouse
    ,parents_children
    ,fare
) VALUES %s
'''

# this sucks and I don't like it.  But psycopg does not recognize numpy
# data types, so I have to register the np.int64 data type so it knows how to
# handle it. For my future self, just run a bunch of insertion queries, or just
# append the database using .to_sql() from the data frame.
psycopg2.extensions.register_adapter(np.int64, psycopg2._psycopg.AsIs)

# prep dataframe for execute_values.  Should be a list of tuples
prepped = list(df.to_records(index = False))

execute_values(cur, insertion_query, prepped)

# Exploratory queries/sanity checks
print('--------------------------------------------')

q1 = '''
SELECT count(name) as length
FROM passengers
'''

cur.execute(q1)
res = cur.fetchone()

print('Dataframe length', df.shape[0])
print('Database length', res[0])
Example #18
0
    def _execute(self,
                 query,
                 values=None,
                 silent=None,
                 values_list=False,
                 template=None,
                 commit=None,
                 slow_note=None,
                 reissued=False,
                 buffered=False):
        """
        Execute an SQL command, properly catching errors and returning the resulting cursor.

        INPUT:

        - ``query`` -- an SQL Composable object, the SQL command to execute.
        - ``values`` -- values to substitute for %s in the query.  Quoting from the documentation
            for psycopg2 (http://initd.org/psycopg/docs/usage.html#passing-parameters-to-sql-queries):

            Never, never, NEVER use Python string concatenation (+) or string parameters
            interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.

        - ``silent`` -- boolean (default None).  If True, don't log a warning for a slow query.
            If None, allow DelayCommit contexts to control silencing.
        - ``values_list`` -- boolean (default False).  If True, use the ``execute_values`` method,
            designed for inserting multiple values.
        - ``template`` -- string, for use with ``values_list`` to insert constant values:
            for example ``"(%s, %s, 42)"``. See the documentation of ``execute_values``
            for more details.
        - ``commit`` -- boolean (default None).  Whether to commit changes on success.  The default
            is to commit unless we are currently in a DelayCommit context.
        - ``slow_note`` -- a tuple for generating more useful data for slow query logging.
        - ``reissued`` -- used internally to prevent infinite recursion when attempting to
            reset the connection.
        - ``buffered`` -- whether to create a server side cursor that must be manually
            closed and connection committed  (to closed the transaction) after using it,
            this implies ``commit=False``.

        .. NOTE:

            If the Postgres connection has been closed, the execute statement will fail.
            We try to recover gracefully by attempting to open a new connection
            and issuing the command again.  However, this approach is not prudent if this
            execute statement is one of a chain of statements, which we detect by checking
            whether ``commit == False``.  In this case, we will reset the connection but reraise
            the interface error.

            The upshot is that you should use ``commit=False`` even for the last of a chain of
            execute statements, then explicitly call ``self.conn.commit()`` afterward.

        OUTPUT:

        - a cursor object from which the resulting records can be obtained via iteration.

        This function will also log slow queries.
        """
        if not isinstance(query, Composable):
            raise TypeError(
                "You must use the psycopg2.sql module to execute queries")

        if buffered:
            if commit is None:
                commit = False
            elif commit:
                raise ValueError("buffered and commit are incompatible")

        try:
            cur = self._db.cursor(buffered=buffered)

            t = time.time()
            if values_list:
                if template is not None:
                    template = template.as_string(self.conn)
                execute_values(cur, query.as_string(self.conn), values,
                               template)
            else:
                try:
                    cur.execute(query, values)
                except (OperationalError, ProgrammingError, NotSupportedError,
                        DataError, SyntaxError) as e:
                    try:
                        context = " happens while executing {}".format(
                            cur.mogrify(query, values))
                    except Exception:
                        context = " happens while executing {} with values {}".format(
                            query, values)
                    reraise(type(e),
                            type(e)(str(e) + context),
                            sys.exc_info()[2])
            if silent is False or (silent is None and not self._db._silenced):
                t = time.time() - t
                if t > self.slow_cutoff:
                    if values_list:
                        query = query.as_string(self.conn).replace(
                            "%s", "VALUES_LIST")
                    elif values:
                        try:
                            query = cur.mogrify(query, values)
                        except Exception:
                            # This shouldn't happen since the execution above was successful
                            query = query + str(values)
                    else:
                        query = query.as_string(self.conn)
                    if isinstance(query, bytes):  # PY3 compatibility
                        query = query.decode("utf-8")
                    self.logger.info(
                        query + " ran in \033[91m {0!s}s \033[0m".format(t))
                    if slow_note is not None:
                        self.logger.info(
                            "Replicate with db.%s.%s(%s)",
                            slow_note[0],
                            slow_note[1],
                            ", ".join(str(c) for c in slow_note[2:]),
                        )
        except (DatabaseError, InterfaceError):
            if self.conn.closed != 0:
                # If reissued, we need to raise since we're recursing.
                if reissued:
                    raise
                # Attempt to reset the connection
                self._db.reset_connection()
                if commit or (commit is None
                              and self._db._nocommit_stack == 0):
                    return self._execute(
                        query,
                        values=values,
                        silent=silent,
                        values_list=values_list,
                        template=template,
                        commit=commit,
                        slow_note=slow_note,
                        buffered=buffered,
                        reissued=True,
                    )
                else:
                    raise
            else:
                self.conn.rollback()
                raise
        else:
            if commit or (commit is None and self._db._nocommit_stack == 0):
                self.conn.commit()
        return cur
Example #19
0
def sync_cve_md(page_size=5000):
    """Sync all CVE metadata from VMaaS"""
    LOGGER.info('Syncing CVE metadata')
    conn = DatabaseHandler.get_connection()
    cur = conn.cursor()
    impact_id_map = {}
    cur.execute("select name, id from cve_impact")
    for impact_name, impact_id in cur.fetchall():
        impact_id_map[impact_name] = impact_id
    cur.execute('select cve from cve_metadata')
    cves_in_db = []
    for cve_tuple in cur.fetchall():
        cves_in_db.append(cve_tuple[0])
    cve_list = [".*"]
    success = True
    page = 1
    session = requests.Session()
    while True:
        cve_request = {
            'cve_list': cve_list,
            'page_size': page_size,
            'page': page,
            'rh_only': True
        }
        LOGGER.info('Downloading CVE metadata (page: %s, page_size: %s)', page,
                    page_size)
        r_json = vmaas_post_request(VMAAS_CVES_ENDPOINT,
                                    cve_request,
                                    session=session)
        if r_json is None:
            success = False
            break
        LOGGER.info(
            'Importing CVE metadata (page: %s, page_size: %s, pages: %s)',
            page, page_size, r_json['pages'])
        cves = r_json['cve_list']
        to_insert = []
        to_update = []
        for cve in cves:
            description = cves[cve]['description']
            impact_id = impact_id_map[cves[cve]['impact']]
            public_date = cves[cve]['public_date'] or None
            modified_date = cves[cve]['modified_date'] or None
            cvss3_score = float(cves[cve]['cvss3_score']) if cves[cve].get(
                'cvss3_score') else None
            cvss3_metrics = cves[cve].get('cvss3_metrics')
            cvss2_score = float(cves[cve]['cvss2_score']) if cves[cve].get(
                'cvss2_score') else None
            cvss2_metrics = cves[cve].get('cvss2_metrics')
            row = (cve, description, impact_id, public_date, modified_date,
                   cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics)
            if cve not in cves_in_db:
                to_insert.append(row)
            else:
                to_update.append(row)
        if to_insert:
            execute_values(cur,
                           """insert into cve_metadata
                           (cve, description, impact_id, public_date, modified_date,
                           cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics)
                           values %s""",
                           to_insert,
                           page_size=len(to_insert))
        if to_update:
            execute_values(
                cur,
                """update cve_metadata set description = data.description,
                           impact_id = data.impact_id,
                           public_date = cast(data.public_date as timestamp with time zone),
                           modified_date = cast(data.modified_date as timestamp with time zone),
                           cvss3_score = cast(data.cvss3_score as numeric),
                           cvss3_metrics = data.cvss3_metrics,
                           cvss2_score = cast(data.cvss2_score as numeric),
                           cvss2_metrics = data.cvss2_metrics
                           from (values %s) as data (cve, description, impact_id, public_date, modified_date,
                           cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics)
                           where cve_metadata.cve = data.cve""",
                to_update,
                page_size=len(to_update))
        LOGGER.info(
            'Finished importing CVE metadata (page: %s, page_size: %s, pages: %s)',
            page, page_size, r_json['pages'])
        if page >= r_json['pages']:
            break
        page += 1
    cur.close()
    conn.commit()
    session.close()
    DatabaseHandler.close_connection()
    LOGGER.info('Finished syncing CVE metadata')
    return success
Example #20
0
def sync(dry_run):
    pathname = str(os.path.dirname(os.path.realpath(__file__)))
    logging.basicConfig(filename=pathname + '/log/main.log',
                        level=logging.DEBUG,
                        filemode='a', format='%(asctime)s - %(message)s')

    # Initialise connection to MITREid Connect DB
    connect_oidc_str = "dbname='" + config.mitreid_config['dbname'] + \
        "' user='******'user'] + \
        "' host='" + config.mitreid_config['host'] + \
        "' password='******'password'] + "'"
    try:
        connOIDC = psycopg2.connect(connect_oidc_str)
    except Exception as e:
        logging.error("Could not connect to MITREid Connect DB")
        logging.error(e)
        raise SystemExit("Could not connect to MITREid Connect DB")

    # Create psycopg2 cursor that can execute queries
    cursorOIDC = connOIDC.cursor()

    # Initialise connection to proxystatistics DB
    connect_proxystats_str = "dbname='" + \
        config.proxystats_config['dbname'] + \
        "' user='******'user'] + \
        "' host='" + config.proxystats_config['host'] + \
        "' password='******'password'] + "'"
    try:
        connProxystats = psycopg2.connect(connect_proxystats_str)
    except Exception as e:
        logging.error("Could not connect to proxystatistics DB")
        logging.error(e)
        raise SystemExit("Could not connect to proxystatistics DB")

    # Create psycopg2 cursor that can execute queries
    cursorProxystats = connProxystats.cursor()

    #
    # Select MITREid Connect clients
    #
    logging.debug("Retrieving client details from MITREid Connect DB")
    try:
        cursorOIDC.execute("SELECT client_id, client_name "
        "FROM client_details WHERE client_name IS NOT NULL;")
    except Exception as e:
        logging.error("Could not retrieve client details from MITREid "
                      "Connect DB")
        logging.error(e)
        raise SystemExit("Could not retrieve client details from MITREid "
                         "Connect DB")

    clientDetails = cursorOIDC.fetchall()

    #
    # Insert client names into SSPMOD_proxystatistics DB
    #
    query = ("INSERT INTO serviceprovidersmap (identifier, name) VALUES %s "
    "ON CONFLICT (identifier) DO UPDATE SET name = EXCLUDED.name "
    "WHERE serviceprovidersmap.name IS DISTINCT FROM EXCLUDED.name;")

    logging.debug("Updating proxystatistics DB")
    try:
        # https://www.psycopg.org/docs/extras.html#psycopg2.extras.execute_values
        execute_values(cursorProxystats, query, clientDetails)
    except Exception as e:
        logging.error("Could not update proxystatistics DB query")
        logging.error(e)
        raise SystemExit("Could not update proxystatistics DB query")

    if not dry_run:
        logging.info("Commit proxystatistics DB update")
        connProxystats.commit()

    cursorOIDC.close()
    cursorProxystats.close()

    connOIDC.close()
    connProxystats.close()
Example #21
0
def calendarcron():
    assert request.remote_addr in EASYCRON_IPS
    now = datetime.utcnow()
    telegram_key = os.environ["TELEGRAM_KEY"]
    calendar_id = request.args["calendar_id"]
    calendar_type = request.args["calendar_type"]
    cron_start_time = datetime.strptime(request.args["next_event_start_time"],
                                        "%Y-%m-%dT%H:%M:%S")
    with tracer("db connection"):
        try:
            with tracer("open db connection"):
                connection = psycopg2.connect(os.environ["POSTGRES_DSN"])
            with tracer("find calendar cron transaction"), connection:
                with tracer("cursor"), connection.cursor() as cursor:
                    with tracer("find calendar cron query"):
                        cursor.execute(
                            """
                            SELECT
                                sub, cron_id
                            FROM
                                calendarcron
                            WHERE
                                calendar_id = %s
                                AND calendar_type = %s
                            """,
                            (calendar_id, calendar_type),
                        )
                    assert cursor.rowcount
                    g.sub, cron_id = next(cursor)
            start_minus_1m = cron_start_time - timedelta(minutes=1)
            events_response = oauth.google.get(
                ("https://www.googleapis.com/calendar/v3/calendars"
                 f"/{calendar_id}/events"),
                params={
                    "maxResults": "50",
                    "orderBy": "startTime",
                    "singleEvents": "true",
                    "timeMin": f"{start_minus_1m:%Y-%m-%dT%H:%M:%SZ}",
                    "timeZone": "Etc/UTC",
                },
            )
            events_obj = events_response.json()
            calendar_tz = events_obj["timeZone"]
            events = events_obj["items"]
            for event in events:
                start = parse_event_time(event["start"], calendar_tz)
                if start > now:
                    break
            else:  # no break
                return ("", 204)
            with tracer("update cron time transaction"), connection:
                with tracer("cursor"), connection.cursor() as cursor:
                    with tracer("update cron time query"):
                        cursor.execute(
                            """
                            UPDATE
                                calendarcron
                            SET
                                next_event_start_time = %s
                            WHERE
                                sub = %s
                                AND calendar_id = %s
                                AND calendar_type = 'google'
                            """,
                            (start, g.sub, calendar_id),
                        )
                    cron_response = requests.get(
                        "https://www.easycron.com/rest/edit",
                        params={
                            "token":
                            os.environ["EASYCRON_KEY"],
                            "id":
                            cron_id,
                            "cron_expression":
                            f"{start:%M %H %d %m * %Y}",
                            "url":
                            url_for(
                                "calendarcron.calendarcron",
                                _external=True,
                                next_event_start_time=
                                f"{start:%Y-%m-%dT%H:%M:%S}",
                                calendar_id=calendar_id,
                                calendar_type="google",
                            ),
                        },
                    )
                    cron_response.raise_for_status()
                    error = cron_response.json().get("error",
                                                     {}).get("message")
                    if error:
                        raise Exception(error)
            events_to_send = []
            for event in events:
                start = parse_event_time(event["start"], calendar_tz)
                if cron_start_time <= start <= now:
                    events_to_send.append(event)
            with tracer("find calendar chats transaction"), connection:
                with tracer("cursor"), connection.cursor() as cursor:
                    with tracer("find calendar chats query"):
                        cursor.execute(
                            """
                            SELECT
                                chat_id
                            FROM
                                calendarchatlink
                            WHERE
                                sub = %s
                                AND calendar_id = %s
                                AND calendar_type = 'google'
                                AND chat_type = 'telegram'
                            """,
                            (g.sub, calendar_id),
                        )
                    rows = list(cursor)

            # New code testing in prod so suppress any errors
            with suppress(Exception):
                try:
                    events_start = find_next_event_start(events_obj, now)
                except LookupError:
                    pass
                else:
                    summaries = find_event_summaries_starting(
                        events_obj, events_start)
                    with tracer("event_details table exists transaction"
                                ), connection:
                        with tracer("cursor"), connection.cursor() as cursor:
                            with tracer("event_details table exists query"):
                                cursor.execute("""
                                    SELECT
                                        table_name
                                    FROM
                                        information_schema.tables
                                    WHERE
                                        table_name = 'event_details'
                                    """)
                                if not cursor.rowcount:
                                    with tracer(
                                            "create event_details table query"
                                    ):
                                        cursor.execute("""
                                            CREATE TABLE
                                                event_details (
                                                    calendar_type text,
                                                    calendar_id text,
                                                    summary text
                                                )
                                            """)
                    with tracer(
                            "update event_details transaction"), connection:
                        with tracer("cursor"), connection.cursor() as cursor:
                            with tracer("clear event_details query"):
                                cursor.execute(
                                    """
                                    DELETE FROM
                                        event_details
                                    WHERE
                                        calendar_type = 'google'
                                        AND calendar_id = %s
                                    """,
                                    (calendar_id, ),
                                )
                            with tracer("insert event_details query"):
                                execute_values(
                                    cursor,
                                    """
                                    INSERT INTO
                                        event_details (
                                            calendar_type,
                                            calendar_id,
                                            summary
                                        )
                                    VALUES %s
                                    """,
                                    [("google", calendar_id, summary)
                                     for summary in summaries],
                                )
        finally:
            connection.close()
    summaries = concat_unique([e["summary"] for e in events_to_send], [])
    with tracer("telegram sends"):
        for (chat_id, ) in rows:
            telegram_response = requests.get(
                f"https://api.telegram.org/bot{telegram_key}/sendMessage",
                data={
                    "chat_id": chat_id,
                    "text": "\n".join(summaries)
                },
            )
            assert telegram_response.json()["ok"]
    return ("", 204)
#  ('A rowwwww', 'null'),
#  ('Another row, with JSONNNNN', json.dumps(my_dict)),
#  ('Third row', "3")
#])

df = pd.DataFrame([['A rowwwww', 'null'],
                   ['Another row, with JSONNNNN',
                    json.dumps(my_dict)], ['Third row', "null"],
                   ["Pandas Row", "null"]])

records = df.to_dict(
    "records"
)  #> [{0: 'A rowwwww', 1: 'null'}, {0: 'Another row, with JSONNNNN', 1: '{"a": 1, "b": ["dog", "cat", 42], "c": "true"}'}, {0: 'Third row', 1: '3'}, {0: 'Pandas Row', 1: 'YOOO!'}]
list_of_tuples = [(r[0], r[1]) for r in records]

execute_values(cursor, insertion_query, list_of_tuples)

#
# QUERY THE TABLE
#

print("-------------------")
query = f"SELECT * FROM {table_name};"
print("SQL:", query)
cursor.execute(query)
for row in cursor.fetchall():
    print(row)

# ACTUALLY SAVE THE TRANSACTIONS
connection.commit()
 def _insert_logs(self, cursor, logs):
     if not logs:
         return
     execute_values(cursor, LOG_INSERT_QUERY, logs,
                    template=LOG_VALUES_TEMPLATE)
Example #24
0
    def parse_stooq_new(self, last_db_date):
        logging.info('Checking if a new stooq file has been downloaded')
        # stooq_file = os.path.join(os.getenv('DOWNLOAD_DIR'), 'data_d.txt')
        download_dir = self.config.get('datafeed', 'download_dir')
        stooq_file = os.path.join(download_dir, 'data_d.txt')
        if not os.path.exists(stooq_file):
            logging.info('No new stooq data file found.  Nothing to do.')
            return
        logging.info('Reading stooq file, renaming columns, getting daily '
                     'US stocks data')
        df = pd.read_csv(stooq_file,
                         dtype={
                             "<TICKER>": "string",
                             "<PER>": "string",
                             "<DATE>": "string",
                             "<TIME>": "string",
                             "<OPEN>": float,
                             "<HIGH>": float,
                             "<LOW>": float,
                             "<CLOSE>": float,
                             "<VOL>": int,
                             "<OPENINT>": int
                         })
        df.columns = [x[1:-1].lower() for x in df.columns]
        stx_df = df.query('ticker.str.endswith(".US") and per == "D"',
                          engine='python').copy()
        logging.info(
            'Getting {0:d} daily US stocks out of {1:d} records'.format(
                len(stx_df), len(df)))
        stx_df['date'] = stx_df['date'].astype(str)
        stx_df['date'] = stx_df.apply(lambda r: '{0:s}-{1:s}-{2:s}'.format(
            r['date'][0:4], r['date'][4:6], r['date'][6:8]),
                                      axis=1)
        logging.info('Converted stx_df dates in yyyy-mm-dd format')
        dates = stx_df.groupby(by='date')['ticker'].count()
        next_date = stxcal.next_busday(last_db_date)
        ix0, num_dates = 0, len(dates)
        logging.info('Data available for {0:d} dates, from {1:s} to {2:s}; DB '
                     'needs data starting from {3:s}'.format(
                         len(dates), dates.index[0],
                         dates.index[num_dates - 1], next_date))
        db_dates = []
        while ix0 < num_dates:
            if dates.index[ix0] == next_date:
                break
            ix0 += 1
        for ixx in range(ix0, num_dates):
            if dates.index[ixx] == next_date and dates.values[ixx] > 9000:
                db_dates.append(dates.index[ixx])
            else:
                if dates.index[ixx] != next_date:
                    logging.error(f'Missing date {next_date}; got '
                                  f'{dates.index[ixx]} instead')

                if dates.values[ixx] < 9000:
                    logging.error(f'Not enough records ({dates.values[ixx]}) '
                                  f'available for {dates.index[ixx]}')
                break
            next_date = stxcal.next_busday(next_date)

        if not db_dates:
            logging.info('No new data available for processing. Exiting')
            return
        logging.info('Check that there are no time gaps between DB data and '
                     'upload data')
        start_date = stxcal.next_busday(last_db_date)
        num_bdays = stxcal.num_busdays(start_date, db_dates[0])
        if num_bdays > 0:
            logging.warn(
                'No data for {0:d} days ({1:s} - {2:s}). Exiting ...'.format(
                    num_bdays, start_date, stxcal.prev_busday(db_dates[0])))
            return
        logging.info('Check that there are no time gaps in the upload data')
        for ixx in range(len(db_dates) - 1):
            if stxcal.next_busday(db_dates[ixx]) != db_dates[ixx + 1]:
                logging.warn('Inconsistent dates {0:s} and {1:s} '
                             'at indexes {2:d} and {3:d}'.format(
                                 db_dates[ixx], db_dates[ixx + 1], ixx,
                                 ixx + 1))

        sel_stx_df = stx_df.query('date in @db_dates').copy()
        logging.info(
            '{0:d}/{1:d} records found for following dates: [{2:s}]'.format(
                len(sel_stx_df), len(stx_df), ', '.join(db_dates)))
        sel_stx_df['invalid'] = sel_stx_df.apply(
            lambda r: np.isnan(r['open']) or np.isnan(r['high']) or np.
            isnan(r['low']) or np.isnan(r['close']) or np.isnan(r['vol']) or r[
                'vol'] == 0 or r['open'] > r['high'] or r['open'] < r[
                    'low'] or r['close'] > r['high'] or r['close'] < r['low'],
            axis=1)
        valid_stx_df = sel_stx_df.query('not invalid').copy()
        logging.info('Found {0:d} valid records out of {1:d} records'.format(
            len(valid_stx_df), len(sel_stx_df)))

        def process_row(r):
            stk = r['ticker'][:-3].replace("-.", ".P.").replace("_",
                                                                ".").replace(
                                                                    '-', '.')
            o = int(100 * r['open'])
            hi = int(100 * r['high'])
            lo = int(100 * r['low'])
            c = int(100 * r['close'])
            v = int(r['vol'])
            v = v // 1000
            if v == 0:
                v = 1
            lst = [stk, o, hi, lo, c, v]
            return pd.Series(lst)

        valid_stx_df[['ticker', 'open', 'high', 'low', 'close', 'vol']] = \
            valid_stx_df.apply(process_row, axis=1)
        valid_stx_df['openint'] = 2
        valid_stx_df.drop(columns=['per', 'time', 'invalid'],
                          axis=1,
                          inplace=True)
        valid_stx_df.columns = ['stk', 'dt', 'o', 'hi', 'lo', 'c', 'v', 'oi']

        with closing(stxdb.db_get_cnx().cursor()) as crs:
            sql = 'CREATE TEMPORARY TABLE temp_table ('\
                'stk VARCHAR(16) NOT NULL, '\
                'dt DATE NOT NULL, '\
                'o INTEGER NOT NULL, '\
                'hi INTEGER NOT NULL, '\
                'lo INTEGER NOT NULL, '\
                'c INTEGER NOT NULL, '\
                'v INTEGER, '\
                'oi INTEGER, '\
                'PRIMARY KEY(stk, dt))'
            crs.execute(sql)
            logging.info('Created temporary table')
            upload_data = valid_stx_df.values.tolist()
            execute_values(
                crs, 'INSERT INTO temp_table '
                '(stk, dt, o, hi, lo, c, v, oi) VALUES %s', upload_data)
            logging.info('Uploaded dataframe into temporary table')
            stxdb.db_write_cmd(
                'INSERT INTO eods (stk, dt, o, hi, lo, c, v, oi) '
                'SELECT * FROM temp_table ON CONFLICT (stk, dt) DO '
                'UPDATE SET o = EXCLUDED.o, hi = EXCLUDED.hi, '
                'lo = EXCLUDED.lo, c = EXCLUDED.c, v = EXCLUDED.v, '
                'oi = EXCLUDED.oi')
            logging.info('Uploaded data into eods table')
        last_upload_date = valid_stx_df['dt'].max()
        stxdb.db_write_cmd("UPDATE analyses SET dt='{0:s}' WHERE "
                           "analysis='eod_datafeed'".format(last_upload_date))
        logging.info('Updated latest eod datafeed date {0:s} in DB'.format(
            last_upload_date))
        self.rename_stooq_file(dates.index[0], dates.index[num_dates - 1])
    def batch_insert_into_tokens(tokens: list, token_type: str, page_size: int,
                                 conn) -> list:
        """
        Batch inserts tokens into table: reactions or intents.
        :param tokens: list of tokens as reaction or intent, each can consist of one word or several words.
        :param token_type: to be reaction or intent.
        :param page_size: an interger for batch insertion into database.
        :param conn: database connection object.
        :return: list if token id which are inserted to database.
        """

        cur = conn.cursor()

        eids = []

        if token_type == Event2MindDumper.INTENT_TOKENS:
            slct_id_sql = Event2MindDumper.SLCT_ID_INTENT_SQL
            slct_max_id_sql = Event2MindDumper.SLCT_MAX_ID_INTENT_SQL
            batch_insert_sql = Event2MindDumper.BATCH_INSRT_INTENT_SQL
        else:
            slct_id_sql = Event2MindDumper.SLCT_ID_REACTION_SQL
            slct_max_id_sql = Event2MindDumper.SLCT_MAX_ID_REACTION_SQL
            batch_insert_sql = Event2MindDumper.BATCH_INSRT_REACTION_SQL

        for token in tokens:
            # get id for each token and list of all token ids
            try:
                cur.execute(slct_id_sql, (token, ))
                a = cur.fetchone()
                if a:
                    # when token is already in database, return the existed id
                    eids.append(a[0])
                else:
                    # when token is not in database, select the max_token_id in database and assign (max_token_id + 1) to this token
                    cur.execute(slct_max_id_sql)
                    eid = cur.fetchone()[0]
                    if eid:
                        eid += 1
                    else:
                        eid = 1
                    eids.append(eid)
            except Exception:
                logger.error('error: ' + traceback.format_exc())
                return eids

        # combine two list into one for batch insertion
        eid_token_pairs = []
        eid_token_pairs.append(eids)
        eid_token_pairs.append(tokens)
        eid_token_pairs = list(zip(*eid_token_pairs))
        try:
            extras.execute_values(cur,
                                  batch_insert_sql,
                                  eid_token_pairs,
                                  template=None,
                                  page_size=page_size)
        except Exception:
            logger.error('error: ' + traceback.format_exc())
            return eids
        conn.commit()
        cur.close()
        return eids
Example #26
0
def update_userstories_kanban_order_in_bulk(
        project: Project,
        status: UserStoryStatus,
        bulk_userstories: List[int],
        before_userstory: Optional[models.UserStory] = None,
        after_userstory: Optional[models.UserStory] = None,
        swimlane: Optional[Swimlane] = None):
    """
    Updates the order of the userstories specified adding the extra updates
    needed to keep consistency.

    Note: `after_userstory_id` and `before_userstory_id` are mutually exclusive;
          you can use only one at a given request. They can be both None which
          means "at the beginning of is cell"

     - `bulk_userstories` should be a list of user stories IDs
    """
    # filter user stories from status and swimlane
    user_stories = project.user_stories.filter(status=status)
    if swimlane is not None:
        user_stories = user_stories.filter(swimlane=swimlane)
    else:
        user_stories = user_stories.filter(swimlane__isnull=True)

    # exclude moved user stories
    user_stories = user_stories.exclude(id__in=bulk_userstories)

    # if before_userstory, get it and all elements before too:
    if before_userstory:
        user_stories = (user_stories.filter(
            kanban_order__gte=before_userstory.kanban_order))
    # if after_userstory, exclude it and get only elements after it:
    elif after_userstory:
        user_stories = (user_stories.exclude(id=after_userstory.id).filter(
            kanban_order__gte=after_userstory.kanban_order))

    # sort and get only ids
    user_story_ids = (user_stories.order_by("kanban_order",
                                            "id").values_list('id', flat=True))

    # append moved user stories
    user_story_ids = bulk_userstories + list(user_story_ids)

    # calculate the start order
    if before_userstory:
        # order start with the before_userstory order
        start_order = before_userstory.kanban_order
    elif after_userstory:
        # order start after the after_userstory order
        start_order = after_userstory.kanban_order + 1
    else:
        # move at the beggining of the column if there is no after and before
        start_order = 1

    # prepare rest of data
    total_user_stories = len(user_story_ids)

    user_story_swimlane_ids = (swimlane.id
                               if swimlane else None, ) * total_user_stories
    user_story_status_ids = (status.id, ) * total_user_stories
    user_story_kanban_orders = range(start_order,
                                     start_order + total_user_stories)

    data = tuple(
        zip(user_story_ids, user_story_swimlane_ids, user_story_status_ids,
            user_story_kanban_orders))

    # execute query for update status, swimlane and kanban_order
    sql = """
    UPDATE userstories_userstory
       SET swimlane_id = tmp.new_swimlane_id::BIGINT,
           status_id = tmp.new_status_id::BIGINT,
           kanban_order = tmp.new_kanban_order::BIGINT
      FROM (VALUES %s) AS tmp (id, new_swimlane_id, new_status_id, new_kanban_order)
     WHERE tmp.id = userstories_userstory.id
    """
    with connection.cursor() as cursor:
        execute_values(cursor, sql, data)

    # Update is_closed attr for UserStories adn related milestones
    if settings.CELERY_ENABLED:
        update_open_or_close_conditions_if_status_has_been_changed.delay(
            bulk_userstories)
    else:
        update_open_or_close_conditions_if_status_has_been_changed(
            bulk_userstories)

    # Sent events of updated stories
    events.emit_event_for_ids(ids=user_story_ids,
                              content_type="userstories.userstory",
                              projectid=project.pk)

    # Generate response with modified info
    res = ({
        "id": id,
        "swimlane": swimlane,
        "status": status,
        "kanban_order": kanban_order
    } for (id, swimlane, status, kanban_order) in data)
    return res
### Connect to ElephantSQL-hosted PostgreSQL
connection = psycopg2.connect(dbname=DBNAME,
                              user=DBUSER,
                              password=DBPASSWORD,
                              host=DBHOST)
### A "cursor", a structure to iterate over db records to perform queries
print(type(connection))

cursor = connection.cursor()
print(type(cursor))

my_dict = {"a": 1, "b": ["dog", "cat", 42], "c": 'true'}

rows_to_insert = [('A rowwwww', 'null'),
                  ('Another row, with JSONNNNN', json.dumps(my_dict))
                  ]  # list of tuples

# h/t: https://stackoverflow.com/questions/8134602/psycopg2-insert-multiple-rows-with-one-query
insertion_query = "INSERT INTO test_table (name, data) VALUES %s"
execute_values(cursor, insertion_query, rows_to_insert)

### An example query
cursor.execute('SELECT * from test_table;')
### Note - nothing happened yet! We need to actually *fetch* from the cursor
print("---------")
results = cursor.fetchall()
print(results)

# ACTUALLY SAVE THE TRANSACTIONS
# if creating tables or inserting data (changing db)
connection.commit()
Example #28
0
 def insert_items(self, items, position=None):
     with connect(self.dbc) as conn:
         register(connection=conn)
         with conn.cursor() as curs:
             data = [self.to_tuple(item) for item in items]
             extras.execute_values(curs, self.insert_query, data, template=None, page_size=100)
Example #29
0
 def insert_characters(self, characters):
     insertion_query = "INSERT INTO characters (character_id, name, level, exp, hp, strength, intelligence, dexterity, wisdom) VALUES %s"
     list_of_tuples = characters
     execute_values(self.pg_cursor, insertion_query, list_of_tuples)
     self.pg_connection.commit()
Example #30
0
def lambda_handler(event, context):
    """
    lambda_handler executes the logic for the getCovidStatsUS AWS Lambda function. The logic:
      * Accepts an optional date parameter in the form of a json document: e.g. {"date": "2020-01-15"}
      * If no valid date is passed, then "yesterday's" date is assume
      * Connects to a Postgres database
      * Makes a REST API call to a CDC Covid API
      * Fetches Covid data for US states and districts for the given date
      * Inserts that data into a database table
      * NOTE: if a row exists for that particular state/date the insert will not be executed
    """
    # Print out the inbound event parameters
    print(f"INFO: Starting function with inbound event: {event}",
          file=sys.stderr)

    # Set up working variables
    ret_dict = {'statusCode': 200, 'body': json.dumps('default message')}

    # Grab environment variables
    MY_APP_TOKEN = os.getenv("COVID_API")
    if len(MY_APP_TOKEN) == 0:
        # error: missing API key
        print("ERROR: missing API key", file=sys.stderr)

        ret_dict['statusCode'] = 401
        ret_dict['body'] = json.dumps('error: missing API key')
        return ret_dict

    PG_DB_PASSWORD = os.getenv("PG_DB_PASSWORD")
    if len(PG_DB_PASSWORD) == 0:
        # error: missing API key
        print("ERROR: missing Postgres database password", file=sys.stderr)

        ret_dict['statusCode'] = 400
        ret_dict['body'] = json.dumps(
            'error: missing Postgres database password')
        return ret_dict

    # Configure a connection object
    connection = psycopg2.connect(
        user="******",
        password=PG_DB_PASSWORD,
        host="labspt13-res.c39zgmn8rrtf.us-east-2.rds.amazonaws.com",
        port="5432",
        database="resfeber")

    # Test the database connection
    try:
        cursor = connection.cursor()
        # Print PostgreSQL Connection properties
        print(
            f"INFO: Connecting to the database with these credentials: {connection.get_dsn_parameters()}\n",
            file=sys.stderr)

        # Print PostgreSQL version
        cursor.execute("SELECT version();")
        record = cursor.fetchone()
        print(f"INFO: Successfully connected to the database: {record}\n",
              file=sys.stderr)

    except (Exception, psycopg2.Error) as error:
        print(f"Error while connecting to PostgreSQL: {error}",
              file=sys.stderr)
        ret_dict['statusCode'] = 500
        ret_dict['body'] = json.dumps(
            'error connecting to the Postgres database')
        return ret_dict

    # Missing the date parameter?
    req_date = ""
    has_date_param = False  # flag indicating a date parameter has been found
    if "date" in event:
        # yes: the event object has a date key/value pair
        has_date_param = True
        req_date = event["date"]
    elif "body" in event:
        # yes: the event object has a body element
        # parse the body string into a dict and look for a date parameter
        try:
            tmp_dict = json.loads(event["body"])
            # is a date parameter included in the body?
            if "date" in tmp_dict:
                has_date_param = True
                req_date = tmp_dict['date']
        except:
            # error occurred parsing the request body content
            print(
                "ERROR: error parsing the event['body'] searching for a 'date' parameter"
            )

    # Has the function been passed a date parameter?
    if not has_date_param:
        # yes: "date" key value is missing, fetch Covid API data for yesterday
        yesterday = date.today() - timedelta(
            days=1)  # 1 day from the current day (utc)
        req_date = yesterday.strftime("%Y-%m-%d")

    # Validate the date parameter (e.g. "2020-11-01")
    if len(req_date) != 10:
        # error: invalid date parameter
        print(
            f"ERROR: invalid date parameter: {req_date} with length: {len(req_date)}",
            file=sys.stderr)

        ret_dict['statusCode'] = 400
        ret_dict['body'] = json.dumps('error: invalid date parameter')
        return ret_dict

    # Construct a date/time string
    date_time = req_date + "T00:00:00.000"
    print(f"INFO: fetching covid data for this date: {date_time}",
          file=sys.stderr)

    # Stand up an API client object
    client = Socrata('data.cdc.gov', MY_APP_TOKEN)
    # Fetch data from the API
    results = client.get("9mfq-cb36", submission_date=date_time)

    # Construct an Insert SQL statement
    sql = "INSERT INTO state_covid_stats (state, date, new_cases, time_created, covid_api_rsp, notes) " + \
          "VALUES %s ON CONFLICT (state, date) DO NOTHING"

    # Iterate through the results
    print(
        f"INFO: Number of covid data points returned by the CDC API: {len(results)}",
        file=sys.stderr)
    vals_arr = []
    for rslt in results:
        tmp_tpl = (rslt["state"], req_date, int(float(rslt["new_case"])),
                   datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
                   json.dumps(rslt), "")

        # append this "row" dictionary to our running array
        vals_arr.append(tmp_tpl)

    # Do we have any data (rows) to insert into the database?
    if len(vals_arr) != 0:
        # yes: attempt the database insert
        try:
            print(
                f"INFO: attempting to insert {len(vals_arr)} rows into the database for requested date: {date_time}",
                file=sys.stderr)
            # stand up a db cursor
            cursor = connection.cursor()
            # execute the insert for multiple rows
            execute_values(cursor, sql, vals_arr)
            # commit the db changes
            connection.commit()
            # close the db connection
            connection.close()

        except (Exception, psycopg2.Error) as error:
            print(
                f"ERROR: error occurred attempting to insert rows into the database: {error}",
                file=sys.stderr)
            ret_dict['statusCode'] = 500
            ret_dict['body'] = json.dumps(
                'error connecting to the Postgres database')
            return ret_dict

    else:
        # no data to insert
        print(
            f"INFO: no Covid API data returned.  Nothing to insert into the database",
            file=sys.stderr)
        ret_dict['statusCode'] = 204
        ret_dict['body'] = json.dumps(
            'no Covid API data returned; nothing to insert into the database')
        return ret_dict

    # Function completed processing
    print(f"INFO: finished processing: {len(vals_arr)} rows", file=sys.stderr)
    print(f"INFO: NOTE! - duplicate state/date rows will be ignored",
          file=sys.stderr)
    ret_dict['statusCode'] = 200
    ret_dict['body'] = json.dumps(
        f"INFO: finished processing: {len(vals_arr)} rows")
    return ret_dict
p = re.compile(
    '/\d*-(.*\.wav)-(\d*\.\d*)\.wav\.npz *: should be (\d*) but came out as (\d*)'
)
matches = p.findall(txt)
errors = [(str(iteration), ) + m for m in matches]

# COMMAND ----------

# Store training errors in database
from psycopg2.extras import execute_values

with conn.cursor() as curs:
    delete_query = 'delete from training_errors where iteration_id = %s'
    curs.execute(delete_query, (iteration, ))
    insert_query = 'insert into training_errors (iteration_id, file_name, "offset", should_be_id, came_out_id) values %s'
    execute_values(curs, insert_query, errors, template=None, page_size=100)
    conn.commit()

# COMMAND ----------

# Compute evaluated accuracy
p2 = re.compile('Found (\d*) total mistakes out of (\d*)')
m2 = p2.findall(txt)
accuracy = round((100 - (int(m2[0][0]) / int(m2[0][1])) * 100), 2)

# COMMAND ----------

# Store accuracy in database
update_accuracy_sql = 'update model_iterations set accuracy = %s where id = %s'

with conn.cursor() as curs:
# ); """

my_dict = {"a": 1, "b": ["dog", "cat", 42], "c": 'true'}

# insertion_query = "INSERT INTO test_table (name, data) VALUES (%s, %s)"
# cursor.execute(insertion_query,
#  ('A rowwwww', 'null')
# )
# cursor.execute(insertion_query,
#  ('Another row, with JSONNNNN', json.dumps(my_dict))
# )

# h/t: https://stackoverflow.com/questions/8134602/psycopg2-insert-multiple-rows-with-one-query
insertion_query = "INSERT INTO test_table (name, data) VALUES %s"
execute_values(cursor, insertion_query,
               [('A rowwwww', 'null'),
                ('Another row, with JSONNNNN', json.dumps(my_dict)),
                ('Third row', "3")])

connection.commit()

cursor.close()
connection.close()

exit()

df = pd.DataFrame([['A rowwwww', 'null'],
                   ['Another row, with JSONNNNN',
                    json.dumps(my_dict)], ['Third row', "null"],
                   ["Pandas Row", "null"]])

records = df.to_dict(
Example #33
0
def commitDataList(logger, query, values, dbName=None):
    '''query data from the database
    
    Query the data over here. If there is a problem with
    the data, it is going to return the value of None, and
    log the error. Your program needs to check whether 
    there was an error with the query by checking for a None
    return value
    
    Parameters
    ----------
    logger : {logging.logger}
        logging element 
    query : {str}
        The query to be made to the databse
    values : {tuple or list-like}, optional
        Additional values to be passed to the query (the default 
        is None)
    dbName : {str or None}, optional
        The name of the database to use. If this is None, the function will 
        attempt to read the name from the ``defaultDB`` item within the 
        file ``../config/db.json``. 
    
    Returns
    -------
    list or None
        A list of tuples containing the values is returned. In case
        there is an error, the error will be logged, and a None will
        be return
    '''

    vals = None

    try:
        db = json.load(open('../config/db.json'))

        # Check whether a dbName is available
        if (dbName is None) and ('defaultDB' in db):
            dbName = db['defaultDB']

        # Check whether a dbName has been specified
        if dbName is None:
            logger.error('A database name has not been specified.')
            return None

        conn = psycopg2.connect(db[dbName]['connection'])
        cur = conn.cursor()
    except Exception as e:
        logger.error('Unable to connect to the database')
        logger.error(str(e))
        return

    try:
        query = cur.mogrify(query)
        execute_values(cur, query, values)

    except Exception as e:
        logger.error(
            'Unable to obtain data from the database for:\n query: {}\nvalues'.
            format(query, values))
        logger.error(str(e))

    try:
        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        logger.error('Unable to disconnect to the database')
        logger.error(str(e))
        return

    return vals
Example #34
0
def commands(code):
    with closing(
            psycopg2.connect(dbname='postgres',
                             user='******',
                             password='******',
                             host='localhost')) as conn:
        with conn.cursor(cursor_factory=DictCursor) as cursor:
            conn.autocommit = True
            if code == 1:
                cursor.execute(
                    'select name from footballers where rating = (select max(rating) from footballers) limit 1'
                )
                for row in cursor:
                    print(row[0])
            if code == 2:
                cursor.execute(
                    """select footballers.name, transfers.position, agents.name
                                  from transfers
                                  join footballers on footballers.id = transfers.footballer
                                  join agents on agents.id = transfers.agent 
                                  limit 5;""")
                for row in cursor:
                    print(row[0])
            if code == 3:
                cursor.execute("""
                                  With cte as
                                  (select name, rating, cost, avg(cost) over (partition by rating)
                                    from agents )
                                    select * from cte limit 5;""")
                for row in cursor:
                    print(row)
            if code == 4:
                cursor.execute(
                    """select table_catalog, table_schema, table_name, table_type from information_schema.tables where table_schema = 'public'"""
                )
                for row in cursor:
                    print(row)
            if code == 5:
                cursor.callproc('getFootballer')
                for row in cursor:
                    print(row)
            if code == 6:
                cursor.callproc('getFootballerTable', ['left'])
                records = cursor.fetchmany(size=5)
                for row in records:
                    print(row)
            if code == 7:
                cursor.execute(
                    'select * into temp footballers_copy from footballers;')
                cursor.execute('call update_rating();')
                cursor.execute(
                    """select footballers.name, footballers.foot, footballers.rating as before_update, footballers_copy.rating as after_update
                                  from footballers_copy join footballers on footballers_copy.id = footballers.id order by footballers_copy.rating desc limit 10"""
                )
                for row in cursor:
                    print(row)
            if code == 8:
                cursor.execute('select current_user;')
                record1 = cursor.fetchone()
                cursor.execute('select version();')
                record2 = cursor.fetchone()
                print("USER: "******"VERSION: ", record2)
            if code == 9:
                cursor.execute("""create table if not exists tournaments
                                  (
                                      name varchar(30),
                                      quality int,
                                      quantity_clubs int,
                                      price varchar
                                  )
                                    """)
            if code == 10:
                namedict = [("Bundesliga", 5, 18, "1B"),
                            ("La Liga", 5, 20, "2B"),
                            ("Seria A", 4, 20, "0.5B"),
                            ("Tinkoff RFL", 3, 16, "150M"),
                            ("Premier League", 5, 20, "3B"),
                            ("Ligue 1", 4, 18, "1B")]
                execute_values(
                    cursor,
                    "insert into tournaments(name,quality, quantity_clubs,price) values %s",
                    namedict)
def update_landmarks_in_db(current_landmarks):
    progress("recording {} active landmarks in database".format(
        len(current_landmarks)))
    state = get_worker_state()
    with state.db, state.db.cursor() as cur:
        cur.execute("""
            CREATE TEMP TABLE current_landmarks (
                probeid   INTEGER  NOT NULL PRIMARY KEY,
                anchorid  INTEGER,
                addr      INET     NOT NULL,
                latitude  REAL     NOT NULL,
                longitude REAL     NOT NULL);
        """)
        execute_values(cur, """
            INSERT INTO current_landmarks
                (probeid, anchorid, addr, latitude, longitude)
            VALUES %s;
        """, current_landmarks, page_size=400)
        cur.execute("""
            ANALYZE current_landmarks;
        """)
        cur.execute("""
            DELETE FROM current_landmarks
             WHERE latitude < -60 OR latitude > 85;
        """)
        cur.execute("""
            UPDATE landmarks
               SET usable = false
             WHERE usable = true
               AND (addr NOT IN (SELECT addr FROM current_landmarks)
                    OR probeid NOT IN (SELECT probeid FROM current_landmarks));
        """)
        cur.execute("""
            INSERT INTO landmarks (probeid, anchorid, addr, usable, location)
            SELECT probeid, anchorid, addr, true AS usable,
                   ST_SetSRID(ST_Point(longitude, latitude), 4326) AS location
              FROM current_landmarks
                ON CONFLICT (probeid)
                DO UPDATE SET anchorid = excluded.anchorid,
                              addr = excluded.addr,
                              usable = true,
                              location = excluded.location;
        """)
        cur.execute("""
            DROP TABLE current_landmarks;
        """)
        cur.execute("""
            UPDATE landmarks AS l SET region = r.id
            FROM regions r
            WHERE l.region IS NULL
            AND ST_Covers(r.box, l.location::geometry);
        """)
        cur.execute("""
            ANALYZE landmarks;
        """)
        # Should really cast back to GEOGRAPHY before calling ST_Centroid,
        # but postgis 2.3.1 (the version in debian 9) doesn't support that.
        # The error is less than 500km in all cases so we can live with it
        # for now.
        cur.execute("""
            UPDATE regions AS r SET lm_centroid = s.centroid
              FROM (SELECT r.id,
                       ST_Centroid(ST_Union(l.location::GEOMETRY))
                         AS centroid
                      FROM regions r, landmarks l
                     WHERE l.usable AND l.region = r.id
                     GROUP BY r.id) s
             WHERE s.id = r.id;
        """)
        cur.execute("""
            SELECT cl, count(*) FROM (
                SELECT CASE WHEN NOT usable THEN 'unusable'
                            WHEN anchorid IS NOT NULL THEN 'anchor'
                            ELSE 'probe' END AS cl
                FROM landmarks
            ) _ GROUP BY cl;
        """)
        counts = { 'unusable': 0, 'anchor': 0, 'probe': 0 }
        counts.update(cur.fetchall())
        progress("recorded {probe} probes, {anchor} anchors, and "
                 "{unusable} no longer usable.".format(**counts))

        # The next stages need this information, and we have a
        # database handle right now, so we may as well retrieve it.
        cur.execute("""
            SELECT l.addr, l.anchorid, MAX(m.stop_time)
              FROM landmarks l
         LEFT JOIN ripe_measurements m ON l.addr = m.d_addr
             WHERE l.usable AND l.anchorid IS NOT NULL
          GROUP BY l.addr, l.anchorid
        """)
        return list(cur)
Example #36
0
    def save_result_values(self, mapobject_type_name, result_id, data):
        '''Saves generated label values.

        Parameters
        ----------
        mapobject_type_name: str
            name of the selected
            :class:`MapobjectType <tmlib.models.mapobject.MapobjectType>`
        result_id: int
            ID of a registerd
            :class:`ToolResult <tmlib.models.result.ToolResult>`
        data: pandas.Series
            series with multi-level index for "mapobject_id" and "tpoint"

        See also
        --------
        :class:`tmlib.models.result.LabelValues`
        '''
        logger.info('save label values for result %d', result_id)
        mapobject_ids = data.index.levels[0].tolist()
        tpoints = data.index.levels[1]
        with tm.utils.ExperimentConnection(self.experiment_id) as connection:
            connection.execute('''
                SELECT id FROM mapobject_types
                WHERE name = %(mapobject_type_name)s
            ''', {
                'mapobject_type_name': mapobject_type_name
            })
            results = connection.fetchall()
            mapobject_type_id = results[0][0]
            connection.execute('''
                SELECT partition_key, array_agg(id) AS mapobject_ids
                FROM mapobjects AS m
                WHERE m.mapobject_type_id = %(mapobject_type_id)s
                AND m.id = ANY(%(mapobject_ids)s)
                GROUP BY partition_key
            ''', {
                'mapobject_type_id': mapobject_type_id,
                'mapobject_ids': mapobject_ids
            })
            records = connection.fetchall()

        # Grouping mapobject IDs per partition_key allows us
        # to target individual shards of the label_values table directly
        # on the worker nodes with full SQL support, including multi-row
        # insert/update statements.
        for tpoint in tpoints:
            for partition_key, mapobject_ids in records:
                with tm.utils.ExperimentConnection(self.experiment_id) as conn:
                    host, port, shard_id = conn.locate_partition(
                        tm.LabelValues, partition_key
                    )
                worker_connection = tm.utils.ExperimentWorkerConnection(
                    self.experiment_id, host, port
                )
                with worker_connection as connection:
                    logger.debug(
                        'upsert label values for partition %d', partition_key
                    )
                    sql = '''
                        INSERT INTO label_values_{shard} AS v (
                            partition_key, mapobject_id, values, tpoint
                        )
                        VALUES %s
                        ON CONFLICT ON CONSTRAINT label_values_pkey_{shard}
                        DO UPDATE
                        SET values = v.values || EXCLUDED.values
                    '''.format(shard=shard_id)
                    template = '''
                        (
                            %(partition_key)s, %(mapobject_id)s,
                            %(values)s, %(tpoint)s
                        )
                    '''
                    args = [
                        {
                            'values': {
                                str(result_id):
                                    str(np.round(data.ix[(mid, tpoint)], 6))
                            },
                            'mapobject_id': mid,
                            'partition_key': partition_key,
                            'tpoint': tpoint
                        }
                        for mid in mapobject_ids
                    ]
                    execute_values(
                        connection, sql, args, template=template, page_size=500
                    )
def retrieve_anchor_ping_results(ainfo):
    d_addr, anchorid, max_stop_time = ainfo
    state = get_worker_state()
    progress("retrieving ping times toward {}".format(d_addr))
    with state.db, state.db.cursor() as cur:
        cur.execute("""
            SELECT l.probeid, min(r.odate), max(r.odate)
              FROM (SELECT * FROM landmarks WHERE usable) l
         LEFT JOIN (SELECT * FROM calibration_rtts WHERE d_id = %s) r
                ON l.probeid = r.s_id
          GROUP BY l.probeid;
        """, (anchorid,))
        already_have = { row[0]: (row[1], row[2]) for row in cur }

        # Construct an index of available measurements sorted in
        # reverse order of when they ended (that is, most recent first,
        # still running first of all).
        cur.execute("""
            SELECT meas_id, start_time, stop_time, probes
              FROM ripe_measurements
             WHERE d_addr = %s
          ORDER BY stop_time DESC NULLS FIRST;
        """, (d_addr,))
        measurements = cur.fetchall()
    # release database locks while talking to RIPE

    # So that we never have to UPDATE a row in calibration_rtts, and
    # also to make the logic below a little bit simpler, we never
    # retrieve any information about the current day, only days that
    # are completely in the past.
    begin_today = beginning_of_day(datetime.date.today())

    # We want to record a number for each probe for each day, which is
    # most easily done by querying the measurements API for one-day
    # intervals.
    one_day = datetime.timedelta(days=1)

    # We record a blank measurement for yesterday for all probes that
    # don't have any data for yesterday; this ensures future runs of
    # this script will not repeat work that has already been done.
    yesterday = (begin_today - one_day).date()
    no_data_for_yesterday = set()

    # For each probe, work out how far back in time we need to go for
    # it.  Some probes do not ping all of the anchors continuously, so
    # we need to do this in two passes.  The first, optimistic pass
    # retrieves only the data we would need if all of the probes did
    # ping all of the anchors at least once each day -- going back at
    # most 14 days.
    optimistic_earliest_time = begin_today - datetime.timedelta(days=14)
    earliest_time_for_probe = {}
    earliest_time_overall = begin_today
    for probe, (_, maxdate) in already_have.items():
        if maxdate is None:
            maxdatestamp = optimistic_earliest_time
        else:
            maxdatestamp = beginning_of_day(maxdate)

        # If the maxdatestamp is at or after the beginning of today,
        # we don't need to retrieve anything for this probe at all.
        if maxdatestamp < begin_today:
            earliest_time_for_probe[probe] = max(maxdatestamp,
                                                 optimistic_earliest_time)
            earliest_time_overall = min(earliest_time_overall,
                                        earliest_time_for_probe[probe])
            no_data_for_yesterday.add(probe)

    if earliest_time_overall == begin_today:
        progress("no need to retrieve pings for {}".format(d_addr))
        return
    else:
        progress("{}: beginning of today is {}"
                 .format(d_addr, begin_today))
        progress("{}: optimistic earliest date is {}"
                 .format(d_addr, earliest_time_overall))
        progress("{}: need data for {} probes".format(d_addr, len(earliest_time_for_probe)))
        for pid, etime in earliest_time_for_probe.items():
            if etime < (begin_today - one_day):
                progress("{}: for {} need since {}"
                         .format(d_addr, pid, etime))

    new_data = []
    w_stop_time = begin_today
    while earliest_time_for_probe and w_stop_time > earliest_time_overall:
        w_start_time = w_stop_time - one_day
        assert w_start_time >= earliest_time_overall

        datestamp = w_start_time.date()
        data_this_day = {}
        for m_id, m_start_time, m_stop_time, m_probes in measurements:
            if m_stop_time is None:
                m_stop_time = begin_today
            if m_stop_time < w_start_time or m_start_time > w_stop_time:
                continue

            wanted_probes = set(earliest_time_for_probe.keys())
            wanted_probes.intersection_update(m_probes)
            if not wanted_probes:
                continue

            progress("retrieving measurement {} for {}"
                     .format(m_id, datestamp))

            x_start_time = int(max(m_start_time, w_start_time).timestamp())
            x_stop_time = int(min(m_stop_time, w_stop_time).timestamp())
            m_results = 'measurements/{}/results'.format(m_id)

            for probeids in chunked(wanted_probes, n=100):
                for result in retrieve_atlas(
                        state.session, m_results,
                        params = {
                            "start": x_start_time,
                            "stop": x_stop_time,
                            "probe_ids": ",".join(str(pid)
                                                  for pid in probeids)
                        },
                        constructor = PingResult):
                    if (result.is_error or result.is_malformed or
                        result.rtt_min is None):
                        continue
                    if result.probe_id in data_this_day:
                        data_this_day[result.probe_id] = min(
                            data_this_day[result.probe_id],
                            result.rtt_min)
                    else:
                        data_this_day[result.probe_id] = result.rtt_min

                    if datestamp == yesterday:
                        no_data_for_yesterday.discard(result.probe_id)

        progress("retrieved pings from {} probes to {} on {}"
                 .format(len(data_this_day), d_addr, datestamp))
        for probeid, minrtt in data_this_day.items():
            new_data.append((probeid, datestamp, minrtt))
            if (probeid in earliest_time_for_probe and
                earliest_time_for_probe[probeid] >= w_start_time):
                del earliest_time_for_probe[probeid]

        w_stop_time = w_start_time

    # Do not load historical data for probes for which we have
    # received no new data but earlier data was available.
    for pid, etime in list(earliest_time_for_probe.items()):
        if etime > optimistic_earliest_time:
            del earliest_time_for_probe[pid]

    if earliest_time_for_probe:
        progress("{}: need historical data for {} probes"
                 .format(d_addr, len(earliest_time_for_probe)))
        for pid, etime in earliest_time_for_probe.items():
            progress("{}: for {} need since {}"
                     .format(d_addr, pid, etime))

        historical_data = collections.defaultdict(dict)

        for m_id, m_start_time, m_stop_time, m_probes in measurements:
            if m_stop_time is None or m_stop_time >= earliest_time_overall:
                continue # we already got this one

            wanted_probes = set(earliest_time_for_probe.keys())
            wanted_probes.intersection_update(m_probes)
            if not wanted_probes:
                continue # this measurement has no probes we care about

            # Round the stop time up to a day boundary.
            if m_stop_time == beginning_of_day(m_stop_time.date()):
                w_stop_time = m_stop_time
            else:
                w_stop_time = beginning_of_day((m_stop_time + one_day).date())

            days_considered = 0
            while w_stop_time > m_start_time and days_considered < 14:
                w_start_time = w_stop_time - one_day
                datestamp = w_start_time.date()

                x_start_time = int(max(m_start_time, w_start_time).timestamp())
                x_stop_time = int(min(m_stop_time, w_stop_time).timestamp())
                m_results = 'measurements/{}/results'.format(m_id)
                data_this_day = historical_data[datestamp]

                progress("retrieving measurement {} for {} (historical)"
                         .format(m_id, datestamp))
                old_pings = len(data_this_day)
                for probeids in chunked(wanted_probes, n=100):
                    for result in retrieve_atlas(
                            state.session, m_results,
                            params = {
                                "start": x_start_time,
                                "stop": x_stop_time,
                                "probe_ids": ",".join(str(pid)
                                                      for pid in probeids)
                            },
                            constructor = PingResult):
                        if (result.is_error or result.is_malformed or
                            result.rtt_min is None):
                            continue
                        if result.probe_id in data_this_day:
                            data_this_day[result.probe_id] = min(
                                data_this_day[result.probe_id],
                                result.rtt_min)
                        else:
                            data_this_day[result.probe_id] = result.rtt_min
                progress(
                    "retrieved pings from {} probes to {} on {} (historical)"
                    .format(len(data_this_day) - old_pings, d_addr, datestamp))
                w_stop_time = w_start_time
                days_considered += 1

        for datestamp, data_this_day in historical_data.items():
            for probeid, minrtt in data_this_day.items():
                new_data.append((probeid, datestamp, minrtt))
                if probeid in earliest_time_for_probe:
                    del earliest_time_for_probe[probeid]

    if earliest_time_for_probe:
        progress("{} probes have no data at all for {}"
                 .format(len(earliest_time_for_probe), d_addr))

    if no_data_for_yesterday:
        new_data.extend((pid, yesterday, None)
                        for pid in no_data_for_yesterday)

    if new_data:
        progress("recording {} new observations for {}"
                 .format(len(new_data), d_addr))
        with state.db, state.db.cursor() as cur:
            execute_values(
                cur, """
                    INSERT INTO calibration_rtts (s_id, d_id, odate, minrtt)
                    VALUES %s
                    ON CONFLICT DO NOTHING;
                """, new_data,
                template="(%s, {}, %s, %s)".format(anchorid))
Example #38
0
conn = psycopg2.connect("dbname=covid19 user=covid19\
                         password=covid19databasepassword host=localhost")

location_data = json.load(open(location_file))
if 'locations' not in location_data:
    print("Malformed file")
    sys.exit(1)

low_watermark = datetime(2020, 1, 1)

# TODO: JSON schema?
values = []
for loc in location_data['locations']:
    timestampMs = int(loc['timestampMs']) / 1000  # to seconds
    ts = datetime.utcfromtimestamp(timestampMs)
    if ts <= low_watermark:
        continue
    start_time = ts.strftime('%Y-%m-%dT%H:%M:%S')
    end_time = (ts + timedelta(minutes=30)).strftime('%Y-%m-%dT%H:%M:%S')
    lat = int(loc['latitudeE7']) / 1e7
    lon = int(loc['longitudeE7']) / 1e7
    coord = f'SRID=4326;POINT({lon} {lat})'
    values.append((userid, start_time, end_time, coord))
with conn.cursor() as cur:
    execute_values(
        cur, "INSERT INTO\
        location_history(userid, start_time, end_time, geographic_location)\
        VALUES %s", values)
    conn.commit()
Example #39
0
File: db.py Project: voitsik/pypima
    def fri2db(self, fri, exper_info, run_id):
        """
        Store information from the PIMA fri-file to the database.

        Parameters
        ----------
        fri : fri.Fri object
            PIMA fringe fitting results.
        exper_info : pima.ExperInfo object
            Experiment general information.
        run_id : int
            Id of the record in the ``pima_runs`` database table.

        """
        if not fri:
            return

        exper = exper_info.exper
        band = exper_info.band
        polar = fri.records[0]['polar']

        query = """INSERT INTO pima_obs (obs, start_time, stop_time,
exper_name, band, source, polar, st1, st2, delay, rate, accel, snr, ampl,
solint, u, v, base_ed, ref_freq, scan_name, run_id, if_id, status, elevation,
bandpass) VALUES %s;"""

        rec_list = []
        for rec in fri.records:
            if rec['FRIB.FINE_SEARCH'] == 'ACC':
                accel = rec['ph_acc']
            else:
                accel = rec['accel']

            # Set IF id to 0 if fringe fitting done over multiple IFs
            if rec['beg_ifrq'] != rec['end_ifrq']:
                if_id = 0
            else:
                if_id = rec['beg_ifrq']

            rec_list.append((rec['obs'],
                             rec['start_time'],
                             rec['stop_time'],
                             exper,
                             band,
                             rec['source'],
                             polar,
                             rec['sta1'],
                             rec['sta2'],
                             rec['delay'],
                             rec['rate'],
                             accel,
                             rec['SNR'],
                             rec['ampl_lsq'],
                             rec['duration'],
                             rec['U'],
                             rec['V'],
                             rec['uv_rad_ed'],
                             rec['ref_freq'],
                             rec['time_code'],
                             run_id,
                             if_id,
                             rec['status'],
                             rec['elevation'],
                             fri.aux['bandpass']))

        with self.connw.cursor() as cursor:
            execute_values(cursor, query, rec_list)

        self.connw.commit()
    #CSV_FILEPATH = "data/titanic.csv"
    CSV_FILEPATH = os.path.join(os.path.dirname(__file__), "..", "data",
                                "titanic.csv")
    print("FILE EXISTS?", os.path.isfile(CSV_PATH))
    df = pandas.read_csv(CSV_PATH)
    print(df.head())
    # rows should be a list of tuples
    # [
    #   ('A rowwwww', 'null'),
    #   ('Another row, with JSONNNNN', json.dumps(my_dict)),
    #   ('Third row', "3")
    # ]
    # h/t Jesus and https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html
    rows = list(df.itertuples(index=False, name=None))
    insertion_query = "INSERT INTO passengers (survived, pclass, name, sex, age, sib_spouse_count, parent_child_count, fare) VALUES %s"
    execute_values(curs, insertion_query, rows)
# ACTUALLY SAVE THE TRANSACTIONS
conn.commit()

# insert = """
# COPY %s FROM STDIN WITH
#                     CSV
#                     HEADER
#                     DELIMITER AS ','
# """
# file = open(CSV_PATH, "r")
# table = 'passengers'
# with conn.cursor() as cur:
#     cur.execute("truncate " + table + ";")
#     cur.copy_expert(sql=insert % table, file=file)
#     conn.commit()
 def _insert_events(self, cursor, events):
     if not events:
         return
     execute_values(cursor, EVENT_INSERT_QUERY, events,
                    template=EVENT_VALUES_TEMPLATE)
Example #42
0
    def _populate_cves(self, cvemap):
        cve_impact_map = self._populate_cve_impacts()
        rh_source_id = self._get_source_id('Red Hat')
        cur = self.conn.cursor()

        cve_data = cvemap.list_cves()
        cve_names = [(name, ) for name in cve_data]
        execute_values(cur,
                       """select id, name, source_id,
                                 description, impact_id,
                                 published_date, modified_date,
                                 cvss3_score, cvss3_metrics,
                                 iava, redhat_url,
                                 secondary_url, source_id,
                                 cvss2_score, cvss2_metrics
                                 from cve
                                 join (values %s) t(name)
                                using (name)""",
                       cve_names,
                       page_size=len(cve_names))
        #
        # find and merge cves that have already been loaded
        #
        # fields we care about potentially merging include:
        # [3]description,
        # [5]published_date, [6]modified_date,
        # [7]cvss3_score, [8]cvss3_metrics,
        # [9]iava, [10]redhat_url, [11]secondary_url,
        # [13]cvss2_score, [14]cvss2_metrics,
        cols = {
            'description': 3,
            'published_date': 5,
            'modified_date': 6,
            'cvss3_score': 7,
            'cvss3_metrics': 8,
            'iava': 9,
            'redhat_url': 10,
            'secondary_url': 11,
            'cvss2_score': 13,
            'cvss2_metrics': 14
        }

        for a_db_row in cur.fetchall():
            # cve_data[row[1]] = incoming-cve-with-same-name-as-from-db
            # skip id, name, source_id (they are *always* filled in
            # for rest, use incoming unless null, then use from-db
            db_name = a_db_row[1]
            db_id = a_db_row[0]
            cve_data[db_name]["id"] = db_id
            for a_key in cols:
                if not a_key in cve_data[db_name]:
                    cve_data[db_name][a_key] = None
                if not cve_data[db_name][a_key]:
                    cve_data[db_name][a_key] = a_db_row[cols[a_key]]

        to_import = []
        to_update = []
        # now, deal with all items
        for name, values in cve_data.items():
            values["impact_id"] = cve_impact_map[values["impact"].capitalize()] \
                        if values["impact"] is not None else cve_impact_map["None"]
            # make sure everyting has all the keys, even if val is empty
            for a_key in cols:
                if not a_key in values:
                    values[a_key] = None

            item = (name, values["description"], values["impact_id"],
                    values["published_date"], values["modified_date"],
                    values["cvss3_score"], values["cvss3_metrics"],
                    values["iava"], values["redhat_url"],
                    values["secondary_url"], rh_source_id,
                    values["cvss2_score"], values["cvss2_metrics"])
            # if we have an 'id', it means we're already in the db
            if "id" in values:
                to_update.append((values["id"], ) + item)
            else:
                to_import.append(item)
        cur.close()
        self.logger.debug("CVEs to import: %d", len(to_import))
        self.logger.debug("CVEs to update: %d", len(to_update))

        self._import_cves(to_import, cve_data)

        self._update_cves(to_update)

        cur = self.conn.cursor()
        try:
            self._populate_cwes(cur, cve_data)
            self.conn.commit()
        except Exception:  # pylint: disable=broad-except
            self.logger.exception("Failure when populating CWEs")
            self.conn.rollback()
        finally:
            cur.close()
Example #43
0
import psycopg2
import csv
from psycopg2.extras import execute_values

conn = psycopg2.connect(host="data",
                        database="hmetua",
                        user="******",
                        password="******")
cur = conn.cursor()

data = []

with open("mailbox_enron.csv", "r", newline="", encoding="utf-8") as f:
    lignes = csv.reader(f, delimiter=",", quotechar='"')
    next(lignes)
    for l in lignes:
        data.append(l)

texte = """
INSERT INTO infiltration.mailbox_enron_original(
id_message,auteur,destinataire,reponse,date_p)
VALUES %s
"""

with psycopg2.connect(host="data",
                      database="hmetua",
                      user="******",
                      password="******") as conn:
    with conn.cursor() as cur:
        execute_values(cur, texte, data)
Example #44
0
def sync_cve_md():
    """Sync all CVE metadata from VMaaS"""
    LOGGER.info('Syncing CVE metadata')
    with DatabasePoolConnection() as conn:
        with conn.cursor() as cur:
            impact_id_map = {}
            cur.execute("select name, id from cve_impact")
            for impact_name, impact_id in cur.fetchall():
                impact_id_map[impact_name] = impact_id
            cur.execute('select id, cve from cve_metadata')
            cves_in_db = {}
            for cve_tuple in cur.fetchall():
                cves_in_db[cve_tuple[1]] = cve_tuple[0]
            cve_json = {'cve_list': [".*"], 'page': 1, 'page_size': DEFAULT_PAGE_SIZE, 'rh_only': True, 'errata_associated': True}
            success, cve_pages = paging(VMAAS_CVES_ENDPOINT, cve_json)
            if not success:
                return success
            cves = cve_pages['cve_list']
            LOGGER.info("Importing CVE metadata")

            to_insert, to_update, to_delete = process_cve_list(cves, cves_in_db, impact_id_map)

            insert_cves(cur, to_insert)
            update_cves(cur, to_update)

            if to_delete:
                associated_cves = set()
                LOGGER.info("Deleting %s unnecessary CVE metadata", len(to_delete))
                cur.execute("""select distinct cve_id from system_vulnerabilities""")
                for row in cur.fetchall():
                    associated_cves.add(row[0])
                cur.execute("""select distinct cve_id from cve_rule_mapping""")
                for row in cur.fetchall():
                    associated_cves.add(row[0])
                safety_delete = []
                unable_to_delete = []
                for cve_to_delete in to_delete:
                    cve_id = cves_in_db[cve_to_delete[0]]
                    if cve_id in associated_cves:
                        unable_to_delete.append(cve_to_delete[0])
                    else:
                        safety_delete.append(cve_id)
                if safety_delete:
                    execute_values(cur, """delete from cve_account_data
                                           where cve_id in (%s)""",
                                   list(zip(safety_delete)), page_size=len(safety_delete))
                    execute_values(cur, """delete from cve_metadata where id in (%s)""",
                                   list(zip(safety_delete)), page_size=len(safety_delete))
                    LOGGER.info('Finished deleting unnecessary CVE metadata')
                if unable_to_delete:
                    LOGGER.warning(
                        'Unable to delete %s cves (still referenced from system_vulnerabilities table or have rules): %s',
                        len(unable_to_delete), str(unable_to_delete))
                    LOGGER.debug('Attempting to update information about %s', str(unable_to_delete))
                    cve_json = {'cve_list': unable_to_delete, 'page': 1, 'page_size': DEFAULT_PAGE_SIZE, 'rh_only': True}
                    success, cve_pages = paging(VMAAS_CVES_ENDPOINT, cve_json)
                    if not success:
                        return success
                    cves = cve_pages['cve_list']
                    _, to_update, _ = process_cve_list(cves, cves_in_db, impact_id_map)
                    update_cves(cur, to_update)

            conn.commit()
            LOGGER.info('Finished syncing CVE metadata')
            return success