def model2db(self, run_id, clock_model): """ Parameters ---------- run_id : int Id of record in `pima_runs` table. clock_model : list List of clock model components. """ query = """INSERT INTO clock_models (sta, time, clock_offset, clock_rate, group_delay, delay_rate, run_id) VALUES %s;""" data = [] for rec in clock_model: row = list(rec) row.append(run_id) data.append(row) with self.connw.cursor() as cursor: # cursor.execute(query, parameters) execute_values(cursor, query, data) self.connw.commit()
def calibrate_cbg(): """Worker function: calculate CBG calibration "bestlines" for each probe.""" state = get_worker_state() with state.db: progress("computing bestlines") with state.db.cursor(name='cbgcal_read_{:016x}'.format( random.randint(0, sys.maxsize))) as cur: cur.itersize = 1024 cur.execute(""" SELECT l.probeid, ST_Distance(l.location, m.location) AS distance, n.median AS rtt FROM landmarks l, landmarks m, (SELECT s_id, d_id, median(minrtt) FROM (SELECT * FROM (SELECT s_id, d_id, odate, minrtt, rank() OVER ( PARTITION BY s_id, d_id ORDER BY odate DESC) FROM calibration_rtts WHERE minrtt IS NOT NULL) _a WHERE rank <= 14) _b GROUP BY s_id, d_id) n WHERE l.probeid = n.s_id and m.anchorid = n.d_id """) results = [] block = None prev_sid = None for row in cur: if row[0] != prev_sid: if prev_sid is not None: results.extend(calibrate_cbg_for_block(prev_sid, block)) prev_sid = row[0] block = [] block.append((row[1], row[2])) if prev_sid is not None: results.extend(calibrate_cbg_for_block(prev_sid, block)) progress("recording bestlines for {} probes".format(len(results))) with state.db.cursor() as cur: execute_values( cur, """ UPDATE landmarks AS l SET cbg_m = d.m, cbg_b = d.b FROM (VALUES %s) AS d (id, m, b) WHERE l.probeid = d.id """, results)
def uvfits2db(self, fits_file, b1950_name, run_id): """ Parameters ---------- fits_file : UVFits object UV-FITS file object. b1950_name : str B1950 source name. run_id : int Id of the record in ``pima_runs`` table. """ data = [] file_name = os.path.basename(fits_file.file_name) for ind in range(fits_file.gcount): time = fits_file.dates[ind] # As datetime.datetime object ant1_name, ant2_name = fits_file.get_ant_by_ind(ind) inttime = float(fits_file.inttime[ind]) for if_ind in range(fits_file.no_if): flux = float(fits_file.amplitudes[ind, if_ind]) weight = float(fits_file.weights[ind, if_ind]) if weight <= 0: continue freq = float(fits_file.freq + fits_file.freq_table[if_ind]['if_freq']) uu = float(fits_file.u_raw[ind]) vv = float(fits_file.v_raw[ind]) row = (ind+1, time, if_ind+1, b1950_name, fits_file.exper_name, fits_file.band, fits_file.stokes, ant1_name, ant2_name, uu, vv, freq, flux, weight, inttime, file_name, run_id) data.append(row) query = """INSERT INTO ra_uvfits (ind, time, if_id, source, exper_name, band, polar, sta1, sta2, u, v, freq, ampl, weight, inttime, file_name, run_id) VALUES %s;""" if data: with self.connw.cursor() as cursor: # cursor.execute('DELETE FROM ra_uvfits WHERE file_name = %s;', # (file_name, )) # cur.executemany(query, data) execute_values(cursor, query, data) self.connw.commit()
def seedData(cursor, rows): start_time = time.time() # ... bad loop # posts_data = [] # for i in range(rows): # posts_data.append((fake.sentence(), fake.text())) # ... good loop posts_data = [ (fake.sentence(), fake.text()) for i in range(rows) ] # ... map # posts_data = map(lambda i: (fake.sentence(), fake.text()), range(rows)) # seed posts data execute_values(cursor, "INSERT INTO posts (title, content) VALUES %s", posts_data) print("seeding %s rows of faker data\n... took %s seconds\n" % (rows, time_util.getTime(start_time, time.time())))
def insert(self, data, chunks=None): if not isinstance(data, BatchIterator): data = Iterator(data, dtypes=self.dtypes).batchs(chunks=chunks) columns = "(" + ", ".join(self.groups) + ")" insert_str = "INSERT INTO {name} {columns} VALUES".format( name=self.name, columns=columns) insert = insert_str + " %s" cur = self.conn.cursor() num_groups = len(data.groups) for row in data: shape = row.batch.shape.to_tuple() if len(shape) == 1 and num_groups > 1: value = row.batch.to_df.values # .to_ndarray().reshape(1, -1) elif len(shape) == 1 and num_groups == 1: value = row.batch.to_df().values # .to_ndarray().reshape(-1, 1) else: value = row.batch.to_df().values execute_values(cur, insert, value, page_size=len(data)) cur.close()
def retrieve_anchor_ping_measurements(ainfo): """Worker function: retrieve all ping measurements targeting the anchor with IPv4 address ADDR, and record their metadata in the database. """ addr, _, max_stop_time = ainfo state = get_worker_state() params = { "target_ip": addr, "status": "2,4,8", "type": "ping", "optional_fields": "probes" } if max_stop_time is not None: params["stop_time__gt"] = max_stop_time.timestamp() updates = list(retrieve_atlas( state.session, 'measurements', params=params, constructor = lambda m: ( addr, m["id"], maybe_dt_from_timestamp(m["start_time"]), maybe_dt_from_timestamp(m["stop_time"]), [p["id"] for p in m["probes"]] ))) if updates: with state.db, state.db.cursor() as cur: execute_values(cur, """ INSERT INTO ripe_measurements (d_addr, meas_id, start_time, stop_time, probes) VALUES %s ON CONFLICT (meas_id) DO UPDATE SET d_addr = excluded.d_addr, start_time = excluded.start_time, stop_time = excluded.stop_time, probes = excluded.probes """, updates) progress("{}: updated {} measurement{}" .format(addr, len(updates), "" if len(updates) == 1 else "s"))
def run_query(self, query, vars=None, bulk_query=False): str_query = query.decode(encoding='UTF-8', errors='replace') str_query = str_query.replace(u"\uFFFD", "?") ctx.logger.debug('Running query: {0}'.format(str_query)) with closing(self._connection.cursor()) as cur: try: if bulk_query: execute_values(cur, query, vars) else: cur.execute(query, vars) status_message = cur.statusmessage fetchall = cur.fetchall() result = {'status': status_message, 'all': fetchall} ctx.logger.debug('Running query result status: {0}' .format(status_message)) except Exception, e: fetchall = None status_message = str(e) result = {'status': status_message, 'all': fetchall} if status_message != 'no results to fetch': ctx.logger.error('Running query result status: {0}' .format(status_message)) return result
def autospec2db(self, acta_file): """ Store autocorrelation spectrum to the database. """ exper, band = acta_file.header['experiment'].split('_') polar = acta_file.header['polar'] sta = acta_file.header['station'] start_date = acta_file.header['start_date'] stop_date = acta_file.header['stop_date'] obs = acta_file.header['obs'] scan_name = acta_file.header['scan_name'] delete_query = """DELETE FROM autospec_info WHERE exper_name = %s AND band = %s AND polar = %s AND sta = %s AND scan_name = %s;""" query_info = """INSERT INTO autospec_info (exper_name, band, polar, sta, start_date, stop_date, obs, scan_name) VALUES %s RETURNING id;""" query_data = """INSERT INTO autospec (if_num, chann_num, freq, ampl, info_id) VALUES %s;""" data = [] with self.connw.cursor() as cursor: cursor.execute(delete_query, (exper, band, polar, sta, scan_name)) cursor.execute(query_info, [(exper, band, polar, sta, start_date, stop_date, obs, scan_name)]) info_id = cursor.fetchone()[0] for ind in range(acta_file.header['num_of_points']): row = (acta_file.if_num[ind], acta_file.channel[ind], acta_file.freq[ind], acta_file.ampl[ind], info_id) data.append(row) execute_values(cursor, query_data, data, page_size=2048) self.connw.commit()
connection_string = os.environ.get("TIMESCALEDB_CONNECTION", default="dbname=pi user=pi") with psycopg2.connect(connection_string) as conn: with conn.cursor() as cur: with DigitalInOut(board.D23) as pin: while True: start = time.time() values = [] while time.time() - start < 1.0: time.sleep(0.099) pin.direction = Direction.OUTPUT pin.value = False pin.direction = Direction.INPUT reading = 0 while pin.value is False: reading += 1 values.append( (datetime.utcnow(), "photoresistor", reading)) execute_values( cur, """ INSERT INTO pi_obs (time, metric, value) VALUES %s """, values, ) conn.commit() print("Inserted ", len(values))
def update_caches(cur, rh_account_id: int, system_id: int, opt_out: bool, stale: bool, new_rule_hits: dict, # pylint: disable=too-many-branches updated_rules: dict, fixed_rules: dict): """Update CVE caches""" new_cves_ids = list(new_rule_hits.keys()) updated_cves_ids = list(updated_rules.keys()) fixed_cves_ids = list(fixed_rules.keys()) all_cves_ids = new_cves_ids + updated_cves_ids + fixed_cves_ids if not all_cves_ids: return inc_cves = [] inc_cves_divergent_status = [] dec_cves = [] dec_cves_divergent_status = [] cur.execute("""select cve_id, status_id from cve_account_data where rh_account_id = %s and cve_id in %s order by cve_id for update""", (rh_account_id, tuple(all_cves_ids),)) for cve_id, status_id in cur.fetchall(): # completely new system_vulnerabilities record, update caches if cve_id in new_cves_ids: if status_id == 0: inc_cves.append(cve_id) else: inc_cves_divergent_status.append(cve_id) # do not double increase counts if CVE has already been imported by VMaaS elif cve_id in updated_cves_ids and (updated_rules[cve_id][2] is not None or updated_rules[cve_id][4]): if status_id == updated_rules[cve_id][3]: inc_cves.append(cve_id) else: inc_cves_divergent_status.append(cve_id) # decrease counter only when VMaaS thinks we're not vulnerabale or we're appyling rule mitigation elif cve_id in fixed_cves_ids and (fixed_rules[cve_id][4] or fixed_rules[cve_id][2] is not None): if status_id == fixed_rules[cve_id][3]: dec_cves.append(cve_id) else: dec_cves_divergent_status.append(cve_id) all_cves_ids.remove(cve_id) insert_cves = [(cve_id, rh_account_id, 1, 0) for cve_id in all_cves_ids] system_cve_change = len(insert_cves) + len(inc_cves) + len(inc_cves_divergent_status) - len(dec_cves) - len(dec_cves_divergent_status) cur.execute("""update system_platform set cve_count_cache = cve_count_cache + %s where id = %s""", (system_cve_change, system_id,)) if opt_out or stale: # do not update CVE level caches if the system is opted out or stale return if dec_cves: cur.execute("""update cve_account_data set systems_affected = systems_affected - 1 where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(dec_cves))) if dec_cves_divergent_status: cur.execute("""update cve_account_data set systems_affected = systems_affected - 1, systems_status_divergent = systems_status_divergent - 1 where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(dec_cves_divergent_status))) if inc_cves: cur.execute("""update cve_account_data set systems_affected = systems_affected + 1 where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(inc_cves))) if inc_cves_divergent_status: cur.execute("""update cve_account_data set systems_affected = systems_affected + 1, systems_status_divergent = systems_status_divergent + 1 where rh_account_id = %s and cve_id in %s""", (rh_account_id, tuple(inc_cves_divergent_status))) if insert_cves: execute_values(cur, """insert into cve_account_data (cve_id, rh_account_id, systems_affected, systems_status_divergent) values %s on conflict (cve_id, rh_account_id) do update set systems_affected = cve_account_data.systems_affected + excluded.systems_affected, systems_status_divergent = cve_account_data.systems_status_divergent + excluded.systems_status_divergent""", insert_cves, page_size=len(insert_cves))
def update_batch_summary(batch_summary): ''' This function will update the batch summary as we go for each value in batch_summary :param batch_summary: the dictionary we are using :return: ''' db = get_db_connection(CONFIG) cur = db.cursor() ###two cases...this is a new batch (so batch id doesn't exist) or the batch already exists batch_exists = get_table_noconn( '''select * from batch_summary where batch_id={}'''.format( batch_summary['batch_id']), db) if CONFIG['sql_flavor'] == 'sqlite': if len(batch_exists) == 0: # create individual data tables diskEngine = create_engine('sqlite:///{}/{}.db'.format( CONFIG['projectPath'], CONFIG['db_name'])) ###IF the batch doesn't exist, make the row (it's just the batch status and batch_id) pd.DataFrame(batch_summary, index=[0]).to_sql('batch_summary', diskEngine, if_exists='append', index=False) db.commit() else: ##First check for any json and do a json.dumps for key in batch_summary: if batch_summary[key].__class__ == dt.datetime: batch_summary[key] = batch_summary[key].strftime( '%Y-%m-%d %H:%M:%S') if batch_summary[key].__class__ == dict: batch_summary[key] = json.dumps(batch_summary[key]) if batch_summary[key] != batch_exists[0][key]: update_statement = 'update batch_summary set {}=? where batch_id={}'.format( key, batch_summary['batch_id']) try: cur.execute(update_statement, (batch_summary[key], )) db.commit() except Exception as err: print(err) print(batch_summary[key]) elif CONFIG['sql_flavor'] == 'postgres': if len(batch_exists) == 0: ###IF the batch doesn't exist, make the row (it's just the batch status and batch_id) columns = batch_summary.keys() columns_list = str(tuple([str(i) for i in columns])).replace("'", "") values = [tuple(batch_summary[key] for key in batch_summary)] insert_statement = 'insert into {table} {collist} values %s'.format( table='batch_summary', collist=columns_list) execute_values(cur, insert_statement, values, page_size=len(values)) db.commit() else: ##First check for any json and do a json.dumps for key in batch_summary: if batch_summary[key].__class__ == dict: batch_summary[key] = json.dumps(batch_summary[key]) columns = [i for i in batch_summary.keys() if i != 'batch_id'] columns_statement = ', '.join(['{}=%s'.format(i) for i in columns]) values = [ tuple(batch_summary[key] for key in batch_summary if key != 'batch_id') ] update_statement = 'update batch_summary set {} where batch_id={}'.format( columns_statement, batch_summary['batch_id']) cur.execute(update_statement, values[0]) db.commit() cur.close() db.close()
def insert_armory_items(self, armory_items): insertion_query = "INSERT INTO armory_item (item_id, name, value, weight) VALUES %s" list_of_tuples = armory_items execute_values(self.pg_cursor, insertion_query, list_of_tuples) self.pg_connection.commit()
def lambda_handler(event, context): # Get parameters for database ssm = boto3.client('ssm', 'us-east-1') user = ssm.get_parameter(Name='/apartments/db/user', WithDecryption=True)['Parameter']['Value'] password = ssm.get_parameter(Name='/apartments/db/password', WithDecryption=True)['Parameter']['Value'] host = ssm.get_parameter(Name='/apartments/db/host', WithDecryption=True)['Parameter']['Value'] database = ssm.get_parameter(Name='/apartments/db/database', WithDecryption=True)['Parameter']['Value'] # Connect connection = psycopg2.connect(user = "******", password = "******", host = "aa10uxjrfmskobd.cf8tqavognjx.us-east-1.rds.amazonaws.com", port = "5432", database = "ebdb") cursor = connection.cursor() # Parse event for needed data bucket = event['Records'][0]['s3']['bucket']['name'] key = event['Records'][0]['s3']['object']['key'] key = key.replace('%3A', ':').replace('+', ' ') # Read in the object s3 = boto3.resource('s3') content_object = s3.Object(bucket, key) file_content = content_object.get()['Body'].read().decode('utf-8') json_data = json.loads(file_content) # Neighborhood Map neighborhoodMaps = {'10453': 'Central Bronx', '10457': 'Central Bronx', '10460': 'Central Bronx', '10458': 'Bronx Park and Fordham', '10467': 'Bronx Park and Fordham', '10468': 'Bronx Park and Fordham', '10451': 'High Bridge and Morrisania', '10452': 'High Bridge and Morrisania', '10456': 'High Bridge and Morrisania', '10454': 'Hunts Point and Mott Haven', '10455': 'Hunts Point and Mott Haven', '10459': 'Hunts Point and Mott Haven', '10474': 'Hunts Point and Mott Haven', '10463': 'Kingsbridge and Riverdale', '10471': 'Kingsbridge and Riverdale', '10466': 'Northeast Bronx', '10469': 'Northeast Bronx', '10470': 'Northeast Bronx', '10475': 'Northeast Bronx', '10461': 'Southeast Bronx', '10462': 'Southeast Bronx', '10464': 'Southeast Bronx', '10465': 'Southeast Bronx', '10472': 'Southeast Bronx', '10473': 'Southeast Bronx', '11212': 'Central Brooklyn', '11213': 'Central Brooklyn', '11216': 'Central Brooklyn', '11233': 'Central Brooklyn', '11238': 'Central Brooklyn', '11209': 'Southwest Brooklyn', '11214': 'Southwest Brooklyn', '11228': 'Southwest Brooklyn', '11204': 'Borough Park', '11218': 'Borough Park', '11219': 'Borough Park', '11230': 'Borough Park', '11234': 'Canarsie and Flatlands', '11236': 'Canarsie and Flatlands', '11239': 'Canarsie and Flatlands', '11223': 'Southern Brooklyn', '11224': 'Southern Brooklyn', '11229': 'Southern Brooklyn', '11235': 'Southern Brooklyn', '11201': 'Northwest Brooklyn', '11205': 'Northwest Brooklyn', '11215': 'Northwest Brooklyn', '11217': 'Northwest Brooklyn', '11231': 'Northwest Brooklyn', '11203': 'Flatbush', '11210': 'Flatbush', '11225': 'Flatbush', '11226': 'Flatbush', '11207': 'East New York and New Lots', '11208': 'East New York and New Lots', '11211': 'Greenpoint', '11222': 'Greenpoint', '11220': 'Sunset Park', '11232': 'Sunset Park', '11206': 'Bushwick and Williamsburg', '11221': 'Bushwick and Williamsburg', '11237': 'Bushwick and Williamsburg', '10026': 'Central Harlem', '10027': 'Central Harlem', '10030': 'Central Harlem', '10037': 'Central Harlem', '10039': 'Central Harlem', '10001': 'Chelsea and Clinton', '10011': 'Chelsea and Clinton', '10018': 'Chelsea and Clinton', '10019': 'Chelsea and Clinton', '10020': 'Chelsea and Clinton', '10036': 'Chelsea and Clinton', '10029': 'East Harlem', '10035': 'East Harlem', '10010': 'Gramercy Park and Murray Hill', '10016': 'Gramercy Park and Murray Hill', '10017': 'Gramercy Park and Murray Hill', '10022': 'Gramercy Park and Murray Hill', '10012': 'Greenwich Village and Soho', '10013': 'Greenwich Village and Soho', '10014': 'Greenwich Village and Soho', '10004': 'Lower Manhattan', '10005': 'Lower Manhattan', '10006': 'Lower Manhattan', '10007': 'Lower Manhattan', '10038': 'Lower Manhattan', '10280': 'Lower Manhattan', '10002': 'Lower East Side', '10003': 'Lower East Side', '10009': 'Lower East Side', '10021': 'Upper East Side', '10028': 'Upper East Side', '10044': 'Upper East Side', '10065': 'Upper East Side', '10075': 'Upper East Side', '10128': 'Upper East Side', '10023': 'Upper West Side', '10024': 'Upper West Side', '10025': 'Upper West Side', '10031': 'Inwood and Washington Heights', '10032': 'Inwood and Washington Heights', '10033': 'Inwood and Washington Heights', '10034': 'Inwood and Washington Heights', '10040': 'Inwood and Washington Heights', '11361': 'Northeast Queens', '11362': 'Northeast Queens', '11363': 'Northeast Queens', '11364': 'Northeast Queens', '11354': 'North Queens', '11355': 'North Queens', '11356': 'North Queens', '11357': 'North Queens', '11358': 'North Queens', '11359': 'North Queens', '11360': 'North Queens', '11365': 'Central Queens', '11366': 'Central Queens', '11367': 'Central Queens', '11412': 'Jamaica', '11423': 'Jamaica', '11432': 'Jamaica', '11433': 'Jamaica', '11434': 'Jamaica', '11435': 'Jamaica', '11436': 'Jamaica', '11101': 'Northwest Queens', '11102': 'Northwest Queens', '11103': 'Northwest Queens', '11104': 'Northwest Queens', '11105': 'Northwest Queens', '11106': 'Northwest Queens', '11374': 'West Central Queens', '11375': 'West Central Queens', '11379': 'West Central Queens', '11385': 'West Central Queens', '11691': 'Rockaways', '11692': 'Rockaways', '11693': 'Rockaways', '11694': 'Rockaways', '11695': 'Rockaways', '11697': 'Rockaways', '11004': 'Southeast Queens', '11005': 'Southeast Queens', '11411': 'Southeast Queens', '11413': 'Southeast Queens', '11422': 'Southeast Queens', '11426': 'Southeast Queens', '11427': 'Southeast Queens', '11428': 'Southeast Queens', '11429': 'Southeast Queens', '11414': 'Southwest Queens', '11415': 'Southwest Queens', '11416': 'Southwest Queens', '11417': 'Southwest Queens', '11418': 'Southwest Queens', '11419': 'Southwest Queens', '11420': 'Southwest Queens', '11421': 'Southwest Queens', '11368': 'West Queens', '11369': 'West Queens', '11370': 'West Queens', '11372': 'West Queens', '11373': 'West Queens', '11377': 'West Queens', '11378': 'West Queens', '10302': 'Port Richmond', '10303': 'Port Richmond', '10310': 'Port Richmond', '10306': 'South Shore', '10307': 'South Shore', '10308': 'South Shore', '10309': 'South Shore', '10312': 'South Shore', '10301': 'Stapleton and St. George', '10304': 'Stapleton and St. George', '10305': 'Stapleton and St. George', '10314': 'Mid-Island' } # Borough Maps boroughMaps = {'Central Bronx': 'Bronx', 'Bronx Park and Fordham': 'Bronx', 'High Bridge and Morrisania': 'Bronx', 'Hunts Point and Mott Haven': 'Bronx', 'Kingsbridge and Riverdale': 'Bronx', 'Northeast Bronx': 'Bronx', 'Southeast Bronx': 'Bronx', 'Central Brooklyn': 'Brooklyn', 'Southwest Brooklyn': 'Brooklyn', 'Borough Park': 'Brooklyn', 'Canarsie and Flatlands': 'Brooklyn', 'Southern Brooklyn': 'Brooklyn', 'Northwest Brooklyn': 'Brooklyn', 'Flatbush': 'Brooklyn', 'East New York and New Lots': 'Brooklyn', 'Greenpoint': 'Brooklyn', 'Sunset Park': 'Brooklyn', 'Bushwick and Williamsburg': 'Brooklyn', 'Central Harlem': 'Manhattan', 'Chelsea and Clinton': 'Manhattan', 'East Harlem': 'Manhattan', 'Gramercy Park and Murray Hill': 'Manhattan', 'Greenwich Village and Soho': 'Manhattan', 'Lower Manhattan': 'Manhattan', 'Lower East Side': 'Manhattan', 'Upper East Side': 'Manhattan', 'Upper West Side': 'Manhattan', 'Inwood and Washington Heights': 'Manhattan', 'Northeast Queens': 'Queens', 'North Queens': 'Queens', 'Central Queens': 'Queens', 'Jamaica': 'Queens', 'Northwest Queens': 'Queens', 'West Central Queens': 'Queens', 'Rockaways': 'Queens', 'Southeast Queens': 'Queens', 'Southwest Queens': 'Queens', 'West Queens': 'Queens', 'Port Richmond': 'Staten Island', 'South Shore': 'Staten Island', 'Stapleton and St. George': 'Staten Island', 'Mid-Island': 'Staten Island', 'No Neighborhood Found': 'None', 'Kearney': 'None', 'Stamford': 'None' } # Check data in database sqlCurrentDataInDB = "SELECT DISTINCT name, address FROM website_apartment;" cursor.execute(sqlCurrentDataInDB) currentDataInDB = cursor.fetchall() rowsToInsert = [] for i, row in enumerate(json_data): address = row['address'] area = row['area'] if type(area) == str: area = area.replace('ft2', '') bedrooms = row['bedrooms'] bikeScore = row['bikeScore'] transitScore = row['transitScore'] walkScore = row['walkScore'] datetime = row['datetime'] distanceToNearestIntersection = row['distanceToNearestIntersection'] hasImage = row['has_image'] hasMap = row['has_map'] name = row['name'] price = row['price'] if type(price) == str: price = price.replace('$', '') sideOfStreet = row['sideOfStreet'] url = row['url'] longitude = row['geotag'][0] latitude = row['geotag'][1] # Check for area if row['area']: includesArea = True else: includesArea = False # Check for no fee if 'no fee' in row['name'].lower(): advertisesNoFee = True else: advertisesNoFee = False # Check if repost if row['repost_of']: is_repost = True else: is_repost = False postalCode = row['postalCode'][0:5] try: neighborhood = neighborhoodMaps[postalCode] except KeyError: neighborhood = 'No Neighborhood Found' borough = boroughMaps[neighborhood] thisRow = (address, area, bedrooms, bikeScore, transitScore, walkScore, datetime, distanceToNearestIntersection, hasImage, hasMap, name, price, sideOfStreet, url, longitude, latitude, includesArea, advertisesNoFee, is_repost, postalCode, neighborhood, borough) rowAlreadyInDatabase = False # Check data in this file if i > 0: for row in rowsToInsert: if thisRow[10] == row[10] and thisRow[0] == row[0]: rowAlreadyInDatabase = True if (thisRow[10], thisRow[0]) in currentDataInDB: rowAlreadyInDatabase = True if not rowAlreadyInDatabase: rowsToInsert.append(thisRow) insert_query = 'INSERT INTO website_apartment (address, area, bedrooms, bike_score, transit_score, walk_score, datetime, distance_to_nearest_intersection, has_image, has_map, name, price, side_of_street, url, longitude, latitude, includes_area, advertises_no_fee, is_repost, postal_code, Neighborhood, borough) values %s' execute_values(cursor, insert_query, rowsToInsert, template=None) connection.commit() return None
except: print('no table to drop') try: cursor.execute( 'CREATE TABLE embeddings (key varchar, embedding jsonb)') conn.commit() except: print("DB already created") if __name__ == ("__main__"): conn, cursor = connect_db() create_embed_table(conn, cursor) print("loading embeding") model_path = '/data/GoogleNews-vectors-negative300.bin' model = KeyedVectors.load_word2vec_format(model_path, binary=True) print("embedding loaded") done = 0 vocab = list(set(model.index2word)) embeds = [model[key] for key in vocab] data = list(zip(vocab, embeds)) print('running inserts') insert_query = "INSERT INTO EMBEDDINGS (key,embedding) VALUES %s" execute_values(cursor, insert_query, data, page_size=10) print('running commit') conn.commit()
import traceback import psycopg2 from psycopg2.extras import execute_values from psycopg2 import sql try: conn = psycopg2.connect("dbname='test' host='127.0.0.1'") except: print("I am unable to connect to the database") cur = conn.cursor() try: arr = [("James", 26), ("Kristof", 25)] execute_values(cur, "INSERT INTO aaa (name, age) VALUES %s", arr) conn.commit() tablename = "aaa" queryText = "INSERT INTO {table} (name, age) VALUES %s" query = sql.SQL(queryText).format(table=sql.Identifier(tablename)) execute_values(cur, query.as_string(cur), arr) conn.commit() except: print(traceback.format_exc())
def insert_dicts(self, kwargss, dropExtra=True): """ Insert records into the table for the current test. Checks arguments to make sure all required columns are present, and prevents extra data from being entered. """ # Chunk uploads so we get some progress, and don't create a huge transaction. for chunk_start in range(0, len(kwargss), 10): chunk_end = min(chunk_start + 10, len(kwargss)) chunk = kwargss[chunk_start:chunk_end] self.log('building %d rows' % len(chunk)) self._clear_memo() if chunk_start != 0: self._refresh_connection() with self.connection.cursor() as cursor: records = [] for kwargs in chunk: # Replace aliases with internal, canonical form for alias, replacement in list(ALIASES.items()): if alias in kwargs: if replacement: kwargs[replacement] = kwargs[alias] del kwargs[alias] # Check for extras extra_columns = [] extra_values = [] for k, v in kwargs.items(): if k not in self.denormalized_columns: extra_columns.append(k) extra_values.append(v) # Check for missing missing_columns = [] for col in self.denormalized_columns: if not col in kwargs: if col not in self.optional_cols: missing_columns.append(col) # If we are missing stuff, bad! if missing_columns: required_columns = filter(lambda x: x not in self.optional_cols, self.denormalized_columns) raise Exception('Columns [%s] not provided, expected columns [%s] - optional [%s]' % ((', ').join(missing_columns), (', ').join(required_columns), (', ').join(self.optional_cols))) # Extra stuff is only sometimes bad blob = None if not dropExtra: # If we have a 'blob' column, use that to accumulate extra fields (unless the blob field has been explicitly set) if 'blob' in self.columns and 'blob' not in kwargs: blob = dict(zip(extra_columns, extra_values)) elif extra_columns: raise Exception('Extra columns [%s] provided, expected only: %s' % ((', ').join(extra_columns), (', ').join(self.denormalized_columns))) records.append(self.get_normalized_row(cursor, kwargs, blob)) self.log('adding %d rows' % len(records)) try: results = execute_values(cursor, self._insert_sql, records, fetch=True) rowids = [x["id"] for x in results] if rowids: self.connection.commit() else: self.log('rolling back...') self.connection.rollback() return rowids except Exception as e: self.log('rolling back...') self.connection.rollback() raise e
,siblings_spouse ,parents_children ,fare ) VALUES %s ''' # this sucks and I don't like it. But psycopg does not recognize numpy # data types, so I have to register the np.int64 data type so it knows how to # handle it. For my future self, just run a bunch of insertion queries, or just # append the database using .to_sql() from the data frame. psycopg2.extensions.register_adapter(np.int64, psycopg2._psycopg.AsIs) # prep dataframe for execute_values. Should be a list of tuples prepped = list(df.to_records(index = False)) execute_values(cur, insertion_query, prepped) # Exploratory queries/sanity checks print('--------------------------------------------') q1 = ''' SELECT count(name) as length FROM passengers ''' cur.execute(q1) res = cur.fetchone() print('Dataframe length', df.shape[0]) print('Database length', res[0])
def _execute(self, query, values=None, silent=None, values_list=False, template=None, commit=None, slow_note=None, reissued=False, buffered=False): """ Execute an SQL command, properly catching errors and returning the resulting cursor. INPUT: - ``query`` -- an SQL Composable object, the SQL command to execute. - ``values`` -- values to substitute for %s in the query. Quoting from the documentation for psycopg2 (http://initd.org/psycopg/docs/usage.html#passing-parameters-to-sql-queries): Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint. - ``silent`` -- boolean (default None). If True, don't log a warning for a slow query. If None, allow DelayCommit contexts to control silencing. - ``values_list`` -- boolean (default False). If True, use the ``execute_values`` method, designed for inserting multiple values. - ``template`` -- string, for use with ``values_list`` to insert constant values: for example ``"(%s, %s, 42)"``. See the documentation of ``execute_values`` for more details. - ``commit`` -- boolean (default None). Whether to commit changes on success. The default is to commit unless we are currently in a DelayCommit context. - ``slow_note`` -- a tuple for generating more useful data for slow query logging. - ``reissued`` -- used internally to prevent infinite recursion when attempting to reset the connection. - ``buffered`` -- whether to create a server side cursor that must be manually closed and connection committed (to closed the transaction) after using it, this implies ``commit=False``. .. NOTE: If the Postgres connection has been closed, the execute statement will fail. We try to recover gracefully by attempting to open a new connection and issuing the command again. However, this approach is not prudent if this execute statement is one of a chain of statements, which we detect by checking whether ``commit == False``. In this case, we will reset the connection but reraise the interface error. The upshot is that you should use ``commit=False`` even for the last of a chain of execute statements, then explicitly call ``self.conn.commit()`` afterward. OUTPUT: - a cursor object from which the resulting records can be obtained via iteration. This function will also log slow queries. """ if not isinstance(query, Composable): raise TypeError( "You must use the psycopg2.sql module to execute queries") if buffered: if commit is None: commit = False elif commit: raise ValueError("buffered and commit are incompatible") try: cur = self._db.cursor(buffered=buffered) t = time.time() if values_list: if template is not None: template = template.as_string(self.conn) execute_values(cur, query.as_string(self.conn), values, template) else: try: cur.execute(query, values) except (OperationalError, ProgrammingError, NotSupportedError, DataError, SyntaxError) as e: try: context = " happens while executing {}".format( cur.mogrify(query, values)) except Exception: context = " happens while executing {} with values {}".format( query, values) reraise(type(e), type(e)(str(e) + context), sys.exc_info()[2]) if silent is False or (silent is None and not self._db._silenced): t = time.time() - t if t > self.slow_cutoff: if values_list: query = query.as_string(self.conn).replace( "%s", "VALUES_LIST") elif values: try: query = cur.mogrify(query, values) except Exception: # This shouldn't happen since the execution above was successful query = query + str(values) else: query = query.as_string(self.conn) if isinstance(query, bytes): # PY3 compatibility query = query.decode("utf-8") self.logger.info( query + " ran in \033[91m {0!s}s \033[0m".format(t)) if slow_note is not None: self.logger.info( "Replicate with db.%s.%s(%s)", slow_note[0], slow_note[1], ", ".join(str(c) for c in slow_note[2:]), ) except (DatabaseError, InterfaceError): if self.conn.closed != 0: # If reissued, we need to raise since we're recursing. if reissued: raise # Attempt to reset the connection self._db.reset_connection() if commit or (commit is None and self._db._nocommit_stack == 0): return self._execute( query, values=values, silent=silent, values_list=values_list, template=template, commit=commit, slow_note=slow_note, buffered=buffered, reissued=True, ) else: raise else: self.conn.rollback() raise else: if commit or (commit is None and self._db._nocommit_stack == 0): self.conn.commit() return cur
def sync_cve_md(page_size=5000): """Sync all CVE metadata from VMaaS""" LOGGER.info('Syncing CVE metadata') conn = DatabaseHandler.get_connection() cur = conn.cursor() impact_id_map = {} cur.execute("select name, id from cve_impact") for impact_name, impact_id in cur.fetchall(): impact_id_map[impact_name] = impact_id cur.execute('select cve from cve_metadata') cves_in_db = [] for cve_tuple in cur.fetchall(): cves_in_db.append(cve_tuple[0]) cve_list = [".*"] success = True page = 1 session = requests.Session() while True: cve_request = { 'cve_list': cve_list, 'page_size': page_size, 'page': page, 'rh_only': True } LOGGER.info('Downloading CVE metadata (page: %s, page_size: %s)', page, page_size) r_json = vmaas_post_request(VMAAS_CVES_ENDPOINT, cve_request, session=session) if r_json is None: success = False break LOGGER.info( 'Importing CVE metadata (page: %s, page_size: %s, pages: %s)', page, page_size, r_json['pages']) cves = r_json['cve_list'] to_insert = [] to_update = [] for cve in cves: description = cves[cve]['description'] impact_id = impact_id_map[cves[cve]['impact']] public_date = cves[cve]['public_date'] or None modified_date = cves[cve]['modified_date'] or None cvss3_score = float(cves[cve]['cvss3_score']) if cves[cve].get( 'cvss3_score') else None cvss3_metrics = cves[cve].get('cvss3_metrics') cvss2_score = float(cves[cve]['cvss2_score']) if cves[cve].get( 'cvss2_score') else None cvss2_metrics = cves[cve].get('cvss2_metrics') row = (cve, description, impact_id, public_date, modified_date, cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics) if cve not in cves_in_db: to_insert.append(row) else: to_update.append(row) if to_insert: execute_values(cur, """insert into cve_metadata (cve, description, impact_id, public_date, modified_date, cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics) values %s""", to_insert, page_size=len(to_insert)) if to_update: execute_values( cur, """update cve_metadata set description = data.description, impact_id = data.impact_id, public_date = cast(data.public_date as timestamp with time zone), modified_date = cast(data.modified_date as timestamp with time zone), cvss3_score = cast(data.cvss3_score as numeric), cvss3_metrics = data.cvss3_metrics, cvss2_score = cast(data.cvss2_score as numeric), cvss2_metrics = data.cvss2_metrics from (values %s) as data (cve, description, impact_id, public_date, modified_date, cvss3_score, cvss3_metrics, cvss2_score, cvss2_metrics) where cve_metadata.cve = data.cve""", to_update, page_size=len(to_update)) LOGGER.info( 'Finished importing CVE metadata (page: %s, page_size: %s, pages: %s)', page, page_size, r_json['pages']) if page >= r_json['pages']: break page += 1 cur.close() conn.commit() session.close() DatabaseHandler.close_connection() LOGGER.info('Finished syncing CVE metadata') return success
def sync(dry_run): pathname = str(os.path.dirname(os.path.realpath(__file__))) logging.basicConfig(filename=pathname + '/log/main.log', level=logging.DEBUG, filemode='a', format='%(asctime)s - %(message)s') # Initialise connection to MITREid Connect DB connect_oidc_str = "dbname='" + config.mitreid_config['dbname'] + \ "' user='******'user'] + \ "' host='" + config.mitreid_config['host'] + \ "' password='******'password'] + "'" try: connOIDC = psycopg2.connect(connect_oidc_str) except Exception as e: logging.error("Could not connect to MITREid Connect DB") logging.error(e) raise SystemExit("Could not connect to MITREid Connect DB") # Create psycopg2 cursor that can execute queries cursorOIDC = connOIDC.cursor() # Initialise connection to proxystatistics DB connect_proxystats_str = "dbname='" + \ config.proxystats_config['dbname'] + \ "' user='******'user'] + \ "' host='" + config.proxystats_config['host'] + \ "' password='******'password'] + "'" try: connProxystats = psycopg2.connect(connect_proxystats_str) except Exception as e: logging.error("Could not connect to proxystatistics DB") logging.error(e) raise SystemExit("Could not connect to proxystatistics DB") # Create psycopg2 cursor that can execute queries cursorProxystats = connProxystats.cursor() # # Select MITREid Connect clients # logging.debug("Retrieving client details from MITREid Connect DB") try: cursorOIDC.execute("SELECT client_id, client_name " "FROM client_details WHERE client_name IS NOT NULL;") except Exception as e: logging.error("Could not retrieve client details from MITREid " "Connect DB") logging.error(e) raise SystemExit("Could not retrieve client details from MITREid " "Connect DB") clientDetails = cursorOIDC.fetchall() # # Insert client names into SSPMOD_proxystatistics DB # query = ("INSERT INTO serviceprovidersmap (identifier, name) VALUES %s " "ON CONFLICT (identifier) DO UPDATE SET name = EXCLUDED.name " "WHERE serviceprovidersmap.name IS DISTINCT FROM EXCLUDED.name;") logging.debug("Updating proxystatistics DB") try: # https://www.psycopg.org/docs/extras.html#psycopg2.extras.execute_values execute_values(cursorProxystats, query, clientDetails) except Exception as e: logging.error("Could not update proxystatistics DB query") logging.error(e) raise SystemExit("Could not update proxystatistics DB query") if not dry_run: logging.info("Commit proxystatistics DB update") connProxystats.commit() cursorOIDC.close() cursorProxystats.close() connOIDC.close() connProxystats.close()
def calendarcron(): assert request.remote_addr in EASYCRON_IPS now = datetime.utcnow() telegram_key = os.environ["TELEGRAM_KEY"] calendar_id = request.args["calendar_id"] calendar_type = request.args["calendar_type"] cron_start_time = datetime.strptime(request.args["next_event_start_time"], "%Y-%m-%dT%H:%M:%S") with tracer("db connection"): try: with tracer("open db connection"): connection = psycopg2.connect(os.environ["POSTGRES_DSN"]) with tracer("find calendar cron transaction"), connection: with tracer("cursor"), connection.cursor() as cursor: with tracer("find calendar cron query"): cursor.execute( """ SELECT sub, cron_id FROM calendarcron WHERE calendar_id = %s AND calendar_type = %s """, (calendar_id, calendar_type), ) assert cursor.rowcount g.sub, cron_id = next(cursor) start_minus_1m = cron_start_time - timedelta(minutes=1) events_response = oauth.google.get( ("https://www.googleapis.com/calendar/v3/calendars" f"/{calendar_id}/events"), params={ "maxResults": "50", "orderBy": "startTime", "singleEvents": "true", "timeMin": f"{start_minus_1m:%Y-%m-%dT%H:%M:%SZ}", "timeZone": "Etc/UTC", }, ) events_obj = events_response.json() calendar_tz = events_obj["timeZone"] events = events_obj["items"] for event in events: start = parse_event_time(event["start"], calendar_tz) if start > now: break else: # no break return ("", 204) with tracer("update cron time transaction"), connection: with tracer("cursor"), connection.cursor() as cursor: with tracer("update cron time query"): cursor.execute( """ UPDATE calendarcron SET next_event_start_time = %s WHERE sub = %s AND calendar_id = %s AND calendar_type = 'google' """, (start, g.sub, calendar_id), ) cron_response = requests.get( "https://www.easycron.com/rest/edit", params={ "token": os.environ["EASYCRON_KEY"], "id": cron_id, "cron_expression": f"{start:%M %H %d %m * %Y}", "url": url_for( "calendarcron.calendarcron", _external=True, next_event_start_time= f"{start:%Y-%m-%dT%H:%M:%S}", calendar_id=calendar_id, calendar_type="google", ), }, ) cron_response.raise_for_status() error = cron_response.json().get("error", {}).get("message") if error: raise Exception(error) events_to_send = [] for event in events: start = parse_event_time(event["start"], calendar_tz) if cron_start_time <= start <= now: events_to_send.append(event) with tracer("find calendar chats transaction"), connection: with tracer("cursor"), connection.cursor() as cursor: with tracer("find calendar chats query"): cursor.execute( """ SELECT chat_id FROM calendarchatlink WHERE sub = %s AND calendar_id = %s AND calendar_type = 'google' AND chat_type = 'telegram' """, (g.sub, calendar_id), ) rows = list(cursor) # New code testing in prod so suppress any errors with suppress(Exception): try: events_start = find_next_event_start(events_obj, now) except LookupError: pass else: summaries = find_event_summaries_starting( events_obj, events_start) with tracer("event_details table exists transaction" ), connection: with tracer("cursor"), connection.cursor() as cursor: with tracer("event_details table exists query"): cursor.execute(""" SELECT table_name FROM information_schema.tables WHERE table_name = 'event_details' """) if not cursor.rowcount: with tracer( "create event_details table query" ): cursor.execute(""" CREATE TABLE event_details ( calendar_type text, calendar_id text, summary text ) """) with tracer( "update event_details transaction"), connection: with tracer("cursor"), connection.cursor() as cursor: with tracer("clear event_details query"): cursor.execute( """ DELETE FROM event_details WHERE calendar_type = 'google' AND calendar_id = %s """, (calendar_id, ), ) with tracer("insert event_details query"): execute_values( cursor, """ INSERT INTO event_details ( calendar_type, calendar_id, summary ) VALUES %s """, [("google", calendar_id, summary) for summary in summaries], ) finally: connection.close() summaries = concat_unique([e["summary"] for e in events_to_send], []) with tracer("telegram sends"): for (chat_id, ) in rows: telegram_response = requests.get( f"https://api.telegram.org/bot{telegram_key}/sendMessage", data={ "chat_id": chat_id, "text": "\n".join(summaries) }, ) assert telegram_response.json()["ok"] return ("", 204)
# ('A rowwwww', 'null'), # ('Another row, with JSONNNNN', json.dumps(my_dict)), # ('Third row', "3") #]) df = pd.DataFrame([['A rowwwww', 'null'], ['Another row, with JSONNNNN', json.dumps(my_dict)], ['Third row', "null"], ["Pandas Row", "null"]]) records = df.to_dict( "records" ) #> [{0: 'A rowwwww', 1: 'null'}, {0: 'Another row, with JSONNNNN', 1: '{"a": 1, "b": ["dog", "cat", 42], "c": "true"}'}, {0: 'Third row', 1: '3'}, {0: 'Pandas Row', 1: 'YOOO!'}] list_of_tuples = [(r[0], r[1]) for r in records] execute_values(cursor, insertion_query, list_of_tuples) # # QUERY THE TABLE # print("-------------------") query = f"SELECT * FROM {table_name};" print("SQL:", query) cursor.execute(query) for row in cursor.fetchall(): print(row) # ACTUALLY SAVE THE TRANSACTIONS connection.commit()
def _insert_logs(self, cursor, logs): if not logs: return execute_values(cursor, LOG_INSERT_QUERY, logs, template=LOG_VALUES_TEMPLATE)
def parse_stooq_new(self, last_db_date): logging.info('Checking if a new stooq file has been downloaded') # stooq_file = os.path.join(os.getenv('DOWNLOAD_DIR'), 'data_d.txt') download_dir = self.config.get('datafeed', 'download_dir') stooq_file = os.path.join(download_dir, 'data_d.txt') if not os.path.exists(stooq_file): logging.info('No new stooq data file found. Nothing to do.') return logging.info('Reading stooq file, renaming columns, getting daily ' 'US stocks data') df = pd.read_csv(stooq_file, dtype={ "<TICKER>": "string", "<PER>": "string", "<DATE>": "string", "<TIME>": "string", "<OPEN>": float, "<HIGH>": float, "<LOW>": float, "<CLOSE>": float, "<VOL>": int, "<OPENINT>": int }) df.columns = [x[1:-1].lower() for x in df.columns] stx_df = df.query('ticker.str.endswith(".US") and per == "D"', engine='python').copy() logging.info( 'Getting {0:d} daily US stocks out of {1:d} records'.format( len(stx_df), len(df))) stx_df['date'] = stx_df['date'].astype(str) stx_df['date'] = stx_df.apply(lambda r: '{0:s}-{1:s}-{2:s}'.format( r['date'][0:4], r['date'][4:6], r['date'][6:8]), axis=1) logging.info('Converted stx_df dates in yyyy-mm-dd format') dates = stx_df.groupby(by='date')['ticker'].count() next_date = stxcal.next_busday(last_db_date) ix0, num_dates = 0, len(dates) logging.info('Data available for {0:d} dates, from {1:s} to {2:s}; DB ' 'needs data starting from {3:s}'.format( len(dates), dates.index[0], dates.index[num_dates - 1], next_date)) db_dates = [] while ix0 < num_dates: if dates.index[ix0] == next_date: break ix0 += 1 for ixx in range(ix0, num_dates): if dates.index[ixx] == next_date and dates.values[ixx] > 9000: db_dates.append(dates.index[ixx]) else: if dates.index[ixx] != next_date: logging.error(f'Missing date {next_date}; got ' f'{dates.index[ixx]} instead') if dates.values[ixx] < 9000: logging.error(f'Not enough records ({dates.values[ixx]}) ' f'available for {dates.index[ixx]}') break next_date = stxcal.next_busday(next_date) if not db_dates: logging.info('No new data available for processing. Exiting') return logging.info('Check that there are no time gaps between DB data and ' 'upload data') start_date = stxcal.next_busday(last_db_date) num_bdays = stxcal.num_busdays(start_date, db_dates[0]) if num_bdays > 0: logging.warn( 'No data for {0:d} days ({1:s} - {2:s}). Exiting ...'.format( num_bdays, start_date, stxcal.prev_busday(db_dates[0]))) return logging.info('Check that there are no time gaps in the upload data') for ixx in range(len(db_dates) - 1): if stxcal.next_busday(db_dates[ixx]) != db_dates[ixx + 1]: logging.warn('Inconsistent dates {0:s} and {1:s} ' 'at indexes {2:d} and {3:d}'.format( db_dates[ixx], db_dates[ixx + 1], ixx, ixx + 1)) sel_stx_df = stx_df.query('date in @db_dates').copy() logging.info( '{0:d}/{1:d} records found for following dates: [{2:s}]'.format( len(sel_stx_df), len(stx_df), ', '.join(db_dates))) sel_stx_df['invalid'] = sel_stx_df.apply( lambda r: np.isnan(r['open']) or np.isnan(r['high']) or np. isnan(r['low']) or np.isnan(r['close']) or np.isnan(r['vol']) or r[ 'vol'] == 0 or r['open'] > r['high'] or r['open'] < r[ 'low'] or r['close'] > r['high'] or r['close'] < r['low'], axis=1) valid_stx_df = sel_stx_df.query('not invalid').copy() logging.info('Found {0:d} valid records out of {1:d} records'.format( len(valid_stx_df), len(sel_stx_df))) def process_row(r): stk = r['ticker'][:-3].replace("-.", ".P.").replace("_", ".").replace( '-', '.') o = int(100 * r['open']) hi = int(100 * r['high']) lo = int(100 * r['low']) c = int(100 * r['close']) v = int(r['vol']) v = v // 1000 if v == 0: v = 1 lst = [stk, o, hi, lo, c, v] return pd.Series(lst) valid_stx_df[['ticker', 'open', 'high', 'low', 'close', 'vol']] = \ valid_stx_df.apply(process_row, axis=1) valid_stx_df['openint'] = 2 valid_stx_df.drop(columns=['per', 'time', 'invalid'], axis=1, inplace=True) valid_stx_df.columns = ['stk', 'dt', 'o', 'hi', 'lo', 'c', 'v', 'oi'] with closing(stxdb.db_get_cnx().cursor()) as crs: sql = 'CREATE TEMPORARY TABLE temp_table ('\ 'stk VARCHAR(16) NOT NULL, '\ 'dt DATE NOT NULL, '\ 'o INTEGER NOT NULL, '\ 'hi INTEGER NOT NULL, '\ 'lo INTEGER NOT NULL, '\ 'c INTEGER NOT NULL, '\ 'v INTEGER, '\ 'oi INTEGER, '\ 'PRIMARY KEY(stk, dt))' crs.execute(sql) logging.info('Created temporary table') upload_data = valid_stx_df.values.tolist() execute_values( crs, 'INSERT INTO temp_table ' '(stk, dt, o, hi, lo, c, v, oi) VALUES %s', upload_data) logging.info('Uploaded dataframe into temporary table') stxdb.db_write_cmd( 'INSERT INTO eods (stk, dt, o, hi, lo, c, v, oi) ' 'SELECT * FROM temp_table ON CONFLICT (stk, dt) DO ' 'UPDATE SET o = EXCLUDED.o, hi = EXCLUDED.hi, ' 'lo = EXCLUDED.lo, c = EXCLUDED.c, v = EXCLUDED.v, ' 'oi = EXCLUDED.oi') logging.info('Uploaded data into eods table') last_upload_date = valid_stx_df['dt'].max() stxdb.db_write_cmd("UPDATE analyses SET dt='{0:s}' WHERE " "analysis='eod_datafeed'".format(last_upload_date)) logging.info('Updated latest eod datafeed date {0:s} in DB'.format( last_upload_date)) self.rename_stooq_file(dates.index[0], dates.index[num_dates - 1])
def batch_insert_into_tokens(tokens: list, token_type: str, page_size: int, conn) -> list: """ Batch inserts tokens into table: reactions or intents. :param tokens: list of tokens as reaction or intent, each can consist of one word or several words. :param token_type: to be reaction or intent. :param page_size: an interger for batch insertion into database. :param conn: database connection object. :return: list if token id which are inserted to database. """ cur = conn.cursor() eids = [] if token_type == Event2MindDumper.INTENT_TOKENS: slct_id_sql = Event2MindDumper.SLCT_ID_INTENT_SQL slct_max_id_sql = Event2MindDumper.SLCT_MAX_ID_INTENT_SQL batch_insert_sql = Event2MindDumper.BATCH_INSRT_INTENT_SQL else: slct_id_sql = Event2MindDumper.SLCT_ID_REACTION_SQL slct_max_id_sql = Event2MindDumper.SLCT_MAX_ID_REACTION_SQL batch_insert_sql = Event2MindDumper.BATCH_INSRT_REACTION_SQL for token in tokens: # get id for each token and list of all token ids try: cur.execute(slct_id_sql, (token, )) a = cur.fetchone() if a: # when token is already in database, return the existed id eids.append(a[0]) else: # when token is not in database, select the max_token_id in database and assign (max_token_id + 1) to this token cur.execute(slct_max_id_sql) eid = cur.fetchone()[0] if eid: eid += 1 else: eid = 1 eids.append(eid) except Exception: logger.error('error: ' + traceback.format_exc()) return eids # combine two list into one for batch insertion eid_token_pairs = [] eid_token_pairs.append(eids) eid_token_pairs.append(tokens) eid_token_pairs = list(zip(*eid_token_pairs)) try: extras.execute_values(cur, batch_insert_sql, eid_token_pairs, template=None, page_size=page_size) except Exception: logger.error('error: ' + traceback.format_exc()) return eids conn.commit() cur.close() return eids
def update_userstories_kanban_order_in_bulk( project: Project, status: UserStoryStatus, bulk_userstories: List[int], before_userstory: Optional[models.UserStory] = None, after_userstory: Optional[models.UserStory] = None, swimlane: Optional[Swimlane] = None): """ Updates the order of the userstories specified adding the extra updates needed to keep consistency. Note: `after_userstory_id` and `before_userstory_id` are mutually exclusive; you can use only one at a given request. They can be both None which means "at the beginning of is cell" - `bulk_userstories` should be a list of user stories IDs """ # filter user stories from status and swimlane user_stories = project.user_stories.filter(status=status) if swimlane is not None: user_stories = user_stories.filter(swimlane=swimlane) else: user_stories = user_stories.filter(swimlane__isnull=True) # exclude moved user stories user_stories = user_stories.exclude(id__in=bulk_userstories) # if before_userstory, get it and all elements before too: if before_userstory: user_stories = (user_stories.filter( kanban_order__gte=before_userstory.kanban_order)) # if after_userstory, exclude it and get only elements after it: elif after_userstory: user_stories = (user_stories.exclude(id=after_userstory.id).filter( kanban_order__gte=after_userstory.kanban_order)) # sort and get only ids user_story_ids = (user_stories.order_by("kanban_order", "id").values_list('id', flat=True)) # append moved user stories user_story_ids = bulk_userstories + list(user_story_ids) # calculate the start order if before_userstory: # order start with the before_userstory order start_order = before_userstory.kanban_order elif after_userstory: # order start after the after_userstory order start_order = after_userstory.kanban_order + 1 else: # move at the beggining of the column if there is no after and before start_order = 1 # prepare rest of data total_user_stories = len(user_story_ids) user_story_swimlane_ids = (swimlane.id if swimlane else None, ) * total_user_stories user_story_status_ids = (status.id, ) * total_user_stories user_story_kanban_orders = range(start_order, start_order + total_user_stories) data = tuple( zip(user_story_ids, user_story_swimlane_ids, user_story_status_ids, user_story_kanban_orders)) # execute query for update status, swimlane and kanban_order sql = """ UPDATE userstories_userstory SET swimlane_id = tmp.new_swimlane_id::BIGINT, status_id = tmp.new_status_id::BIGINT, kanban_order = tmp.new_kanban_order::BIGINT FROM (VALUES %s) AS tmp (id, new_swimlane_id, new_status_id, new_kanban_order) WHERE tmp.id = userstories_userstory.id """ with connection.cursor() as cursor: execute_values(cursor, sql, data) # Update is_closed attr for UserStories adn related milestones if settings.CELERY_ENABLED: update_open_or_close_conditions_if_status_has_been_changed.delay( bulk_userstories) else: update_open_or_close_conditions_if_status_has_been_changed( bulk_userstories) # Sent events of updated stories events.emit_event_for_ids(ids=user_story_ids, content_type="userstories.userstory", projectid=project.pk) # Generate response with modified info res = ({ "id": id, "swimlane": swimlane, "status": status, "kanban_order": kanban_order } for (id, swimlane, status, kanban_order) in data) return res
### Connect to ElephantSQL-hosted PostgreSQL connection = psycopg2.connect(dbname=DBNAME, user=DBUSER, password=DBPASSWORD, host=DBHOST) ### A "cursor", a structure to iterate over db records to perform queries print(type(connection)) cursor = connection.cursor() print(type(cursor)) my_dict = {"a": 1, "b": ["dog", "cat", 42], "c": 'true'} rows_to_insert = [('A rowwwww', 'null'), ('Another row, with JSONNNNN', json.dumps(my_dict)) ] # list of tuples # h/t: https://stackoverflow.com/questions/8134602/psycopg2-insert-multiple-rows-with-one-query insertion_query = "INSERT INTO test_table (name, data) VALUES %s" execute_values(cursor, insertion_query, rows_to_insert) ### An example query cursor.execute('SELECT * from test_table;') ### Note - nothing happened yet! We need to actually *fetch* from the cursor print("---------") results = cursor.fetchall() print(results) # ACTUALLY SAVE THE TRANSACTIONS # if creating tables or inserting data (changing db) connection.commit()
def insert_items(self, items, position=None): with connect(self.dbc) as conn: register(connection=conn) with conn.cursor() as curs: data = [self.to_tuple(item) for item in items] extras.execute_values(curs, self.insert_query, data, template=None, page_size=100)
def insert_characters(self, characters): insertion_query = "INSERT INTO characters (character_id, name, level, exp, hp, strength, intelligence, dexterity, wisdom) VALUES %s" list_of_tuples = characters execute_values(self.pg_cursor, insertion_query, list_of_tuples) self.pg_connection.commit()
def lambda_handler(event, context): """ lambda_handler executes the logic for the getCovidStatsUS AWS Lambda function. The logic: * Accepts an optional date parameter in the form of a json document: e.g. {"date": "2020-01-15"} * If no valid date is passed, then "yesterday's" date is assume * Connects to a Postgres database * Makes a REST API call to a CDC Covid API * Fetches Covid data for US states and districts for the given date * Inserts that data into a database table * NOTE: if a row exists for that particular state/date the insert will not be executed """ # Print out the inbound event parameters print(f"INFO: Starting function with inbound event: {event}", file=sys.stderr) # Set up working variables ret_dict = {'statusCode': 200, 'body': json.dumps('default message')} # Grab environment variables MY_APP_TOKEN = os.getenv("COVID_API") if len(MY_APP_TOKEN) == 0: # error: missing API key print("ERROR: missing API key", file=sys.stderr) ret_dict['statusCode'] = 401 ret_dict['body'] = json.dumps('error: missing API key') return ret_dict PG_DB_PASSWORD = os.getenv("PG_DB_PASSWORD") if len(PG_DB_PASSWORD) == 0: # error: missing API key print("ERROR: missing Postgres database password", file=sys.stderr) ret_dict['statusCode'] = 400 ret_dict['body'] = json.dumps( 'error: missing Postgres database password') return ret_dict # Configure a connection object connection = psycopg2.connect( user="******", password=PG_DB_PASSWORD, host="labspt13-res.c39zgmn8rrtf.us-east-2.rds.amazonaws.com", port="5432", database="resfeber") # Test the database connection try: cursor = connection.cursor() # Print PostgreSQL Connection properties print( f"INFO: Connecting to the database with these credentials: {connection.get_dsn_parameters()}\n", file=sys.stderr) # Print PostgreSQL version cursor.execute("SELECT version();") record = cursor.fetchone() print(f"INFO: Successfully connected to the database: {record}\n", file=sys.stderr) except (Exception, psycopg2.Error) as error: print(f"Error while connecting to PostgreSQL: {error}", file=sys.stderr) ret_dict['statusCode'] = 500 ret_dict['body'] = json.dumps( 'error connecting to the Postgres database') return ret_dict # Missing the date parameter? req_date = "" has_date_param = False # flag indicating a date parameter has been found if "date" in event: # yes: the event object has a date key/value pair has_date_param = True req_date = event["date"] elif "body" in event: # yes: the event object has a body element # parse the body string into a dict and look for a date parameter try: tmp_dict = json.loads(event["body"]) # is a date parameter included in the body? if "date" in tmp_dict: has_date_param = True req_date = tmp_dict['date'] except: # error occurred parsing the request body content print( "ERROR: error parsing the event['body'] searching for a 'date' parameter" ) # Has the function been passed a date parameter? if not has_date_param: # yes: "date" key value is missing, fetch Covid API data for yesterday yesterday = date.today() - timedelta( days=1) # 1 day from the current day (utc) req_date = yesterday.strftime("%Y-%m-%d") # Validate the date parameter (e.g. "2020-11-01") if len(req_date) != 10: # error: invalid date parameter print( f"ERROR: invalid date parameter: {req_date} with length: {len(req_date)}", file=sys.stderr) ret_dict['statusCode'] = 400 ret_dict['body'] = json.dumps('error: invalid date parameter') return ret_dict # Construct a date/time string date_time = req_date + "T00:00:00.000" print(f"INFO: fetching covid data for this date: {date_time}", file=sys.stderr) # Stand up an API client object client = Socrata('data.cdc.gov', MY_APP_TOKEN) # Fetch data from the API results = client.get("9mfq-cb36", submission_date=date_time) # Construct an Insert SQL statement sql = "INSERT INTO state_covid_stats (state, date, new_cases, time_created, covid_api_rsp, notes) " + \ "VALUES %s ON CONFLICT (state, date) DO NOTHING" # Iterate through the results print( f"INFO: Number of covid data points returned by the CDC API: {len(results)}", file=sys.stderr) vals_arr = [] for rslt in results: tmp_tpl = (rslt["state"], req_date, int(float(rslt["new_case"])), datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), json.dumps(rslt), "") # append this "row" dictionary to our running array vals_arr.append(tmp_tpl) # Do we have any data (rows) to insert into the database? if len(vals_arr) != 0: # yes: attempt the database insert try: print( f"INFO: attempting to insert {len(vals_arr)} rows into the database for requested date: {date_time}", file=sys.stderr) # stand up a db cursor cursor = connection.cursor() # execute the insert for multiple rows execute_values(cursor, sql, vals_arr) # commit the db changes connection.commit() # close the db connection connection.close() except (Exception, psycopg2.Error) as error: print( f"ERROR: error occurred attempting to insert rows into the database: {error}", file=sys.stderr) ret_dict['statusCode'] = 500 ret_dict['body'] = json.dumps( 'error connecting to the Postgres database') return ret_dict else: # no data to insert print( f"INFO: no Covid API data returned. Nothing to insert into the database", file=sys.stderr) ret_dict['statusCode'] = 204 ret_dict['body'] = json.dumps( 'no Covid API data returned; nothing to insert into the database') return ret_dict # Function completed processing print(f"INFO: finished processing: {len(vals_arr)} rows", file=sys.stderr) print(f"INFO: NOTE! - duplicate state/date rows will be ignored", file=sys.stderr) ret_dict['statusCode'] = 200 ret_dict['body'] = json.dumps( f"INFO: finished processing: {len(vals_arr)} rows") return ret_dict
p = re.compile( '/\d*-(.*\.wav)-(\d*\.\d*)\.wav\.npz *: should be (\d*) but came out as (\d*)' ) matches = p.findall(txt) errors = [(str(iteration), ) + m for m in matches] # COMMAND ---------- # Store training errors in database from psycopg2.extras import execute_values with conn.cursor() as curs: delete_query = 'delete from training_errors where iteration_id = %s' curs.execute(delete_query, (iteration, )) insert_query = 'insert into training_errors (iteration_id, file_name, "offset", should_be_id, came_out_id) values %s' execute_values(curs, insert_query, errors, template=None, page_size=100) conn.commit() # COMMAND ---------- # Compute evaluated accuracy p2 = re.compile('Found (\d*) total mistakes out of (\d*)') m2 = p2.findall(txt) accuracy = round((100 - (int(m2[0][0]) / int(m2[0][1])) * 100), 2) # COMMAND ---------- # Store accuracy in database update_accuracy_sql = 'update model_iterations set accuracy = %s where id = %s' with conn.cursor() as curs:
# ); """ my_dict = {"a": 1, "b": ["dog", "cat", 42], "c": 'true'} # insertion_query = "INSERT INTO test_table (name, data) VALUES (%s, %s)" # cursor.execute(insertion_query, # ('A rowwwww', 'null') # ) # cursor.execute(insertion_query, # ('Another row, with JSONNNNN', json.dumps(my_dict)) # ) # h/t: https://stackoverflow.com/questions/8134602/psycopg2-insert-multiple-rows-with-one-query insertion_query = "INSERT INTO test_table (name, data) VALUES %s" execute_values(cursor, insertion_query, [('A rowwwww', 'null'), ('Another row, with JSONNNNN', json.dumps(my_dict)), ('Third row', "3")]) connection.commit() cursor.close() connection.close() exit() df = pd.DataFrame([['A rowwwww', 'null'], ['Another row, with JSONNNNN', json.dumps(my_dict)], ['Third row', "null"], ["Pandas Row", "null"]]) records = df.to_dict(
def commitDataList(logger, query, values, dbName=None): '''query data from the database Query the data over here. If there is a problem with the data, it is going to return the value of None, and log the error. Your program needs to check whether there was an error with the query by checking for a None return value Parameters ---------- logger : {logging.logger} logging element query : {str} The query to be made to the databse values : {tuple or list-like}, optional Additional values to be passed to the query (the default is None) dbName : {str or None}, optional The name of the database to use. If this is None, the function will attempt to read the name from the ``defaultDB`` item within the file ``../config/db.json``. Returns ------- list or None A list of tuples containing the values is returned. In case there is an error, the error will be logged, and a None will be return ''' vals = None try: db = json.load(open('../config/db.json')) # Check whether a dbName is available if (dbName is None) and ('defaultDB' in db): dbName = db['defaultDB'] # Check whether a dbName has been specified if dbName is None: logger.error('A database name has not been specified.') return None conn = psycopg2.connect(db[dbName]['connection']) cur = conn.cursor() except Exception as e: logger.error('Unable to connect to the database') logger.error(str(e)) return try: query = cur.mogrify(query) execute_values(cur, query, values) except Exception as e: logger.error( 'Unable to obtain data from the database for:\n query: {}\nvalues'. format(query, values)) logger.error(str(e)) try: conn.commit() cur.close() conn.close() except Exception as e: logger.error('Unable to disconnect to the database') logger.error(str(e)) return return vals
def commands(code): with closing( psycopg2.connect(dbname='postgres', user='******', password='******', host='localhost')) as conn: with conn.cursor(cursor_factory=DictCursor) as cursor: conn.autocommit = True if code == 1: cursor.execute( 'select name from footballers where rating = (select max(rating) from footballers) limit 1' ) for row in cursor: print(row[0]) if code == 2: cursor.execute( """select footballers.name, transfers.position, agents.name from transfers join footballers on footballers.id = transfers.footballer join agents on agents.id = transfers.agent limit 5;""") for row in cursor: print(row[0]) if code == 3: cursor.execute(""" With cte as (select name, rating, cost, avg(cost) over (partition by rating) from agents ) select * from cte limit 5;""") for row in cursor: print(row) if code == 4: cursor.execute( """select table_catalog, table_schema, table_name, table_type from information_schema.tables where table_schema = 'public'""" ) for row in cursor: print(row) if code == 5: cursor.callproc('getFootballer') for row in cursor: print(row) if code == 6: cursor.callproc('getFootballerTable', ['left']) records = cursor.fetchmany(size=5) for row in records: print(row) if code == 7: cursor.execute( 'select * into temp footballers_copy from footballers;') cursor.execute('call update_rating();') cursor.execute( """select footballers.name, footballers.foot, footballers.rating as before_update, footballers_copy.rating as after_update from footballers_copy join footballers on footballers_copy.id = footballers.id order by footballers_copy.rating desc limit 10""" ) for row in cursor: print(row) if code == 8: cursor.execute('select current_user;') record1 = cursor.fetchone() cursor.execute('select version();') record2 = cursor.fetchone() print("USER: "******"VERSION: ", record2) if code == 9: cursor.execute("""create table if not exists tournaments ( name varchar(30), quality int, quantity_clubs int, price varchar ) """) if code == 10: namedict = [("Bundesliga", 5, 18, "1B"), ("La Liga", 5, 20, "2B"), ("Seria A", 4, 20, "0.5B"), ("Tinkoff RFL", 3, 16, "150M"), ("Premier League", 5, 20, "3B"), ("Ligue 1", 4, 18, "1B")] execute_values( cursor, "insert into tournaments(name,quality, quantity_clubs,price) values %s", namedict)
def update_landmarks_in_db(current_landmarks): progress("recording {} active landmarks in database".format( len(current_landmarks))) state = get_worker_state() with state.db, state.db.cursor() as cur: cur.execute(""" CREATE TEMP TABLE current_landmarks ( probeid INTEGER NOT NULL PRIMARY KEY, anchorid INTEGER, addr INET NOT NULL, latitude REAL NOT NULL, longitude REAL NOT NULL); """) execute_values(cur, """ INSERT INTO current_landmarks (probeid, anchorid, addr, latitude, longitude) VALUES %s; """, current_landmarks, page_size=400) cur.execute(""" ANALYZE current_landmarks; """) cur.execute(""" DELETE FROM current_landmarks WHERE latitude < -60 OR latitude > 85; """) cur.execute(""" UPDATE landmarks SET usable = false WHERE usable = true AND (addr NOT IN (SELECT addr FROM current_landmarks) OR probeid NOT IN (SELECT probeid FROM current_landmarks)); """) cur.execute(""" INSERT INTO landmarks (probeid, anchorid, addr, usable, location) SELECT probeid, anchorid, addr, true AS usable, ST_SetSRID(ST_Point(longitude, latitude), 4326) AS location FROM current_landmarks ON CONFLICT (probeid) DO UPDATE SET anchorid = excluded.anchorid, addr = excluded.addr, usable = true, location = excluded.location; """) cur.execute(""" DROP TABLE current_landmarks; """) cur.execute(""" UPDATE landmarks AS l SET region = r.id FROM regions r WHERE l.region IS NULL AND ST_Covers(r.box, l.location::geometry); """) cur.execute(""" ANALYZE landmarks; """) # Should really cast back to GEOGRAPHY before calling ST_Centroid, # but postgis 2.3.1 (the version in debian 9) doesn't support that. # The error is less than 500km in all cases so we can live with it # for now. cur.execute(""" UPDATE regions AS r SET lm_centroid = s.centroid FROM (SELECT r.id, ST_Centroid(ST_Union(l.location::GEOMETRY)) AS centroid FROM regions r, landmarks l WHERE l.usable AND l.region = r.id GROUP BY r.id) s WHERE s.id = r.id; """) cur.execute(""" SELECT cl, count(*) FROM ( SELECT CASE WHEN NOT usable THEN 'unusable' WHEN anchorid IS NOT NULL THEN 'anchor' ELSE 'probe' END AS cl FROM landmarks ) _ GROUP BY cl; """) counts = { 'unusable': 0, 'anchor': 0, 'probe': 0 } counts.update(cur.fetchall()) progress("recorded {probe} probes, {anchor} anchors, and " "{unusable} no longer usable.".format(**counts)) # The next stages need this information, and we have a # database handle right now, so we may as well retrieve it. cur.execute(""" SELECT l.addr, l.anchorid, MAX(m.stop_time) FROM landmarks l LEFT JOIN ripe_measurements m ON l.addr = m.d_addr WHERE l.usable AND l.anchorid IS NOT NULL GROUP BY l.addr, l.anchorid """) return list(cur)
def save_result_values(self, mapobject_type_name, result_id, data): '''Saves generated label values. Parameters ---------- mapobject_type_name: str name of the selected :class:`MapobjectType <tmlib.models.mapobject.MapobjectType>` result_id: int ID of a registerd :class:`ToolResult <tmlib.models.result.ToolResult>` data: pandas.Series series with multi-level index for "mapobject_id" and "tpoint" See also -------- :class:`tmlib.models.result.LabelValues` ''' logger.info('save label values for result %d', result_id) mapobject_ids = data.index.levels[0].tolist() tpoints = data.index.levels[1] with tm.utils.ExperimentConnection(self.experiment_id) as connection: connection.execute(''' SELECT id FROM mapobject_types WHERE name = %(mapobject_type_name)s ''', { 'mapobject_type_name': mapobject_type_name }) results = connection.fetchall() mapobject_type_id = results[0][0] connection.execute(''' SELECT partition_key, array_agg(id) AS mapobject_ids FROM mapobjects AS m WHERE m.mapobject_type_id = %(mapobject_type_id)s AND m.id = ANY(%(mapobject_ids)s) GROUP BY partition_key ''', { 'mapobject_type_id': mapobject_type_id, 'mapobject_ids': mapobject_ids }) records = connection.fetchall() # Grouping mapobject IDs per partition_key allows us # to target individual shards of the label_values table directly # on the worker nodes with full SQL support, including multi-row # insert/update statements. for tpoint in tpoints: for partition_key, mapobject_ids in records: with tm.utils.ExperimentConnection(self.experiment_id) as conn: host, port, shard_id = conn.locate_partition( tm.LabelValues, partition_key ) worker_connection = tm.utils.ExperimentWorkerConnection( self.experiment_id, host, port ) with worker_connection as connection: logger.debug( 'upsert label values for partition %d', partition_key ) sql = ''' INSERT INTO label_values_{shard} AS v ( partition_key, mapobject_id, values, tpoint ) VALUES %s ON CONFLICT ON CONSTRAINT label_values_pkey_{shard} DO UPDATE SET values = v.values || EXCLUDED.values '''.format(shard=shard_id) template = ''' ( %(partition_key)s, %(mapobject_id)s, %(values)s, %(tpoint)s ) ''' args = [ { 'values': { str(result_id): str(np.round(data.ix[(mid, tpoint)], 6)) }, 'mapobject_id': mid, 'partition_key': partition_key, 'tpoint': tpoint } for mid in mapobject_ids ] execute_values( connection, sql, args, template=template, page_size=500 )
def retrieve_anchor_ping_results(ainfo): d_addr, anchorid, max_stop_time = ainfo state = get_worker_state() progress("retrieving ping times toward {}".format(d_addr)) with state.db, state.db.cursor() as cur: cur.execute(""" SELECT l.probeid, min(r.odate), max(r.odate) FROM (SELECT * FROM landmarks WHERE usable) l LEFT JOIN (SELECT * FROM calibration_rtts WHERE d_id = %s) r ON l.probeid = r.s_id GROUP BY l.probeid; """, (anchorid,)) already_have = { row[0]: (row[1], row[2]) for row in cur } # Construct an index of available measurements sorted in # reverse order of when they ended (that is, most recent first, # still running first of all). cur.execute(""" SELECT meas_id, start_time, stop_time, probes FROM ripe_measurements WHERE d_addr = %s ORDER BY stop_time DESC NULLS FIRST; """, (d_addr,)) measurements = cur.fetchall() # release database locks while talking to RIPE # So that we never have to UPDATE a row in calibration_rtts, and # also to make the logic below a little bit simpler, we never # retrieve any information about the current day, only days that # are completely in the past. begin_today = beginning_of_day(datetime.date.today()) # We want to record a number for each probe for each day, which is # most easily done by querying the measurements API for one-day # intervals. one_day = datetime.timedelta(days=1) # We record a blank measurement for yesterday for all probes that # don't have any data for yesterday; this ensures future runs of # this script will not repeat work that has already been done. yesterday = (begin_today - one_day).date() no_data_for_yesterday = set() # For each probe, work out how far back in time we need to go for # it. Some probes do not ping all of the anchors continuously, so # we need to do this in two passes. The first, optimistic pass # retrieves only the data we would need if all of the probes did # ping all of the anchors at least once each day -- going back at # most 14 days. optimistic_earliest_time = begin_today - datetime.timedelta(days=14) earliest_time_for_probe = {} earliest_time_overall = begin_today for probe, (_, maxdate) in already_have.items(): if maxdate is None: maxdatestamp = optimistic_earliest_time else: maxdatestamp = beginning_of_day(maxdate) # If the maxdatestamp is at or after the beginning of today, # we don't need to retrieve anything for this probe at all. if maxdatestamp < begin_today: earliest_time_for_probe[probe] = max(maxdatestamp, optimistic_earliest_time) earliest_time_overall = min(earliest_time_overall, earliest_time_for_probe[probe]) no_data_for_yesterday.add(probe) if earliest_time_overall == begin_today: progress("no need to retrieve pings for {}".format(d_addr)) return else: progress("{}: beginning of today is {}" .format(d_addr, begin_today)) progress("{}: optimistic earliest date is {}" .format(d_addr, earliest_time_overall)) progress("{}: need data for {} probes".format(d_addr, len(earliest_time_for_probe))) for pid, etime in earliest_time_for_probe.items(): if etime < (begin_today - one_day): progress("{}: for {} need since {}" .format(d_addr, pid, etime)) new_data = [] w_stop_time = begin_today while earliest_time_for_probe and w_stop_time > earliest_time_overall: w_start_time = w_stop_time - one_day assert w_start_time >= earliest_time_overall datestamp = w_start_time.date() data_this_day = {} for m_id, m_start_time, m_stop_time, m_probes in measurements: if m_stop_time is None: m_stop_time = begin_today if m_stop_time < w_start_time or m_start_time > w_stop_time: continue wanted_probes = set(earliest_time_for_probe.keys()) wanted_probes.intersection_update(m_probes) if not wanted_probes: continue progress("retrieving measurement {} for {}" .format(m_id, datestamp)) x_start_time = int(max(m_start_time, w_start_time).timestamp()) x_stop_time = int(min(m_stop_time, w_stop_time).timestamp()) m_results = 'measurements/{}/results'.format(m_id) for probeids in chunked(wanted_probes, n=100): for result in retrieve_atlas( state.session, m_results, params = { "start": x_start_time, "stop": x_stop_time, "probe_ids": ",".join(str(pid) for pid in probeids) }, constructor = PingResult): if (result.is_error or result.is_malformed or result.rtt_min is None): continue if result.probe_id in data_this_day: data_this_day[result.probe_id] = min( data_this_day[result.probe_id], result.rtt_min) else: data_this_day[result.probe_id] = result.rtt_min if datestamp == yesterday: no_data_for_yesterday.discard(result.probe_id) progress("retrieved pings from {} probes to {} on {}" .format(len(data_this_day), d_addr, datestamp)) for probeid, minrtt in data_this_day.items(): new_data.append((probeid, datestamp, minrtt)) if (probeid in earliest_time_for_probe and earliest_time_for_probe[probeid] >= w_start_time): del earliest_time_for_probe[probeid] w_stop_time = w_start_time # Do not load historical data for probes for which we have # received no new data but earlier data was available. for pid, etime in list(earliest_time_for_probe.items()): if etime > optimistic_earliest_time: del earliest_time_for_probe[pid] if earliest_time_for_probe: progress("{}: need historical data for {} probes" .format(d_addr, len(earliest_time_for_probe))) for pid, etime in earliest_time_for_probe.items(): progress("{}: for {} need since {}" .format(d_addr, pid, etime)) historical_data = collections.defaultdict(dict) for m_id, m_start_time, m_stop_time, m_probes in measurements: if m_stop_time is None or m_stop_time >= earliest_time_overall: continue # we already got this one wanted_probes = set(earliest_time_for_probe.keys()) wanted_probes.intersection_update(m_probes) if not wanted_probes: continue # this measurement has no probes we care about # Round the stop time up to a day boundary. if m_stop_time == beginning_of_day(m_stop_time.date()): w_stop_time = m_stop_time else: w_stop_time = beginning_of_day((m_stop_time + one_day).date()) days_considered = 0 while w_stop_time > m_start_time and days_considered < 14: w_start_time = w_stop_time - one_day datestamp = w_start_time.date() x_start_time = int(max(m_start_time, w_start_time).timestamp()) x_stop_time = int(min(m_stop_time, w_stop_time).timestamp()) m_results = 'measurements/{}/results'.format(m_id) data_this_day = historical_data[datestamp] progress("retrieving measurement {} for {} (historical)" .format(m_id, datestamp)) old_pings = len(data_this_day) for probeids in chunked(wanted_probes, n=100): for result in retrieve_atlas( state.session, m_results, params = { "start": x_start_time, "stop": x_stop_time, "probe_ids": ",".join(str(pid) for pid in probeids) }, constructor = PingResult): if (result.is_error or result.is_malformed or result.rtt_min is None): continue if result.probe_id in data_this_day: data_this_day[result.probe_id] = min( data_this_day[result.probe_id], result.rtt_min) else: data_this_day[result.probe_id] = result.rtt_min progress( "retrieved pings from {} probes to {} on {} (historical)" .format(len(data_this_day) - old_pings, d_addr, datestamp)) w_stop_time = w_start_time days_considered += 1 for datestamp, data_this_day in historical_data.items(): for probeid, minrtt in data_this_day.items(): new_data.append((probeid, datestamp, minrtt)) if probeid in earliest_time_for_probe: del earliest_time_for_probe[probeid] if earliest_time_for_probe: progress("{} probes have no data at all for {}" .format(len(earliest_time_for_probe), d_addr)) if no_data_for_yesterday: new_data.extend((pid, yesterday, None) for pid in no_data_for_yesterday) if new_data: progress("recording {} new observations for {}" .format(len(new_data), d_addr)) with state.db, state.db.cursor() as cur: execute_values( cur, """ INSERT INTO calibration_rtts (s_id, d_id, odate, minrtt) VALUES %s ON CONFLICT DO NOTHING; """, new_data, template="(%s, {}, %s, %s)".format(anchorid))
conn = psycopg2.connect("dbname=covid19 user=covid19\ password=covid19databasepassword host=localhost") location_data = json.load(open(location_file)) if 'locations' not in location_data: print("Malformed file") sys.exit(1) low_watermark = datetime(2020, 1, 1) # TODO: JSON schema? values = [] for loc in location_data['locations']: timestampMs = int(loc['timestampMs']) / 1000 # to seconds ts = datetime.utcfromtimestamp(timestampMs) if ts <= low_watermark: continue start_time = ts.strftime('%Y-%m-%dT%H:%M:%S') end_time = (ts + timedelta(minutes=30)).strftime('%Y-%m-%dT%H:%M:%S') lat = int(loc['latitudeE7']) / 1e7 lon = int(loc['longitudeE7']) / 1e7 coord = f'SRID=4326;POINT({lon} {lat})' values.append((userid, start_time, end_time, coord)) with conn.cursor() as cur: execute_values( cur, "INSERT INTO\ location_history(userid, start_time, end_time, geographic_location)\ VALUES %s", values) conn.commit()
def fri2db(self, fri, exper_info, run_id): """ Store information from the PIMA fri-file to the database. Parameters ---------- fri : fri.Fri object PIMA fringe fitting results. exper_info : pima.ExperInfo object Experiment general information. run_id : int Id of the record in the ``pima_runs`` database table. """ if not fri: return exper = exper_info.exper band = exper_info.band polar = fri.records[0]['polar'] query = """INSERT INTO pima_obs (obs, start_time, stop_time, exper_name, band, source, polar, st1, st2, delay, rate, accel, snr, ampl, solint, u, v, base_ed, ref_freq, scan_name, run_id, if_id, status, elevation, bandpass) VALUES %s;""" rec_list = [] for rec in fri.records: if rec['FRIB.FINE_SEARCH'] == 'ACC': accel = rec['ph_acc'] else: accel = rec['accel'] # Set IF id to 0 if fringe fitting done over multiple IFs if rec['beg_ifrq'] != rec['end_ifrq']: if_id = 0 else: if_id = rec['beg_ifrq'] rec_list.append((rec['obs'], rec['start_time'], rec['stop_time'], exper, band, rec['source'], polar, rec['sta1'], rec['sta2'], rec['delay'], rec['rate'], accel, rec['SNR'], rec['ampl_lsq'], rec['duration'], rec['U'], rec['V'], rec['uv_rad_ed'], rec['ref_freq'], rec['time_code'], run_id, if_id, rec['status'], rec['elevation'], fri.aux['bandpass'])) with self.connw.cursor() as cursor: execute_values(cursor, query, rec_list) self.connw.commit()
#CSV_FILEPATH = "data/titanic.csv" CSV_FILEPATH = os.path.join(os.path.dirname(__file__), "..", "data", "titanic.csv") print("FILE EXISTS?", os.path.isfile(CSV_PATH)) df = pandas.read_csv(CSV_PATH) print(df.head()) # rows should be a list of tuples # [ # ('A rowwwww', 'null'), # ('Another row, with JSONNNNN', json.dumps(my_dict)), # ('Third row', "3") # ] # h/t Jesus and https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html rows = list(df.itertuples(index=False, name=None)) insertion_query = "INSERT INTO passengers (survived, pclass, name, sex, age, sib_spouse_count, parent_child_count, fare) VALUES %s" execute_values(curs, insertion_query, rows) # ACTUALLY SAVE THE TRANSACTIONS conn.commit() # insert = """ # COPY %s FROM STDIN WITH # CSV # HEADER # DELIMITER AS ',' # """ # file = open(CSV_PATH, "r") # table = 'passengers' # with conn.cursor() as cur: # cur.execute("truncate " + table + ";") # cur.copy_expert(sql=insert % table, file=file) # conn.commit()
def _insert_events(self, cursor, events): if not events: return execute_values(cursor, EVENT_INSERT_QUERY, events, template=EVENT_VALUES_TEMPLATE)
def _populate_cves(self, cvemap): cve_impact_map = self._populate_cve_impacts() rh_source_id = self._get_source_id('Red Hat') cur = self.conn.cursor() cve_data = cvemap.list_cves() cve_names = [(name, ) for name in cve_data] execute_values(cur, """select id, name, source_id, description, impact_id, published_date, modified_date, cvss3_score, cvss3_metrics, iava, redhat_url, secondary_url, source_id, cvss2_score, cvss2_metrics from cve join (values %s) t(name) using (name)""", cve_names, page_size=len(cve_names)) # # find and merge cves that have already been loaded # # fields we care about potentially merging include: # [3]description, # [5]published_date, [6]modified_date, # [7]cvss3_score, [8]cvss3_metrics, # [9]iava, [10]redhat_url, [11]secondary_url, # [13]cvss2_score, [14]cvss2_metrics, cols = { 'description': 3, 'published_date': 5, 'modified_date': 6, 'cvss3_score': 7, 'cvss3_metrics': 8, 'iava': 9, 'redhat_url': 10, 'secondary_url': 11, 'cvss2_score': 13, 'cvss2_metrics': 14 } for a_db_row in cur.fetchall(): # cve_data[row[1]] = incoming-cve-with-same-name-as-from-db # skip id, name, source_id (they are *always* filled in # for rest, use incoming unless null, then use from-db db_name = a_db_row[1] db_id = a_db_row[0] cve_data[db_name]["id"] = db_id for a_key in cols: if not a_key in cve_data[db_name]: cve_data[db_name][a_key] = None if not cve_data[db_name][a_key]: cve_data[db_name][a_key] = a_db_row[cols[a_key]] to_import = [] to_update = [] # now, deal with all items for name, values in cve_data.items(): values["impact_id"] = cve_impact_map[values["impact"].capitalize()] \ if values["impact"] is not None else cve_impact_map["None"] # make sure everyting has all the keys, even if val is empty for a_key in cols: if not a_key in values: values[a_key] = None item = (name, values["description"], values["impact_id"], values["published_date"], values["modified_date"], values["cvss3_score"], values["cvss3_metrics"], values["iava"], values["redhat_url"], values["secondary_url"], rh_source_id, values["cvss2_score"], values["cvss2_metrics"]) # if we have an 'id', it means we're already in the db if "id" in values: to_update.append((values["id"], ) + item) else: to_import.append(item) cur.close() self.logger.debug("CVEs to import: %d", len(to_import)) self.logger.debug("CVEs to update: %d", len(to_update)) self._import_cves(to_import, cve_data) self._update_cves(to_update) cur = self.conn.cursor() try: self._populate_cwes(cur, cve_data) self.conn.commit() except Exception: # pylint: disable=broad-except self.logger.exception("Failure when populating CWEs") self.conn.rollback() finally: cur.close()
import psycopg2 import csv from psycopg2.extras import execute_values conn = psycopg2.connect(host="data", database="hmetua", user="******", password="******") cur = conn.cursor() data = [] with open("mailbox_enron.csv", "r", newline="", encoding="utf-8") as f: lignes = csv.reader(f, delimiter=",", quotechar='"') next(lignes) for l in lignes: data.append(l) texte = """ INSERT INTO infiltration.mailbox_enron_original( id_message,auteur,destinataire,reponse,date_p) VALUES %s """ with psycopg2.connect(host="data", database="hmetua", user="******", password="******") as conn: with conn.cursor() as cur: execute_values(cur, texte, data)
def sync_cve_md(): """Sync all CVE metadata from VMaaS""" LOGGER.info('Syncing CVE metadata') with DatabasePoolConnection() as conn: with conn.cursor() as cur: impact_id_map = {} cur.execute("select name, id from cve_impact") for impact_name, impact_id in cur.fetchall(): impact_id_map[impact_name] = impact_id cur.execute('select id, cve from cve_metadata') cves_in_db = {} for cve_tuple in cur.fetchall(): cves_in_db[cve_tuple[1]] = cve_tuple[0] cve_json = {'cve_list': [".*"], 'page': 1, 'page_size': DEFAULT_PAGE_SIZE, 'rh_only': True, 'errata_associated': True} success, cve_pages = paging(VMAAS_CVES_ENDPOINT, cve_json) if not success: return success cves = cve_pages['cve_list'] LOGGER.info("Importing CVE metadata") to_insert, to_update, to_delete = process_cve_list(cves, cves_in_db, impact_id_map) insert_cves(cur, to_insert) update_cves(cur, to_update) if to_delete: associated_cves = set() LOGGER.info("Deleting %s unnecessary CVE metadata", len(to_delete)) cur.execute("""select distinct cve_id from system_vulnerabilities""") for row in cur.fetchall(): associated_cves.add(row[0]) cur.execute("""select distinct cve_id from cve_rule_mapping""") for row in cur.fetchall(): associated_cves.add(row[0]) safety_delete = [] unable_to_delete = [] for cve_to_delete in to_delete: cve_id = cves_in_db[cve_to_delete[0]] if cve_id in associated_cves: unable_to_delete.append(cve_to_delete[0]) else: safety_delete.append(cve_id) if safety_delete: execute_values(cur, """delete from cve_account_data where cve_id in (%s)""", list(zip(safety_delete)), page_size=len(safety_delete)) execute_values(cur, """delete from cve_metadata where id in (%s)""", list(zip(safety_delete)), page_size=len(safety_delete)) LOGGER.info('Finished deleting unnecessary CVE metadata') if unable_to_delete: LOGGER.warning( 'Unable to delete %s cves (still referenced from system_vulnerabilities table or have rules): %s', len(unable_to_delete), str(unable_to_delete)) LOGGER.debug('Attempting to update information about %s', str(unable_to_delete)) cve_json = {'cve_list': unable_to_delete, 'page': 1, 'page_size': DEFAULT_PAGE_SIZE, 'rh_only': True} success, cve_pages = paging(VMAAS_CVES_ENDPOINT, cve_json) if not success: return success cves = cve_pages['cve_list'] _, to_update, _ = process_cve_list(cves, cves_in_db, impact_id_map) update_cves(cur, to_update) conn.commit() LOGGER.info('Finished syncing CVE metadata') return success