def conn_to_rs(host, port, db, usr, pwd, opt=options, timeout=set_timeout_stmt): rs_conn_string = """host=%s port=%s dbname=%s user=%s password=%s %s""" % (host, port, db, usr, pwd, opt) print "Connecting to %s:%s:%s as %s" % (host, port, db, usr) rs_conn = DB(dbname=rs_conn_string) rs_conn.query(timeout) return rs_conn
def move_data(dbset): try: db = DB(**dbset) db.begin() query = db.query("SELECT bluetooth.move_raw_data();") if query.getresult()[0][0] != 1: db.rollback() raise DatabaseError( 'bluetooth.move_raw_data did not complete successfully') query = db.query("TRUNCATE bluetooth.raw_data;") query = db.query("SELECT king_pilot.load_bt_data();") if query.getresult()[0][0] != 1: db.rollback() raise DatabaseError( 'king_pilot.load_bt_data did not complete successfully') db.query( 'DELETE FROM king_pilot.daily_raw_bt WHERE measured_timestamp < now()::DATE;' ) db.commit() except DatabaseError as dberr: LOGGER.error(dberr) db.rollback() except IntegrityError: LOGGER.critical( 'Moving data failed due to violation of a constraint. Data will have to be moved manually' ) finally: db.close()
def requestDB(requete='select * from ',tableName='planet_osm_line',base='champ', limit=None,only=False,verbose=False): """ Function that execute an sql request into a postgres database. --- parameters : string requete : the request will be executed. string tableName : the name of the table will be request if only is false. string base : name of the database int limit : integer, request limit option. produces : string requeteResult : request result. """ db = DB(dbname=base) if verbose : tablesList = db.get_tables() print('Tables :') printPuces(tablesList) print('\n') headerLinesOSM = db.get_attnames(tableName) print('Table header :') printPuces(headerLinesOSM) print('\n') if only : requeteResult = db.query(requete).getresult() elif limit is None : requeteResult = db.query(requete+tableName).getresult() else : requeteResult = db.query(requete+tableName+' limit '+str(limit)).getresult() return requeteResult
def run_db_query( *, database: str, host: str, port: int, username: str, password: str, query: str ) -> None: logging.info("Initializing DB connection") logging.info( f"DB params dbname={database}, host={host}, " f"port={port}, user={username}" ) logging.info(f"query={query}") db = DB( dbname=database, host=host, port=port, user=username, passwd=password ) logging.info("DB connection initialized") logging.info("Executing query") db.query(query) logging.info("Query executed")
def run_analyze(table): global counter db = DB(dbname = vDatabase, host = vHost) db.query('analyze %s' %table) with counter.get_lock(): counter.value += 1 if counter.value % 10 == 0 or counter.value == total_tables: logging.info(str(counter.value) + " tables completed out of " + str(total_tables) + " tables") db.close()
class SqlClient: def __init__(self): self.db = DB(dbname='refugee', host='localhost', user='******', passwd='1234') def insert_into(self, fingerprint_hash, name, dob, gender): res = self.db.query('INSERT INTO public.demographics(name, birth_date, fingerprint_hash, gender) \n VALUES(' + str(name) + ', ' + str(dob) + ', ' + str(fingerprint_hash) + ', ' + str(gender) + ');') def lookUp(self, hash): res = self.db.query(f'SELECT * FROM public.demographics WHERE fingerprint_hash == {hash}') return res[0]
def update_configs(all_analyses, dbset): ''' Syncs configs from blip server with database and returns configs to pull data from. :param all_analyses: List of blip configurations :param dbset: Dictionary to connect to PostgreSQL database ''' db = DB(**dbset) db.begin() db.query('''TRUNCATE bluetooth.all_analyses_day_old; INSERT INTO bluetooth.all_analyses_day_old SELECT * FROM bluetooth.all_analyses;''' ) db.commit() analyses_pull_data = {} for report in all_analyses: report.outcomes = [outcome.__json__() for outcome in report.outcomes] report.routePoints = [ route_point.__json__() for route_point in report.routePoints ] row = dict(device_class_set_name=report.deviceClassSetName, analysis_id=report.id, minimum_point_completed=db.encode_json( report.minimumPointCompleted.__json__()), outcomes=report.outcomes, report_id=report.reportId, report_name=report.reportName, route_id=report.routeId, route_name=report.routeName, route_points=report.routePoints) #If upsert fails, log error and continue, don't add analysis to analyses to pull try: upserted = db.upsert('bluetooth.all_analyses', row, pull_data='included.pull_data') analyses_pull_data[upserted['analysis_id']] = { 'pull_data': upserted['pull_data'], 'report_name': upserted['report_name'] } except IntegrityError as err: LOGGER.error(err) db.close() analyses_to_pull = { analysis_id: analysis for (analysis_id, analysis) in analyses_pull_data.items() if analysis['pull_data'] } return analyses_to_pull
def healthcheck(self): self._generic_healthcheck() spec = self.get_spec() env = {e['name']: e['value'] for e in spec['containers'][0]['env']} user = env['POSTGRES_USER'] passwd = env['POSTGRES_PASSWORD'] db = DB(dbname=user, host=self.host, port=5432, user=user, passwd=passwd) sql = "create table test_table(id serial primary key, name varchar)" db.query(sql) assert_in('public.test_table', db.get_tables())
def read_line(): i = 0 reader = csv.reader(f, delimiter='\t') db = DB(dbname="ngram" ,user="******" , port=5432) for row in reader: #ngram, year, match_count, page_count, volume_count sql = sqlp1 + " " + tb_name + " " + sqlp2 + " (" + str(i) + ", " \ + "\'" + row[0] + "\'" + ", " + row[1] + ", " + row[2] + ", " + row[3] \ + ", " + row[4] + ");"; print sql i = i + 1 #call insert(sql) db.query(sql) db.close()
def main(**kwargs): CONFIG = configparser.ConfigParser() CONFIG.read('db.cfg') dbset = CONFIG['DBSETTINGS'] logger.info('Connecting to Database') db = DB(dbname=dbset['database'], host=dbset['host'], user=dbset['user'], passwd=dbset['password']) proxies = {'https': kwargs.get('proxy', None)} # Update Venue List venues = [] curId = db.query('SELECT max(id) FROM city.venues').getresult()[0][0] logger.info('Updating venues table') venues, inserted_venues = update_venues(db, proxies, curId) # Get Events from List of Venues #cla = [] logger.info('Finished updating venues tables, %s new venues inserted', inserted_venues) inserted_count = update_events(db, proxies, venues) logger.info('Finished processing events, %s events inserted', inserted_count) db.close()
class db_query: def __init__(self, auth): self.auth = auth self.connect() def __del__(self): try: self.db.close() except: pass def connect(self): self.db = DB(dbname=self.auth['db'], host=self.auth['host'], port=self.auth['port'], user=self.auth['user'], passwd=self.auth['passwd']) def run_query(self, query, *args): def send_query(q): return self.db.query(q) try: res = send_query(query) except: self.connect() res = send_query(query) return res def array_query(self, query, *args): res = self.run_query(query) return res.dictresult()
class PostgresqlAdapter(object): def __init__(self, dbname, user, passwd, host="localhost", port=5432): self._db = DB(dbname=dbname, host=host, port=port, user=user, passwd=passwd) def create(self, inhabitant): record = self._db.insert('inhabitants', data=inhabitant) return _normalize(record) def find_all(self): all_inhabitants = self._db.query( 'select * from inhabitants').dictresult() return [_normalize(x) for x in all_inhabitants] def find_by_id(self, id): inhabitant = self._find_by_id(id) return _normalize(inhabitant) def find_by_name(self, name): all_inhabitants = self._db.query( "SELECT * FROM inhabitants " + "WHERE data ->> 'FirstName' ILIKE '%' || $1 || '%' OR " + " data ->> 'LastName' ILIKE '%' || $1 || '%'", (name, )).dictresult() return [_normalize(x) for x in all_inhabitants] def delete(self, id): try: self._db.delete('inhabitants', id=int(id)) except ValueError: pass def update(self, inhabitant): id = inhabitant["Id"] self._find_by_id(id) del inhabitant["Id"] stored = self._db.update('inhabitants', id=id, data=inhabitant) return _normalize(stored) def _find_by_id(self, id): try: return self._db.get('inhabitants', int(id)) except (ValueError, DatabaseError): raise NoRecordFound()
def get_data(from_table, key, value): pg_db = DB(dbname=DATABASE_NAME, host=DATABASE_URL, port=int(DATABASE_PORT), user=DATABASE_USERNAME, passwd=DATABASE_PASSWORD) result = pg_db.query("select " + key + " , " + value + " from " + from_table + "") return [convert_tuples(tuple, key, value) for tuple in result.getresult()]
def db_input(tweet): tweet = tweet.split(" "); hashtags = [] words = [] for word in tweet: if len(word) > 3 and special_match(word, plaintext) == True: word = word.lower() if "#" == word[0]: hashtags.append(word) else: words.append(word) insert_update = ("with w as (insert into words (word) values ('{}') on conflict (word) do update set word = words.word returning (wid)), h as (insert into hashtags (hashtag) values ('{}') on conflict (hashtag) do update set hashtag = hashtags.hashtag returning (hid)) insert into links (wid, hid, weight) values ((select * from w), (select * from h), 1) on conflict (wid, hid) do update set weight = links.weight + 1") db = DB(dbname=database, host=host, post=int(port), user=user, passwd=password) db.begin() for word in words: for hashtag in hashtags: db.query(insert_update.format(word, hashtag)) db.commit()
def main(**kwargs): CONFIG = configparser.ConfigParser() CONFIG.read('db.cfg') dbset = CONFIG['DBSETTINGS'] logger.info('Connecting to Database') db = DB(dbname=dbset['database'], host=dbset['host'], user=dbset['user'], passwd=dbset['password']) proxies = {'http': kwargs.get('proxy', None)} logger.info('Requesting data') r = requests.get( 'http://app.toronto.ca/cc_sr_v1_app/data/edc_eventcal_APR', proxies=proxies) events = r.json() global CURID, ODID CURID = db.query('SELECT max(id) FROM city.venues').getresult()[0][0] ODID = db.query('SELECT max(od_id) FROM city.od_venues').getresult()[0][0] logger.info('Processing events') inserted_events, inserted_venues, updated_venues = 0, 0, 0 for i, entry0 in enumerate(events): try: inserted_venue, updated_venue = process_event( i, entry0['calEvent'], db) inserted_events += 1 inserted_venues += inserted_venue updated_venues += updated_venue except KeyError as key_error: logger.error('Key error with event: %s, key %s, skipping', entry0['calEvent'].get('eventName', ''), key_error.args[0]) logger.info('%s events processed, %s venues inserted, %s venues updated', inserted_events, inserted_venues, updated_venues) logger.info('closing connection to DB') db.close()
def connect(): parser = argparse.ArgumentParser() parser.add_argument('--host') parser.add_argument('--user') parser.add_argument('--passwd') parser.add_argument('--schema', default = 'mimiciii') parser.add_argument('--mode', default = 'all') args = parser.parse_args() # host = '162.105.146.246' # host = 'localhost' # schema = 'mimiciii' host = args.host user = args.user passwd = args.passwd logging.info('connect to %s, user = %s, search_path = %s' %(host, user, args.schema)) db = DB(host = host, user = user, passwd = passwd) db.query('set search_path to %s' %(args.schema)) return db
def get_csv_data(): """ Connect to the PostgreSQL database server """ conn = None try: # read connection parameters params = config() # connect to the PostgreSQL server print('Connecting to the PostgreSQL database...') conn = DB(**params) # execute a statement print('PostgreSQL ALL accepted users who signed up for team matching') q = conn.query( "SELECT * FROM users JOIN event_applications ON users.id = event_applications.user_id WHERE custom_fields ->> 'team_forming' = 'Yes, sign me up!' AND status = 'accepted';" ) data = q.dictresult() f = StringIO() writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(features) print(f'Adding {len(data)} entries to csv file') for row in data: str_id = str(row['user_id']) # get the MD5 hash of id result = hashlib.md5(str_id.encode()) hashed_id = result.hexdigest() full_duration = (row['custom_fields'].get( 'arrival_time', '') == "I'm staying for the entire event") user_features = [hashed_id, row['first_name'], row['last_name'], row['email'], row['phone'], full_duration, \ row['age'], row['pronoun'], row['university'], row['education_lvl'], row['major'], \ row['grad_year'], row['custom_fields'].get("travel", None), row['custom_fields'].get("programming_skills", None), \ row['custom_fields'].get("been_to_ttb", None), None, None, row['custom_fields'].get("linkedin_url", None), \ row['custom_fields'].get("github_url", None), row['custom_fields'].get("other_url", None), row['custom_fields'].get("how_did_you_hear", None), \ None, row['custom_fields'].get("programming_experience", None), row['custom_fields'].get("how_many_hackathons", None), None, \ row['custom_fields'].get("other_skills", None), row['custom_fields'].get("particular_topic", None), row['custom_fields'].get("goals", None), \ row['custom_fields'].get("experience_area", None), row['custom_fields'].get("teammate_preference", None), None] writer.writerow(user_features) # move the pointer back to beginning of file f.seek(0) return f except (Exception) as error: print("Error:", error) finally: if conn is not None: conn.close() print('Database connection closed.')
def read_data(device_id: str, temp: Optional[float], humid: Optional[float], light: Optional[float]): db = DB(dbname=dbname, host=dbhost, port=dbport, user=dbuser, passwd=dbpassword) table = "raw_data" local_received_time = datetime.datetime.utcnow() insert_statement = f"""INSERT INTO {table} (device_id,temp,humidity,light,timestamp_on_write) VALUES ('{device_id}',{temp},{humid},{light},'{local_received_time}'); """ # Write to PG DB try: db.query(insert_statement) except: pass # Push to Power BI if feature flag is True if stream_pb == True: try: payload = f""" [{{ "DateTime" :"{local_received_time}", "Temperature" :{temp}, "Humidity" :{humid}, "Brightness" :{light}, "Device" :"{device_id}" }}] """ body = bytes(payload, encoding='utf-8') http_req = urllib2.Request(streaming_url, body) response = urllib2.urlopen(http_req) except: print("Power BI ERROR") return ("received")
class Store: def __init__(self): self.connection = DB(dbname=environ.get('POSTGRESQL_ADDON_DB'), host=environ.get('POSTGRESQL_ADDON_HOST'), port=int(environ.get('POSTGRESQL_ADDON_PORT')), user=environ.get('POSTGRESQL_ADDON_USER'), passwd=environ.get('POSTGRESQL_ADDON_PASSWORD')) try: self.connection.query("SELECT * FROM stats") except ProgrammingError: print "Create Table" self.connection.query("""CREATE TABLE stats (timestamp TIMESTAMP PRIMARY KEY,value NUMERIC(5,2))""") # self.connection.query("truncate stats") def add(self, value): if float(value) > 0.1: self.connection.insert('stats', timestamp=get_current_time(), value=value) def display(self): self.connection.query("SELECT * FROM stats") def get_values(self, value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)): result = self.connection.query( "SELECT extract(DOW FROM timestamp)::INT AS dow, timestamp, value FROM stats WHERE timestamp >= '" + value + "' AND timestamp < ('" + value + "'::DATE + '1 day'::INTERVAL) ORDER BY 2") return result def get_average_values(self, value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)): result = self.connection.query( "SELECT extract(DOW FROM timestamp)::INT AS dow, " "to_timestamp('" + value + " ' || " "LPAD(extract(HOUR FROM timestamp)::TEXT,2,'0') || ':' ||" " LPAD(extract(MINUTE FROM timestamp)::TEXT,2,'0') ||" " ':00', 'YYYY-MM-DD HH24:MI:SS') AS timestamp," " round(avg(value)::NUMERIC,2) AS value FROM stats" " GROUP BY 1,2 ORDER BY 1,2") return result
def getSchemaInfo( options, sql_tables, sql_references ): """ Connect to the database and retrieve our schema information. """ conn = DB( options[ 'dbname' ], options[ 'dbhost' ], int( options[ 'dbport' ] ), user=options[ 'dbuser' ] ) res = conn.query( options[ 'query' ] ).dictresult() for i in range( len( res ) ): ftbl = res[i][ 'table_name' ] fcol = res[i][ 'column_name' ] type = res[i][ 'data_type' ] nullable = res[i][ 'is_nullable' ] keytype = res[i][ 'constraint_type' ] ttbl = res[i][ 'referenced_table_name' ] tcol = res[i][ 'referenced_column_name' ] if not sql_tables.has_key( ftbl ): sql_tables[ ftbl ] = [] sql_tables[ ftbl ] += [ [ fcol, type, nullable, keytype ] ] if keytype == 'FOREIGN KEY' : sql_references += [ [ ftbl, fcol, ttbl, tcol ] ]
class PGDB(object): def __init__(self): pass def connect(self, dbname, host, port, user): self.db = DB(dbname=dbname, host=host, port=port, user=user) def connect_default(self): self.db = DB(dbname=DBNAME, host=HOST, port=PORT, user=USER) def close(self): self.db.close() def execute(self, sql): return self.db.query(sql) def drop_table(self, tbl): self.execute('DROP TABLE if exists %s' % tbl) def create_init_table(self, tbl): self.execute('CREATE TABLE %s (uid INT, iid INT, rating REAL)' % tbl) def copy(self, tbl, path, delimiter): self.execute("COPY %s FROM '%s' delimiter '%s'" % (tbl, path, delimiter))
src = cfg[args.src] print("") print("------------------------------------------------------------------------------------") print("Source: " + str(args.src)) print("------------------------------------------------------------------------------------") srcdb = DB(dbname=src["db"], host=src["host"], port=int(src["port"]), user=src["user"], passwd=src["password"]) for srv in args.dst: item = cfg[srv] print("") print("------------------------------------------------------------------------------------") print("Destination: " + str(srv)) print("------------------------------------------------------------------------------------") dstdb = DB(dbname=item["db"], host=item["host"], port=int(item["port"]), user=item["user"], passwd=item["password"]) for table in tables: dstdb.start() rows = srcdb.query('SELECT * FROM %s' % table).getresult() dstdb.query('CREATE TEMPORARY TABLE newvals ON COMMIT DROP AS TABLE %s WITH NO DATA' % table) dstdb.inserttable('newvals', rows) dstdb.query('LOCK TABLE %s IN EXCLUSIVE MODE' % table) print(upd.get(table)) dstdb.query(upd.get(table)) print(insert.get(table)) dstdb.query(insert.get(table)) dstdb.commit()
from pg import DB import random import configparser config = configparser.ConfigParser() config.read('config.ini') db = DB(host=config['mysqlDB']['hostname'], user=config['mysqlDB']['username'], passwd=config['mysqlDB']['password'], dbname=config['mysqlDB'] ['database']) #connect to DB with given credentials and information def new_user_id(): user_rand_gen_id = random.randint(100000000, 1000000000) return user_rand_gen_id print(db.query("SELECT * FROM users"))
class data_utils(object): """docstring for db_utils""" def __init__(self, arg): #super(db_utils, self).__init__() self.arg = arg self.db = DB(dbname='structnet_complete', user='******', passwd='structnet', host='localhost') #self.identity = ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) #print self.identity print "Conncted!" self.start_time = time.time() def load_pmids(self): f = open(self.arg['corpus_map'], 'rb') self.pmid_dict = marshal.load(f) f.close() print "PMID loaded!" def insert_prediction(self): cnt = 0 # 527.8M lines with open(self.arg['data_file'], 'r') as IN: for line in IN: #try: if cnt % 50000 == 0: print self.arg['data_file'], "process ", cnt, " lines" cnt += 1 tmp = json.loads(line) self.db.insert(self.arg['prediction_table'], entity_a=tmp['em1Text'], entity_b=tmp['em2Text'], relation_type=tmp['label'], score=tmp['score']) def insert_query(self): with open(self.arg['data_file']) as IN: line_num = 0 for line in IN: print line_num tmp = line.split('\t') target_type = ast.literal_eval(tmp[0])[0] if '[' in tmp[3]: output_types = ast.literal_eval(tmp[3]) else: output_types = [tmp[3]] relation_type = tmp[2] self.db.insert(self.arg['query_table'], target_type=target_type, output_types=output_types, index=line_num, relation_type=relation_type) line_num += 1 def insert_caseolap(self, index_number): with open(self.arg['data_file'] + str(index_number) + '_c.txt') as IN: for line in IN: tmp = line.split('\t') score_list = tmp[1].strip().lstrip('[').rstrip(']').split(',') for ele in score_list: temp = ele.strip().split('|') if len(temp) != 2: continue self.db.insert(self.arg['caseolap_table'], doc_id=index_number, sub_type=tmp[0], entity=temp[0], score=float(temp[1])) def query_prediction(self, name_a, name_b, relation_type): self.db.query("set statement_timeout TO 0") query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \ name_b + "\' AND relation_type=\'" + relation_type + "\'" #query_em_a = "SELECT sent_id FROM"+self.arg['entity_table']+" WHERE entity_name=\'"+name_a+"\' GROUP BY article_id" q = self.db.query(query_string) if len(q.dictresult()) == 0: return 0 else: return q.dictresult()[0]['score'] def query_prediction_v2(self, name_a, name_b, relation_type): query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \ name_b + "\' AND relation_type=\'" + relation_type + "\'" query_ems = "SELECT L.article_title, L.pmid, L.sent FROM entity_table as L INNER JOIN (select sent_id FROM entity_table where entity_name=\'"+name_a+\ "\') AS R ON L.sent_id=R.sent_id where entity_name=\'"+name_b+"\' LIMIT 1" #set_a = set(map(lambda x:x['sent_id'], self.db.query(query_em_a).dictresult())) ##query_em_b = "SELECT distinct sent_id FROM entity_table WHERE entity_name=\'"+name_b+"\'" #set_b = set(map(lambda x:x['sent_id'], self.db.query(query_em_b).dictresult())) #print set_a.intersection(set_b) self.db.query("set statement_timeout TO 100") try: result = self.db.query(query_ems).dictresult()[0] except: result = {} q = self.db.query(query_string) if len(q.dictresult()) == 0: result['score'] = 0 else: result['score'] = q.dictresult()[0]['score'] return result #print query_string def query_distinctive(self, target_type, output_types, relation_type, sub_types, num_records=8): self.db.query("set statement_timeout TO 0") sub_types = ast.literal_eval(sub_types) result = [] query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\ output_types+"\' and relation_type=\'" + relation_type +"\'" #print query_string idx = self.db.query(query_string).dictresult()[0]['index'] for sub_type in sub_types: query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \ " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records) q = self.db.query(query_string) result.append(q.dictresult()) return result def query_distinctive_v2(self, target_type, output_types, relation_type, sub_types, num_records=8): self.db.query("set statement_timeout TO 0") sub_types = ast.literal_eval(sub_types) result = [] pmid_result = [] query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\ output_types+"\' and relation_type=\'" + relation_type +"\'" #print query_string if 'MeSH' in target_type: type_b_name = 'type_b_mesh' else: type_b_name = 'type_b_umls' if 'MeSH' in output_types: type_a_name = 'type_a_mesh' else: type_a_name = 'type_a_umls' idx = self.db.query(query_string).dictresult()[0]['index'] for sub_type in sub_types: query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \ " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records) q = self.db.query(query_string) type_target = sub_type.split('::')[0] #print type_target #entity_list = q.dictresult() pmids = [] for em in q.dictresult(): qq = "select pmid from entity_table where sent_id = (select sent_id FROM relation_table WHERE entity_a=\'" +em['entity'] +\ "\' AND relation_type = '"+relation_type+"' LIMIT 1) LIMIT 1" #"\' AND relation_type = '"+relation_type+"' AND "+type_b_name+"@>'{"+type_target+"}' LIMIT 1" em_result = self.db.query(qq).dictresult() if len(em_result) > 0: em_result[0]['entity'] = em['entity'] pmids.append(em_result[0]) result.append(q.dictresult()) pmid_result.append(pmids) return result, pmid_result def insert_record(self): cnt = 0 # 527.8M lines with open(self.arg['data_file'], 'r') as IN: for line in IN: #try: if cnt % 5000 == 0: print self.arg['data_file'], "process ", cnt, " lines" cnt += 1 tmp = json.loads(line) type_dict = {} for e in tmp['entityMentions']: aid = str(tmp['articleId']) mesh = [] umls = [] go = [] sub_type = defaultdict(list) for l in e['label']: if 'MeSH' in l: mesh.append(l.replace('MeSH:::', '')) elif 'GO' in l: go.append(l.replace('GO:::', '')) else: umls.append(l) if len(go) > 0: sub_type['go'] = True else: sub_type['go'] = False if len(mesh) > 0: sub_type['mesh'] = mesh[0].split('::') sub_type['umls'] = umls type_dict[e['start']] = sub_type assert (len(self.pmid_dict[aid]) < 9) self.db.insert(self.arg['entity_table'], entity_name=e['text'], sent_id=tmp['sentId'], article_id=tmp['articleId'], pmid=self.pmid_dict[aid][0], article_title=self.pmid_dict[aid][1], sent=tmp['sentText']) for r in tmp['relationMentions']: self.db.insert( self.arg['relation_table'], entity_a=r['em1Text'], entity_b=r['em2Text'], relation_type=r['label'], a_is_gene=type_dict[r['em1Start']]['go'], b_is_gene=type_dict[r['em2Start']]['go'], type_a_umls=type_dict[r['em1Start']]['umls'], type_b_umls=type_dict[r['em2Start']]['umls'], type_a_mesh=type_dict[r['em1Start']]['mesh'], type_b_mesh=type_dict[r['em2Start']]['mesh'], sent_id=tmp['sentId'], article_id=tmp['articleId']) #except: #print cnt def generate_random_walks(self, edge_pairs, num_walks=3, num_steps=3): print len(edge_pairs) outgoing_edges = defaultdict(list) ingoing_edges = defaultdict(list) map(lambda x: outgoing_edges[x['entity_a']].append(x['entity_b']), edge_pairs) map(lambda x: ingoing_edges[x['entity_b']].append(x['entity_a']), edge_pairs) for k, v in outgoing_edges.iteritems(): if len(v) > 0: v.append(k) #print outgoing_edges #print ingoing_edges #print outgoing_edges temp = max(outgoing_edges.values(), key=len) origin = temp[-1] walk_nodes = [origin] total_nodes = set([origin]) edges = set() for i in xrange(num_steps): #print "++++++++++++++++++++",i next_step = [] while len(walk_nodes) > 0: node = walk_nodes.pop() if i % 2 == 0: node_list = outgoing_edges[node][:-1] assert (len(node_list) > 0) else: node_list = ingoing_edges[node] assert (len(node_list) > 0) #print node if len(node_list) <= num_walks: next_step.extend(node_list) edges = edges.union( list(map(lambda x: (node, x), node_list))) else: #print "random_sample",node_list random_nodes = random.sample(node_list, num_walks) next_step.extend(random_nodes) edges = edges.union( list(map(lambda x: (node, x), random_nodes))) assert (len(walk_nodes) == 0) walk_nodes = list(set(next_step)) total_nodes = total_nodes.union(set(next_step)) #print total_nodes print len(edges) return edges #print edges def query_links_with_walk(self, type_a, type_b, relation_type, num_edges=5, num_pps=1): try: type_a = ast.literal_eval(type_a) type_b = ast.literal_eval(type_b) except: pass #print num_edges #query_string = "SELECT * FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\ #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT 500" query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps) + "] "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\ +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges) q = self.db.query(query_string) result = {'node_a': {}, 'node_b': {}, 'edges': []} for p in list(self.generate_random_walks(q.dictresult())): result['node_a'][p[0]] = [] result['node_b'][p[1]] = [] result['edges'].append({'source': p[0], 'target': p[1]}) #print result return result def get_category_entities(self): outputfile = open("category_entities.json", 'w') category_entity = dict() with open("mesh_type_hierarchy-2016.txt") as fp: for line in fp: mesh_id = line.split('\t', 1)[0] if len(mesh_id.split('.')) == 2: mesh = line.split('\t', 1)[1] query_string = "SELECT entity_name from entity_table_slim where entity_mesh @> '{" + mesh.replace( "'", "''") + "}'" # print query_string entities = self.db.query(query_string) entities = entities.dictresult() if len(entities) == 0: continue category_entity[mesh[:-1]] = [] for entity in entities: category_entity[mesh[:-1]].append( entity['entity_name']) category_json = json.dumps(category_entity) outputfile.write(str(category_json)) outputfile.close() def get_relations(self, type_a, type_b, entities_left=[], entities_right=[]): try: entities_left = ast.literal_eval(entities_left) entities_right = ast.literal_eval(entities_right) except: pass query_string = "SELECT distinct relation_type, entity_a, entity_b FROM relation_table_slim WHERE type_a_mesh @> '{" + type_a.replace( "'", "''") + "}' AND type_b_mesh @> '{\ " + type_b.replace("'", "''") + "}'" q = self.db.query(query_string) relations = q.dictresult() result = set() for relation in relations: if (not entities_left or (relation['entity_a'] in entities_left)) and ( (not entities_right) or (relation['entity_b'] in entities_right)): result.add(relation['relation_type']) return {'relations': list(result)} def query_links_by_two_sides_entities(self, entities_left, entities_right, relation_type, num_edges=5, num_pps=1): query_a = [] query_b = [] query_edges = [] for entity_left in entities_left: for entity_right in entities_right: query_string = "SELECT (array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str( num_pps + 2) + "] as sents FROM relation_table WHERE \ entity_a='" + entity_left + "' and entity_b='" + entity_right + "' and relation_type='" + relation_type + "'" q = self.db.query(query_string) result = q.dictresult() article = dict() if result[0]['sents'] != None: for x in result[0]['sents']: x = ast.literal_eval(x) article[x[0]] = str(x[1]) print "article: ", article query_a.append(entity_left) query_b.append(entity_right) query_edges.append({ 'source': entity_left, 'target': entity_right, 'sids': article.values(), 'sents': [] }) red_node = dict() for v in query_a: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] red_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): red_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) blue_node = dict() for v in query_b: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] blue_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): blue_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) for edge in query_edges: for s_id in edge['sids'][:num_pps]: tmp = self.db.query( "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '" + s_id + "' LIMIT 1") edge['sents'].append(tmp.dictresult()[0]) return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges} def query_links_by_left_entities(self, entities_left, type_b, relation_type, num_edges, num_pps): self.db.query("set statement_timeout TO 0") query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str( num_pps + 2 ) + "] as sents FROM " + self.arg[ 'relation_table'] + " WHERE type_b_" + type_b[ 'name'] + "@>'" + type_b[ 'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x" q = self.db.query(query_string) result = q.dictresult() query_a = [] query_b = [] query_edges = [] count = 0 for r in result: if r['entity_a'] in entities_left: if count < int(num_edges): count = count + 1 else: break query_a.append(r['entity_a']) query_b.append(r['entity_b']) article = dict() for x in r['sents']: x = ast.literal_eval(x) article[x[0]] = str(x[1]) query_edges.append({ 'source': r['entity_a'], 'target': r['entity_b'], 'sids': article.values(), 'sents': [] }) red_node = dict() for v in query_a: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] red_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): red_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) blue_node = dict() for v in query_b: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] blue_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): blue_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) for edge in query_edges: for s_id in edge['sids'][:num_pps]: tmp = self.db.query( "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '" + s_id + "' LIMIT 1") edge['sents'].append(tmp.dictresult()[0]) return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges} def query_links_by_right_entities(self, type_a, entities_right, relation_type, num_edges, num_pps): self.db.query("set statement_timeout TO 0") query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str( num_pps + 2 ) + "] as sents FROM " + self.arg[ 'relation_table'] + " WHERE type_a_" + type_a[ 'name'] + "@>'" + type_a[ 'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x" q = self.db.query(query_string) print "right entities" result = q.dictresult() query_a = [] query_b = [] query_edges = [] count = 0 for r in result: if r['entity_b'] in entities_right: if count < int(num_edges): count = count + 1 else: break print 'entity_a: ', r['entity_a'] print 'entity_b: ', r['entity_b'] query_a.append(r['entity_a']) query_b.append(r['entity_b']) article = dict() for x in r['sents']: x = ast.literal_eval(x) article[x[0]] = str(x[1]) query_edges.append({ 'source': r['entity_a'], 'target': r['entity_b'], 'sids': article.values(), 'sents': [] }) print 'query_a: ', query_a print 'query_b: ', query_b print 'query_edges: ', query_edges red_node = dict() for v in query_a: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] red_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): red_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) blue_node = dict() for v in query_b: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] blue_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): blue_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) for edge in query_edges: for s_id in edge['sids'][:num_pps]: tmp = self.db.query( "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '" + s_id + "' LIMIT 1") edge['sents'].append(tmp.dictresult()[0]) return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges} def query_links_by_categories(self, type_a, type_b, relation_type, num_edges, num_pps): self.db.query("set statement_timeout TO 0") query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\ +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges) print query_string_v2 q = self.db.query(query_string_v2) temp_time = time.time() - self.start_time self.start_time = time.time() print "------query time cost = ", temp_time result = q.dictresult() query_a = [] query_b = [] query_edges = [] for r in result: print 'entity_a: ', r['entity_a'] print 'entity_b: ', r['entity_b'] query_a.append(r['entity_a']) query_b.append(r['entity_b']) article = dict() for x in r['sents']: x = ast.literal_eval(x) article[x[0]] = str(x[1]) query_edges.append({ 'source': r['entity_a'], 'target': r['entity_b'], 'sids': article.values(), 'sents': [] }) print "query_a: ", query_a print "query_b: ", query_b red_node = dict() for v in query_a: # tmp=self.db.query("select article_title, pmid, sent from entity_table where entity_name= '" + v + "' LIMIT "+str(num_pps)) tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] red_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): red_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) # red_node[v] = map(lambda x:(x['article_titles'],x['sents'],x['pmids']),tmp.dictresult()) blue_node = dict() for v in query_b: tmp = self.db.query( "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '" + v + "' LIMIT " + str(1)) dictresult = tmp.dictresult()[0] blue_node[v] = [] random_nums = random.sample( range(0, len(dictresult['article_titles'])), min(len(dictresult['article_titles']), num_pps)) for i in range(0, len(random_nums)): blue_node[v].append( (dictresult['article_titles'][random_nums[i]], dictresult['sents'][random_nums[i]], dictresult['pmids'][random_nums[i]])) temp_time = time.time() - self.start_time self.start_time = time.time() print "------query entity_a and entity_b time cost = ", temp_time for edge in query_edges: for s_id in edge['sids'][:num_pps]: tmp = self.db.query( "select article_title, pmid, sent from entity_table where sent_id= '" + s_id + "' LIMIT 1") edge['sents'].append(tmp.dictresult()[0]) temp_time = time.time() - self.start_time self.start_time = time.time() print "red_node nubmer = ", len(red_node), " blue_node number = ", len( blue_node), " edges number = ", len(query_edges) print "------query edges time cost = ", temp_time return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges} def query_links_v2(self, type_a, type_b, relation_type, entities_left=[], entities_right=[], num_edges=5, num_pps=1): try: type_a = ast.literal_eval(type_a) type_b = ast.literal_eval(type_b) entities_left = ast.literal_eval(entities_left) entities_right = ast.literal_eval(entities_right) except: pass if len(entities_left) > 0 and len(entities_right) > 0: print "two sides" result = self.query_links_by_two_sides_entities( entities_left, entities_right, relation_type, num_edges, num_pps) elif len(entities_left) == 0 and len(entities_right) == 0: print "categories" result = self.query_links_by_categories(type_a, type_b, relation_type, num_edges, num_pps) elif len(entities_left) == 0: print "right" result = self.query_links_by_right_entities( type_a, entities_right, relation_type, num_edges, num_pps) elif len(entities_right) == 0: print "left" result = self.query_links_by_left_entities(entities_left, type_b, relation_type, num_edges, num_pps) nodes = [] if 'node_a' in result: for node_name in result['node_a']: node = dict() node['name'] = node_name node['sents'] = [] for article in result['node_a'][node_name]: temp = dict() temp['artitle_title'] = article[0] temp['sent'] = article[1] temp['pmid'] = article[2] node['sents'].append(temp) node['group'] = 1 nodes.append(node) if 'node_b' in result: for node_name in result['node_b']: node = dict() node['name'] = node_name node['sents'] = [] for article in result['node_b'][node_name]: temp = dict() temp['artitle_title'] = article[0] temp['sent'] = article[1] temp['pmid'] = article[2] node['sents'].append(temp) node['group'] = 2 nodes.append(node) # print json.dumps({'edges': result['edge'], 'nodes': nodes}) return {'edges': result['edge'], 'nodes': nodes} def query_links(self, type_a, type_b, relation_type, num_edges=5, num_pps=1): #type_a={'mesh':0, 'name':"Chemicals_and_Drugs"} try: type_a = ast.literal_eval(type_a) type_b = ast.literal_eval(type_b) except: pass print "type_a = ", type_a print "type_b = ", type_b print "relation_type = ", relation_type self.db.query("set statement_timeout TO 0") #query_string = "SELECT * INTO " +self.identity+ " FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\ #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT " +str(num_edges) query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\ +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges) print query_string_v2 q = self.db.query(query_string_v2) temp_time = time.time() - self.start_time self.start_time = time.time() # print "------query time cost = ", temp_time result = q.dictresult() # print "result length = ", len(result) query_a = [] query_b = [] query_edges = [] for r in result: query_a.append(r['entity_a']) query_b.append(r['entity_b']) article = dict() for x in r['sents']: x = ast.literal_eval(x) article[x[0]] = str(x[1]) query_edges.append({ 'source': r['entity_a'], 'target': r['entity_b'], 'sids': article.values(), 'sents': [] }) red_node = dict() # print "len query_a = ", len(query_a), "; len query_b = ", len(query_b), "; len query_edges", len(query_edges) for v in query_a: #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps) #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT 2" tmp = self.db.query( "select article_title, pmid, sent from entity_table where entity_name= '" + v + "' LIMIT " + str(num_pps)) #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps)) red_node[v] = map( lambda x: (x['article_title'], x['sent'], x['pmid']), tmp.dictresult()) #query_b=self.db.query("select entity_b from "+self.identity) blue_node = dict() for v in query_b: #pass #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps) tmp = self.db.query( "select article_title, pmid, sent from entity_table where entity_name= '" + v + "' LIMIT " + str(num_pps)) #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps)) blue_node[v] = map( lambda x: (x['article_title'], x['sent'], x['pmid']), tmp.dictresult()) temp_time = time.time() - self.start_time self.start_time = time.time() # print "------query entity_a and entity_b time cost = ", temp_time #print red_node,blue_node for edge in query_edges: for s_id in edge['sids'][:num_pps]: tmp = self.db.query( "select article_title, pmid, sent from entity_table where sent_id= '" + s_id + "' LIMIT 1") edge['sents'].append(tmp.dictresult()[0]) temp_time = time.time() - self.start_time self.start_time = time.time() # print "red_node nubmer = ", len(red_node), " blue_node number = ", len(blue_node), " edges number = ", len(query_edges) # print "------query edges time cost = ", temp_time #query_edge = "SELECT DISTINCT T.entity_a as source, T.entity_b as target, E.pmid, E.article_title,E.sent FROM " + self.arg['entity_table'] + " AS E INNER JOIN "+\ #self.identity+" T ON E.sent_id = T.sent_id"; #query_edges = "SELECT distinct on (R.article_id) R.article_id, R.sent_id from " + self.arg['relation_table']+ " AS R inner join " + self.identity+ \ #" T on R.entity_a = T.entity_a and R.entity_b = T.entity_b and R.relation_type = \'" + relation_type +"' LIMIT " +str(num_pps) #"distinct on (article_id) inner join on entity_a, entity_b, relation_type" #q = self.db.query(query_edge) #self.db.query("drop table "+self.identity) return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}
class PostGreDBConnector: """PostGreDBConnector opens a PostGre DB connection. Different functions allow you to add, delete or update documents in PostGre DB.""" def __init__(self): """Connecting to localhost (default host and port [localhost, 4532]) PostGre DB and initializing needed data base and tables.""" try: print("Connecting to PostGre DB...") self.__db = DB(dbname='testdb', host='localhost', port=5432, user='******', passwd='superuser') print("PostGre DB connection successfully built.") except ConnectionError: print("PostGre DB connection could not be built.") self.delete_all_data() self.drop_all_tables() def close_connection(self): self.__db.close() def create_schema(self, schema_name): self.__db.query("CREATE SCHEMA " + schema_name) self.__create_tables(schema_name) self.__create_functions(schema_name) def __create_tables(self, schema): """Create needed tables for RDF parsing.""" schema += "." self._add_table("CREATE TABLE " + schema + "texts (id serial primary key, title text)") self._add_table( "CREATE TABLE " + schema + "bscale (id serial primary key, bscale text, nominal bool, ordinal bool, interval bool)") self._add_table("CREATE TABLE " + schema + "bsort (id serial primary key, bsort text)") self._add_table("CREATE TABLE " + schema + "pattern (id serial primary key, pattern text)") self._add_table("CREATE TABLE " + schema + "single_pattern (id serial primary key, single_pattern text)") self._add_table("CREATE TABLE " + schema + "snippets (id serial primary key, snippet text)") # relations self._add_table("CREATE TABLE " + schema + "has_attribute (bsort_id int, bscale_id integer[], aggregation int)") self._add_table("CREATE TABLE " + schema + "has_object (bscale_id int, pattern_id integer[], aggregation int)") self._add_table( "CREATE TABLE " + schema + "pattern_single_pattern (pattern_id int, single_pattern_id integer[], aggregation int)") self._add_table("CREATE TABLE " + schema + "texts_snippets (text_id int primary key, snippet_id integer[], aggregation int)") self._add_table( "CREATE TABLE " + schema + "snippet_offsets (id serial primary key," " single_pattern_id int, snippet_id int, offsets integer[][], aggregation int)") # adjective and verb extractions self._add_table("CREATE TABLE " + schema + "subject_occ (id serial primary key, subject text, count int)") self._add_table("CREATE TABLE " + schema + "adjective_occ (id serial primary key, adjective text, count int)") self._add_table("CREATE TABLE " + schema + "verb_occ (id serial primary key, verb text, count int)") self._add_table("CREATE TABLE " + schema + "object_occ (id serial primary key, object text, count int)") self._add_table("CREATE TABLE " + schema + "subject_adjective_occ (id serial primary key, subject int, adjective int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "subject_object_occ (id serial primary key, subject int, object int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "object_verb_occ (id serial primary key, object int, verb int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "subject_verb_occ (id serial primary key, subject int, verb int, count int, pmi float)") # correlating pattern self._add_table("CREATE TABLE " + schema + "bscale_single_pattern (id serial primary key, bscale_id int, single_pattern_id int, single_pattern text, count int)") self._add_table( "CREATE TABLE " + schema + "correlating_pattern (id serial primary key, pattern_a int, pattern_b int, count int, pmi float)") def __create_functions(self, schema): """Create all necessary functions to aggregate the results saved in the database.""" schema += "." self.add_function(schema + "aggregate_texts_snippets", "SELECT text_id, array_length(snippet_id, 1) FROM " + schema + "texts_snippets") self.add_function(schema + "aggregate_snippet_offsets", "SELECT id, array_length(offsets, 1) FROM " + schema + "snippet_offsets") def add_function(self, name, function): """Create a new function in the db.""" create_function = "CREATE FUNCTION " returns = "() RETURNS SETOF RECORD AS " lang = " LANGUAGE SQL" query = create_function + name + returns + add_quotes(function) + lang self.__db.query(query) def _add_table(self, query): """Create a new table with a query.""" self.__db.query(query) def add_table(self, schema, name, rows): """Create a new table with a name and rows given in query form.""" create_table = "CREATE TABLE " query = create_table + schema + "." + name + rows self.__db.query(query) def insert(self, schema, table, row): """Insert a new row element into a specified table.""" return self.__db.insert(schema + "." + table, row) def is_in_table(self, schema, table, where_clause): """Returns whether a row already exists in a table or not.""" select = "SELECT * FROM " where = " WHERE " q = select + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return True else: return False def update(self, schema, table, values, where_clause): """Update an entry in a specified table.""" UPDATE = "UPDATE " SET = " SET " WHERE = " WHERE " query = UPDATE + schema + "." + table + SET + values + WHERE + where_clause self.query(query) def get(self, schema, table, where_clause, key): """Return the key of a specific item in a table.""" select = "SELECT " _from = " FROM " where = " WHERE " q = select + key + _from + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return result[0][key] else: return None def get_data_from_table(self, schema, table): """Gets all data available in a specific table.""" return self.__db.query("SELECT * FROM " + schema + "." + table).dictresult() def get_id(self, schema, table, where_clause): """Return the id of an item in a table. If found return id number of found item, else None.""" select = "SELECT id FROM " where = " WHERE " q = select + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return result[0]['id'] else: return None def delete_from_table(self, schema, table, row): """Delete a row element form a specific table.""" return self.__db.delete(schema + "." + table, row) def delete_data_in_table(self, schema, table): """Delete all data in a specific table.""" self.__db.truncate(schema + "." + table, restart=True, cascade=True, only=False) def delete_all_data(self): """Deletes all data from all existing tables.""" tables = self.get_tables() for table in tables: table_name = str(table) self.__db.truncate(table_name, restart=True, cascade=True, only=False) def get_tables(self): """Get all available tables in the database.""" return self.__db.get_tables() def get_attributes(self, schema, table): """Get all attributes of a specified table.""" return self.__db.get_attnames(schema + "." + table) def drop_table(self, schema, table): """Drops a specified table.""" query = "DROP TABLE " self.__db.query(query + schema + "." + table) def drop_all_tables(self): """Drops all existing tables.""" tables = self.get_tables() table_names = "" if len(tables) > 0 : for ind, table in enumerate(tables): if ind == 0: table_names = str(table) else: table_names = table_names + ", " + str(table) self.__db.query("DROP TABLE " + table_names) else: print("Nothing to delete.") def get_all(self, schema, table, attribute): """Gets one or more attributes of all entries from a specified table.""" select = "SELECT " _from = " FROM " query = select + attribute + _from + schema + "." + table return self.__db.query(query).dictresult() def query(self, query): """Sends a query to the database.""" result = self.__db.query(query) if result is not None: if not isinstance(result, str): return result.dictresult() else: return result
WHERE date_part(\'month\'::text, new.start_time) = ' + m + '::double precision AND date_part(\'year\'::text, new.start_time) = ' + str( year ) + '::double precision DO INSTEAD INSERT INTO scoot.agg_15_' + str( year ) + m + ' (detector, start_time, end_time, flow_mean, occ_mean, vehicle_occ_mean, lpu_factor_mean)\ VALUES (new.detector, new.start_time, new.end_time, new.flow_mean, new.occ_mean, new.vehicle_occ_mean, new.lpu_factor_mean)' q = 'CREATE TABLE scoot.agg_15_' + str(year) + m + '(detector text, \ start_time timestamp without time zone, end_time timestamp without time zone, \ flow_mean int, occ_mean double precision, vehicle_occ_mean int, \ LPU_factor_mean double precision, \ CONSTRAINT c' + str( year ) + m + ' CHECK (date_part(\'month\'::text, start_time) = ' + m + '::double precision AND date_part(\'year\'::text, start_time) = ' + str( year) + '::double precision))' db.query(q) db.query(r) print('Table Created') db.inserttable('scoot.agg_15_' + str(year) + m, sdata) print('Table Inserted') # Insert outflowing data from a month earlier than start_month if not sdata_prev.empty: if month == 1: year = year - 1 m = '12' else: if (month - 1) < 10: m = '0' + str(month - 1) else: m = str(month - 1)
class TPCDS: STORAGE = [ ("small_storage", "appendonly=true, orientation=column"), ("medium_storage", "appendonly=true, orientation=column, compresstype=zstd"), ("large_storage", "appendonly=true, orientation=column, compresstype=zstd"), ("e9_medium_storage", "appendonly=true, compresstype=zstd"), ("e9_large_storage", "appendonly=true, orientation=column, compresstype=zstd") ] RAW_DATA_PATH = "/data/generated_source_data/data" def __init__(self, info_dir, dbname, port, host, data_path): self.info_dir = info_dir self.db = DB(dbname=dbname, port=port, host=host) self.dist_info = self.parse_dist_info() self.data_path = data_path def create_schema(self): sqls = [ "DROP SCHEMA IF EXISTS tpcds CASCADE;", "DROP SCHEMA IF EXISTS ext_tpcds CASCADE;", "CREATE SCHEMA tpcds;", "CREATE SCHEMA ext_tpcds;" ] for sql in sqls: self.db.query(sql) def create_table(self): ddl_top_path = os.path.join(self.info_dir, "ddl") print("creating norm tables...") for fn in os.listdir(ddl_top_path): if not fn.endswith(".sql"): continue if fn == "000.e9.tpcds.sql": continue if "ext_" in fn: continue self.create_normal_table(os.path.join(ddl_top_path, fn)) print("norm tables created.") print("creating ext tables...") for fn in os.listdir(ddl_top_path): if not fn.endswith(".sql"): continue if fn == "000.e9.tpcds.sql": continue if "ext_" not in fn: continue self.create_ext_table(os.path.join(ddl_top_path, fn)) print("ext tables created.") def create_normal_table(self, ddlpath): assert("ext_" not in ddlpath) with open(ddlpath) as f: sql = f.read().lower() tabname = self.get_tabname_from_path(ddlpath) sql = self.patch_dist_info(sql, tabname) sql = self.patch_storage_info(sql) self.db.query(sql) def create_ext_table(self, ddlpath): assert("ext_" in ddlpath) with open(ddlpath) as f: sql = f.read().lower() tabname = self.get_tabname_from_path(ddlpath) sql = self.patch_gpfdist_local(sql, tabname) self.db.query(sql) def patch_dist_info(self, sql, tabname): distkeys = self.dist_info[tabname] sql = sql.replace(":distributed_by", "distributed by (%s)" % distkeys) return sql def patch_storage_info(self, sql): for storage_type, storage_option in self.STORAGE: replace_key = ":" + storage_type sql = sql.replace(replace_key, storage_option) return sql def patch_gpfdist_local(self, sql, tabname): url = "'gpfdist://mdw:2223/%s*.dat'" % tabname sql = sql.replace(":location", url) return sql def get_tabname_from_path(self, path): filename = os.path.basename(path) return filename.split(".")[-2] def parse_dist_info(self): dist_info = {} with open(os.path.join(self.info_dir, "ddl", "distribution.txt")) as f: for line in f: _, tabname, distkeys = line.strip().split("|") dist_info[tabname] = distkeys return dist_info def start_gpfdist(self, port, logfile): cmd = ["gpfdist", "-p", str(port), "-l", logfile] proc = subprocess.Popen(cmd) sleep(3) return proc def load_all_tables(self): ddl_top_path = os.path.join(self.info_dir, "ddl") start_time = time() print("load tables...") print("===================================") for fn in os.listdir(ddl_top_path): if not fn.endswith(".sql"): continue if fn == "000.e9.tpcds.sql": continue if "ext_" in fn: continue self.load_table(os.path.join(ddl_top_path, fn)) end_time = time() cost = end_time - start_time print("all tables finished in %s seconds" % cost) def load_table(self, ddlpath): tabname = self.get_tabname_from_path(ddlpath) tab_data_path = self.get_tab_data_path(tabname) os.chdir(tab_data_path) proc = self.start_gpfdist("2223", "/data/gpfdist.log") sql = ("insert into tpcds.%s " "select * from ext_tpcds.%s") % (tabname, tabname) start_time = time() self.db.query(sql) end_time = time() proc.terminate() proc.wait() cost = end_time - start_time print("load %s cost time %s seconds" % (tabname, cost)) print("===================================") def get_tab_data_path(self, tabname): return os.path.join(self.data_path, tabname) def move_all_tables(self): ddl_top_path = os.path.join(self.info_dir, "ddl") for fn in os.listdir(ddl_top_path): if not fn.endswith(".sql"): continue if fn == "000.e9.tpcds.sql": continue if "ext_" in fn: continue self.move_table(os.path.join(ddl_top_path, fn)) def move_table(self, ddlpath): tabname = self.get_tabname_from_path(ddlpath) fns = self.findall_tab_data(tabname) newplace = os.path.join(self.data_path, tabname) os.makedirs(newplace, exist_ok=True) for fn in fns: shutil.move(fn, newplace) print("moving %s (%d dat files)" % (tabname, len(fns))) def findall_tab_data(self, tabname): data_fns = [] pt_regstr = tabname + r"_\d+_\d+.dat" pt = re.compile(pt_regstr) for fn in os.listdir(self.RAW_DATA_PATH): if pt.search(fn): data_fns.append(os.path.join(self.RAW_DATA_PATH, fn)) return data_fns def close_db(self): self.db.close()
name = 'st_asgeojson(wkb_geometry)as geometry' cols+=name+',' cols = cols.rstrip(',') cols += ' from %s ) tb;' % tbname return cols #pgtbs=pgcnn.query("select tablename from pg_tables where schemaname='public' and tablename like 'bs_%_layer' order by tablename;") #tbrows=pgtbs.namedresult() tbrows=['pipe'] for row in tbrows: #tbname=row.tablename tbname=row print(tbname) sql = genSql(tbname) print(sql) dts=pgcnn.query(sql).namedresult() c = 0 d = 0 w = open('pipe.json','w') for dt in dts: dt.json['xdwz']='' dt.json['sccj']='' dt.json['sjdw']='' dt.json['sgdw']='' dt.json['wxyy']='' dt.json['fsfs']=u"直埋" dt.json['fssd']=1.5 dt.json['bwcl']=u"聚氨酯" dt.json['whklx']=u"聚乙烯" dt.json['bz']="20180920" dt.json['zw_type']='597'
import sys type = sys.getfilesystemencoding() print(type) db=DB(dbname='postgres',host='localhost',port=5432,user='******',passwd='postgres') xl=xlrd.open_workbook(u'电大所-海银御景站-管网.xls') #xl=xlrd.open_workbook('/home/bblu/repo/algopython/database/pipe.xls') sheetname = xl.sheet_names() xl_sheet = xl.sheet_by_index(0) print xl_sheet.name xl_sheet1 = xl.sheet_by_name(u'Sheet1') print xl_sheet1.name fileds="" for col in range(0,xl_sheet.ncols): fileds+=xl_sheet.cell(0,col).value+',' print(fileds) for row in range(1,xl_sheet.nrows): values="" #这里可以选择必要的两三个字段看看在第几列 #values+= xl_sheet.cell(row,col).value + ',' for col in range(0,xl_sheet.ncols): values+= unicode(xl_sheet.cell(row,col).value) + ',' #print(values) #去掉最后逗号 fields=fields[:-1] sql = 'insert into dwzy.table (sbid,equip) values(%s)'%values db.query(sql)
from pg import DB db=DB(dbname='gis',host='localhost',port=5432,user='******',passwd='pswd') # 1.drop tables begin with 't_' q=db.query("SELECT tablename FROM pg_tables WHERE schemaname='public' and tablename LIKE 't\_%'") rows=q.namedresult() c=0; for row in rows: tbname=row.tablename alt='ALTER TABLE %s ADD COLUMN' % tbname sql='%s edittype_update_ integer' % alt sql='%s; %s edittime_update_ timestamp with time zone'%(sql,alt) sql='%s; %s editversionid_update_ bigint;'%(sql,alt) db.query(sql) c=c+1 print('%d : %s' %(c,sql)) db.close()
# -*- coding: utf-8 -*- """ insert data to PostgreSQL @author: Shih-Chi """ from pg import DB import numpy as np import datetime db = DB(dbname='Pingtung', host='localhost', port=5432, user='******', passwd='husky') sites = db.query('select site from "Well_Stations"').getresult() for site in sites: mindate = db.query('select MIN(date),MAX(date) from "Well" where site like \'{site}\''.format(site = site[0])).getresult() if mindate[0][0] != None: print('{site}: date from {ys}/{ms}/{ds} to {ye}/{me}/{de}'.format(site=site[0],ys=mindate[0][0].year,ms=mindate[0][0].month,ds=mindate[0][0].day,ye=mindate[0][1].year,me=mindate[0][1].month,de=mindate[0][1].day)) db.query('UPDATE "Well_Stations" SET starttime = \'{ys}/{ms}/{ds}\', endtime = \'{ye}/{me}/{de}\' WHERE site like \'{site}\''.format(site=site[0],ys=mindate[0][0].year,ms=mindate[0][0].month,ds=mindate[0][0].day,ye=mindate[0][1].year,me=mindate[0][1].month,de=mindate[0][1].day));
import os from pg import DB db=DB(dbname='db_meta',host='172.16.1.10',port=5432,user='******',passwd='password') # 1.drop test tmp tables q=db.query("SELECT sys_tablename AS tablename FROM sys_component where sys_tablename like 't\_%' order by sys_tablename;") rows=q.namedresult() i=0 for row in rows: tbname=row.tablename cmd='pg_dump -h 172.16.1.10 -U postgres -F p -n public -s -t %s dbName >> D:/pgdata/db_meta_struct.sql' % tbname; os.system(cmd) i=i+1 print('export table: %s ,%d' % (tbname,i))
sites = get_well_list(eventtime) for site in sites: time, data = get_well_data(site, eventtime) fig = figure(figsize=(12, 8), dpi=300) title('{site} ({ST}~{ET})'.format(site = site, ST = str(eventtime[0]), ET = str(eventtime[1]))) plot(time, data, color='b', linewidth=1.5) fig.savefig(os.path.join(newpath, 'well\{site}.png'.format(site = site)), dpi=200, bbox_inches='tight') print(site) close('all') sites = get_P_list(eventtime) for site in sites: time, data = get_P_data(site, eventtime) siteXY = db.query('select "TWD97_X", "TWD97_Y" from "Precipitation_Stations" where site = \'{site}\''.format(site = site)).getresult() file.write('{site}, {s}, {x}, {y}\n'.format(site=site, s=np.sum(data), x=float(siteXY[0][0]), y=float(siteXY[0][1]))) fig = figure(figsize=(12, 8), dpi=300) title('{site} ({ST}~{ET})'.format(site = site, ST = str(eventtime[0]), ET = str(eventtime[1]))) step(time, data, color='b', linewidth=1.5, where='mid') fig.savefig(os.path.join(newpath, 'P\{site}.png'.format(site = site)), dpi=200, bbox_inches='tight') print(site) close('all') file.close()
from itertools import chain from suds.client import Client from pg import DB import time reload(sys) sys.setdefaultencoding("utf8") start_time = time.time() url = "http://www.marinespecies.org/aphia.php?p=soap&wsdl=1" client = Client(url) db = DB(dbname="gbif", host="localhost", port=5432, user="******", passwd="postgres") offset = 0 while True: names = db.query("select name from gbif.names where match_type is null limit 50 offset " + str(offset)).getresult() names = list(chain(*names)) if (len(names) == 0): break scientificnames = client.factory.create("scientificnames") scientificnames.item = names scientificnames._arrayType = "xsd:string[]" try: results = client.service.matchAphiaRecordsByNames(scientificnames, True, True, False) except: e = sys.exc_info()[0] print "Error: " + str(e) results = []
def setUp(self): """Setup test tables or empty them if they already exist.""" db = DB(dbname=dbname, host=dbhost, port=dbport) db.query("set datestyle to 'iso'") db.query("set default_with_oids=false") db.query("set standard_conforming_strings=false") db.query("set client_min_messages=warning") db.query("drop table if exists fruits cascade") db.query("create table fruits(id serial primary key, name varchar)") self.db = db
# extract params test1 = sys.argv[1] test2 = sys.argv[2] tpath1 = "test1" tpath2 = "test2" # read data from db # these are the params we need for build # comp1, comp2, flags1, flags2 # these are the params we need for doDiffGdb.py: # tname prec1 prec2 sort1 sort2 db = DB(dbname="qfp", host="localhost", user="******", passwd="qfp123") q1 = db.query("select * from tests where index = " + test1) q2 = db.query("select * from tests where index = " + test2) d1 = q1.dictresult()[0] print("queried for test 1, found? " + str(len(d1) > 0)) comp1 = d1["compiler"] flags1 = d1["switches"] prec1 = d1["precision"] tname = d1["name"] sort1 = d1["sort"] d2 = q2.dictresult()[0] print("queried for test 2, found? " + str(len(d2) > 0)) comp2 = d2["compiler"] flags2 = d2["switches"] prec2 = d2["precision"]
# -*- coding: utf-8 -*- """ insert data to PostgreSQL @author: Shih-Chi """ from pg import DB import numpy as np import datetime db = DB(dbname='Pingtung', host='localhost', port=5432, user='******', passwd='husky') db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'中正(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'石化(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'林園(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'潮寮(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'永芳(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'昭明(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'溪埔(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'九曲(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'大樹(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'旗山(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'美濃(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'旗美(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'新威(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'清溪(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'海豐(1)\''); db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'前進\'');
"../old-hp-data/log20110713.txt", "../old-hp-data/log20110728.txt", "../old-hp-data/log20110801.txt", "../old-hp-data/log20110816.txt", "../old-hp-data/log20110825.txt", "../old-hp-data/log20110903.txt", "../old-hp-data/log20110913.txt", "../old-hp-data/log20111014.txt", "../old-hp-data/log20111024.txt", "../old-hp-data/log20111106.txt", "../old-hp-data/log20111128.txt", ] honeypotName = 'HoneypotOld' port = 22 sessionDatabaseID = DB.query(db, "SELECT MAX(session) FROM connections WHERE honeypot='" + honeypotName + "'").getresult()[0][0] if not sessionDatabaseID: sessionDatabaseID = 1; sessionOld = -1 ip = '' sessionStartTime = '' sessionEndTime = '' user = '' password = '' for fileName in fileNames: for line in input(fileName): if len(line) < 20: continue if "Session" in line:
copy mysql_dwh_staging.orders from 's3://troy-de-course/orders/current/orders.csv' iam_role 'arn:aws:iam::387932593219:role/myredshift' CSV QUOTE '\"' DELIMITER ',' acceptinvchars; delete from mysql_dwh.orders using mysql_dwh_staging.orders where mysql_dwh.orders.order_id = mysql_dwh_staging.orders.order_id; insert into mysql_dwh.orders select * from mysql_dwh_staging.orders; truncate table mysql_dwh_staging.orders; end; """ result = db.query(merge_qry) print(result) # import boto3,json # from pg import DB # secret_name = 'your-secret-name' # region_name ='eu-west-1' # session = boto3.session.Session() # client = session.client(service_name='secretsmanager',region_name=region_name) # get_secret_value_response = client.get_secret_value(SecretId=secret_name) # creds = json.loads(get_secret_value_response['SecretString'])
for srv in args.dst: item = cfg[srv] print("") print( "------------------------------------------------------------------------------------" ) print("Destination: " + str(srv)) print( "------------------------------------------------------------------------------------" ) dstdb = DB(dbname=item["db"], host=item["host"], port=int(item["port"]), user=item["user"], passwd=item["password"]) for table in tables: dstdb.start() rows = srcdb.query('SELECT * FROM %s' % table).getresult() dstdb.query( 'CREATE TEMPORARY TABLE newvals ON COMMIT DROP AS TABLE %s WITH NO DATA' % table) dstdb.inserttable('newvals', rows) dstdb.query('LOCK TABLE %s IN EXCLUSIVE MODE' % table) print(upd.get(table)) dstdb.query(upd.get(table)) print(insert.get(table)) dstdb.query(insert.get(table)) dstdb.commit()
global pgcnn sql = "select column_name,data_type from information_schema.columns where table_schema='public' and table_name = '%s'" % tbname columns = pgcnn.query(sql).namedresult() cols='select row_to_json(tb) as json from (select ' for tup in columns: name = tup.column_name if(name!='zscale'): if(name == 'geometry'): name = 'st_asgeojson(st_transform(geometry,4326))as geometry' elif(name == 'wkb_geometry'): name = 'st_asgeojson(st_transform(wkb_geometry,4326))as geometry' cols+=name+',' cols = cols.rstrip(',') cols += ' from %s tb;' % tbname return cols pgtbs=pgcnn.query("select tablename from pg_tables where schemaname='public' and tablename like '%_layer' order by tablename;") tbrows=pgtbs.namedresult() #tbrows=['road_layer'] for row in tbrows: tbname=row.tablename print(tbname) sql = genSql(tbname) print(sql) dts=pgcnn.query(sql).namedresult() c = 0 d = 0 for dt in dts: dt.json['device_table']='public.'+tbname geo = dt.json['geometry'] if geo: dt.json['geometry']=json.loads(geo)
pts += cds else: for sub in cds: pts += sub print item_id,len(pts), if len(pts)>2: hull = Convex(pts) idxs = hull.vertices #idxs.append(idxs[0]) print len(hull.vertices),hull.vertices drawHull(pts,idxs) wo(item_id,pts,idxs) elif len(pts)==2: print '*****************************' wb(item_id,pts) pgtbs=pgcnn.query('select distinct item_id from equ_xl_dev order by item_id limit 1;') for row in pgtbs.namedresult(): #genSql(row.item_id) genSql("1616G011000R03") break #测试 if __name__ == '__main__': ps = [[2, 2],[1, 1],[2, 1],[1.5, 1.5],[1, 2],[3, 1.5],[1.5, 1.2],[0.5, 2],[1.5, 0.5]] #[{'y': 0.5, 'x': 1.5}, {'y': 1.5, 'x': 3}, {'y': 2, 'x': 2}, {'y': 2, 'x': 1}, {'y': 2, 'x': 0.5}, {'y': 1, 'x': 1}] #print Convex.graham_scan(ps)
class SteadyPgConnection: """Class representing steady connections to a PostgreSQL database. Underlying the connection is a classic PyGreSQL pg API database connection which is reset if the connection is lost or used too often. Thus the resulting connection is steadier ("tough and self-healing"). If you want the connection to be persistent in a threaded environment, then you should not deal with this class directly, but use either the PooledPg module or the PersistentPg module to get the connections. """ version = __version__ def __init__(self, maxusage=None, setsession=None, closeable=True, *args, **kwargs): """Create a "tough" PostgreSQL connection. maxusage: maximum usage limit for the underlying PyGreSQL connection (number of uses, 0 or None means unlimited usage) When this limit is reached, the connection is automatically reset. setsession: optional list of SQL commands that may serve to prepare the session, e.g. ["set datestyle to ...", "set time zone ..."] closeable: if this is set to false, then closing the connection will be silently ignored, but by default the connection can be closed args, kwargs: the parameters that shall be used to establish the PostgreSQL connections with PyGreSQL using pg.DB() """ # basic initialization to make finalizer work self._con = None self._closed = True # proper initialization of the connection if maxusage is None: maxusage = 0 if not isinstance(maxusage, int): raise TypeError("'maxusage' must be an integer value.") self._maxusage = maxusage self._setsession_sql = setsession self._closeable = closeable self._con = PgConnection(*args, **kwargs) self._transaction = False self._closed = False self._setsession() self._usage = 0 def _setsession(self): """Execute the SQL commands for session preparation.""" if self._setsession_sql: for sql in self._setsession_sql: self._con.query(sql) def _close(self): """Close the tough connection. You can always close a tough connection with this method and it will not complain if you close it more than once. """ if not self._closed: try: self._con.close() except Exception: pass self._transaction = False self._closed = True def close(self): """Close the tough connection. You are allowed to close a tough connection by default and it will not complain if you close it more than once. You can disallow closing connections by setting the closeable parameter to something false. In this case, closing tough connections will be silently ignored. """ if self._closeable: self._close() elif self._transaction: self.reset() def reopen(self): """Reopen the tough connection. It will not complain if the connection cannot be reopened. """ try: self._con.reopen() except Exception: if self._transcation: self._transaction = False try: self._con.query('rollback') except Exception: pass else: self._transaction = False self._closed = False self._setsession() self._usage = 0 def reset(self): """Reset the tough connection. If a reset is not possible, tries to reopen the connection. It will not complain if the connection is already closed. """ try: self._con.reset() self._transaction = False self._setsession() self._usage = 0 except Exception: try: self.reopen() except Exception: try: self.rollback() except Exception: pass def begin(self, sql=None): """Begin a transaction.""" self._transaction = True try: begin = self._con.begin except AttributeError: return self._con.query(sql or 'begin') else: # use existing method if available if sql: return begin(sql=sql) else: return begin() def end(self, sql=None): """Commit the current transaction.""" self._transaction = False try: end = self._con.end except AttributeError: return self._con.query(sql or 'end') else: if sql: return end(sql=sql) else: return end() def commit(self, sql=None): """Commit the current transaction.""" self._transaction = False try: commit = self._con.commit except AttributeError: return self._con.query(sql or 'commit') else: if sql: return commit(sql=sql) else: return commit() def rollback(self, sql=None): """Rollback the current transaction.""" self._transaction = False try: rollback = self._con.rollback except AttributeError: return self._con.query(sql or 'rollback') else: if sql: return rollback(sql=sql) else: return rollback() def _get_tough_method(self, method): """Return a "tough" version of a connection class method. The tough version checks whether the connection is bad (lost) and automatically and transparently tries to reset the connection if this is the case (for instance, the database has been restarted). """ def tough_method(*args, **kwargs): transaction = self._transaction if not transaction: try: # check whether connection status is bad if not self._con.db.status: raise AttributeError if self._maxusage: # or connection used too often if self._usage >= self._maxusage: raise AttributeError except Exception: self.reset() # then reset the connection try: result = method(*args, **kwargs) # try connection method except Exception: # error in query if transaction: # inside a transaction self._transaction = False raise # propagate the error elif self._con.db.status: # if it was not a connection problem raise # then propagate the error else: # otherwise self.reset() # reset the connection result = method(*args, **kwargs) # and try one more time self._usage += 1 return result return tough_method def __getattr__(self, name): """Inherit the members of the standard connection class. Some methods are made "tougher" than in the standard version. """ if self._con: attr = getattr(self._con, name) if (name in ('query', 'get', 'insert', 'update', 'delete') or name.startswith('get_')): attr = self._get_tough_method(attr) return attr else: raise InvalidConnection def __del__(self): """Delete the steady connection.""" try: self._close() # make sure the connection is closed except Exception: pass
self.country = 0 def handle_endtag(self, tag): if tag == "th": self.start = 0 def handle_data(self, data): if self.start and data == "Country:": self.country = 1 elif self.found : self.data.append(data) self.found = 0 if __name__ == '__main__': db = DB(dbname='honeypotSSH', host='localhost', user='******', passwd='1') while True: try: ip = DB.query(db, "SELECT ip from connections where ip <>'' and country is NULL LIMIT 1").getresult()[0][0] print ip except Exception: exit(1) try: country = DB.query(db, "SELECT country from connections where ip = '" + ip + "' and country is not NULL LIMIT 1").getresult()[0][0] except IndexError: country = 'Not Found' print "Local: " + country if ip[:8] in '192.168.' or ip[:3] in '10.': country = 'Private' print "Private" if ip in '127.0.0.1': country = 'Loopback' print "Loopback" if country is 'Not Found':
class BotDB: def __init__(self, db_url): urlparse.uses_netloc.append("postgres") self.__db_url = db_url url = urlparse.urlparse(db_url) self.__db = DB( dbname=url.path[1:], user=url.username, passwd=url.password, host=url.hostname, port=url.port ) def insertThesis(self, init_id, chat_id, user_id, body): ts = time.time() timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print("inserting thesis") print(init_id, chat_id, user_id, body, timestamp) self.__db.insert("theses", row={"init_id": init_id, "chat_id": chat_id, "user_id": user_id, "body": body}) print("done") self.__db.commit() def getThesisByIds(self, init_id, chat_id): query = self.__db.query("SELECT * FROM theses WHERE init_id = %d AND chat_id = %d;" % (init_id, chat_id)) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res[0] def getThesisByBody(self, body): query = self.__db.query("SELECT * FROM theses WHERE body = '%s';" % body) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res[0] def getLastThesesByTime(self, chat_id, interval): query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_timestamp - interval '%s';" % (chat_id, interval)) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res def getTodayTheses(self, chat_id): query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_date;" % chat_id) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res def insertUser(self, user_id, username, first_name, last_name): # ts = time.time() row = {"user_id":user_id} if username: row["username"] = username if first_name: row["first_name"] = first_name if last_name: row["last_name"] = last_name self.__db.insert('users', row=row) self.__db.commit() def getUserById(self, user_id): query = self.__db.query("SELECT * FROM users WHERE user_id = %d;" % user_id) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res[0] def insertBotMessage(self, chat_id, message_id, owner_id): row = {"chat_id": chat_id, "message_id": message_id, "owner_id": owner_id} self.__db.insert('bot_messages', row=row) self.__db.commit() def getBotMessage(self, chat_id, message_id): query = self.__db.query("SELECT * FROM bot_messages WHERE chat_id = %d AND message_id = %d;" % (chat_id, message_id)) dict_res = query.dictresult() if len(dict_res) == 0: return False else: return dict_res[0] def close(self): self.__db.close()
class SteadyPgConnection: """Class representing steady connections to a PostgreSQL database. Underlying the connection is a classic PyGreSQL pg API database connection which is reset if the connection is lost or used too often. Thus the resulting connection is steadier ("tough and self-healing"). If you want the connection to be persistent in a threaded environment, then you should not deal with this class directly, but use either the PooledPg module or the PersistentPg module to get the connections. """ def __init__(self, maxusage=0, setsession=None, *args, **kwargs): """Create a "tough" PostgreSQL connection. maxusage: maximum usage limit for the underlying PygreSQL connection (number of uses, 0 or False means unlimited usage) When this limit is reached, the connection is automatically reset. setsession: optional list of SQL commands that may serve to prepare the session, e.g. ["set datestyle to ...", "set time zone ..."] args, kwargs: the parameters that shall be used to establish the PostgreSQL connections with PyGreSQL using pg.DB() """ self._maxusage = maxusage self._setsession_sql = setsession self._closeable = 1 self._usage = 0 self._con = PgConnection(*args, **kwargs) self._setsession() def _setsession(self): """Execute the SQL commands for session preparation.""" if self._setsession_sql: for sql in self._setsession_sql: self._con.query(sql) def _close(self): """Close the tough connection. You can always close a tough connection with this method and it will not complain if you close it more than once. """ try: self._con.close() self._usage = 0 except: pass def close(self): """Close the tough connection. You are allowed to close a tough connection by default and it will not complain if you close it more than once. You can disallow closing connections by setting the _closeable attribute to 0 or False. In this case, closing a connection will be silently ignored. """ if self._closeable: self._close() def reopen(self): """Reopen the tough connection. It will not complain if the connection cannot be reopened.""" try: self._con.reopen() self._setsession() self._usage = 0 except: pass def reset(self): """Reset the tough connection. If a reset is not possible, tries to reopen the connection. It will not complain if the connection is already closed. """ try: self._con.reset() self._setsession() self._usage = 0 except: self.reopen() def _get_tough_method(self, method): """Return a "tough" version of a connection class method. The tough version checks whether the connection is bad (lost) and automatically and transparently tries to reset the connection if this is the case (for instance, the database has been restarted). """ def tough_method(*args, **kwargs): try: # check whether connection status is bad if not self._con.db.status: raise AttributeError if self._maxusage: # or connection used too often if self._usage >= self._maxusage: raise AttributeError except: self.reset() # then reset the connection try: r = method(*args, **kwargs) # try connection method except: # error in query if self._con.db.status: # if it was not a connection problem raise # then propagate the error else: # otherwise self.reset() # reset the connection r = method(*args, **kwargs) # and try one more time self._usage += 1 return r return tough_method def __getattr__(self, name): """Inherit the members of the standard connection class. Some methods are made "tougher" than in the standard version. """ attr = getattr(self._con, name) if name in ('query', 'get', 'insert', 'update', 'delete') \ or name.startswith('get_'): attr = self._get_tough_method(attr) return attr
#!/usr/bin/python from pg import DB db = DB(dbname="interactive_brokers", host="localhost", port=5432, user="******", passwd="profit") tables = ("benchmark", "fundamental_ratios", "histogram", "tick", "tick_history") for table in tables: deletequery = "DELETE FROM %s" % table resetquery = "ALTER SEQUENCE %s_index_seq RESTART WITH 1" % table db.query(deletequery) try: db.query(resetquery) except: continue
from pg import DB import random import configparser config = configparser.ConfigParser() config.read('config.ini') db = DB(host=config['mysqlDB']['hostname'], user=config['mysqlDB']['username'], passwd=config['mysqlDB']['password'], dbname=config['mysqlDB']['database']) #connect to DB with given credentials and information db.query("DROP TABLE users;") print "Dropped" db.query("CREATE TABLE users( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL, caller_id VARCHAR(20) NOT NULL);") print "Created"
''' Created on 1 июня 2016 г. @author: Михаил Булыгин <*****@*****.**> ''' import xml.etree.ElementTree as ET from pg import DB if __name__ == '__main__': file = open("ruwiki.xml", encoding="utf-8") tree = ET.iterparse(file) db = DB(host="localhost", user="******", passwd="1234", dbname="wiki") db.query("TRUNCATE TABLE wiki") for i, line in enumerate(tree): event, element = line if element.tag == "page": pageid = element.find("id").text title = element.find("title").text timestamp = element.find("revision").find("timestamp").text.replace("T", " ") username = element.find("revision").find("contributor").find("username") if not username is None: author = username.text else: author = element.find("revision").find("contributor").find("ip").text text = element.find("revision").find("text").text db.insert("wiki", id=pageid, title=title, timestamp=timestamp, author=author, text=text)
def main(): db = DB(dbname='databasename', host='127.0.0.1', port=5432, user='******', passwd='...') for tablename in ('table1', 'table2'): q = db.query('select * from %s where ...' % tablename) d = q.dictresult() json.dump(d, open("%s.txt" % tablename,'w'))
# Database name : testdatabase # Database server: localhost (my computer) # Database port: 5432 (default portnumber) # Username : postgres # Password : admin from pg import DB db = DB(dbname='testdatabase', host='localhost', port=5432, user='******', passwd='admin') print("Connection to the database: OK") print("Clear database...") db.query("drop TABLE IF EXISTS location") db.query("drop TABLE IF EXISTS cityfeatures") db.query("drop TABLE IF EXISTS feature") db.query("drop TABLE IF EXISTS city") print("Now creating database objects :") print("Table - Feature") db.query(" CREATE TABLE feature ( " " featureId smallint NOT NULL," " featureName character varying(50) NOT NULL," " CONSTRAINT Feature_pk PRIMARY KEY (featureId))"
# db.query("ALTER TABLE "+config.get('summary_stats', 'shp_table_name')+" ADD column attributes TEXT") # # print "importing raster" # db.query("DROP TABLE IF EXISTS "+config.get('summary_stats', 'raster_table_name')) # p1 = subprocess.Popen(['/usr/bin/raster2pgsql', '-d', '-s', config.get('summary_stats', 'raster_file_srs'), '-t', '100x100', config.get('summary_stats', 'raster_file'), # config.get('summary_stats', 'raster_table_name')], stdout=subprocess.PIPE) # p2 = subprocess.Popen(['/usr/bin/psql', '-p', '5432', config.get('database', 'dbname'), '-U', config.get('database', 'user'), '-h', '127.0.0.1'], # stdin=p1.stdout, stdout=subprocess.PIPE, env=psql_env) # p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. # output,err = p2.communicate() # # db.query('CREATE INDEX dem_st_convexhull_idx ON '+config.get('summary_stats', 'raster_table_name')+' ' # 'USING gist ((st_convexhull(rast)) public.gist_geometry_ops_2d)') print("updating attributes") q = db.query("SELECT count(*) as count_all from " + config.get('summary_stats', 'shp_table_name')) count_all = q.dictresult()[0]["count_all"] steps = count_all / 500 for s in range(0, steps): try: db.begin() db.query('select raster.updtae_attributes($1, $2, $3, $4)', (config.get('summary_stats', 'raster_table_name'), config.get('summary_stats', 'shp_table_name'), 500, s * 500)) db.commit() print("processed " + str((s * 500))) except Exception, e: print(str(e)) pass
@author: Shih-Chi """ from pg import DB import numpy as np import datetime db = DB(dbname='Pingtung', host='localhost', port=5432, user='******', passwd='husky') import os for filename in os.listdir('Pingtung/Well'): site = filename[0:8] year = filename[9:13] print(filename) sites = db.query('select site, starttime, endtime from "Well_Stations" where site like \'{site}\''.format(site=site)).getresult() if sites != []: for line in open('Pingtung/Well/{FN}'.format(FN=filename), 'r'): #檢查時間是否相符 data = line.split() if len(data[1]) == 8: if (data[0][0:4] != year) and (data[0][5:10] != '01-01'): print('檔案有誤 日期不對') break else: print('檔案有誤 時間不對') break db.insert('Well', site=site, date='{date} {time}'.format(date=data[0], time=data[1]), value=data[2]) else: print('查無此站')
class vol_utils(object): def __init__(self): self.logger = logging.getLogger('volume_project.sql_utilities') self.db_connect() def db_connect(self): CONFIG = configparser.ConfigParser() CONFIG.read('db.cfg') dbset = CONFIG['DBSETTINGS'] self.db = DB(dbname=dbset['database'], host=dbset['host'], user=dbset['user'], passwd=dbset['password']) self.logger.info('Database connected.') def exec_file(self, filename): try: f = open(filename) exec(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = root_f + '/' + filename break self.logger.info('Running ', f) exec(f) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File %s not found!', filename) def execute_sql(self, filename): f = None try: f = open(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File not found!') sql = f.read() reconnect = 0 while True: try: self.db.query(sql) self.db.commit() return except ProgrammingError as pe: self.logger.error('Error in SQL', exc_info=True) self.db_connect() reconnect += 1 if reconnect > 5: raise Exception('Check DB connection. Cannot connect') def get_sql_results(self, filename, columns, replace_columns=None, parameters=None): ''' Input: filename columns: a list of column names replace_columns: a dictionary of {placeholders:real strings} parameters: list of parameter values Output: dataframe of results ''' f = None try: f = open(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: if filename[: 6] == 'SELECT': # Also accepts sql queries directly in string form sql = filename else: self.logger.error('File %s not found!', filename) raise Exception('File not found!') else: sql = f.read() if replace_columns is not None: for key, value in replace_columns.items(): sql = sql.replace(key, str(value)) reconnect = 0 while True: try: if parameters is not None: return pd.DataFrame(self.db.query(sql, parameters).getresult(), columns=columns) else: return pd.DataFrame(self.db.query(sql).getresult(), columns=columns) except ProgrammingError as pe: self.logger.error('Error in SQL', exc_info=True) self.db_connect() reconnect += 1 if reconnect > 5: raise Exception('Check Error Message') def load_pkl(self, filename): f = None try: f = open(filename, "rb") except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File not found!') return pickle.load(f) def truncatetable(self, tablename): reconnect = 0 while True: try: self.db.truncate(tablename) self.db.commit() self.logger.info('%s truncated', tablename) return except ProgrammingError as pe: print(pe) self.db_connect() reconnect += 1 if reconnect > 5: self.logger.error('Error in SQL', exc_info=True) raise Exception('Check Error Message') def inserttable(self, tablename, content): reconnect = 0 while True: try: self.db.inserttable(tablename, content) self.db.commit() self.logger.info('Inserted table: %s', tablename) break except ProgrammingError: self.db_connect() reconnect += 1 if reconnect > 5: self.logger.error('Error in SQL', exc_info=True) raise Exception('Check Error Message') def __exit__(self): self.db.close()
from pg import DB PG=DB(dbname='VM', host='192.168.1.112' ,port=5432,user='******',passwd='123456') sql="select A.sample_time,A.stat_name,B.stat_rollup,B.unit,A.stat_group,A.entity,A.stat_value from hist_stat_daily A ,stat_counters B where A.stat_id=B.id " q=PG.query(sql) rows=q.getresult() for row in rows: PG.insert("vm_state",time=row[0],stat_name=row[1],stat_rollup_type=row[2],unit=row[3],stat_group=row[4],entity=row[5],stat_value=row[6]) print("成功") PG.close()
class pgcon(object): def __init__(self): self.pgsql = {} self.result = None self.status = "Unknown" self.db = None def query_consul(self): ''' at first try to get the consul coordinates from the environment. these should be set by docker due to container linking. then fetch postgresql container coordinates. ''' try: g.consul_server = os.environ['CONSUL_PORT_8500_TCP_ADDR'] g.consul_port = os.environ['CONSUL_PORT_8500_TCP_PORT'] except: if app.debug: g.consul_server = '172.17.0.2' g.consul_port = '8500' else: raise EnvironmentError( 'No consul environment variables available') try: c = consul.Consul(host=g.consul_server, port=g.consul_port) cresponse = c.kv.get('postgresql', recurse=True)[1] except: raise LookupError('Error in connecting to the Consul server') try: for d in cresponse: v = d['Value'] k = d['Key'].split('/')[-1] self.pgsql[k] = v except: raise AttributeError('Something is wrong with Consuls response') def connect(self): if not self.pgsql: raise ValueError('No coordinates to connect to the db.') try: self.db = DB(dbname=self.pgsql['user'], host=self.pgsql['host'], port=int(self.pgsql['port']), user=self.pgsql['user'], passwd=self.pgsql['password']) except: raise IOError('Could not connect to the db.') self.status = "Connected" def query(self, p=None): ''' connect to the db and retrieve something ''' if not self.db: self.connect() if not self.db: self.result = {'postgresql db: ': 'not connected'} return if p: self.result = self.db.query(p) def call(self, m): if not self.db: self.connect() self.result = getattr(self.db, m)() def disconnect(self): try: self.db.close() except: pass
# db.query("ALTER TABLE "+config.get('summary_stats', 'shp_table_name')+" ADD column attributes TEXT") # # print "importing raster" # db.query("DROP TABLE IF EXISTS "+config.get('summary_stats', 'raster_table_name')) # p1 = subprocess.Popen(['/usr/bin/raster2pgsql', '-d', '-s', config.get('summary_stats', 'raster_file_srs'), '-t', '100x100', config.get('summary_stats', 'raster_file'), # config.get('summary_stats', 'raster_table_name')], stdout=subprocess.PIPE) # p2 = subprocess.Popen(['/usr/bin/psql', '-p', '5432', config.get('database', 'dbname'), '-U', config.get('database', 'user'), '-h', '127.0.0.1'], # stdin=p1.stdout, stdout=subprocess.PIPE, env=psql_env) # p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. # output,err = p2.communicate() # # db.query('CREATE INDEX dem_st_convexhull_idx ON '+config.get('summary_stats', 'raster_table_name')+' ' # 'USING gist ((st_convexhull(rast)) public.gist_geometry_ops_2d)') print("updating attributes") q = db.query("SELECT count(*) as count_all from "+config.get('summary_stats', 'shp_table_name')) count_all = q.dictresult()[0]["count_all"] steps = count_all/500 for s in range(0, steps): try: db.begin() db.query('select raster.updtae_attributes($1, $2, $3, $4)', (config.get('summary_stats', 'raster_table_name'), config.get('summary_stats', 'shp_table_name'), 500, s*500)) db.commit() print("processed "+str((s * 500))) except Exception, e: print(str(e)) pass