def db(): dburl = os.environ['DATABASE_URL'] maxconn = int(os.environ['DATABASE_MAXCONN']) db = Postgres(dburl, maxconn=maxconn) # register hstore type with db.get_cursor() as cursor: psycopg2.extras.register_hstore(cursor, globally=True, unicode=True) db.register_model(Community) db.register_model(Participant) return db
class PgDatabaseClient(DatabaseClient): """ Postgres Database Client Implementation """ def __init__(self, config): super().__init__(config) def configure(self): """ Configure db client and create table""" self.db_client = Postgres(url=self._config.get(CONSTANTS.DB_HOST)) self.create_table_if_no_exist() def create_table_if_no_exist(self): """ Create table if not exists """ with self.db_client.get_cursor() as cursor: cursor.run(SQLQueries.CREATE_TABLE) def insert_data(self, data: str): """ Insert data to the table """ try: msg = json.loads(data) with self.db_client.get_cursor() as cursor: cursor.run(SQLQueries.INSERT_DATA, msg) logger.info(f"Data saved successfully") except Exception as e: logger.error(f"Cannot save data to db, following error occurred {e}")
class PostgresHelper: #PostgreSQL özellikle store procedure çalıştırmak için yardımcı sınıf def __init__(self, server, dbname, username): self.server = server self.dbname = dbname self.username = username self.db = Postgres("postgres://" + self.username + "@" + self.server + "/" + self.dbname) def executeCommand(self, commandStr): self.db.run(sql=commandStr) def getMultipleCursor(self, spname, parameters, refcursors): paramstr = "" cursorstr = "" for refcursor in refcursors: cursorstr += "'" + refcursor + "'," for param in parameters: paramstr += param + "," if paramstr.endswith(','): paramstr = paramstr[:-1] if cursorstr.endswith(','): cursorstr = cursorstr[:-1] data = {} with self.db.get_cursor() as cursor: cursor.run("select " + spname + "(" + paramstr + "," + cursorstr + ");") for refcursor in refcursors: fetchstr = 'FETCH ALL IN "' + refcursor + '";' tempdata = cursor.all(fetchstr) ##print(tempdata) data[refcursor] = tempdata return data ## kayit = self.db.run(sql="select "+spname+"("+paramstr+","+cursorstr+");") def getSingleCursor(self, spname, parameters): return self.getMultipleCursor(spname=spname, parameters=parameters, refcursors=["rc1"])
class Service_DB: def __init__(self, reprocess_failures_hours): self.db = Postgres(config.get_local_settings()) self.REPROCESS_FAILURES_HOURS = reprocess_failures_hours self.COMMIT_LOG_TOOL_COLUMNS = frozenset(["artifacts_archived"]) def get_unprocessed_commits(self): cursor = self.db.get_cursor() if config.REPO_TO_ANALYSE: # determine if it is a string or a list if isinstance( config.REPO_TO_ANALYSE, (list, tuple)) and not isinstance(config.REPO_TO_ANALYSE, (str, basestring)): equality = "in" value = tuple(config.REPO_TO_ANALYSE) else: equality = "=" value = config.REPO_TO_ANALYSE query = """ SELECT repository_id, commit_hash, author_date FROM COMMITS WHERE COMMITS.COMMIT_HASH NOT IN (SELECT COMMIT FROM STATIC_COMMIT_PROCESSED AS PROCESSED) AND repository_id {equality} %s ORDER BY author_date_unix_timestamp DESC LIMIT 10; """.format(equality=equality) cursor.execute(query, (value, )) else: query = """ SELECT repository_id, commit_hash FROM COMMITS WHERE COMMITS.COMMIT_HASH NOT IN (SELECT COMMIT FROM STATIC_COMMIT_PROCESSED AS PROCESSED) ORDER BY author_date_unix_timestamp DESC LIMIT 10; """ cursor.execute(query) rows = cursor.fetchall() REPO = 0 COMMIT = 1 AUTHOR_DATE = 2 commits = [] #datetime.strptime('Tue Jun 28 23:29:52 2016 -0700'[], '%a %b %d %H:%M:%S %Y %z') for row in rows: raw_author_date = row[AUTHOR_DATE] # Strip off the timezone not to have to deal with it author_datetime = datetime.strptime( raw_author_date[:len(raw_author_date) - 6], '%a %b %d %H:%M:%S %Y') commits.append({ "repo": row[REPO], "commit": row[COMMIT], "author_date": author_datetime }) self.queued_commit(commits) return commits """ Delete any records of previous runs that have not finished running since a specified interval """ def truncate_commit_processing(self): cursor = self.db.get_cursor() cursor.execute(""" DELETE FROM STATIC_COMMIT_PROCESSED WHERE STATUS <> 'PROCESSED' AND modified < NOW() - INTERVAL '%s hour'; """ % self.REPROCESS_FAILURES_HOURS) self.db.db.commit() def queued_commit(self, commits): cursor = self.db.get_cursor() cursor.executemany( """ INSERT INTO STATIC_COMMIT_PROCESSED (REPO, COMMIT, STATUS) VALUES (%(repo)s, %(commit)s, 'QUEUED'); """, commits) self.db.db.commit() def processing_commit(self, repo, commit): cursor = self.db.get_cursor() cursor.execute( """ UPDATE STATIC_COMMIT_PROCESSED SET STATUS = 'PROCESSING', MODIFIED = NOW() WHERE REPO = %s AND COMMIT = %s; """, (repo, commit)) self.db.db.commit() def processed_commit(self, repo, commit, build, log=""): cursor = self.db.get_cursor() cursor.execute( """ UPDATE STATIC_COMMIT_PROCESSED SET STATUS = 'PROCESSED', BUILD = %s, BUILD_LOG = %s, MODIFIED = NOW() WHERE REPO = %s AND COMMIT = %s; """, (build, log, repo, commit)) self.db.db.commit() def processing_commit_sql_failed(self, repo, commit, error_message): # Cancel the current transaction self.db.db.reset() # Log the error in the database self.processed_commit(repo, commit, "SQL_ERROR", error_message) def commit_log_tool(self, repo, commit, column, value): if column not in self.COMMIT_LOG_TOOL_COLUMNS: logger.error("Column %s is not a log tool column" % column) else: cursor = self.db.get_cursor() cursor.execute( """ UPDATE STATIC_COMMIT_PROCESSED SET {column} = %s WHERE REPO = %s AND COMMIT = %s; """.format(column=column), (value, repo, commit)) def add_commit_warning_lines(self, warnings): cursor = self.db.get_cursor() cursor.executemany( """ INSERT INTO STATIC_COMMIT_LINE_WARNING (REPO, COMMIT, RESOURCE, LINE, SFP, CWE, GENERATOR_TOOL, WEAKNESS) VALUES (%(repo_id)s, %(commit_id)s, %(resource)s, %(line_number)s, %(SFP)s, %(CWE)s, %(generator_tool)s, %(description)s) """, warnings) def add_commit_warning_blames(self, blames): cursor = self.db.get_cursor() cursor.executemany( """ INSERT INTO static_commit_line_blame (REPO, COMMIT, RESOURCE, LINE, ORIGIN_COMMIT, ORIGIN_RESOURCE, ORIGIN_LINE, IS_NEW_LINE) VALUES (%(repo_id)s, %(commit_id)s, %(resource)s, %(line)s, %(origin_commit)s, %(origin_resource)s, %(origin_line)s, %(is_new_line)s) """, blames) def add_commit_history_graph(self, relations): cursor = self.db.get_cursor() cursor.executemany( """ INSERT INTO commit_history_graph (REPO, COMMIT, PARENT_COMMIT) VALUES (%(repo_id)s, %(commit_id)s, %(parent_commit)s) """, relations) def clear_commit_data(self, repo, commit): cursor = self.db.get_cursor() cursor.execute( """DELETE FROM STATIC_COMMIT_LINE_WARNING WHERE REPO=%s AND COMMIT=%s;""", (repo, commit)) cursor.execute( """DELETE FROM STATIC_COMMIT_LINE_BLAME WHERE REPO=%s AND COMMIT=%s;""", (repo, commit)) cursor.execute( """DELETE FROM COMMIT_HISTORY_GRAPH WHERE REPO=%s AND COMMIT=%s;""", (repo, commit))
class PostgreSQLDB(object): FIELD_SONG_ID = 'song_id' FIELD_SONGNAME = 'song_name' FIELD_OFFSET = 'time' FIELD_HASH = 'hash' SONG_ID = 'song_id' SONG_NAME = 'song_name' CONFIDENCE = 'confidence' MATCH_TIME = 'match_time' OFFSET = 'time' OFFSET_SECS = 'offset_seconds' db = None # tables FINGERPRINTS_TABLENAME = 'fingerprints' SONGS_TABLENAME = 'songs' # creates CREATE_FINGERPRINTS_TABLE = """ CREATE TABLE IF NOT EXISTS "%s"( "%s" INT PRIMARY KEY NOT NULL, "%s" INT NOT NULL, "%s" INT NOT NULL);""" % (FINGERPRINTS_TABLENAME, FIELD_HASH, FIELD_SONG_ID, FIELD_OFFSET) CREATE_SONGS_TABLE = \ """CREATE TABLE IF NOT EXISTS "%s"( "%s" SERIAL PRIMARY KEY , "%s" varchar(250) NOT NULL);""" % \ (SONGS_TABLENAME, FIELD_SONG_ID, FIELD_SONGNAME) SELECT_SONG = """SELECT %s FROM %s WHERE %s = %%s;""" \ % (FIELD_SONGNAME, SONGS_TABLENAME, FIELD_SONG_ID) # inserts fingerprint. Update if existing INSERT_FINGERPRINT = \ """INSERT INTO %s VALUES (%%s, %%s, %%s) ON CONFLICT (%s) DO UPDATE SET %s = EXCLUDED.%s, %s = EXCLUDED.%s;""" \ % (FINGERPRINTS_TABLENAME, FIELD_HASH, FIELD_SONG_ID, FIELD_SONG_ID, FIELD_OFFSET, FIELD_OFFSET) INSERT_SONG = "INSERT INTO %s (%s) VALUES (%%s);" % (SONGS_TABLENAME, FIELD_SONGNAME) SELECT_MULTIPLE = """SELECT %s, %s, %s FROM %s WHERE %s IN (%%s);""" \ % (FIELD_HASH, FIELD_SONG_ID, FIELD_OFFSET, FINGERPRINTS_TABLENAME, FIELD_HASH) def __init__(self, drop_tables=False): super(PostgreSQLDB, self).__init__() if os.environ.get('DOCKERCLOUD_SERVICE_HOSTNAME', None) is not None: self.db = Postgres( u"postgres://*****:*****@postgres/hashes") else: # self.db = Postgres(u"postgres://*****:*****@localhost/postgres") self.db = Postgres( u"postgres://*****:*****@pervasivesounds.com/hashes" ) if drop_tables: self.db.run("DROP TABLE IF EXISTS %s CASCADE" % self.SONGS_TABLENAME) self.db.run("DROP TABLE IF EXISTS %s CASCADE" % self.FINGERPRINTS_TABLENAME) self.db.run(self.CREATE_SONGS_TABLE) self.db.run(self.CREATE_FINGERPRINTS_TABLE) def store(self, name, hashes): sid = self.insert_song(name) self.insert_hashes(sid, hashes) def insert_hash(self, song_id, hash, offset): self.db.run(self.INSERT_FINGERPRINT, (hash, song_id, offset)) def insert_hashes(self, sid, hashes): values = [] for time_, hash_ in hashes: values.append((int(hash_), sid, time_)) with self.db.get_cursor() as cur: for split_values in batch(values, 1000): cur.executemany(self.INSERT_FINGERPRINT, split_values) def insert_song(self, songname): """ Inserts song in the database and returns the ID of the inserted record. """ self.db.run(self.INSERT_SONG, (songname, )) return self.db.one( "SELECT %s FROM %s ORDER BY %s DESC LIMIT 1" % (self.FIELD_SONG_ID, self.SONGS_TABLENAME, self.FIELD_SONG_ID)) def get_song_by_id(self, sid): """ Returns song by its ID. """ return self.db.one(self.SELECT_SONG, (int(sid), )) def return_matches(self, hashes): mapper = {} for offset, hash in hashes: mapper[int(hash)] = offset # Get an iteratable of all the hashes we need values = list(mapper.keys()) res = [] if hashes is not None: for split_values in batch(values, 100): query = self.SELECT_MULTIPLE query %= ', '.join(["%s"] * len(split_values)) [ res.append(r) for r in self.db.all(query, split_values, back_as=tuple) ] return np.asarray([(sid, offset - mapper[hash]) for (hash, sid, offset) in res]) def get_best_sids(self, matches): unique, counts = np.unique(matches[:, 0], return_counts=True) return unique[np.argsort(counts)[::-1][:np.minimum(len(counts), 20)]] def align_matches(self, matches, sids): """ Finds hash matches that align in time with other matches and finds consensus about which hashes are "true" signal from the audio. Returns a dictionary with match information. """ # align by diffs diff_counter = {} largest = 0 largest_count = 0 song_id = -1 for sid in sids: for sid, diff in matches[matches[:, 0] == sid]: if sid not in diff_counter: diff_counter[sid] = {} if diff not in diff_counter[sid]: diff_counter[sid][diff] = 0 diff_counter[sid][diff] += 1 if diff_counter[sid][diff] > largest_count: largest = diff largest_count = diff_counter[sid][diff] song_id = sid # total_count = {} # for sid in diff_counter.keys(): # total_count[sid] = np.sum(diff_counter[sid].values) songs = [] for sid in diff_counter.keys(): song_name = self.get_song_by_id(sid) for diff in diff_counter[sid].keys(): confidence = diff_counter[sid][diff] if confidence > 4: songs.append({ 'song_id': song_id, 'song_name': song_name, 'confidence': confidence, 'offset': diff }) return songs
class TestCache(TestCase): def setUp(self): self.db = Postgres(cache=Cache(max_size=1), cursor_factory=SimpleTupleCursor) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (key text, value int)") self.db.run("INSERT INTO foo VALUES ('a', 1)") self.db.run("INSERT INTO foo VALUES ('b', 2)") def test_one_returns_cached_row(self): query = "SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, max_age=10) r2 = self.db.one(query, max_age=10) assert r2 is r1 def test_all_returns_cached_rows(self): query = "SELECT * FROM foo ORDER BY key" r1 = self.db.all(query, max_age=10) r2 = self.db.all(query, max_age=10) assert r2 == r1 assert r2 is not r1 assert r2[0] is r1[0] def test_back_as_is_compatible_with_caching(self): query = "SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, back_as=dict, max_age=10) r2 = self.db.one(query, back_as=namedtuple, max_age=10) assert r1 == r2._asdict() rows = self.db.all(query, back_as='Row', max_age=10) assert rows == [r1] def test_all_returns_row_cached_by_one(self): query = "SELECT * FROM foo WHERE key = 'a'" row = self.db.one(query, max_age=10) rows = self.db.all(query, max_age=10) assert rows == [row] assert rows[0] is row def test_one_raises_TooMany_when_the_cache_contains_multiple_rows(self): query = "SELECT * FROM foo" rows = self.db.all(query, max_age=10) assert len(rows) == 2 with self.assertRaises(TooMany): self.db.one(query, max_age=10) def test_cache_max_size(self): query1 = b"SELECT * FROM foo WHERE key = 'a'" query2 = b"SELECT * FROM foo WHERE key = 'b'" self.db.all(query1, max_age=10) assert set(self.db.cache.entries.keys()) == {query1} self.db.all(query2, max_age=10) assert set(self.db.cache.entries.keys()) == {query2} def test_cache_max_age(self): query = b"SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, max_age=0) r2 = self.db.one(query, max_age=10) assert r2 is not r1 def test_cache_prune(self): self.db.cache.max_size = 2 query1 = b"SELECT * FROM foo WHERE key = 'a'" query2 = b"SELECT * FROM foo WHERE key = 'b'" self.db.one(query1, max_age=-1) self.db.one(query2, max_age=10) assert set(self.db.cache.entries.keys()) == {query1, query2} self.db.cache.prune() assert set(self.db.cache.entries.keys()) == {query2} def test_cache_prevents_concurrent_queries(self): with self.db.get_cursor() as cursor: cursor.run("LOCK TABLE foo IN EXCLUSIVE MODE") def insert(): self.db.one("INSERT INTO foo VALUES ('c', 3) RETURNING *", max_age=1) t1 = Thread(target=insert) t2 = Thread(target=insert) t1.start() t2.start() cursor.run("COMMIT") # this releases the table lock t1.join() t2.join() n = self.db.one("SELECT count(*) FROM foo WHERE key = 'c'") assert n == 1
def main(filename: str, raw_file: str, head_file: str, CONNECT_DB=False): log.info("Running on file: {}".format(filename)) # read headers from pickled file with open(filename, "rb") as f: headers = load(f) # read columns from csv file columns = combine_tables([raw_file, head_file]) # calculate JD # calculate RA DEC for head in headers: add_file_id(head) add_jd(head) add_pos(head) # check if any of the keys in the dict are not used in the database used_headers = set(col["py-name"] for col in columns) new_headers = set() update_later = [] for head in headers: unused = head.keys() - used_headers if len(unused): name_str = ". File: " + head.get("FILENAME", "") log.warn(f"Unused FITS headers{name_str} {unused}") # TODO: add new column; # https://www.postgresql.org/docs/13/sql-altertable.html new_headers |= {(key, type(head[key])) for key in unused} update_later.append(head) # TODO: add check verifying no duplicate keys are in here # TODO: update the column_list.csv file with the new headers # TODO: trigger update for DaCHS (or maybe next step in the CRON job) if len(new_headers) > 0: log.info("New Headers Found: ", new_headers) log.info("--- start column statements ---") for key, typ in new_headers: log.info( add_col_fmt.format("observations.raw", key, type_map[typ.__name__]) ) log.info("---- end column statements ----") log.info("") log.info("--- start csv statements ---") for key, typ in new_headers: log.info(csv_fmt.format(key, typ.__name__)) log.info("---- end csv statements ----") if CONNECT_DB: db_url = "dbname=dachs" db = Postgres(db_url) unprocessed = [] sql_stmt = prep_sql_statement(columns) log.info(f"Inserting {len(headers)} new headers") for header in headers: # Insert new headers if not yet in the database with db.get_cursor() as curs: # convert the header to a defaultdict which gives None if the # key is not present. try: curs.run(sql_stmt, parameters=defaultdict(lambda: None, header)) except UniqueViolation as e: # Handle the failed connection error as well log.info( "UniqueViolation. Header probably already in the database. " f"Updating header of {header['FILENAME']} later" ) unprocessed.append(header) log.info("Done") log.info(f"Updating {len(unprocessed)} headers") for header in unprocessed: # Update already existing headers with db.get_cursor() as curs: update_stmt = make_update_statement(header, columns) try: curs.run(update_stmt, parameters=header) except Exception as e: log.exception( f"Exception in updating, {e}. " f"Happened with file {header['FILENAME']}" ) log.info("Done")