Esempio n. 1
0
def db():
    dburl = os.environ['DATABASE_URL']
    maxconn = int(os.environ['DATABASE_MAXCONN'])
    db = Postgres(dburl, maxconn=maxconn)

    # register hstore type
    with db.get_cursor() as cursor:
        psycopg2.extras.register_hstore(cursor, globally=True, unicode=True)

    db.register_model(Community)
    db.register_model(Participant)

    return db
Esempio n. 2
0
def db():
    dburl = os.environ['DATABASE_URL']
    maxconn = int(os.environ['DATABASE_MAXCONN'])
    db = Postgres(dburl, maxconn=maxconn)

    # register hstore type
    with db.get_cursor() as cursor:
        psycopg2.extras.register_hstore(cursor, globally=True, unicode=True)

    db.register_model(Community)
    db.register_model(Participant)

    return db
class PgDatabaseClient(DatabaseClient):
    """ Postgres Database Client Implementation """
    def __init__(self, config):
        super().__init__(config)

    def configure(self):
        """ Configure db client and create table"""
        self.db_client = Postgres(url=self._config.get(CONSTANTS.DB_HOST))
        self.create_table_if_no_exist()

    def create_table_if_no_exist(self):
        """ Create table if not exists """
        with self.db_client.get_cursor() as cursor:
            cursor.run(SQLQueries.CREATE_TABLE)

    def insert_data(self, data: str):
        """ Insert data to the table """
        try:
            msg = json.loads(data)
            with self.db_client.get_cursor() as cursor:
                cursor.run(SQLQueries.INSERT_DATA, msg)
            logger.info(f"Data saved successfully")
        except Exception as e:
            logger.error(f"Cannot save data to db, following error occurred {e}")
Esempio n. 4
0
class PostgresHelper:
    #PostgreSQL özellikle store procedure çalıştırmak için yardımcı sınıf
    def __init__(self, server, dbname, username):
        self.server = server
        self.dbname = dbname
        self.username = username
        self.db = Postgres("postgres://" + self.username + "@" + self.server +
                           "/" + self.dbname)

    def executeCommand(self, commandStr):
        self.db.run(sql=commandStr)

    def getMultipleCursor(self, spname, parameters, refcursors):
        paramstr = ""
        cursorstr = ""
        for refcursor in refcursors:
            cursorstr += "'" + refcursor + "',"

        for param in parameters:
            paramstr += param + ","
        if paramstr.endswith(','):
            paramstr = paramstr[:-1]

        if cursorstr.endswith(','):
            cursorstr = cursorstr[:-1]
        data = {}
        with self.db.get_cursor() as cursor:
            cursor.run("select " + spname + "(" + paramstr + "," + cursorstr +
                       ");")
            for refcursor in refcursors:
                fetchstr = 'FETCH ALL IN "' + refcursor + '";'

                tempdata = cursor.all(fetchstr)
                ##print(tempdata)

                data[refcursor] = tempdata

        return data

    ## kayit = self.db.run(sql="select "+spname+"("+paramstr+","+cursorstr+");")
    def getSingleCursor(self, spname, parameters):
        return self.getMultipleCursor(spname=spname,
                                      parameters=parameters,
                                      refcursors=["rc1"])
Esempio n. 5
0
class Service_DB:
    def __init__(self, reprocess_failures_hours):
        self.db = Postgres(config.get_local_settings())
        self.REPROCESS_FAILURES_HOURS = reprocess_failures_hours
        self.COMMIT_LOG_TOOL_COLUMNS = frozenset(["artifacts_archived"])

    def get_unprocessed_commits(self):

        cursor = self.db.get_cursor()

        if config.REPO_TO_ANALYSE:

            # determine if it is a string or a list
            if isinstance(
                    config.REPO_TO_ANALYSE,
                (list, tuple)) and not isinstance(config.REPO_TO_ANALYSE,
                                                  (str, basestring)):
                equality = "in"
                value = tuple(config.REPO_TO_ANALYSE)
            else:
                equality = "="
                value = config.REPO_TO_ANALYSE

            query = """
                    SELECT repository_id, commit_hash, author_date
                    FROM COMMITS
                    WHERE COMMITS.COMMIT_HASH NOT IN (SELECT COMMIT FROM STATIC_COMMIT_PROCESSED AS PROCESSED)
                    AND repository_id {equality} %s
                    ORDER BY author_date_unix_timestamp DESC
                    LIMIT 10;
                    """.format(equality=equality)

            cursor.execute(query, (value, ))
        else:
            query = """
                    SELECT repository_id, commit_hash
                    FROM COMMITS
                    WHERE COMMITS.COMMIT_HASH NOT IN (SELECT COMMIT FROM STATIC_COMMIT_PROCESSED AS PROCESSED)
                    ORDER BY author_date_unix_timestamp DESC
                    LIMIT 10;
                    """

            cursor.execute(query)
        rows = cursor.fetchall()

        REPO = 0
        COMMIT = 1
        AUTHOR_DATE = 2

        commits = []

        #datetime.strptime('Tue Jun 28 23:29:52 2016 -0700'[], '%a %b %d %H:%M:%S %Y %z')
        for row in rows:
            raw_author_date = row[AUTHOR_DATE]
            # Strip off the timezone not to have to deal with it
            author_datetime = datetime.strptime(
                raw_author_date[:len(raw_author_date) - 6],
                '%a %b %d %H:%M:%S %Y')
            commits.append({
                "repo": row[REPO],
                "commit": row[COMMIT],
                "author_date": author_datetime
            })

        self.queued_commit(commits)

        return commits

    """
    Delete any records of previous runs that have not finished running since a specified interval
    """

    def truncate_commit_processing(self):
        cursor = self.db.get_cursor()
        cursor.execute("""
            DELETE FROM STATIC_COMMIT_PROCESSED
            WHERE STATUS <> 'PROCESSED' AND modified < NOW() - INTERVAL '%s hour';
            """ % self.REPROCESS_FAILURES_HOURS)
        self.db.db.commit()

    def queued_commit(self, commits):
        cursor = self.db.get_cursor()

        cursor.executemany(
            """
            INSERT INTO STATIC_COMMIT_PROCESSED
            (REPO, COMMIT, STATUS)
            VALUES
            (%(repo)s, %(commit)s, 'QUEUED');
            """, commits)
        self.db.db.commit()

    def processing_commit(self, repo, commit):
        cursor = self.db.get_cursor()

        cursor.execute(
            """
            UPDATE STATIC_COMMIT_PROCESSED
             SET STATUS = 'PROCESSING', MODIFIED = NOW()
             WHERE REPO = %s AND COMMIT = %s;
            """, (repo, commit))
        self.db.db.commit()

    def processed_commit(self, repo, commit, build, log=""):
        cursor = self.db.get_cursor()

        cursor.execute(
            """
            UPDATE STATIC_COMMIT_PROCESSED
             SET STATUS = 'PROCESSED', BUILD = %s, BUILD_LOG = %s, MODIFIED = NOW()
             WHERE REPO = %s AND COMMIT = %s;
            """, (build, log, repo, commit))
        self.db.db.commit()

    def processing_commit_sql_failed(self, repo, commit, error_message):
        # Cancel the current transaction
        self.db.db.reset()

        # Log the error in the database
        self.processed_commit(repo, commit, "SQL_ERROR", error_message)

    def commit_log_tool(self, repo, commit, column, value):

        if column not in self.COMMIT_LOG_TOOL_COLUMNS:
            logger.error("Column %s is not a log tool column" % column)
        else:
            cursor = self.db.get_cursor()

            cursor.execute(
                """
                    UPDATE STATIC_COMMIT_PROCESSED
                     SET {column} = %s
                     WHERE REPO = %s AND COMMIT = %s;
                    """.format(column=column), (value, repo, commit))

    def add_commit_warning_lines(self, warnings):

        cursor = self.db.get_cursor()
        cursor.executemany(
            """
        INSERT INTO STATIC_COMMIT_LINE_WARNING
        (REPO, COMMIT, RESOURCE, LINE, SFP, CWE, GENERATOR_TOOL, WEAKNESS)
        VALUES
        (%(repo_id)s, %(commit_id)s, %(resource)s, %(line_number)s, %(SFP)s, %(CWE)s, %(generator_tool)s, %(description)s)
        """, warnings)

    def add_commit_warning_blames(self, blames):

        cursor = self.db.get_cursor()
        cursor.executemany(
            """
        INSERT INTO static_commit_line_blame
        (REPO, COMMIT, RESOURCE, LINE, ORIGIN_COMMIT, ORIGIN_RESOURCE, ORIGIN_LINE, IS_NEW_LINE)
        VALUES
        (%(repo_id)s, %(commit_id)s, %(resource)s, %(line)s, %(origin_commit)s, %(origin_resource)s, %(origin_line)s, %(is_new_line)s)
        """, blames)

    def add_commit_history_graph(self, relations):
        cursor = self.db.get_cursor()
        cursor.executemany(
            """
        INSERT INTO commit_history_graph
        (REPO, COMMIT, PARENT_COMMIT)
        VALUES
        (%(repo_id)s, %(commit_id)s, %(parent_commit)s)
        """, relations)

    def clear_commit_data(self, repo, commit):
        cursor = self.db.get_cursor()

        cursor.execute(
            """DELETE FROM STATIC_COMMIT_LINE_WARNING WHERE REPO=%s AND COMMIT=%s;""",
            (repo, commit))
        cursor.execute(
            """DELETE FROM STATIC_COMMIT_LINE_BLAME WHERE REPO=%s AND COMMIT=%s;""",
            (repo, commit))
        cursor.execute(
            """DELETE FROM COMMIT_HISTORY_GRAPH WHERE REPO=%s AND COMMIT=%s;""",
            (repo, commit))
Esempio n. 6
0
class PostgreSQLDB(object):
    FIELD_SONG_ID = 'song_id'
    FIELD_SONGNAME = 'song_name'
    FIELD_OFFSET = 'time'
    FIELD_HASH = 'hash'
    SONG_ID = 'song_id'
    SONG_NAME = 'song_name'
    CONFIDENCE = 'confidence'
    MATCH_TIME = 'match_time'
    OFFSET = 'time'
    OFFSET_SECS = 'offset_seconds'
    db = None

    # tables
    FINGERPRINTS_TABLENAME = 'fingerprints'
    SONGS_TABLENAME = 'songs'

    # creates
    CREATE_FINGERPRINTS_TABLE = """
        CREATE TABLE IF NOT EXISTS "%s"(
             "%s" INT PRIMARY KEY NOT NULL,
             "%s" INT NOT NULL,
             "%s" INT NOT NULL);""" % (FINGERPRINTS_TABLENAME, FIELD_HASH,
                                       FIELD_SONG_ID, FIELD_OFFSET)

    CREATE_SONGS_TABLE = \
        """CREATE TABLE IF NOT EXISTS "%s"(
                "%s" SERIAL PRIMARY KEY ,
                "%s" varchar(250) NOT NULL);""" % \
        (SONGS_TABLENAME, FIELD_SONG_ID, FIELD_SONGNAME)

    SELECT_SONG = """SELECT %s FROM %s WHERE %s = %%s;""" \
                  % (FIELD_SONGNAME, SONGS_TABLENAME, FIELD_SONG_ID)

    # inserts fingerprint. Update if existing
    INSERT_FINGERPRINT = \
        """INSERT INTO %s VALUES (%%s, %%s, %%s)
           ON
             CONFLICT (%s)
           DO UPDATE SET
             %s = EXCLUDED.%s,
             %s = EXCLUDED.%s;""" \
        % (FINGERPRINTS_TABLENAME, FIELD_HASH, FIELD_SONG_ID, FIELD_SONG_ID, FIELD_OFFSET, FIELD_OFFSET)

    INSERT_SONG = "INSERT INTO %s (%s) VALUES (%%s);" % (SONGS_TABLENAME,
                                                         FIELD_SONGNAME)

    SELECT_MULTIPLE = """SELECT %s, %s, %s FROM %s WHERE %s IN (%%s);""" \
                      % (FIELD_HASH, FIELD_SONG_ID, FIELD_OFFSET,
                         FINGERPRINTS_TABLENAME, FIELD_HASH)

    def __init__(self, drop_tables=False):
        super(PostgreSQLDB, self).__init__()
        if os.environ.get('DOCKERCLOUD_SERVICE_HOSTNAME', None) is not None:
            self.db = Postgres(
                u"postgres://*****:*****@postgres/hashes")
        else:
            # self.db = Postgres(u"postgres://*****:*****@localhost/postgres")
            self.db = Postgres(
                u"postgres://*****:*****@pervasivesounds.com/hashes"
            )

        if drop_tables:
            self.db.run("DROP TABLE IF EXISTS %s CASCADE" %
                        self.SONGS_TABLENAME)
            self.db.run("DROP TABLE IF EXISTS %s CASCADE" %
                        self.FINGERPRINTS_TABLENAME)

        self.db.run(self.CREATE_SONGS_TABLE)
        self.db.run(self.CREATE_FINGERPRINTS_TABLE)

    def store(self, name, hashes):
        sid = self.insert_song(name)
        self.insert_hashes(sid, hashes)

    def insert_hash(self, song_id, hash, offset):
        self.db.run(self.INSERT_FINGERPRINT, (hash, song_id, offset))

    def insert_hashes(self, sid, hashes):
        values = []
        for time_, hash_ in hashes:
            values.append((int(hash_), sid, time_))

        with self.db.get_cursor() as cur:
            for split_values in batch(values, 1000):
                cur.executemany(self.INSERT_FINGERPRINT, split_values)

    def insert_song(self, songname):
        """
        Inserts song in the database and returns the ID of the inserted record.
        """
        self.db.run(self.INSERT_SONG, (songname, ))
        return self.db.one(
            "SELECT %s FROM %s ORDER BY %s DESC LIMIT 1" %
            (self.FIELD_SONG_ID, self.SONGS_TABLENAME, self.FIELD_SONG_ID))

    def get_song_by_id(self, sid):
        """
        Returns song by its ID.
        """
        return self.db.one(self.SELECT_SONG, (int(sid), ))

    def return_matches(self, hashes):
        mapper = {}
        for offset, hash in hashes:
            mapper[int(hash)] = offset

        # Get an iteratable of all the hashes we need
        values = list(mapper.keys())

        res = []
        if hashes is not None:
            for split_values in batch(values, 100):
                query = self.SELECT_MULTIPLE
                query %= ', '.join(["%s"] * len(split_values))

                [
                    res.append(r)
                    for r in self.db.all(query, split_values, back_as=tuple)
                ]
            return np.asarray([(sid, offset - mapper[hash])
                               for (hash, sid, offset) in res])

    def get_best_sids(self, matches):
        unique, counts = np.unique(matches[:, 0], return_counts=True)
        return unique[np.argsort(counts)[::-1][:np.minimum(len(counts), 20)]]

    def align_matches(self, matches, sids):
        """
            Finds hash matches that align in time with other matches and finds
            consensus about which hashes are "true" signal from the audio.

            Returns a dictionary with match information.
        """
        # align by diffs
        diff_counter = {}
        largest = 0
        largest_count = 0
        song_id = -1
        for sid in sids:
            for sid, diff in matches[matches[:, 0] == sid]:
                if sid not in diff_counter:
                    diff_counter[sid] = {}
                if diff not in diff_counter[sid]:
                    diff_counter[sid][diff] = 0
                diff_counter[sid][diff] += 1

                if diff_counter[sid][diff] > largest_count:
                    largest = diff
                    largest_count = diff_counter[sid][diff]
                    song_id = sid

        # total_count = {}
        # for sid in diff_counter.keys():
        #     total_count[sid] = np.sum(diff_counter[sid].values)

        songs = []
        for sid in diff_counter.keys():
            song_name = self.get_song_by_id(sid)
            for diff in diff_counter[sid].keys():
                confidence = diff_counter[sid][diff]
                if confidence > 4:
                    songs.append({
                        'song_id': song_id,
                        'song_name': song_name,
                        'confidence': confidence,
                        'offset': diff
                    })
        return songs
Esempio n. 7
0
class TestCache(TestCase):
    def setUp(self):
        self.db = Postgres(cache=Cache(max_size=1),
                           cursor_factory=SimpleTupleCursor)
        self.db.run("DROP SCHEMA IF EXISTS public CASCADE")
        self.db.run("CREATE SCHEMA public")
        self.db.run("CREATE TABLE foo (key text, value int)")
        self.db.run("INSERT INTO foo VALUES ('a', 1)")
        self.db.run("INSERT INTO foo VALUES ('b', 2)")

    def test_one_returns_cached_row(self):
        query = "SELECT * FROM foo WHERE key = 'a'"
        r1 = self.db.one(query, max_age=10)
        r2 = self.db.one(query, max_age=10)
        assert r2 is r1

    def test_all_returns_cached_rows(self):
        query = "SELECT * FROM foo ORDER BY key"
        r1 = self.db.all(query, max_age=10)
        r2 = self.db.all(query, max_age=10)
        assert r2 == r1
        assert r2 is not r1
        assert r2[0] is r1[0]

    def test_back_as_is_compatible_with_caching(self):
        query = "SELECT * FROM foo WHERE key = 'a'"
        r1 = self.db.one(query, back_as=dict, max_age=10)
        r2 = self.db.one(query, back_as=namedtuple, max_age=10)
        assert r1 == r2._asdict()
        rows = self.db.all(query, back_as='Row', max_age=10)
        assert rows == [r1]

    def test_all_returns_row_cached_by_one(self):
        query = "SELECT * FROM foo WHERE key = 'a'"
        row = self.db.one(query, max_age=10)
        rows = self.db.all(query, max_age=10)
        assert rows == [row]
        assert rows[0] is row

    def test_one_raises_TooMany_when_the_cache_contains_multiple_rows(self):
        query = "SELECT * FROM foo"
        rows = self.db.all(query, max_age=10)
        assert len(rows) == 2
        with self.assertRaises(TooMany):
            self.db.one(query, max_age=10)

    def test_cache_max_size(self):
        query1 = b"SELECT * FROM foo WHERE key = 'a'"
        query2 = b"SELECT * FROM foo WHERE key = 'b'"
        self.db.all(query1, max_age=10)
        assert set(self.db.cache.entries.keys()) == {query1}
        self.db.all(query2, max_age=10)
        assert set(self.db.cache.entries.keys()) == {query2}

    def test_cache_max_age(self):
        query = b"SELECT * FROM foo WHERE key = 'a'"
        r1 = self.db.one(query, max_age=0)
        r2 = self.db.one(query, max_age=10)
        assert r2 is not r1

    def test_cache_prune(self):
        self.db.cache.max_size = 2
        query1 = b"SELECT * FROM foo WHERE key = 'a'"
        query2 = b"SELECT * FROM foo WHERE key = 'b'"
        self.db.one(query1, max_age=-1)
        self.db.one(query2, max_age=10)
        assert set(self.db.cache.entries.keys()) == {query1, query2}
        self.db.cache.prune()
        assert set(self.db.cache.entries.keys()) == {query2}

    def test_cache_prevents_concurrent_queries(self):
        with self.db.get_cursor() as cursor:
            cursor.run("LOCK TABLE foo IN EXCLUSIVE MODE")

            def insert():
                self.db.one("INSERT INTO foo VALUES ('c', 3) RETURNING *",
                            max_age=1)

            t1 = Thread(target=insert)
            t2 = Thread(target=insert)
            t1.start()
            t2.start()
            cursor.run("COMMIT")  # this releases the table lock
        t1.join()
        t2.join()
        n = self.db.one("SELECT count(*) FROM foo WHERE key = 'c'")
        assert n == 1
def main(filename: str, raw_file: str, head_file: str, CONNECT_DB=False):

    log.info("Running on file: {}".format(filename))

    # read headers from pickled file
    with open(filename, "rb") as f:
        headers = load(f)

    # read columns from csv file
    columns = combine_tables([raw_file, head_file])

    # calculate JD
    # calculate RA DEC
    for head in headers:
        add_file_id(head)
        add_jd(head)
        add_pos(head)

    # check if any of the keys in the dict are not used in the database
    used_headers = set(col["py-name"] for col in columns)
    new_headers = set()
    update_later = []
    for head in headers:
        unused = head.keys() - used_headers
        if len(unused):
            name_str = ". File: " + head.get("FILENAME", "")
            log.warn(f"Unused FITS headers{name_str} {unused}")
            # TODO: add new column;
            # https://www.postgresql.org/docs/13/sql-altertable.html
        new_headers |= {(key, type(head[key])) for key in unused}
        update_later.append(head)

    # TODO: add check verifying no duplicate keys are in here
    # TODO: update the column_list.csv file with the new headers
    # TODO: trigger update for DaCHS (or maybe next step in the CRON job)
    if len(new_headers) > 0:
        log.info("New Headers Found: ", new_headers)
        log.info("--- start column statements ---")
        for key, typ in new_headers:
            log.info(
                add_col_fmt.format("observations.raw", key, type_map[typ.__name__])
            )
        log.info("---- end column statements ----")
        log.info("")
        log.info("--- start csv statements ---")
        for key, typ in new_headers:
            log.info(csv_fmt.format(key, typ.__name__))
        log.info("---- end csv statements ----")

    if CONNECT_DB:
        db_url = "dbname=dachs"
        db = Postgres(db_url)
        unprocessed = []

        sql_stmt = prep_sql_statement(columns)

        log.info(f"Inserting {len(headers)} new headers")
        for header in headers:
            # Insert new headers if not yet in the database
            with db.get_cursor() as curs:
                # convert the header to a defaultdict which gives None if the
                # key is not present.
                try:
                    curs.run(sql_stmt, parameters=defaultdict(lambda: None, header))
                except UniqueViolation as e:
                    # Handle the failed connection error as well
                    log.info(
                        "UniqueViolation. Header probably already in the database. "
                        f"Updating header of {header['FILENAME']} later"
                    )
                    unprocessed.append(header)
        log.info("Done")

        log.info(f"Updating {len(unprocessed)} headers")
        for header in unprocessed:
            # Update already existing headers
            with db.get_cursor() as curs:
                update_stmt = make_update_statement(header, columns)
                try:
                    curs.run(update_stmt, parameters=header)
                except Exception as e:
                    log.exception(
                        f"Exception in updating, {e}. "
                        f"Happened with file {header['FILENAME']}"
                    )
        log.info("Done")