Ejemplo n.º 1
0
    def _create_database(self, db_name):
        """ Manages database creation.

        Connects to MySQL via Database class from records library.
        Creates a new MySQL local database.

        """
        connection = Database(f'mysql+pymysql://{MYSQL_ID}:\
{MYSQL_PW}@localhost/?charset=utf8')

        connection.query(f'CREATE DATABASE {db_name} CHARACTER SET "utf8"')
def get_count_of_distinct_next_stops(db: Database, relevant_stops: List[str]) -> Dict[int, int]:
    rows = db.query("""WITH relevant_stops AS (
                            SELECT unnest(:relevant_stops) AS uic_ref
                        ),
                        next_station_mapping AS (
                            SELECT DISTINCT
                              s.stop_name,
                              t.trip_id,
                              st.stop_sequence,
                              s.uic_ref
                            FROM stops s
                              INNER JOIN stop_times st ON s.stop_id = st.stop_id
                              INNER JOIN trips t ON st.trip_id = t.trip_id
                              INNER JOIN routes r ON t.route_id = r.route_id
                            WHERE r.route_type = 2 OR r.route_type = 1
                        )
                        SELECT distinct
                          nsm1.uic_ref,
                          COUNT(nsm2.stop_name)
                          OVER (PARTITION BY nsm1.uic_ref )
                        FROM relevant_stops
                          LEFT JOIN next_station_mapping nsm1 ON relevant_stops.uic_ref = nsm1.uic_ref
                          INNER JOIN next_station_mapping nsm2 ON nsm1.trip_id = nsm2.trip_id
                        WHERE nsm1.stop_sequence = (nsm2.stop_sequence - 1)
                        GROUP BY nsm1.uic_ref, nsm2.stop_name;""",
                    relevant_stops=relevant_stops).all()

    return {int(row['uic_ref']): row['count'] for row in rows}
def _retrieve_nearest_vertex(db: Database, uic_ref: int) -> Optional[int]:
    transaction = db.transaction()
    row = db.query("""SELECT nearest_vertex_id FROM stop_vertex_mapping
                        WHERE stop_uic_ref = :uic_ref;""",
                   uic_ref=uic_ref).first()
    transaction.commit()
    if row:
        return row['nearest_vertex_id']
    return None
Ejemplo n.º 4
0
    def test_db_populate_themes(self, database: Database,
                                mock_data: LoadedDbItemsJson) -> None:
        '''Test populating themes'''
        themes = database.query('SELECT * FROM theme').as_dict()

        assert len(themes) == len(mock_data.themes)  # nosec

        db_themes = [t['name'] for t in themes]

        assert set(mock_data.themes) - set(db_themes) == set()  # nosec
def optimize_stop_vertex_mapping(db: Database):
    logger.info("Locate public transport stops on the routing graph")
    transaction = db.transaction()
    db.query("""DROP TABLE IF EXISTS edge_preselection""")
    db.query("""CREATE UNLOGGED TABLE edge_preselection (
                  id INTEGER,
                  source INTEGER,
                  target INTEGER,
                  cost DOUBLE PRECISION
                )""")
    db.query("SELECT optimize_stop_vertex_mapping()")
    db.query("DROP TABLE edge_preselection;")
    transaction.commit()
Ejemplo n.º 6
0
    def test_db_populate_items(self, database: Database,
                               mock_data: LoadedDbItemsJson) -> None:
        '''Test populating items'''
        items = database.query('SELECT * FROM item').as_dict()

        assert len(items) == len(mock_data.items)  # nosec

        db_items = [i['name'] for i in items]
        mock_items = [i.name for i in mock_data.items]

        assert set(mock_items) - set(db_items) == set()  # nosec
def calc_isochrones(db: Database, uic_ref: int, boundaries: List[int]) -> List[Isochrone]:
    logger.info(f"Calculate isochrones for {uic_ref}")

    nearest_vertex_id = _retrieve_nearest_vertex(db, uic_ref)
    if not nearest_vertex_id:
        return list()
    transaction = db.transaction()
    rows = db.query("""SELECT distance, polygon FROM isochrones(:node_id, :boundaries)""",
                    node_id=nearest_vertex_id,
                    boundaries=boundaries
                    ).all()
    isochrones = _map_isochrones(rows)
    transaction.commit()
    return isochrones
def _query_transport_stop_rows(db: Database):
    return db.query("""SELECT DISTINCT
                          s.uic_ref,
                          s.stop_name,
                          s.stop_lat,
                          s.stop_lon,
                          array_agg(r.route_type)
                          OVER (PARTITION BY s.uic_ref) AS route_types,
                          (s.uic_ref IN (SELECT uic_ref FROM intercity_stations)) AS is_intercity_station
                        FROM stops s
                          INNER JOIN stop_times st ON s.stop_id = st.stop_id
                          INNER JOIN trips t ON st.trip_id = t.trip_id
                          INNER JOIN routes r ON t.route_id = r.route_id
                        WHERE s.stop_id LIKE '85%'
                        GROUP BY s.uic_ref, s.stop_name, s.stop_lat, s.stop_lon, r.route_type;
                        """).all()
Ejemplo n.º 9
0
    def test_db_populate_names(self, database: Database,
                               mock_data: LoadedDbItemsJson) -> None:
        '''Test populating themes'''
        names_from_db = database.query('SELECT * FROM name').as_dict()

        assert len(names_from_db) == len(mock_data.names)  # nosec

        mock_full_names = [
            f'{n.firstname} {n.lastname}' for n in mock_data.names
        ]

        db_full_names = [
            f'{n["firstname"]} {n["lastname"]}' for n in names_from_db
        ]

        assert set(mock_full_names) - set(db_full_names) == set()  # nosec
Ejemplo n.º 10
0
    def test_db_populate_link_theme_items(
            self, database: Database, mock_data: LoadedDbItemsJson) -> None:
        '''Test links between names and themes'''
        sql = '''
            SELECT theme.name
            FROM item, linkitemtheme, theme
            WHERE item.id=linkitemtheme.id_item
                AND theme.id=linkitemtheme.id_theme
                AND item.name=:iname
        '''

        for item in mock_data.items:
            links = database.query(sql, iname=item.name).as_dict()
            link_themes = [l['name'] for l in links]

            assert set(link_themes) - set(item.themes) == set()  # nosec
def _query_frequency_departure_times(db: Database, due_date: datetime) -> Dict[int, List[datetime]]:
    """Get departure times for stops which have trips that are modeled in the frequencies table"""
    due_date_gtfs: str = _format_gtfs_date(due_date)
    rows = db.query("""SELECT
                    s.uic_ref,
                    array_agg(st.departure_time + (INTERVAL '1s' * intervals)) AS departure_times
                  FROM stop_times st
                    INNER JOIN frequencies f on st.trip_id = f.trip_id
                    INNER JOIN trips t on f.trip_id = t.trip_id
                    INNER JOIN stops s on st.stop_id = s.stop_id
                    LEFT JOIN calendar_dates c ON t.service_id = c.service_id,
                  generate_series(0, 86400, f.headway_secs) intervals

                  WHERE (st.departure_time + (INTERVAL '1s' * intervals)) <= f.end_time
                    AND (c.date = :date OR t.service_id = '000000')
                  GROUP BY s.uic_ref""",
                    date=due_date_gtfs).all()
    return {row['uic_ref']: _combine_departure_time(row, due_date) for row in rows}
Ejemplo n.º 12
0
    def test_db_populate_link_theme_names(
            self, database: Database, mock_data: LoadedDbItemsJson) -> None:
        '''Test links between names and themes'''
        sql = '''
            SELECT theme.name
            FROM name, linknametheme, theme
            WHERE name.id=linknametheme.id_name
                AND theme.id=linknametheme.id_theme
                AND name.firstname=:fname
                AND name.lastname=:lname
        '''

        for name in mock_data.names:
            links = database.query(sql,
                                   fname=name.firstname,
                                   lname=name.lastname).as_dict()
            link_themes = [l['name'] for l in links]

            assert set(link_themes) - set(name.themes) == set()  # nosec
Ejemplo n.º 13
0
    def test_db_populate_link_theme_features(
            self, database: Database, mock_data: LoadedDbItemsJson) -> None:
        '''Test links between names and themes'''
        sql = '''
            SELECT theme.name
            FROM feature, linkfeaturetheme, theme
            WHERE feature.id=linkfeaturetheme.id_feature
                AND theme.id=linkfeaturetheme.id_theme
                AND feature.text_masc=:tmasc
                AND feature.text_fem=:tfem
        '''

        for feat in mock_data.features:
            links = database.query(sql,
                                   tmasc=feat.text_masc,
                                   tfem=feat.text_fem).as_dict()
            link_themes = [l['name'] for l in links]

            assert set(link_themes) - set(feat.themes) == set()  # nosec
def _query_stop_times_departures(db: Database, due_date: datetime) -> Dict[int, List[datetime]]:
    due_date_gtfs: str = _format_gtfs_date(due_date)
    rows = db.query("""WITH calendar_trip_mapping AS (
                            SELECT
                              st.departure_time,
                              s.uic_ref
                            FROM stop_times st
                              INNER JOIN stops s ON st.stop_id = s.stop_id
                              INNER JOIN trips t ON st.trip_id = t.trip_id
                              LEFT JOIN calendar_dates c ON t.service_id = c.service_id
                            WHERE NOT EXISTS(SELECT 1
                                             FROM frequencies f
                                             WHERE f.trip_id = t.trip_id) 
                                  AND (c.date = :date OR t.service_id = '000000')
                        )
                        SELECT
                          uic_ref,
                          array_agg(departure_time) AS departure_times
                        FROM calendar_trip_mapping
                        GROUP BY uic_ref""",
                    date=due_date_gtfs).all()
    # service_id 000000 represents the whole schedule
    return {row['uic_ref']: _combine_departure_time(row, due_date) for row in rows}
def mark_relevant_roads(db: Database, max_relevant_distance: float):
    logger.info(f"Mark nodes that are reachable in {max_relevant_distance} metres")
    transaction = db.transaction()
    db.query("""SELECT mark_relevant_ways(:max_relevant_distance);""", max_relevant_distance=max_relevant_distance)
    transaction.commit()
Ejemplo n.º 16
0
class AudioDB:
    def __init__(self):
        cfg = load_config()

        self._db = Database("{}://{}:{}@{}/{}".format(
            cfg['sql_type'], cfg['sql_user'], cfg['sql_pass'],
            cfg['sql_host'], cfg['sql_db']))

    def _get_data(self, query, audio_id: str):
        ids = audio_id.split("-")

        if len(ids) != 3:
            return None

        rows = self._db.query(query, rel=ids[0], side=ids[1], track=ids[2])

        result = json.loads(rows.export("json"))

        if len(result) == 0:
            return None

        return result[0]

    def setup(self):
        self._db.query("""
        CREATE TABLE Audio
        (
            audio_release INT NOT NULL,
            audio_side INT NOT NULL,
            audio_track INT NOT NULL,
            bpm INT,
            bpm_confidence FLOAT,
            timbre VARCHAR(20),
            timbre_confidence FLOAT,
            relaxed VARCHAR(20),
            relaxed_confidence FLOAT,
            party VARCHAR(20),
            party_confidence FLOAT,
            aggressive VARCHAR(20),
            aggressive_confidence FLOAT,
            happy VARCHAR(20),
            happy_confidence FLOAT,
            sad VARCHAR(20),
            sad_confidence FLOAT,
            peak FLOAT,
            loudness_integrated FLOAT,
            loudness_range FLOAT,
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            PRIMARY KEY(audio_release, audio_side, audio_track)
        )""")

        # CREATE INDEXES

    def exists(self, audio_id: str) -> bool:
        """Check if an entry with the given audio id exists in the database

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        bool
            True if an entry exists, False otherwise
        """

        query = """
                SELECT audio_release
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id) is not None

    def post_all(self, data: Dict) -> str:
        if 'audio_id' not in data:
            pass

        ids = data['audio_id'].split("-")

        if len(ids) != 3:
            return None

        query = """
        INSERT INTO Audio (
            audio_release, audio_side, audio_track,
            bpm, bpm_confidence, timbre,
            timbre_confidence, relaxed,
            relaxed_confidence, party,
            party_confidence, aggressive,
            aggressive_confidence, happy,
            happy_confidence, sad,
            sad_confidence, peak,
            loudness_integrated, loudness_range
        )
        VALUES ("""

        query = "{}{}, {}, {}, ".format(query, ids[0], ids[1], ids[2])

        if 'BPM' in data:
            query = "{}{}, {}, ".format(
                query, data['BPM']['value'], data['BPM']['confidence'])
        else:
            query = query + "NULL, NULL, "

        if 'timbre' in data:
            query = "{}'{}', {}, ".format(
                query, data['timbre']['value'], data['timbre']['confidence'])
        else:
            query = query + "NULL, NULL, "

        if 'emotions' in data:
            query = "{}'{}', {}, ".format(
                query, data['emotions']['relaxed']['value'],
                data['emotions']['relaxed']['confidence'])
            query = "{}'{}', {}, ".format(
                query, data['emotions']['party']['value'],
                data['emotions']['party']['confidence'])
            query = "{}'{}', {}, ".format(
                query, data['emotions']['aggressive']['value'],
                data['emotions']['aggressive']['confidence'])
            query = "{}'{}', {}, ".format(
                query, data['emotions']['happy']['value'],
                data['emotions']['happy']['confidence'])
            query = "{}'{}', {}, ".format(
                query, data['emotions']['sad']['value'],
                data['emotions']['sad']['confidence'])
        else:
            query = (query + "NULL, NULL, NULL, NULL, " +
                     " NULL, NULL, NULL, NULL, NULL, NULL, ")

        if 'loudness' in data:
            query = "{}{}, ".format(query, data['loudness']['peak'])
            query = "{}{}, ".format(
                query, data['loudness']['loudness_integrated'])
            query = "{}{})".format(query, data['loudness']['loudness_range'])
        else:
            query = query + "NULL, NULL, NULL)"

        self._db.query(query)

    def update_all(self, data: Dict) -> str:
        if 'audio_id' not in data:
            pass

        ids = data['audio_id'].split("-")

        if len(ids) != 3:
            return None

        query = "UPDATE Audio SET "

        if 'BPM' in data:
            query = "{}bpm={}, bpm_confidence={}, ".format(
                query, data['BPM']['value'], data['BPM']['confidence'])
        else:
            query = query + "bpm=NULL, bpm_confidence=NULL, "

        if 'timbre' in data:
            query = "{}timbre='{}', timbre_confidence={}, ".format(
                query, data['timbre']['value'], data['timbre']['confidence'])
        else:
            query = query + "timbre=NULL, timbre_confidence=NULL, "

        if 'emotions' in data:
            query = "{}relaxed='{}', relaxed_confidence={}, ".format(
                query, data['emotions']['relaxed']['value'],
                data['emotions']['relaxed']['confidence'])
            query = "{}party='{}', party_confidence={}, ".format(
                query, data['emotions']['party']['value'],
                data['emotions']['party']['confidence'])
            query = "{}aggressive='{}', aggressive_confidence={}, ".format(
                query, data['emotions']['aggressive']['value'],
                data['emotions']['aggressive']['confidence'])
            query = "{}happy='{}', happy_confidence={}, ".format(
                query, data['emotions']['happy']['value'],
                data['emotions']['happy']['confidence'])
            query = "{}sad='{}', sad_confidence={}, ".format(
                query, data['emotions']['sad']['value'],
                data['emotions']['sad']['confidence'])
        else:
            query = (query + "relaxed=NULL, relaxed_confidence=NULL," +
                     "party=NULL, party_confidence=NULL," +
                     "aggressive=NULL, aggressive_confidence=NULL," +
                     "happy=NULL, happy_confidence=NULL, sad=NULL," +
                     "sad_confidence=NULL, ")

        if 'loudness' in data:
            query = "{}peak={}, ".format(query, data['loudness']['peak'])
            query = "{}loudness_integrated={}, ".format(
                query, data['loudness']['loudness_integrated'])
            query = "{}loudness_range={}, ".format(
                query, data['loudness']['loudness_range'])
        else:
            query = (query + "peak=NULL, loudness_integrated=NULL," +
                     "loudness_range=NULL, ")

        query = query + "last_updated=CURRENT_TIMESTAMP "

        query = ("{}WHERE audio_release={} AND audio_side={}" +
                 "AND audio_track={}").format(
            query, ids[0], ids[1], ids[2])

        self._db.query(query)

    def get_all(self, audio_id: str) -> str:
        """Get all fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT *
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_rhythm(self, audio_id: str) -> str:
        """Get all rhythm fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT bpm, bpm_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_bpm(self, audio_id: str) -> str:
        """Get all bpm fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT bpm, bpm_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_timbre(self, audio_id: str) -> str:
        """Get all timbre fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT timbre, timbre_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_emotions(self, audio_id: str) -> str:
        """Get all emotion fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT relaxed, relaxed_confidence, party,
                       party_confidence, aggressive,
                       aggressive_confidence, happy,
                       happy_confidence, sad,
                       sad_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_relaxed(self, audio_id: str) -> str:
        """Get all relaxed fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT relaxed, relaxed_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_party(self, audio_id: str) -> str:
        """Get all party fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT party, party_confidence, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_aggressive(self, audio_id: str) -> str:
        """Get all aggressive fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT aggressive, aggressive_confidence, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_happy(self, audio_id: str) -> str:
        """Get all happy fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT happy, happy_confidence, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_sad(self, audio_id: str) -> str:
        """Get all sad fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
                SELECT sad, sad_confidence, last_updated
                FROM Audio
                WHERE audio_release=:rel
                    AND audio_side=:side
                    AND audio_track=:track
                """

        return self._get_data(query, audio_id)

    def get_level(self, audio_id: str) -> str:
        """Get all level fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT peak, Loudness_integrated, loudness_range, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_peak(self, audio_id: str) -> str:
        """Get all peak fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT peak, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_loudness_integrated(self, audio_id: str) -> str:
        """Get all loudness integrated fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT Loudness_integrated, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)

    def get_loudness_range(self, audio_id: str) -> str:
        """Get all loudness range fields for the given audio_id

        Parameters
        ----------
        audio_id : str
            The id of the audio to search for

        Returns
        -------
        str
            A JSON string containing the result
        """

        query = """
            SELECT loudness_range, last_updated
            FROM Audio
            WHERE audio_release=:rel
                AND audio_side=:side
                AND audio_track=:track
            """

        return self._get_data(query, audio_id)
Ejemplo n.º 17
0
    return os.path.exists('data/readmes/' + file_name)


def path(github):
    file_name = '__'.join(github.split('/'))
    return 'data/readmes/' + file_name


def has_file(row):
    return os.path.exists(path(row.github))


if not os.path.isfile('data/data.pkl'):

    db = Database(os.environ['DATABASE'])
    rows = db.query('select github, stars, time_alive from libs')

    filtered = filter(has_file, rows)

    data = {
        'y': [],
        'x1': [],
        'x2': [],
    }

    for lib in filtered:
        data['y'].append(lib.stars)
        data['x1'].append(parse_readme(lib.github))
        data['x2'].append(int(lib.time_alive / 1000))

    pickle.dump(data, open("data/data.pkl", "w"))