def _create_database(self, db_name): """ Manages database creation. Connects to MySQL via Database class from records library. Creates a new MySQL local database. """ connection = Database(f'mysql+pymysql://{MYSQL_ID}:\ {MYSQL_PW}@localhost/?charset=utf8') connection.query(f'CREATE DATABASE {db_name} CHARACTER SET "utf8"')
def get_count_of_distinct_next_stops(db: Database, relevant_stops: List[str]) -> Dict[int, int]: rows = db.query("""WITH relevant_stops AS ( SELECT unnest(:relevant_stops) AS uic_ref ), next_station_mapping AS ( SELECT DISTINCT s.stop_name, t.trip_id, st.stop_sequence, s.uic_ref FROM stops s INNER JOIN stop_times st ON s.stop_id = st.stop_id INNER JOIN trips t ON st.trip_id = t.trip_id INNER JOIN routes r ON t.route_id = r.route_id WHERE r.route_type = 2 OR r.route_type = 1 ) SELECT distinct nsm1.uic_ref, COUNT(nsm2.stop_name) OVER (PARTITION BY nsm1.uic_ref ) FROM relevant_stops LEFT JOIN next_station_mapping nsm1 ON relevant_stops.uic_ref = nsm1.uic_ref INNER JOIN next_station_mapping nsm2 ON nsm1.trip_id = nsm2.trip_id WHERE nsm1.stop_sequence = (nsm2.stop_sequence - 1) GROUP BY nsm1.uic_ref, nsm2.stop_name;""", relevant_stops=relevant_stops).all() return {int(row['uic_ref']): row['count'] for row in rows}
def _retrieve_nearest_vertex(db: Database, uic_ref: int) -> Optional[int]: transaction = db.transaction() row = db.query("""SELECT nearest_vertex_id FROM stop_vertex_mapping WHERE stop_uic_ref = :uic_ref;""", uic_ref=uic_ref).first() transaction.commit() if row: return row['nearest_vertex_id'] return None
def test_db_populate_themes(self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test populating themes''' themes = database.query('SELECT * FROM theme').as_dict() assert len(themes) == len(mock_data.themes) # nosec db_themes = [t['name'] for t in themes] assert set(mock_data.themes) - set(db_themes) == set() # nosec
def optimize_stop_vertex_mapping(db: Database): logger.info("Locate public transport stops on the routing graph") transaction = db.transaction() db.query("""DROP TABLE IF EXISTS edge_preselection""") db.query("""CREATE UNLOGGED TABLE edge_preselection ( id INTEGER, source INTEGER, target INTEGER, cost DOUBLE PRECISION )""") db.query("SELECT optimize_stop_vertex_mapping()") db.query("DROP TABLE edge_preselection;") transaction.commit()
def test_db_populate_items(self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test populating items''' items = database.query('SELECT * FROM item').as_dict() assert len(items) == len(mock_data.items) # nosec db_items = [i['name'] for i in items] mock_items = [i.name for i in mock_data.items] assert set(mock_items) - set(db_items) == set() # nosec
def calc_isochrones(db: Database, uic_ref: int, boundaries: List[int]) -> List[Isochrone]: logger.info(f"Calculate isochrones for {uic_ref}") nearest_vertex_id = _retrieve_nearest_vertex(db, uic_ref) if not nearest_vertex_id: return list() transaction = db.transaction() rows = db.query("""SELECT distance, polygon FROM isochrones(:node_id, :boundaries)""", node_id=nearest_vertex_id, boundaries=boundaries ).all() isochrones = _map_isochrones(rows) transaction.commit() return isochrones
def _query_transport_stop_rows(db: Database): return db.query("""SELECT DISTINCT s.uic_ref, s.stop_name, s.stop_lat, s.stop_lon, array_agg(r.route_type) OVER (PARTITION BY s.uic_ref) AS route_types, (s.uic_ref IN (SELECT uic_ref FROM intercity_stations)) AS is_intercity_station FROM stops s INNER JOIN stop_times st ON s.stop_id = st.stop_id INNER JOIN trips t ON st.trip_id = t.trip_id INNER JOIN routes r ON t.route_id = r.route_id WHERE s.stop_id LIKE '85%' GROUP BY s.uic_ref, s.stop_name, s.stop_lat, s.stop_lon, r.route_type; """).all()
def test_db_populate_names(self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test populating themes''' names_from_db = database.query('SELECT * FROM name').as_dict() assert len(names_from_db) == len(mock_data.names) # nosec mock_full_names = [ f'{n.firstname} {n.lastname}' for n in mock_data.names ] db_full_names = [ f'{n["firstname"]} {n["lastname"]}' for n in names_from_db ] assert set(mock_full_names) - set(db_full_names) == set() # nosec
def test_db_populate_link_theme_items( self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test links between names and themes''' sql = ''' SELECT theme.name FROM item, linkitemtheme, theme WHERE item.id=linkitemtheme.id_item AND theme.id=linkitemtheme.id_theme AND item.name=:iname ''' for item in mock_data.items: links = database.query(sql, iname=item.name).as_dict() link_themes = [l['name'] for l in links] assert set(link_themes) - set(item.themes) == set() # nosec
def _query_frequency_departure_times(db: Database, due_date: datetime) -> Dict[int, List[datetime]]: """Get departure times for stops which have trips that are modeled in the frequencies table""" due_date_gtfs: str = _format_gtfs_date(due_date) rows = db.query("""SELECT s.uic_ref, array_agg(st.departure_time + (INTERVAL '1s' * intervals)) AS departure_times FROM stop_times st INNER JOIN frequencies f on st.trip_id = f.trip_id INNER JOIN trips t on f.trip_id = t.trip_id INNER JOIN stops s on st.stop_id = s.stop_id LEFT JOIN calendar_dates c ON t.service_id = c.service_id, generate_series(0, 86400, f.headway_secs) intervals WHERE (st.departure_time + (INTERVAL '1s' * intervals)) <= f.end_time AND (c.date = :date OR t.service_id = '000000') GROUP BY s.uic_ref""", date=due_date_gtfs).all() return {row['uic_ref']: _combine_departure_time(row, due_date) for row in rows}
def test_db_populate_link_theme_names( self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test links between names and themes''' sql = ''' SELECT theme.name FROM name, linknametheme, theme WHERE name.id=linknametheme.id_name AND theme.id=linknametheme.id_theme AND name.firstname=:fname AND name.lastname=:lname ''' for name in mock_data.names: links = database.query(sql, fname=name.firstname, lname=name.lastname).as_dict() link_themes = [l['name'] for l in links] assert set(link_themes) - set(name.themes) == set() # nosec
def test_db_populate_link_theme_features( self, database: Database, mock_data: LoadedDbItemsJson) -> None: '''Test links between names and themes''' sql = ''' SELECT theme.name FROM feature, linkfeaturetheme, theme WHERE feature.id=linkfeaturetheme.id_feature AND theme.id=linkfeaturetheme.id_theme AND feature.text_masc=:tmasc AND feature.text_fem=:tfem ''' for feat in mock_data.features: links = database.query(sql, tmasc=feat.text_masc, tfem=feat.text_fem).as_dict() link_themes = [l['name'] for l in links] assert set(link_themes) - set(feat.themes) == set() # nosec
def _query_stop_times_departures(db: Database, due_date: datetime) -> Dict[int, List[datetime]]: due_date_gtfs: str = _format_gtfs_date(due_date) rows = db.query("""WITH calendar_trip_mapping AS ( SELECT st.departure_time, s.uic_ref FROM stop_times st INNER JOIN stops s ON st.stop_id = s.stop_id INNER JOIN trips t ON st.trip_id = t.trip_id LEFT JOIN calendar_dates c ON t.service_id = c.service_id WHERE NOT EXISTS(SELECT 1 FROM frequencies f WHERE f.trip_id = t.trip_id) AND (c.date = :date OR t.service_id = '000000') ) SELECT uic_ref, array_agg(departure_time) AS departure_times FROM calendar_trip_mapping GROUP BY uic_ref""", date=due_date_gtfs).all() # service_id 000000 represents the whole schedule return {row['uic_ref']: _combine_departure_time(row, due_date) for row in rows}
def mark_relevant_roads(db: Database, max_relevant_distance: float): logger.info(f"Mark nodes that are reachable in {max_relevant_distance} metres") transaction = db.transaction() db.query("""SELECT mark_relevant_ways(:max_relevant_distance);""", max_relevant_distance=max_relevant_distance) transaction.commit()
class AudioDB: def __init__(self): cfg = load_config() self._db = Database("{}://{}:{}@{}/{}".format( cfg['sql_type'], cfg['sql_user'], cfg['sql_pass'], cfg['sql_host'], cfg['sql_db'])) def _get_data(self, query, audio_id: str): ids = audio_id.split("-") if len(ids) != 3: return None rows = self._db.query(query, rel=ids[0], side=ids[1], track=ids[2]) result = json.loads(rows.export("json")) if len(result) == 0: return None return result[0] def setup(self): self._db.query(""" CREATE TABLE Audio ( audio_release INT NOT NULL, audio_side INT NOT NULL, audio_track INT NOT NULL, bpm INT, bpm_confidence FLOAT, timbre VARCHAR(20), timbre_confidence FLOAT, relaxed VARCHAR(20), relaxed_confidence FLOAT, party VARCHAR(20), party_confidence FLOAT, aggressive VARCHAR(20), aggressive_confidence FLOAT, happy VARCHAR(20), happy_confidence FLOAT, sad VARCHAR(20), sad_confidence FLOAT, peak FLOAT, loudness_integrated FLOAT, loudness_range FLOAT, last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(audio_release, audio_side, audio_track) )""") # CREATE INDEXES def exists(self, audio_id: str) -> bool: """Check if an entry with the given audio id exists in the database Parameters ---------- audio_id : str The id of the audio to search for Returns ------- bool True if an entry exists, False otherwise """ query = """ SELECT audio_release FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) is not None def post_all(self, data: Dict) -> str: if 'audio_id' not in data: pass ids = data['audio_id'].split("-") if len(ids) != 3: return None query = """ INSERT INTO Audio ( audio_release, audio_side, audio_track, bpm, bpm_confidence, timbre, timbre_confidence, relaxed, relaxed_confidence, party, party_confidence, aggressive, aggressive_confidence, happy, happy_confidence, sad, sad_confidence, peak, loudness_integrated, loudness_range ) VALUES (""" query = "{}{}, {}, {}, ".format(query, ids[0], ids[1], ids[2]) if 'BPM' in data: query = "{}{}, {}, ".format( query, data['BPM']['value'], data['BPM']['confidence']) else: query = query + "NULL, NULL, " if 'timbre' in data: query = "{}'{}', {}, ".format( query, data['timbre']['value'], data['timbre']['confidence']) else: query = query + "NULL, NULL, " if 'emotions' in data: query = "{}'{}', {}, ".format( query, data['emotions']['relaxed']['value'], data['emotions']['relaxed']['confidence']) query = "{}'{}', {}, ".format( query, data['emotions']['party']['value'], data['emotions']['party']['confidence']) query = "{}'{}', {}, ".format( query, data['emotions']['aggressive']['value'], data['emotions']['aggressive']['confidence']) query = "{}'{}', {}, ".format( query, data['emotions']['happy']['value'], data['emotions']['happy']['confidence']) query = "{}'{}', {}, ".format( query, data['emotions']['sad']['value'], data['emotions']['sad']['confidence']) else: query = (query + "NULL, NULL, NULL, NULL, " + " NULL, NULL, NULL, NULL, NULL, NULL, ") if 'loudness' in data: query = "{}{}, ".format(query, data['loudness']['peak']) query = "{}{}, ".format( query, data['loudness']['loudness_integrated']) query = "{}{})".format(query, data['loudness']['loudness_range']) else: query = query + "NULL, NULL, NULL)" self._db.query(query) def update_all(self, data: Dict) -> str: if 'audio_id' not in data: pass ids = data['audio_id'].split("-") if len(ids) != 3: return None query = "UPDATE Audio SET " if 'BPM' in data: query = "{}bpm={}, bpm_confidence={}, ".format( query, data['BPM']['value'], data['BPM']['confidence']) else: query = query + "bpm=NULL, bpm_confidence=NULL, " if 'timbre' in data: query = "{}timbre='{}', timbre_confidence={}, ".format( query, data['timbre']['value'], data['timbre']['confidence']) else: query = query + "timbre=NULL, timbre_confidence=NULL, " if 'emotions' in data: query = "{}relaxed='{}', relaxed_confidence={}, ".format( query, data['emotions']['relaxed']['value'], data['emotions']['relaxed']['confidence']) query = "{}party='{}', party_confidence={}, ".format( query, data['emotions']['party']['value'], data['emotions']['party']['confidence']) query = "{}aggressive='{}', aggressive_confidence={}, ".format( query, data['emotions']['aggressive']['value'], data['emotions']['aggressive']['confidence']) query = "{}happy='{}', happy_confidence={}, ".format( query, data['emotions']['happy']['value'], data['emotions']['happy']['confidence']) query = "{}sad='{}', sad_confidence={}, ".format( query, data['emotions']['sad']['value'], data['emotions']['sad']['confidence']) else: query = (query + "relaxed=NULL, relaxed_confidence=NULL," + "party=NULL, party_confidence=NULL," + "aggressive=NULL, aggressive_confidence=NULL," + "happy=NULL, happy_confidence=NULL, sad=NULL," + "sad_confidence=NULL, ") if 'loudness' in data: query = "{}peak={}, ".format(query, data['loudness']['peak']) query = "{}loudness_integrated={}, ".format( query, data['loudness']['loudness_integrated']) query = "{}loudness_range={}, ".format( query, data['loudness']['loudness_range']) else: query = (query + "peak=NULL, loudness_integrated=NULL," + "loudness_range=NULL, ") query = query + "last_updated=CURRENT_TIMESTAMP " query = ("{}WHERE audio_release={} AND audio_side={}" + "AND audio_track={}").format( query, ids[0], ids[1], ids[2]) self._db.query(query) def get_all(self, audio_id: str) -> str: """Get all fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT * FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_rhythm(self, audio_id: str) -> str: """Get all rhythm fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT bpm, bpm_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_bpm(self, audio_id: str) -> str: """Get all bpm fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT bpm, bpm_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_timbre(self, audio_id: str) -> str: """Get all timbre fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT timbre, timbre_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_emotions(self, audio_id: str) -> str: """Get all emotion fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT relaxed, relaxed_confidence, party, party_confidence, aggressive, aggressive_confidence, happy, happy_confidence, sad, sad_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_relaxed(self, audio_id: str) -> str: """Get all relaxed fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT relaxed, relaxed_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_party(self, audio_id: str) -> str: """Get all party fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT party, party_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_aggressive(self, audio_id: str) -> str: """Get all aggressive fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT aggressive, aggressive_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_happy(self, audio_id: str) -> str: """Get all happy fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT happy, happy_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_sad(self, audio_id: str) -> str: """Get all sad fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT sad, sad_confidence, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_level(self, audio_id: str) -> str: """Get all level fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT peak, Loudness_integrated, loudness_range, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_peak(self, audio_id: str) -> str: """Get all peak fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT peak, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_loudness_integrated(self, audio_id: str) -> str: """Get all loudness integrated fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT Loudness_integrated, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id) def get_loudness_range(self, audio_id: str) -> str: """Get all loudness range fields for the given audio_id Parameters ---------- audio_id : str The id of the audio to search for Returns ------- str A JSON string containing the result """ query = """ SELECT loudness_range, last_updated FROM Audio WHERE audio_release=:rel AND audio_side=:side AND audio_track=:track """ return self._get_data(query, audio_id)
return os.path.exists('data/readmes/' + file_name) def path(github): file_name = '__'.join(github.split('/')) return 'data/readmes/' + file_name def has_file(row): return os.path.exists(path(row.github)) if not os.path.isfile('data/data.pkl'): db = Database(os.environ['DATABASE']) rows = db.query('select github, stars, time_alive from libs') filtered = filter(has_file, rows) data = { 'y': [], 'x1': [], 'x2': [], } for lib in filtered: data['y'].append(lib.stars) data['x1'].append(parse_readme(lib.github)) data['x2'].append(int(lib.time_alive / 1000)) pickle.dump(data, open("data/data.pkl", "w"))