def execute(self, file_name: str, conn: pymssql.Connection) -> (str, bool): # Get the extension for the file name, ext = path.splitext(file_name) ext = ext[1:].lower() # Convert the file to mp3 if not. if ext != 'mp3': new_file_name = name + '.mp3' tmp_name = time.strftime("%Y%m%d-%H%M%S") + '.mp3' (ffmpeg.input(file_name).output(tmp_name, map_metadata='0').run()) # The file cannot be moved since it's across file systems. shutil.copyfile(tmp_name, new_file_name) os.remove(tmp_name) cursor: pymssql.Cursor = conn.cursor() cursor.execute('EXEC sp_UpdatePath %s, %s', (file_name, new_file_name)) cursor.close() self.handle_duplicate_files(file_name) return new_file_name, True return file_name, True
def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool): # Get the secrets from environment. api_key = os.environ['ACOUSTID_API_KEY'] cursor: pymssql.Cursor = conn.cursor() # Fetch the fingerprint and length cursor.execute('EXEC sp_SelectFileFingerprintLength %s', file_name) length, fingerprint = cursor.fetchone() # Get the results from acoustid. results = acoustid.parse_lookup_result(acoustid.lookup(api_key, fingerprint, length)) results = [r for r in results if r[3] is not None] # If we don't want have any results from the fingerprint, that's fine. if len(results) == 0: return file_name, True # We want to select the most confident answer. max_confidence = max([r[0] for r in results]) results = [r for r in results if r[0] == max_confidence] # Store all possible results. for song in results: cursor.execute( 'EXEC sp_InsertPossibleMatches %s, %s, %s, %s', (file_name, song[3], song[2], song[1]) ) cursor.close() return (file_name, True)
def set_tables_metadata(self, conn: pymssql.Connection) -> None: """ Enriches this TableRules instance with Primary Key and Column metadata for each table Requires a pymssql.Connection handle """ with closing(conn.cursor()) as cur: self._set_primary_keys(cur) self._set_cols(cur) self._check_metadata() self._set_sf_ddl()
def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool): # Get the folder we are moving the music to. base_folder = './shares/music' # Get the info to compute the new location for the song. cursor: pymssql.Cursor = conn.cursor() cursor.execute('EXEC sp_SelectSongLocationInfo %s', file_name) artist, album, track_number, title = cursor.fetchone() # Update the values to be path safe. artist = artist.replace('/', '_') album = album.replace('/', '') title = title.replace('/', '') # Get the filder name. folder = path.join(base_folder, artist + ' - ' + album) # Get the file name. new_file_name = str(track_number) + ' - ' + title # Create the path if it does nto exist. if not path.isdir(folder): os.mkdir(folder) # Get the path without the extension. base_path = path.join(folder, new_file_name) new_file_path = base_path + '.mp3' lyrics_path = base_path + '.txt' # Get the lyrics cursor.execute('EXEC sp_SelectLyrics %s', file_name) lyrics = cursor.fetchone()[0] # If we have lyrics, save them. if lyrics is not None: with open(lyrics_path, 'w') as f: f.write(lyrics) # Move the music file. os.rename(file_name, new_file_path) # Update new information. cursor.execute('EXEC sp_UpdatePath %s, %s', (file_name, new_file_path)) cursor.close() return new_file_path, True
def to_pandas(rules: TableRules, conn: pymssql.Connection): """ Batch out the data into pandas dataframes TODO: Consider specifying the dt_types() in the pandas constructor so that datetime64[ns] has UTC by default. That would require a full SQL -> pd dtype mapping """ with closing(conn.cursor()) as cur: for db, sch, tbl, qry in rules.get_basic_sql(): cur.execute(qry) logger.info(f"Querying table: {db.name}.{sch.name}.{tbl.name}") for batch, rownum in get_batch(cur): df = pd.DataFrame( data=batch, columns=[col.caps_name() for col in tbl.cols]) logger.info(df.head(1)) fix_date_cols(df) yield db, sch, tbl, df
def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool): # Get the secrets from the environment api_key = os.environ['LYRIC_GENIUS_API_KEY'] genius = lyricsgenius.Genius(api_key) cursor: pymssql.Cursor = conn.cursor() cursor.execute('EXEC sp_SelectSongTitleArtist %s', file_name) title, artist = cursor.fetchone() # Get the song from the internet. song = genius.search_song(title, artist=artist) # Save the lyrics in the db. if song is not None: cursor.execute('EXEC sp_UpdateSongLyrics %s, %s', (song.lyrics, file_name)) return file_name, True
def write_parquet(rules: TableRules, conn: pymssql.Connection): """ Batch out the data into parquet files Consider a buffer and passing that to boto3 s3 import io f = io.BytesIO() df.to_parquet(f) f.seek(0) content = f.read() """ with closing(conn.cursor()) as cur: for db, sch, tbl, qry in rules.get_basic_sql(): cur.execute(qry) logger.info(f"Querying table: {db.name}.{sch.name}.{tbl.name}") for batch, rownum in get_batch(cur): df = pd.DataFrame( data=batch, columns=[col.caps_name() for col in tbl.cols]) logger.info(df.head(1)) path = Path(f"./temp/{tbl.name}_{str(rownum)}.parquet.gzip") df.to_parquet(path, compression="gzip") logger.info(f"File created: {path.resolve()}") yield db, sch, tbl, path
def get_connection(conn: pymssql.Connection): obj = WorkersStorage() obj._conn = conn obj._cur = conn.cursor() return obj
def get_connection(conn: pymssql.Connection): obj = DiscountCardsStorage() obj._conn = conn obj._cur = conn.cursor() return obj
def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool): musicbrainzngs.set_useragent('music-org', '0.1') cursor: pymssql.Cursor = conn.cursor() # Get all possible matches returned from Acoustid cursor.execute('EXEC sp_SelectPossibleMatches %s', file_name) # If there are no matches, then just continue. if cursor.rowcount == 0: return file_name, True # Get all rows. rows = list(cursor.fetchall()) # We want the MusicBrainz ID from the possible matches. musicBrainzIDs = [r[2] for r in rows] # Get releases from MusicBrainz releases = [] for row in rows: result: dict = musicbrainzngs.search_recordings(artist=row[0], recording=row[1]) releases = releases + [ r for r in result['recording-list'] if r['id'] in musicBrainzIDs ] release_group = self.get_release_group(releases) # If we can't find any albums, return. if release_group is None: return file_name, True # Get the recording in the albums recordings = [ r for r in musicbrainzngs.search_recordings( release=release_group['title'])['recording-list'] if r['id'] in musicBrainzIDs ] if len(recordings) != 0: recording = recordings[0] else: # Could not find on MusicBrainz return file_name, True artists = [(r['artist']['id'], r['artist']['name'], file_name) for r in recording['artist-credit']] cursor.executemany('EXEC sp_InsertIntoArtists %s, %s, %s', artists) release_group_countries = [ r for r in recording['release-list'] if r['release-group']['id'] == release_group['id'] ] release_group_country = [ r for r in release_group_countries if r['country'] == 'US' ][0] # Genre tags = recording['tag-list'] tags.sort(key=lambda x: x['name']) tag = tags[0]['name'] tag = tag[:1].upper() + tag[1:] cover_art_data = musicbrainzngs.caa.get_release_group_image_front( release_group['id']) cursor.execute( 'DECLARE @Date DATE = CAST(%s AS DATE);' + 'DECLARE @CoverArt VARBINARY(MAX) = CAST(%s AS VARBINARY(MAX));' + 'EXEC sp_UpdateMusic %s, %s, @Date, @CoverArt, %s, %s, %s, %d, %s, %s', (release_group['first-release-date'], cover_art_data, release_group['id'], release_group['title'], 'image/jpeg', recording['id'], recording['title'], int(release_group_country['medium-list'][0]['track-list'][0] ['number']), tag, file_name)) cursor.close() return (file_name, True)