Exemple #1
0
    def execute(self, file_name: str, conn: pymssql.Connection) -> (str, bool):
        # Get the extension for the file
        name, ext = path.splitext(file_name)
        ext = ext[1:].lower()

        # Convert the file to mp3 if not.
        if ext != 'mp3':
            new_file_name = name + '.mp3'
            tmp_name = time.strftime("%Y%m%d-%H%M%S") + '.mp3'

            (ffmpeg.input(file_name).output(tmp_name, map_metadata='0').run())

            # The file cannot be moved since it's across file systems.
            shutil.copyfile(tmp_name, new_file_name)
            os.remove(tmp_name)

            cursor: pymssql.Cursor = conn.cursor()
            cursor.execute('EXEC sp_UpdatePath %s, %s',
                           (file_name, new_file_name))
            cursor.close()

            self.handle_duplicate_files(file_name)

            return new_file_name, True

        return file_name, True
Exemple #2
0
    def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool):
        # Get the secrets from environment.
        api_key = os.environ['ACOUSTID_API_KEY']

        cursor: pymssql.Cursor = conn.cursor()

        # Fetch the fingerprint and length
        cursor.execute('EXEC sp_SelectFileFingerprintLength %s', file_name)
        length, fingerprint = cursor.fetchone()

        # Get the results from acoustid.
        results = acoustid.parse_lookup_result(acoustid.lookup(api_key, fingerprint, length))
        results = [r for r in results if r[3] is not None]

        # If we don't want have any results from the fingerprint, that's fine.
        if len(results) == 0:
            return file_name, True

        # We want to select the most confident answer.
        max_confidence = max([r[0] for r in results])

        results = [r for r in results if r[0] == max_confidence]

        # Store all possible results.
        for song in results:
            cursor.execute(
                'EXEC sp_InsertPossibleMatches %s, %s, %s, %s', (file_name, song[3], song[2], song[1])
            )

        cursor.close()
        return (file_name, True)
Exemple #3
0
    def set_tables_metadata(self, conn: pymssql.Connection) -> None:
        """
        Enriches this TableRules instance with Primary Key and Column metadata for each table

        Requires a pymssql.Connection handle
        """
        with closing(conn.cursor()) as cur:
            self._set_primary_keys(cur)
            self._set_cols(cur)
            self._check_metadata()
            self._set_sf_ddl()
Exemple #4
0
    def execute(self, file_name: str, db_session: sessionmaker,
                conn: pymssql.Connection) -> (str, bool):
        # Get the folder we are moving the music to.
        base_folder = './shares/music'

        # Get the info to compute the new location for the song.
        cursor: pymssql.Cursor = conn.cursor()
        cursor.execute('EXEC sp_SelectSongLocationInfo %s', file_name)
        artist, album, track_number, title = cursor.fetchone()

        # Update the values to be path safe.
        artist = artist.replace('/', '_')
        album = album.replace('/', '')
        title = title.replace('/', '')

        # Get the filder name.
        folder = path.join(base_folder, artist + ' - ' + album)
        # Get the file name.
        new_file_name = str(track_number) + ' - ' + title

        # Create the path if it does nto exist.
        if not path.isdir(folder):
            os.mkdir(folder)

        # Get the path without the extension.
        base_path = path.join(folder, new_file_name)
        new_file_path = base_path + '.mp3'
        lyrics_path = base_path + '.txt'

        # Get the lyrics
        cursor.execute('EXEC sp_SelectLyrics %s', file_name)
        lyrics = cursor.fetchone()[0]

        # If we have lyrics, save them.
        if lyrics is not None:
            with open(lyrics_path, 'w') as f:
                f.write(lyrics)

        # Move the music file.
        os.rename(file_name, new_file_path)

        # Update new information.
        cursor.execute('EXEC sp_UpdatePath %s, %s', (file_name, new_file_path))

        cursor.close()

        return new_file_path, True
Exemple #5
0
def to_pandas(rules: TableRules, conn: pymssql.Connection):
    """
    Batch out the data into pandas dataframes
    TODO: Consider specifying the dt_types() in the pandas constructor
          so that datetime64[ns] has UTC by default.
          That would require a full SQL -> pd dtype mapping
    """
    with closing(conn.cursor()) as cur:
        for db, sch, tbl, qry in rules.get_basic_sql():
            cur.execute(qry)
            logger.info(f"Querying table: {db.name}.{sch.name}.{tbl.name}")

            for batch, rownum in get_batch(cur):
                df = pd.DataFrame(
                    data=batch, columns=[col.caps_name() for col in tbl.cols])
                logger.info(df.head(1))
                fix_date_cols(df)
                yield db, sch, tbl, df
Exemple #6
0
    def execute(self, file_name: str, db_session: sessionmaker, conn: pymssql.Connection) -> (str, bool):
        # Get the secrets from the environment
        api_key = os.environ['LYRIC_GENIUS_API_KEY'] 
        
        genius = lyricsgenius.Genius(api_key)

        cursor: pymssql.Cursor = conn.cursor()
        cursor.execute('EXEC sp_SelectSongTitleArtist %s', file_name)
        title, artist = cursor.fetchone()

        # Get the song from the internet.
        song = genius.search_song(title, artist=artist)

        # Save the lyrics in the db.
        if song is not None:
            cursor.execute('EXEC sp_UpdateSongLyrics %s, %s', (song.lyrics, file_name))

        return file_name, True
Exemple #7
0
def write_parquet(rules: TableRules, conn: pymssql.Connection):
    """
    Batch out the data into parquet files

    Consider a buffer and passing that to boto3 s3
    import io
    f = io.BytesIO()
    df.to_parquet(f)
    f.seek(0)
    content = f.read()
    """
    with closing(conn.cursor()) as cur:
        for db, sch, tbl, qry in rules.get_basic_sql():
            cur.execute(qry)
            logger.info(f"Querying table: {db.name}.{sch.name}.{tbl.name}")

            for batch, rownum in get_batch(cur):
                df = pd.DataFrame(
                    data=batch, columns=[col.caps_name() for col in tbl.cols])
                logger.info(df.head(1))
                path = Path(f"./temp/{tbl.name}_{str(rownum)}.parquet.gzip")
                df.to_parquet(path, compression="gzip")
                logger.info(f"File created: {path.resolve()}")
                yield db, sch, tbl, path
Exemple #8
0
 def get_connection(conn: pymssql.Connection):
     obj = WorkersStorage()
     obj._conn = conn
     obj._cur = conn.cursor()
     return obj
Exemple #9
0
 def get_connection(conn: pymssql.Connection):
     obj = DiscountCardsStorage()
     obj._conn = conn
     obj._cur = conn.cursor()
     return obj
Exemple #10
0
    def execute(self, file_name: str, db_session: sessionmaker,
                conn: pymssql.Connection) -> (str, bool):
        musicbrainzngs.set_useragent('music-org', '0.1')

        cursor: pymssql.Cursor = conn.cursor()

        # Get all possible matches returned from Acoustid
        cursor.execute('EXEC sp_SelectPossibleMatches %s', file_name)

        # If there are no matches, then just continue.
        if cursor.rowcount == 0:
            return file_name, True

        # Get all rows.
        rows = list(cursor.fetchall())

        # We want the MusicBrainz ID from the possible matches.
        musicBrainzIDs = [r[2] for r in rows]

        # Get releases from MusicBrainz
        releases = []
        for row in rows:
            result: dict = musicbrainzngs.search_recordings(artist=row[0],
                                                            recording=row[1])

            releases = releases + [
                r
                for r in result['recording-list'] if r['id'] in musicBrainzIDs
            ]

        release_group = self.get_release_group(releases)
        # If we can't find any albums, return.
        if release_group is None:
            return file_name, True

        # Get the recording in the albums
        recordings = [
            r for r in musicbrainzngs.search_recordings(
                release=release_group['title'])['recording-list']
            if r['id'] in musicBrainzIDs
        ]

        if len(recordings) != 0:
            recording = recordings[0]
        else:
            # Could not find on MusicBrainz
            return file_name, True

        artists = [(r['artist']['id'], r['artist']['name'], file_name)
                   for r in recording['artist-credit']]
        cursor.executemany('EXEC sp_InsertIntoArtists %s, %s, %s', artists)

        release_group_countries = [
            r for r in recording['release-list']
            if r['release-group']['id'] == release_group['id']
        ]
        release_group_country = [
            r for r in release_group_countries if r['country'] == 'US'
        ][0]

        # Genre
        tags = recording['tag-list']
        tags.sort(key=lambda x: x['name'])
        tag = tags[0]['name']

        tag = tag[:1].upper() + tag[1:]

        cover_art_data = musicbrainzngs.caa.get_release_group_image_front(
            release_group['id'])

        cursor.execute(
            'DECLARE @Date DATE = CAST(%s AS DATE);' +
            'DECLARE @CoverArt VARBINARY(MAX) = CAST(%s AS VARBINARY(MAX));' +
            'EXEC sp_UpdateMusic %s, %s, @Date, @CoverArt, %s, %s, %s, %d, %s, %s',
            (release_group['first-release-date'], cover_art_data,
             release_group['id'], release_group['title'], 'image/jpeg',
             recording['id'], recording['title'],
             int(release_group_country['medium-list'][0]['track-list'][0]
                 ['number']), tag, file_name))

        cursor.close()

        return (file_name, True)