Ejemplo n.º 1
0
def _read_file(cwd, csv=None):
    '''Reads a csv file containing track metadata and annotations (v 2.0)
    
    # may use py comments in metadata .csv file. Must include a header:
    filename, title, artist, album, tracknum, compilation, [optional1], [optional2]...
    and then corresponding values per line (see example metadata.csv file in project root)
    
    @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs
    
    @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in  
    @param csv: csv file-name (in <cwd> dir). Defaults to None
    
    @todo: csv values (after header) may include "\embedded" to try getting it from the audio file.
    Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner.
    @todo: include other metadata formats (use tagpy?)'''

    # open csv file
    if csv is None:
        filename = cwd
    else:
        filename = os.path.join(cwd, csv)

    try:
        data = json.load(open(filename, 'r'))
    except ValueError:
        data = pickle.load(open(filename, 'r'))
    except:
        log.error("  Couldn't open '%s'", csv)
        raise
    
    tags = dict()
    for line_num, line in enumerate(data): 
        # save title, artist, album, tracknum, compilation in tags[<file-name>]
        filepath = line.pop('filepath')

        # this deletes previous lines if filepath is repeated ...
        tags[filepath] = {'tracknum': 0, 'compilation': False}

        tags[filepath].update(line)

        for k, value in tags[filepath].iteritems():
            if not (isinstance(value, str) or isinstance(value, unicode)):
                continue

            if os.path.isfile(value):
                if not is_binary(value):
                    try:
                        txt = open(value)
                        tags[filepath][k] = unicode(txt.read())
                        txt.close()
                    except:
                        log.error('Error opening %s file %s at line %d', k, value, line_num)
                        tags[filepath][k] = unicode(value)
                else:
                    try:
                        tags[filepath][k] = u'%s' % value
                    except UnicodeDecodeError:
                        tags[filepath][k] = value.decode('utf-8')

    return tags
Ejemplo n.º 2
0
def _store_annotations(audiofile, track, all_md=False):
    """Searches for metadata related to the audio-file (v 1.0; for now only ID3 in MP3): type <id3> annotation [tagname];
    Searches for text-files with the same base-name whithin the folder (any [ext]ension): type <txt> annotation [ext];
    Stores these annotation values in the track_annotation DB table
    
    @param audiofile: the file (should be previously verified as an actual audio file)
    @param track: previously stored track record in the database, represented by a gordon.db.model.Track class (SQL Alchemy)
    @param all_md: use True to extract all tags from the audio-file (defaults to False) 
    
    returns number of annotations (0 to *) stored"""
    
    annots = 0
    
    #chek if file is mp3. if so:
    if all_md:
        if id3.isValidMP3(audiofile):
            #extract all ID3 tags, store each tag value as an annotation type id3.[tagname]
            for tag in id3.getAllTags(audiofile, skipTrackFields=True): # this skips the 4 basic tags already in track
                track.annotations.append(Annotation(name=unicode(tag[0]), value=tag[1])) #todo: value=unicode(tag[1])
                annots += 1
    
        #future todo: apply tagpy or other method to extract more metadata formats
    
    if annots == 0: log.debug('    No ID3 metadata found.')
    
    # check text file annotations
    (pathandbase, ext) = os.path.splitext(audiofile)
    simfiles = list()
    if os.path.exists(pathandbase): simfiles.append(pathandbase)
    for s in glob(pathandbase+'.*'): simfiles.append(s)
    txt=None

    for simfile in simfiles: # for every file sharing base-name (any or no extension)
        try:
            if not is_binary(simfile): # if its a text file
#                if simfile == audiofile: continue # (we skip the original) #unnecesary; it is_binary

                # copy text (file content) to new track annotation (type txt.[ext])
                txt=open(simfile)
                (xxx, ext) = os.path.splitext(simfile)
                track.annotations.append(Annotation(name=unicode(ext[1:]), value=unicode(txt.read())))
                annots += 1
        finally:
            if type(txt)==file: txt.close()
            
    commit() #saves all appended annotations in the track
    
    log.debug('    Stored %s annotations overall', annots)
    return annots
Ejemplo n.º 3
0
def _read_csv_tags(cwd, csv=None):
    '''Reads a csv file containing track metadata and annotations (v 2.0)
    
    # may use py comments in metadata .csv file. Must include a header:
    filename, title, artist, album, tracknum, compilation, [optional1], [optional2]...
    and then corresponding values per line (see example metadata.csv file in project root)
    
    @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs
    
    @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in  
    @param csv: csv file-name (in <cwd> dir). Defaults to None
    
    @todo: csv values (after header) may include "\embedded" to try getting it from the audio file.
    Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner.
    @todo: include other metadata formats (use tagpy?)'''

    # open csv file
    if csv is None:
        filename = cwd
    else:
        filename = os.path.join(cwd, csv)

    try:
        csvfile = reader(open(filename))
    except IOError:
        log.error("  Couldn't open '%s'", csv)
        raise
    
    tags = dict()
    headers = False
    for line in csvfile: # each record (file rows)
        if len(line) < 6 : continue # skip bad lines (blank or too short)
        line[0] = line[0].strip()
        if not line[0] or line[0][0] == '#' : continue # skip if filepath empty or comment line
        
        # read and validate header
        if not headers: # first valid line is the header
            line=[l.strip() for l in line]
            if not line[:6]==['filepath','title','artist','album','tracknum','compilation']:
                log.error('CSV headers are incorrect at line %d.',
                          csvfile.line_num)
                return False
            headers = [unicode(x) for x in line]
            continue
            
        # save title, artist, album, tracknum, compilation in tags[<file-name>]
        
        filepath=line[0]
        tags[filepath] = dict() # this deletes previous lines if filepath is repeated ...
        col = 1 # col 0 is 'filepath' so skip it
        while col < len(headers):
            if col >= len(line):
                break 
            value = line[col].strip()
            
            if headers[col] == u'tracknum': # prepare for smallint in the DB
                try: tags[filepath][u'tracknum'] = int(value)
                except: tags[filepath][u'tracknum'] = 0
            elif headers[col] == u'compilation': # prepare for bool in the DB
                if value.lower()=='true' or value=='1':
                    value = True
                else:
                    value = False
                tags[filepath][u'compilation'] = value
            elif os.path.isfile(value):
                if not is_binary(value):
                    try:
                        txt=open(value)
                        tags[filepath][headers[col]] = unicode(txt.read())
                        txt.close()
                    except:
                        log.error('Error opening %s file %s at line %d',
                                  headers[col], value, csvfile.line_num)
                        tags[filepath][headers[col]] = unicode(value)
                else:
                    log.debug('%s file %s at line %d appears to be binary, '
                              'not importing', headers[col], value,
                              csvfile.line_num)
                    tags[filepath][headers[col]] = unicode(value)
            else:
                try:
                    tags[filepath][headers[col]]  = u'%s' % value
                except UnicodeDecodeError:
                    tags[filepath][headers[col]]  = value.decode("utf-8")
            
            col+=1

    return tags
Ejemplo n.º 4
0
def _read_csv_tags(cwd, csv=None):
    '''Reads a csv file containing track metadata and annotations (v 2.0)
    
    # may use py comments in metadata .csv file. Must include a header:
    filename, title, artist, album, tracknum, compilation, [optional1], [optional2]...
    and then corresponding values per line (see example metadata.csv file in project root)
    
    @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs
    
    @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in  
    @param csv: csv file-name (in <cwd> dir). Defaults to None
    
    @todo: csv values (after header) may include "\embedded" to try getting it from the audio file.
    Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner.
    @todo: include other metadata formats (use tagpy?)'''

    # open csv file
    if csv is None:
        filename = cwd
    else:
        filename = os.path.join(cwd, csv)

    try:
        csvfile = reader(open(filename))
    except IOError:
        log.error("  Couldn't open '%s'", csv)
        raise

    tags = dict()
    headers = False
    for line in csvfile:  # each record (file rows)
        if len(line) < 6: continue  # skip bad lines (blank or too short)
        line[0] = line[0].strip()
        if not line[0] or line[0][0] == '#':
            continue  # skip if filepath empty or comment line

        # read and validate header
        if not headers:  # first valid line is the header
            line = [l.strip() for l in line]
            if not line[:6] == [
                    'filepath', 'title', 'artist', 'album', 'tracknum',
                    'compilation'
            ]:
                log.error('CSV headers are incorrect at line %d.',
                          csvfile.line_num)
                return False
            headers = [unicode(x) for x in line]
            continue

        # save title, artist, album, tracknum, compilation in tags[<file-name>]

        filepath = line[0]
        tags[filepath] = dict(
        )  # this deletes previous lines if filepath is repeated ...
        col = 1  # col 0 is 'filepath' so skip it
        while col < len(headers):
            if col >= len(line):
                break
            value = line[col].strip()

            if headers[col] == u'tracknum':  # prepare for smallint in the DB
                try:
                    tags[filepath][u'tracknum'] = int(value)
                except:
                    tags[filepath][u'tracknum'] = 0
            elif headers[col] == u'compilation':  # prepare for bool in the DB
                if value.lower() == 'true' or value == '1':
                    value = True
                else:
                    value = False
                tags[filepath][u'compilation'] = value
            elif os.path.isfile(value):
                if not is_binary(value):
                    try:
                        txt = open(value)
                        tags[filepath][headers[col]] = unicode(txt.read())
                        txt.close()
                    except:
                        log.error('Error opening %s file %s at line %d',
                                  headers[col], value, csvfile.line_num)
                        tags[filepath][headers[col]] = unicode(value)
                else:
                    log.debug(
                        '%s file %s at line %d appears to be binary, '
                        'not importing', headers[col], value, csvfile.line_num)
                    tags[filepath][headers[col]] = unicode(value)
            else:
                try:
                    tags[filepath][headers[col]] = u'%s' % value
                except UnicodeDecodeError:
                    tags[filepath][headers[col]] = value.decode("utf-8")

            col += 1

    return tags