Ejemplo n.º 1
0
def extractAnnos(annotations,action,verbose):

    faillist=[]
    annotations2={}  #keys: docid, values: extracted annotations

    #-----------Loop through documents---------------
    num=len(annotations)
    docids=annotations.keys()
    for ii,idii in enumerate(docids):
        annoii=annotations[idii]
        fii=annoii.path
        fnameii=annoii.filename

        if verbose:
            printNumHeader('Processing file:',ii+1,num,3)
            printInd(fnameii,4)

        if 'm' in action:
            from lib import extracthl2

            try:
	        #------ Check if pdftotext is available--------
	        if extracthl2.checkPdftotext():
		    if verbose:
			printInd('Retrieving highlights using pdftotext ...',4,prefix='# <Menotexport>:')
                    hltexts=extracthl2.extractHighlights2(fii,annoii,verbose)
	        else:
		    if verbose:
			printInd('Retrieving highlights using pdfminer ...',4,prefix='# <Menotexport>:')
                    hltexts=extracthl2.extractHighlights(fii,annoii,verbose)
            except:
                faillist.append(fnameii)
                hltexts=[]
        else:
            hltexts=[]

        if 'n' in action:
            if verbose:
                printInd('Retrieving notes...',4,prefix='# <Menotexport>:')
            try:
                nttexts=extractnt.extractNotes(fii,annoii,verbose)
            except:
                faillist.append(fnameii)
                nttexts=[]
        else:
            nttexts=[]

        annoii.highlights=hltexts
        annoii.notes=nttexts
        annotations2[idii]=annoii

    return annotations2,faillist
Ejemplo n.º 2
0
def main(dbfin,outdir,action,folder,separate,iszotero,verbose=True):
    
    try:
        db = sqlite3.connect(dbfin)
        if verbose:
            printHeader('Connected to database:')
            printInd(dbfin,2)
    except:
        printHeader('Failed to connect to database:')
        printInd(dbfin)
        return 1

    #----------------Get folder list----------------
    folderlist=getFolderList(db,folder)
    allfolders=True if folder is None else False

    #---------------Get canonical doc ids--------------
    if folder is None:
        canonical_doc_ids=getCanonicals(db)

    if len(folderlist)==0 and len(canonical_doc_ids)==0:
        printHeader('It looks like no docs are found in the library. Quit.')
        return 1

    #---------------Process--------------------------
    exportfaillist=[]
    annofaillist=[]
    bibfaillist=[]
    risfaillist=[]

    #---------------Loop through folders---------------
    if len(folderlist)>0:
        for ii,folderii in enumerate(folderlist):
            fidii,fnameii=folderii
            if verbose:
                printNumHeader('Processing folder: "%s"' %fnameii,\
                        ii+1,len(folderlist),1)
            annotations={}
            exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\
                    processFolder(db,outdir,annotations,\
                fidii,fnameii,allfolders,action,separate,iszotero,verbose)

            exportfaillist.extend(exportfaillistii)
            annofaillist.extend(annofaillistii)
            bibfaillist.extend(bibfaillistii)
            risfaillist.extend(risfaillistii)

    #---------------Process canonical docs ------------
    if folder is None and len(canonical_doc_ids)>0:
        if verbose:
            printHeader('Processing docs under "My Library"')
        annotations={}
        exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\
                processCanonicals(db,outdir,annotations,\
                canonical_doc_ids,allfolders,action,separate,iszotero,verbose)

        exportfaillist.extend(exportfaillistii)
        annofaillist.extend(annofaillistii)
        bibfaillist.extend(bibfaillistii)
        risfaillist.extend(risfaillistii)

        printHeader('NOTE that docs not belonging to any folder is saved to directory : "Canonical-My Library"')

    #-----------------Close connection-----------------
    if verbose:
        printHeader('Drop connection to database:')
    db.close()

    #------------------Print summary------------------
    exportfaillist=list(set(exportfaillist))
    annofaillist=list(set(annofaillist))
    bibfaillist=list(set(bibfaillist))
    risfaillist=list(set(risfaillist))

    printHeader('Summary',1)
    if len(exportfaillist)>0:
        printHeader('Failed to export PDFs:',2)
        for failii in exportfaillist:
            printInd(failii,2)

    if len(annofaillist)>0:
        printHeader('Failed to extract and export highlights/notes:',2)
        for failii in annofaillist:
            printInd(failii,2)

    if len(bibfaillist)>0:
        printHeader('Failed to export to .bib files:',2)
        for failii in bibfaillist:
            printInd(failii,2)

    if len(risfaillist)>0:
        printHeader('Failed to export to .ris files:',2)
        for failii in risfaillist:
            printInd(failii,2)

    if len(exportfaillist)==0 and len(annofaillist)==0 and len(bibfaillist)==0 and\
            len(risfaillist)==0:
        if verbose:
            printHeader('All done.',2)

    #-----------------Remove tmp file-----------------
    if os.path.exists('tmp.txt'):
	    os.remove('tmp.txt')


    return 0
Ejemplo n.º 3
0
def main(dbfile,outdir,album,verbose):

    try:
        db = sqlite3.connect(dbfile)
        if verbose:
            #printHeader('Connected to database:')
            printHeader(dgbk('打开数据文件:'))
            printInd(dbfile,2)
    except:
        #printHeader('Failed to connect to database:')
        printHeader(dgbk('无法打开数据文件'))
        printInd(dbfile)
        return 1

    #--------------------Fetch data--------------------
    df=getData(db)
    indir=os.path.split(os.path.abspath(dbfile))[0]

    #----------------Get album list----------------
    albumlist=getAlbumList(df,album)
    if len(albumlist)==0:
        return 1

    #----------Create output dir if not exist----------
    if not os.path.isdir(outdir):
        try:
            os.makedirs(outdir)
        except:
            printHeader('Failed to create output directory: %s' %outdir)
            return 1

    #---------------Loop through albums---------------
    faillist=[]
    metafaillist=[]

    for ii,albumii in enumerate(albumlist):
        idii,albumnameii=albumii
        if verbose:
            #printNumHeader('Processing album: "%s"' %albumnameii,\
	    printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\
                ii+1,len(albumlist),1)
        failistii,metafaillistii=processAlbum(df,indir,outdir,idii,verbose)
        faillist.extend(failistii)
        metafaillist.extend(metafaillistii)

    #-----------------Close connection-----------------
    if verbose:
        #printHeader('Drop connection to database:')
	printHeader(dgbk('关闭数据文件:'))
    db.close()

    #------------------Print summary------------------
    faillist=list(set(faillist))
    metafaillist=list(set(metafaillist))

    #printHeader('Summary',1)
    printHeader(dgbk('总结'),1)
    if len(faillist)>0:
        #printHeader('Failed to export:',2)
        printHeader(dgbk('拷贝失败:'),2)
        for failii in faillist:
            printInd(failii,2)

    if len(metafaillist)>0:
        #printHeader('Failed to write meta data in:',2)
        printHeader(dgbk('元数据写入失败:'),2)
        for failii in metafaillist:
            printInd(failii,2)

    if len(faillist)==0 and len(metafaillist)==0:
        #printHeader('All done.',2)
        printHeader(dgbk('全部完成'),2)

    return 0
Ejemplo n.º 4
0
def processAlbum(df,indir,outdir,albumid,verbose=True):
    '''Process files in an album

    '''
    seldf=df[df.albumId==albumid]
    albumname=seldf.iloc[0].albumName
    ids=seldf.rowid
    faillist=[]
    metafaillist=[]

    subfolder=os.path.join(outdir,albumname)
    subfolder=convertPath(subfolder)
    if not os.path.isdir(subfolder):
        try:
            os.makedirs(subfolder)
        except:
            if verbose:
                printInd('Failed to create subfolder %s' %albumname,2)
                printInd('Skip folder %s' %albumname,2)
            faillist.extend(fetchField(df,'title'))
            return faillist,metafaillist

    #------------Download album cover image------------
    albumImage=seldf.iloc[0].albumImage
    try:
        coverimg=os.path.join(subfolder,'cover.jpg')
        imgfile=urlretrieve(albumImage,coverimg)[0]
        got_cover=True
    except:
        got_cover=False

    #----------------Loop through files----------------
    for ii in range(len(ids)):
        title=seldf.iloc[ii].title
        artist=seldf.iloc[ii].artist
        downloaded=seldf.iloc[ii].downloaded
        totalBytes=seldf.iloc[ii].totalBytes
        downloadurl1=seldf.iloc[ii].downloadUrl
        downloadurl2=seldf.iloc[ii].downloadAacUrl
        filepath=seldf.iloc[ii].filepath

        tmpfile=False
        gotfile=False

        newname="%s-%s.mp4" %(title,artist)
	newname=REPATTERN.sub(' ',newname)
        newname=os.path.join(tools.deu(subfolder),newname)
        newname=convertPath(newname)

        if verbose:
            #printInd('Getting file for: %s' %title, 2)
            printInd(dgbk('获取文件: ')+title, 2)

        #-----If imcomplete download, try downloading now-----
        if downloaded<totalBytes:
            tmpfile=True
            if verbose:
                printInd('Downloading imcomplete audio:',2)
                printInd(title,2)
            try:
                tmpfile=urlretrieve(downloadurl1,newname)
                gotfile=True
            except:
                tmpfile=urlretrieve(downloadurl2,newname)
                gotfile=True
            finally:
                if verbose:
                    printInd('Failed to download %s' %title,2)
                faillist.append(title)
                gotfile=False
        else:
            gotfile=True

        if not gotfile:
            continue

        #----------------------Export----------------------
        if not tmpfile:

            filename=os.path.join(indir,'Download')
            filename=os.path.join(filename,filepath)

            if os.path.exists(filename):
                try:
                    shutil.copy2(filename,newname)
                except:
                    if verbose:
                        printInd('Failed to copy file %s' %title,2)
                        faillist.append(title)
                    continue

        #------------Write metadata (optional)------------
        if HAS_MUTAGEN:

            if verbose:
                #printInd('Writing metadata for: %s' %title, 2)
                printInd(dgbk('为音频写入元数据: ')+title, 2)

            #--------------------mp3 format--------------------
            meta={'title':title, 'artist': artist, 'album': albumname,\
                  'comments': 'Exported from Ximalaya by XimaExport'}
            #--------------------mp4 format--------------------
            meta={'\xa9nam':title, '\xa9ART': artist, '\xa9alb': albumname,\
                  'comments': 'Exported from Ximalaya by XimaExport'}
            if got_cover:
                meta['cover']=imgfile

            try:
                writeMeta(newname,meta)
            except:
                metafaillist.append(title)


    return faillist,metafaillist
Ejemplo n.º 5
0
def main(dbfile, outdir, album, verbose):

    try:
        db = sqlite3.connect(dbfile)
        if verbose:
            #printHeader('Connected to database:')
            printHeader(dgbk('打开数据文件:'))
            printInd(dbfile, 2)
    except:
        #printHeader('Failed to connect to database:')
        printHeader(dgbk('无法打开数据文件'))
        printInd(dbfile)
        return 1

    #--------------------Fetch data--------------------
    df = getData(db)
    indir = os.path.split(os.path.abspath(dbfile))[0]

    #----------------Get album list----------------
    albumlist = getAlbumList(df, album)
    if len(albumlist) == 0:
        return 1

    #----------Create output dir if not exist----------
    if not os.path.isdir(outdir):
        try:
            os.makedirs(outdir)
        except:
            printHeader('Failed to create output directory: %s' % outdir)
            return 1

    #---------------Loop through albums---------------
    faillist = []
    metafaillist = []

    for ii, albumii in enumerate(albumlist):
        idii, albumnameii = albumii
        if verbose:
            #printNumHeader('Processing album: "%s"' %albumnameii,\
            printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\
                       ii+1,len(albumlist),1)
        failistii, metafaillistii = processAlbum(df, indir, outdir, idii,
                                                 verbose)
        faillist.extend(failistii)
        metafaillist.extend(metafaillistii)

    #-----------------Close connection-----------------
    if verbose:
        #printHeader('Drop connection to database:')
        printHeader(dgbk('关闭数据文件:'))
    db.close()

    #------------------Print summary------------------
    faillist = list(set(faillist))
    metafaillist = list(set(metafaillist))

    #printHeader('Summary',1)
    printHeader(dgbk('总结'), 1)
    if len(faillist) > 0:
        #printHeader('Failed to export:',2)
        printHeader(dgbk('拷贝失败:'), 2)
        for failii in faillist:
            printInd(failii, 2)

    if len(metafaillist) > 0:
        #printHeader('Failed to write meta data in:',2)
        printHeader(dgbk('元数据写入失败:'), 2)
        for failii in metafaillist:
            printInd(failii, 2)

    if len(faillist) == 0 and len(metafaillist) == 0:
        #printHeader('All done.',2)
        printHeader(dgbk('全部完成'), 2)

    return 0
Ejemplo n.º 6
0
def processAlbum(df, indir, outdir, albumid, verbose=True):
    '''Process files in an album

    '''
    seldf = df[df.albumId == albumid]
    albumname = seldf.iloc[0].albumName
    ids = seldf.rowid
    faillist = []
    metafaillist = []

    subfolder = os.path.join(outdir, albumname)
    subfolder = convertPath(subfolder)
    if not os.path.isdir(subfolder):
        try:
            os.makedirs(subfolder)
        except:
            if verbose:
                printInd('Failed to create subfolder %s' % albumname, 2)
                printInd('Skip folder %s' % albumname, 2)
            faillist.extend(fetchField(df, 'title'))
            return faillist, metafaillist

    #------------Download album cover image------------
    albumImage = seldf.iloc[0].albumImage
    try:
        coverimg = os.path.join(subfolder, 'cover.jpg')
        imgfile = urlretrieve(albumImage, coverimg)[0]
        got_cover = True
    except:
        got_cover = False

    #----------------Loop through files----------------
    for ii in range(len(ids)):
        title = seldf.iloc[ii].title
        artist = seldf.iloc[ii].artist
        downloaded = seldf.iloc[ii].downloaded
        totalBytes = seldf.iloc[ii].totalBytes
        downloadurl1 = seldf.iloc[ii].downloadUrl
        downloadurl2 = seldf.iloc[ii].downloadAacUrl
        filepath = seldf.iloc[ii].filepath

        tmpfile = False
        gotfile = False

        newname = "%s-%s.mp4" % (title, artist)
        newname = REPATTERN.sub(' ', newname)
        newname = os.path.join(tools.deu(subfolder), newname)
        newname = convertPath(newname)

        if verbose:
            #printInd('Getting file for: %s' %title, 2)
            printInd(dgbk('获取文件: ') + title, 2)

        #-----If imcomplete download, try downloading now-----
        if downloaded < totalBytes:
            tmpfile = True
            if verbose:
                printInd('Downloading imcomplete audio:', 2)
                printInd(title, 2)
            try:
                tmpfile = urlretrieve(downloadurl1, newname)
                gotfile = True
            except:
                tmpfile = urlretrieve(downloadurl2, newname)
                gotfile = True
            finally:
                if verbose:
                    printInd('Failed to download %s' % title, 2)
                faillist.append(title)
                gotfile = False
        else:
            gotfile = True

        if not gotfile:
            continue

        #----------------------Export----------------------
        if not tmpfile:

            filename = os.path.join(indir, 'Download')
            filename = os.path.join(filename, filepath)

            if os.path.exists(filename):
                try:
                    shutil.copy2(filename, newname)
                except:
                    if verbose:
                        printInd('Failed to copy file %s' % title, 2)
                        faillist.append(title)
                    continue

        #------------Write metadata (optional)------------
        if HAS_MUTAGEN:

            if verbose:
                #printInd('Writing metadata for: %s' %title, 2)
                printInd(dgbk('为音频写入元数据: ') + title, 2)

            #--------------------mp3 format--------------------
            meta={'title':title, 'artist': artist, 'album': albumname,\
                  'comments': 'Exported from Ximalaya by XimaExport'}
            #--------------------mp4 format--------------------
            meta={'\xa9nam':title, '\xa9ART': artist, '\xa9alb': albumname,\
                  'comments': 'Exported from Ximalaya by XimaExport'}
            if got_cover:
                meta['cover'] = imgfile

            try:
                writeMeta(newname, meta)
            except:
                metafaillist.append(title)

    return faillist, metafaillist