def extractAnnos(annotations,action,verbose): faillist=[] annotations2={} #keys: docid, values: extracted annotations #-----------Loop through documents--------------- num=len(annotations) docids=annotations.keys() for ii,idii in enumerate(docids): annoii=annotations[idii] fii=annoii.path fnameii=annoii.filename if verbose: printNumHeader('Processing file:',ii+1,num,3) printInd(fnameii,4) if 'm' in action: from lib import extracthl2 try: #------ Check if pdftotext is available-------- if extracthl2.checkPdftotext(): if verbose: printInd('Retrieving highlights using pdftotext ...',4,prefix='# <Menotexport>:') hltexts=extracthl2.extractHighlights2(fii,annoii,verbose) else: if verbose: printInd('Retrieving highlights using pdfminer ...',4,prefix='# <Menotexport>:') hltexts=extracthl2.extractHighlights(fii,annoii,verbose) except: faillist.append(fnameii) hltexts=[] else: hltexts=[] if 'n' in action: if verbose: printInd('Retrieving notes...',4,prefix='# <Menotexport>:') try: nttexts=extractnt.extractNotes(fii,annoii,verbose) except: faillist.append(fnameii) nttexts=[] else: nttexts=[] annoii.highlights=hltexts annoii.notes=nttexts annotations2[idii]=annoii return annotations2,faillist
def main(dbfin,outdir,action,folder,separate,iszotero,verbose=True): try: db = sqlite3.connect(dbfin) if verbose: printHeader('Connected to database:') printInd(dbfin,2) except: printHeader('Failed to connect to database:') printInd(dbfin) return 1 #----------------Get folder list---------------- folderlist=getFolderList(db,folder) allfolders=True if folder is None else False #---------------Get canonical doc ids-------------- if folder is None: canonical_doc_ids=getCanonicals(db) if len(folderlist)==0 and len(canonical_doc_ids)==0: printHeader('It looks like no docs are found in the library. Quit.') return 1 #---------------Process-------------------------- exportfaillist=[] annofaillist=[] bibfaillist=[] risfaillist=[] #---------------Loop through folders--------------- if len(folderlist)>0: for ii,folderii in enumerate(folderlist): fidii,fnameii=folderii if verbose: printNumHeader('Processing folder: "%s"' %fnameii,\ ii+1,len(folderlist),1) annotations={} exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\ processFolder(db,outdir,annotations,\ fidii,fnameii,allfolders,action,separate,iszotero,verbose) exportfaillist.extend(exportfaillistii) annofaillist.extend(annofaillistii) bibfaillist.extend(bibfaillistii) risfaillist.extend(risfaillistii) #---------------Process canonical docs ------------ if folder is None and len(canonical_doc_ids)>0: if verbose: printHeader('Processing docs under "My Library"') annotations={} exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\ processCanonicals(db,outdir,annotations,\ canonical_doc_ids,allfolders,action,separate,iszotero,verbose) exportfaillist.extend(exportfaillistii) annofaillist.extend(annofaillistii) bibfaillist.extend(bibfaillistii) risfaillist.extend(risfaillistii) printHeader('NOTE that docs not belonging to any folder is saved to directory : "Canonical-My Library"') #-----------------Close connection----------------- if verbose: printHeader('Drop connection to database:') db.close() #------------------Print summary------------------ exportfaillist=list(set(exportfaillist)) annofaillist=list(set(annofaillist)) bibfaillist=list(set(bibfaillist)) risfaillist=list(set(risfaillist)) printHeader('Summary',1) if len(exportfaillist)>0: printHeader('Failed to export PDFs:',2) for failii in exportfaillist: printInd(failii,2) if len(annofaillist)>0: printHeader('Failed to extract and export highlights/notes:',2) for failii in annofaillist: printInd(failii,2) if len(bibfaillist)>0: printHeader('Failed to export to .bib files:',2) for failii in bibfaillist: printInd(failii,2) if len(risfaillist)>0: printHeader('Failed to export to .ris files:',2) for failii in risfaillist: printInd(failii,2) if len(exportfaillist)==0 and len(annofaillist)==0 and len(bibfaillist)==0 and\ len(risfaillist)==0: if verbose: printHeader('All done.',2) #-----------------Remove tmp file----------------- if os.path.exists('tmp.txt'): os.remove('tmp.txt') return 0
def main(dbfile,outdir,album,verbose): try: db = sqlite3.connect(dbfile) if verbose: #printHeader('Connected to database:') printHeader(dgbk('打开数据文件:')) printInd(dbfile,2) except: #printHeader('Failed to connect to database:') printHeader(dgbk('无法打开数据文件')) printInd(dbfile) return 1 #--------------------Fetch data-------------------- df=getData(db) indir=os.path.split(os.path.abspath(dbfile))[0] #----------------Get album list---------------- albumlist=getAlbumList(df,album) if len(albumlist)==0: return 1 #----------Create output dir if not exist---------- if not os.path.isdir(outdir): try: os.makedirs(outdir) except: printHeader('Failed to create output directory: %s' %outdir) return 1 #---------------Loop through albums--------------- faillist=[] metafaillist=[] for ii,albumii in enumerate(albumlist): idii,albumnameii=albumii if verbose: #printNumHeader('Processing album: "%s"' %albumnameii,\ printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\ ii+1,len(albumlist),1) failistii,metafaillistii=processAlbum(df,indir,outdir,idii,verbose) faillist.extend(failistii) metafaillist.extend(metafaillistii) #-----------------Close connection----------------- if verbose: #printHeader('Drop connection to database:') printHeader(dgbk('关闭数据文件:')) db.close() #------------------Print summary------------------ faillist=list(set(faillist)) metafaillist=list(set(metafaillist)) #printHeader('Summary',1) printHeader(dgbk('总结'),1) if len(faillist)>0: #printHeader('Failed to export:',2) printHeader(dgbk('拷贝失败:'),2) for failii in faillist: printInd(failii,2) if len(metafaillist)>0: #printHeader('Failed to write meta data in:',2) printHeader(dgbk('元数据写入失败:'),2) for failii in metafaillist: printInd(failii,2) if len(faillist)==0 and len(metafaillist)==0: #printHeader('All done.',2) printHeader(dgbk('全部完成'),2) return 0
def processAlbum(df,indir,outdir,albumid,verbose=True): '''Process files in an album ''' seldf=df[df.albumId==albumid] albumname=seldf.iloc[0].albumName ids=seldf.rowid faillist=[] metafaillist=[] subfolder=os.path.join(outdir,albumname) subfolder=convertPath(subfolder) if not os.path.isdir(subfolder): try: os.makedirs(subfolder) except: if verbose: printInd('Failed to create subfolder %s' %albumname,2) printInd('Skip folder %s' %albumname,2) faillist.extend(fetchField(df,'title')) return faillist,metafaillist #------------Download album cover image------------ albumImage=seldf.iloc[0].albumImage try: coverimg=os.path.join(subfolder,'cover.jpg') imgfile=urlretrieve(albumImage,coverimg)[0] got_cover=True except: got_cover=False #----------------Loop through files---------------- for ii in range(len(ids)): title=seldf.iloc[ii].title artist=seldf.iloc[ii].artist downloaded=seldf.iloc[ii].downloaded totalBytes=seldf.iloc[ii].totalBytes downloadurl1=seldf.iloc[ii].downloadUrl downloadurl2=seldf.iloc[ii].downloadAacUrl filepath=seldf.iloc[ii].filepath tmpfile=False gotfile=False newname="%s-%s.mp4" %(title,artist) newname=REPATTERN.sub(' ',newname) newname=os.path.join(tools.deu(subfolder),newname) newname=convertPath(newname) if verbose: #printInd('Getting file for: %s' %title, 2) printInd(dgbk('获取文件: ')+title, 2) #-----If imcomplete download, try downloading now----- if downloaded<totalBytes: tmpfile=True if verbose: printInd('Downloading imcomplete audio:',2) printInd(title,2) try: tmpfile=urlretrieve(downloadurl1,newname) gotfile=True except: tmpfile=urlretrieve(downloadurl2,newname) gotfile=True finally: if verbose: printInd('Failed to download %s' %title,2) faillist.append(title) gotfile=False else: gotfile=True if not gotfile: continue #----------------------Export---------------------- if not tmpfile: filename=os.path.join(indir,'Download') filename=os.path.join(filename,filepath) if os.path.exists(filename): try: shutil.copy2(filename,newname) except: if verbose: printInd('Failed to copy file %s' %title,2) faillist.append(title) continue #------------Write metadata (optional)------------ if HAS_MUTAGEN: if verbose: #printInd('Writing metadata for: %s' %title, 2) printInd(dgbk('为音频写入元数据: ')+title, 2) #--------------------mp3 format-------------------- meta={'title':title, 'artist': artist, 'album': albumname,\ 'comments': 'Exported from Ximalaya by XimaExport'} #--------------------mp4 format-------------------- meta={'\xa9nam':title, '\xa9ART': artist, '\xa9alb': albumname,\ 'comments': 'Exported from Ximalaya by XimaExport'} if got_cover: meta['cover']=imgfile try: writeMeta(newname,meta) except: metafaillist.append(title) return faillist,metafaillist
def main(dbfile, outdir, album, verbose): try: db = sqlite3.connect(dbfile) if verbose: #printHeader('Connected to database:') printHeader(dgbk('打开数据文件:')) printInd(dbfile, 2) except: #printHeader('Failed to connect to database:') printHeader(dgbk('无法打开数据文件')) printInd(dbfile) return 1 #--------------------Fetch data-------------------- df = getData(db) indir = os.path.split(os.path.abspath(dbfile))[0] #----------------Get album list---------------- albumlist = getAlbumList(df, album) if len(albumlist) == 0: return 1 #----------Create output dir if not exist---------- if not os.path.isdir(outdir): try: os.makedirs(outdir) except: printHeader('Failed to create output directory: %s' % outdir) return 1 #---------------Loop through albums--------------- faillist = [] metafaillist = [] for ii, albumii in enumerate(albumlist): idii, albumnameii = albumii if verbose: #printNumHeader('Processing album: "%s"' %albumnameii,\ printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\ ii+1,len(albumlist),1) failistii, metafaillistii = processAlbum(df, indir, outdir, idii, verbose) faillist.extend(failistii) metafaillist.extend(metafaillistii) #-----------------Close connection----------------- if verbose: #printHeader('Drop connection to database:') printHeader(dgbk('关闭数据文件:')) db.close() #------------------Print summary------------------ faillist = list(set(faillist)) metafaillist = list(set(metafaillist)) #printHeader('Summary',1) printHeader(dgbk('总结'), 1) if len(faillist) > 0: #printHeader('Failed to export:',2) printHeader(dgbk('拷贝失败:'), 2) for failii in faillist: printInd(failii, 2) if len(metafaillist) > 0: #printHeader('Failed to write meta data in:',2) printHeader(dgbk('元数据写入失败:'), 2) for failii in metafaillist: printInd(failii, 2) if len(faillist) == 0 and len(metafaillist) == 0: #printHeader('All done.',2) printHeader(dgbk('全部完成'), 2) return 0
def processAlbum(df, indir, outdir, albumid, verbose=True): '''Process files in an album ''' seldf = df[df.albumId == albumid] albumname = seldf.iloc[0].albumName ids = seldf.rowid faillist = [] metafaillist = [] subfolder = os.path.join(outdir, albumname) subfolder = convertPath(subfolder) if not os.path.isdir(subfolder): try: os.makedirs(subfolder) except: if verbose: printInd('Failed to create subfolder %s' % albumname, 2) printInd('Skip folder %s' % albumname, 2) faillist.extend(fetchField(df, 'title')) return faillist, metafaillist #------------Download album cover image------------ albumImage = seldf.iloc[0].albumImage try: coverimg = os.path.join(subfolder, 'cover.jpg') imgfile = urlretrieve(albumImage, coverimg)[0] got_cover = True except: got_cover = False #----------------Loop through files---------------- for ii in range(len(ids)): title = seldf.iloc[ii].title artist = seldf.iloc[ii].artist downloaded = seldf.iloc[ii].downloaded totalBytes = seldf.iloc[ii].totalBytes downloadurl1 = seldf.iloc[ii].downloadUrl downloadurl2 = seldf.iloc[ii].downloadAacUrl filepath = seldf.iloc[ii].filepath tmpfile = False gotfile = False newname = "%s-%s.mp4" % (title, artist) newname = REPATTERN.sub(' ', newname) newname = os.path.join(tools.deu(subfolder), newname) newname = convertPath(newname) if verbose: #printInd('Getting file for: %s' %title, 2) printInd(dgbk('获取文件: ') + title, 2) #-----If imcomplete download, try downloading now----- if downloaded < totalBytes: tmpfile = True if verbose: printInd('Downloading imcomplete audio:', 2) printInd(title, 2) try: tmpfile = urlretrieve(downloadurl1, newname) gotfile = True except: tmpfile = urlretrieve(downloadurl2, newname) gotfile = True finally: if verbose: printInd('Failed to download %s' % title, 2) faillist.append(title) gotfile = False else: gotfile = True if not gotfile: continue #----------------------Export---------------------- if not tmpfile: filename = os.path.join(indir, 'Download') filename = os.path.join(filename, filepath) if os.path.exists(filename): try: shutil.copy2(filename, newname) except: if verbose: printInd('Failed to copy file %s' % title, 2) faillist.append(title) continue #------------Write metadata (optional)------------ if HAS_MUTAGEN: if verbose: #printInd('Writing metadata for: %s' %title, 2) printInd(dgbk('为音频写入元数据: ') + title, 2) #--------------------mp3 format-------------------- meta={'title':title, 'artist': artist, 'album': albumname,\ 'comments': 'Exported from Ximalaya by XimaExport'} #--------------------mp4 format-------------------- meta={'\xa9nam':title, '\xa9ART': artist, '\xa9alb': albumname,\ 'comments': 'Exported from Ximalaya by XimaExport'} if got_cover: meta['cover'] = imgfile try: writeMeta(newname, meta) except: metafaillist.append(title) return faillist, metafaillist