def processFolder(db,outdir,annotations,folderid,foldername,allfolders,action,\ separate,iszotero,verbose): '''Process files/docs in a folder. <db>: sqlite database. <outdir>: str, output directory path. <annotations>: dict, keys: documentId; values: highlights, notes and meta. See doc in getHighlights(). <folderid>: int, folder id. <foldername>: string, folder name corresponding to <folderid>. <allfolders>: bool, user chooses to process all folders or one folder. <action>: list, possible elements: m, n, e, b. <separate>: bool, whether save one output for each file or all files. <iszotero>: bool, whether exported .bib is reformated to cater to zotero import or not. ''' exportfaillist=[] annofaillist=[] bibfaillist=[] risfaillist=[] ishighlight=False isnote=False if 'm' in action or 'p' in action: ishighlight=True if 'n' in action or 'p' in action: isnote=True #------------Get raw annotation data------------ if ishighlight: annotations = getHighlights(db,annotations,folderid,foldername) if isnote: annotations = getNotes(db, annotations, folderid,foldername) annotations = getDocNotes(db, annotations, folderid,foldername) if len(annotations)==0: printHeader('No annotations found in folder: %s' %foldername,2) if 'b' not in action and 'p' not in action: return exportfaillist,annofaillist,bibfaillist,risfaillist else: #---------------Reformat annotations--------------- annotations=reformatAnno(annotations) #------Get other docs without annotations------ otherdocs=getOtherDocs(db,folderid,foldername,annotations.keys()) #--------Make subdir using folder name-------- outdir_folder=os.path.join(outdir,foldername) if not os.path.isdir(outdir_folder): os.makedirs(outdir_folder) #-------------------Export PDFs------------------- if 'p' in action: if len(annotations)>0: if verbose: printHeader('Exporting annotated PDFs ...',2) flist=exportpdf.exportAnnoPdf(annotations,\ outdir_folder,verbose) exportfaillist.extend(flist) #--------Copy other PDFs to target location-------- if len(otherdocs)>0: if verbose: printHeader('Exporting un-annotated PDFs ...',2) flist=exportpdf.copyPdf(otherdocs,outdir_folder,verbose) exportfaillist.extend(flist) #----------Extract annotations from PDFs---------- if len(annotations)>0: if verbose: printHeader('Extracting annotations from PDFs ...',2) annotations,flist=extractAnnos(annotations,action,verbose) annofaillist.extend(flist) #------------Export annotations to txt------------ if ('m' in action or 'n' in action) and len(annotations)>0: if verbose: printHeader('Exporting annotations to text file...',2) flist=exportannotation.exportAnno(annotations,outdir_folder,action,\ separate,verbose) annofaillist.extend(flist) #--------Export annotations grouped by tags-------- tagsdict=extracttags.groupByTags(annotations) extracttags.exportAnno(tagsdict,outdir_folder,action,verbose) #----------Export meta and anno to bib file---------- if 'b' in action: if verbose: printHeader('Exporting meta-data and annotations to .bib file...',2) bibfolder=outdir if allfolders else outdir_folder isfile=True if 'p' in action else False #-----------Export docs with annotations----------- if len(annotations)>0: # <outdir> is the base folder to save outputs, specified by user # <bibfolder> is the folder to save .bib file, which is <outdir> if <allfolders> is True, # or <outdir>/<folder_tree> otherwise. flist=export2bib.exportAnno2Bib(annotations,outdir,\ bibfolder,allfolders,isfile,iszotero,verbose) bibfaillist.extend(flist) #------Export other docs without annotations------ if len(otherdocs)>0: flist=export2bib.exportDoc2Bib(otherdocs,outdir,\ bibfolder,allfolders,isfile,iszotero,verbose) bibfaillist.extend(flist) #----------Export meta and anno to ris file---------- if 'r' in action: if verbose: printHeader('Exporting meta-data and annotations to .ris file...',2) risfolder=outdir if allfolders else outdir_folder isfile=True if 'p' in action else False #-----------Export docs with annotations----------- if len(annotations)>0: # <outdir> is the base folder to save outputs, specified by user # <bibfolder> is the folder to save .bib file, which is <outdir> if <allfolders> is True, # or <outdir>/<folder_tree> otherwise. flist=export2ris.exportAnno2Ris(annotations,outdir,\ risfolder,allfolders,isfile,iszotero,verbose) risfaillist.extend(flist) #------Export other docs without annotations------ if len(otherdocs)>0: flist=export2ris.exportDoc2Ris(otherdocs,outdir,\ risfolder,allfolders,isfile,iszotero,verbose) risfaillist.extend(flist) return exportfaillist,annofaillist,bibfaillist,risfaillist
def main(dbfile,outdir,album,verbose): try: db = sqlite3.connect(dbfile) if verbose: #printHeader('Connected to database:') printHeader(dgbk('打开数据文件:')) printInd(dbfile,2) except: #printHeader('Failed to connect to database:') printHeader(dgbk('无法打开数据文件')) printInd(dbfile) return 1 #--------------------Fetch data-------------------- df=getData(db) indir=os.path.split(os.path.abspath(dbfile))[0] #----------------Get album list---------------- albumlist=getAlbumList(df,album) if len(albumlist)==0: return 1 #----------Create output dir if not exist---------- if not os.path.isdir(outdir): try: os.makedirs(outdir) except: printHeader('Failed to create output directory: %s' %outdir) return 1 #---------------Loop through albums--------------- faillist=[] metafaillist=[] for ii,albumii in enumerate(albumlist): idii,albumnameii=albumii if verbose: #printNumHeader('Processing album: "%s"' %albumnameii,\ printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\ ii+1,len(albumlist),1) failistii,metafaillistii=processAlbum(df,indir,outdir,idii,verbose) faillist.extend(failistii) metafaillist.extend(metafaillistii) #-----------------Close connection----------------- if verbose: #printHeader('Drop connection to database:') printHeader(dgbk('关闭数据文件:')) db.close() #------------------Print summary------------------ faillist=list(set(faillist)) metafaillist=list(set(metafaillist)) #printHeader('Summary',1) printHeader(dgbk('总结'),1) if len(faillist)>0: #printHeader('Failed to export:',2) printHeader(dgbk('拷贝失败:'),2) for failii in faillist: printInd(failii,2) if len(metafaillist)>0: #printHeader('Failed to write meta data in:',2) printHeader(dgbk('元数据写入失败:'),2) for failii in metafaillist: printInd(failii,2) if len(faillist)==0 and len(metafaillist)==0: #printHeader('All done.',2) printHeader(dgbk('全部完成'),2) return 0
def main(dbfin,outdir,action,folder,separate,iszotero,verbose=True): try: db = sqlite3.connect(dbfin) if verbose: printHeader('Connected to database:') printInd(dbfin,2) except: printHeader('Failed to connect to database:') printInd(dbfin) return 1 #----------------Get folder list---------------- folderlist=getFolderList(db,folder) allfolders=True if folder is None else False #---------------Get canonical doc ids-------------- if folder is None: canonical_doc_ids=getCanonicals(db) if len(folderlist)==0 and len(canonical_doc_ids)==0: printHeader('It looks like no docs are found in the library. Quit.') return 1 #---------------Process-------------------------- exportfaillist=[] annofaillist=[] bibfaillist=[] risfaillist=[] #---------------Loop through folders--------------- if len(folderlist)>0: for ii,folderii in enumerate(folderlist): fidii,fnameii=folderii if verbose: printNumHeader('Processing folder: "%s"' %fnameii,\ ii+1,len(folderlist),1) annotations={} exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\ processFolder(db,outdir,annotations,\ fidii,fnameii,allfolders,action,separate,iszotero,verbose) exportfaillist.extend(exportfaillistii) annofaillist.extend(annofaillistii) bibfaillist.extend(bibfaillistii) risfaillist.extend(risfaillistii) #---------------Process canonical docs ------------ if folder is None and len(canonical_doc_ids)>0: if verbose: printHeader('Processing docs under "My Library"') annotations={} exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\ processCanonicals(db,outdir,annotations,\ canonical_doc_ids,allfolders,action,separate,iszotero,verbose) exportfaillist.extend(exportfaillistii) annofaillist.extend(annofaillistii) bibfaillist.extend(bibfaillistii) risfaillist.extend(risfaillistii) printHeader('NOTE that docs not belonging to any folder is saved to directory : "Canonical-My Library"') #-----------------Close connection----------------- if verbose: printHeader('Drop connection to database:') db.close() #------------------Print summary------------------ exportfaillist=list(set(exportfaillist)) annofaillist=list(set(annofaillist)) bibfaillist=list(set(bibfaillist)) risfaillist=list(set(risfaillist)) printHeader('Summary',1) if len(exportfaillist)>0: printHeader('Failed to export PDFs:',2) for failii in exportfaillist: printInd(failii,2) if len(annofaillist)>0: printHeader('Failed to extract and export highlights/notes:',2) for failii in annofaillist: printInd(failii,2) if len(bibfaillist)>0: printHeader('Failed to export to .bib files:',2) for failii in bibfaillist: printInd(failii,2) if len(risfaillist)>0: printHeader('Failed to export to .ris files:',2) for failii in risfaillist: printInd(failii,2) if len(exportfaillist)==0 and len(annofaillist)==0 and len(bibfaillist)==0 and\ len(risfaillist)==0: if verbose: printHeader('All done.',2) #-----------------Remove tmp file----------------- if os.path.exists('tmp.txt'): os.remove('tmp.txt') return 0
def main(dbfile, outdir, album, verbose): try: db = sqlite3.connect(dbfile) if verbose: #printHeader('Connected to database:') printHeader(dgbk('打开数据文件:')) printInd(dbfile, 2) except: #printHeader('Failed to connect to database:') printHeader(dgbk('无法打开数据文件')) printInd(dbfile) return 1 #--------------------Fetch data-------------------- df = getData(db) indir = os.path.split(os.path.abspath(dbfile))[0] #----------------Get album list---------------- albumlist = getAlbumList(df, album) if len(albumlist) == 0: return 1 #----------Create output dir if not exist---------- if not os.path.isdir(outdir): try: os.makedirs(outdir) except: printHeader('Failed to create output directory: %s' % outdir) return 1 #---------------Loop through albums--------------- faillist = [] metafaillist = [] for ii, albumii in enumerate(albumlist): idii, albumnameii = albumii if verbose: #printNumHeader('Processing album: "%s"' %albumnameii,\ printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\ ii+1,len(albumlist),1) failistii, metafaillistii = processAlbum(df, indir, outdir, idii, verbose) faillist.extend(failistii) metafaillist.extend(metafaillistii) #-----------------Close connection----------------- if verbose: #printHeader('Drop connection to database:') printHeader(dgbk('关闭数据文件:')) db.close() #------------------Print summary------------------ faillist = list(set(faillist)) metafaillist = list(set(metafaillist)) #printHeader('Summary',1) printHeader(dgbk('总结'), 1) if len(faillist) > 0: #printHeader('Failed to export:',2) printHeader(dgbk('拷贝失败:'), 2) for failii in faillist: printInd(failii, 2) if len(metafaillist) > 0: #printHeader('Failed to write meta data in:',2) printHeader(dgbk('元数据写入失败:'), 2) for failii in metafaillist: printInd(failii, 2) if len(faillist) == 0 and len(metafaillist) == 0: #printHeader('All done.',2) printHeader(dgbk('全部完成'), 2) return 0