Esempio n. 1
0
def processFolder(db,outdir,annotations,folderid,foldername,allfolders,action,\
        separate,iszotero,verbose):
    '''Process files/docs in a folder.

    <db>: sqlite database.
    <outdir>: str, output directory path.
    <annotations>: dict, keys: documentId; values: highlights, notes and meta.
                   See doc in getHighlights().
    <folderid>: int, folder id.
    <foldername>: string, folder name corresponding to <folderid>.
    <allfolders>: bool, user chooses to process all folders or one folder.
    <action>: list, possible elements: m, n, e, b.
    <separate>: bool, whether save one output for each file or all files.
    <iszotero>: bool, whether exported .bib is reformated to cater to zotero import or not.
    '''
    
    exportfaillist=[]
    annofaillist=[]
    bibfaillist=[]
    risfaillist=[]

    ishighlight=False
    isnote=False
    if 'm' in action or 'p' in action:
        ishighlight=True
    if 'n' in action or 'p' in action:
        isnote=True

    #------------Get raw annotation data------------
    if ishighlight:
        annotations = getHighlights(db,annotations,folderid,foldername)
    if isnote:
        annotations = getNotes(db, annotations, folderid,foldername)
        annotations = getDocNotes(db, annotations, folderid,foldername)

    if len(annotations)==0:
        printHeader('No annotations found in folder: %s' %foldername,2)
        if 'b' not in action and 'p' not in action:
            return exportfaillist,annofaillist,bibfaillist,risfaillist
    else:
        #---------------Reformat annotations---------------
        annotations=reformatAnno(annotations)

    #------Get other docs without annotations------
    otherdocs=getOtherDocs(db,folderid,foldername,annotations.keys())

    #--------Make subdir using folder name--------
    outdir_folder=os.path.join(outdir,foldername)
    if not os.path.isdir(outdir_folder):
        os.makedirs(outdir_folder)

    #-------------------Export PDFs-------------------
    if 'p' in action:
        if len(annotations)>0:
            if verbose:
                printHeader('Exporting annotated PDFs ...',2)
            flist=exportpdf.exportAnnoPdf(annotations,\
                    outdir_folder,verbose)
            exportfaillist.extend(flist)
    
        #--------Copy other PDFs to target location--------
        if len(otherdocs)>0:
            if verbose:
                printHeader('Exporting un-annotated PDFs ...',2)
            flist=exportpdf.copyPdf(otherdocs,outdir_folder,verbose)
            exportfaillist.extend(flist)

    #----------Extract annotations from PDFs----------
    if len(annotations)>0:
        if verbose:
            printHeader('Extracting annotations from PDFs ...',2)
        annotations,flist=extractAnnos(annotations,action,verbose)
        annofaillist.extend(flist)

    #------------Export annotations to txt------------
    if ('m' in action or 'n' in action) and len(annotations)>0:
        if verbose:
            printHeader('Exporting annotations to text file...',2)
        flist=exportannotation.exportAnno(annotations,outdir_folder,action,\
                separate,verbose)
        annofaillist.extend(flist)

        #--------Export annotations grouped by tags--------
        tagsdict=extracttags.groupByTags(annotations)
        extracttags.exportAnno(tagsdict,outdir_folder,action,verbose)

    #----------Export meta and anno to bib file----------
    if 'b' in action:

        if verbose:
            printHeader('Exporting meta-data and annotations to .bib file...',2)

        bibfolder=outdir if allfolders else outdir_folder
        isfile=True if 'p' in action else False

        #-----------Export docs with annotations-----------
        if len(annotations)>0:
            # <outdir> is the base folder to save outputs, specified by user
            # <bibfolder> is the folder to save .bib file, which is <outdir> if <allfolders> is True,
            # or <outdir>/<folder_tree> otherwise.
            flist=export2bib.exportAnno2Bib(annotations,outdir,\
                bibfolder,allfolders,isfile,iszotero,verbose)
            bibfaillist.extend(flist)

        #------Export other docs without annotations------
        if len(otherdocs)>0:
            flist=export2bib.exportDoc2Bib(otherdocs,outdir,\
                bibfolder,allfolders,isfile,iszotero,verbose)
            bibfaillist.extend(flist)

    #----------Export meta and anno to ris file----------
    if 'r' in action:

        if verbose:
            printHeader('Exporting meta-data and annotations to .ris file...',2)

        risfolder=outdir if allfolders else outdir_folder
        isfile=True if 'p' in action else False

        #-----------Export docs with annotations-----------
        if len(annotations)>0:
            # <outdir> is the base folder to save outputs, specified by user
            # <bibfolder> is the folder to save .bib file, which is <outdir> if <allfolders> is True,
            # or <outdir>/<folder_tree> otherwise.
            flist=export2ris.exportAnno2Ris(annotations,outdir,\
                risfolder,allfolders,isfile,iszotero,verbose)
            risfaillist.extend(flist)

        #------Export other docs without annotations------
        if len(otherdocs)>0:
            flist=export2ris.exportDoc2Ris(otherdocs,outdir,\
                risfolder,allfolders,isfile,iszotero,verbose)
            risfaillist.extend(flist)


    return exportfaillist,annofaillist,bibfaillist,risfaillist
Esempio n. 2
0
def main(dbfile,outdir,album,verbose):

    try:
        db = sqlite3.connect(dbfile)
        if verbose:
            #printHeader('Connected to database:')
            printHeader(dgbk('打开数据文件:'))
            printInd(dbfile,2)
    except:
        #printHeader('Failed to connect to database:')
        printHeader(dgbk('无法打开数据文件'))
        printInd(dbfile)
        return 1

    #--------------------Fetch data--------------------
    df=getData(db)
    indir=os.path.split(os.path.abspath(dbfile))[0]

    #----------------Get album list----------------
    albumlist=getAlbumList(df,album)
    if len(albumlist)==0:
        return 1

    #----------Create output dir if not exist----------
    if not os.path.isdir(outdir):
        try:
            os.makedirs(outdir)
        except:
            printHeader('Failed to create output directory: %s' %outdir)
            return 1

    #---------------Loop through albums---------------
    faillist=[]
    metafaillist=[]

    for ii,albumii in enumerate(albumlist):
        idii,albumnameii=albumii
        if verbose:
            #printNumHeader('Processing album: "%s"' %albumnameii,\
	    printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\
                ii+1,len(albumlist),1)
        failistii,metafaillistii=processAlbum(df,indir,outdir,idii,verbose)
        faillist.extend(failistii)
        metafaillist.extend(metafaillistii)

    #-----------------Close connection-----------------
    if verbose:
        #printHeader('Drop connection to database:')
	printHeader(dgbk('关闭数据文件:'))
    db.close()

    #------------------Print summary------------------
    faillist=list(set(faillist))
    metafaillist=list(set(metafaillist))

    #printHeader('Summary',1)
    printHeader(dgbk('总结'),1)
    if len(faillist)>0:
        #printHeader('Failed to export:',2)
        printHeader(dgbk('拷贝失败:'),2)
        for failii in faillist:
            printInd(failii,2)

    if len(metafaillist)>0:
        #printHeader('Failed to write meta data in:',2)
        printHeader(dgbk('元数据写入失败:'),2)
        for failii in metafaillist:
            printInd(failii,2)

    if len(faillist)==0 and len(metafaillist)==0:
        #printHeader('All done.',2)
        printHeader(dgbk('全部完成'),2)

    return 0
Esempio n. 3
0
def main(dbfin,outdir,action,folder,separate,iszotero,verbose=True):
    
    try:
        db = sqlite3.connect(dbfin)
        if verbose:
            printHeader('Connected to database:')
            printInd(dbfin,2)
    except:
        printHeader('Failed to connect to database:')
        printInd(dbfin)
        return 1

    #----------------Get folder list----------------
    folderlist=getFolderList(db,folder)
    allfolders=True if folder is None else False

    #---------------Get canonical doc ids--------------
    if folder is None:
        canonical_doc_ids=getCanonicals(db)

    if len(folderlist)==0 and len(canonical_doc_ids)==0:
        printHeader('It looks like no docs are found in the library. Quit.')
        return 1

    #---------------Process--------------------------
    exportfaillist=[]
    annofaillist=[]
    bibfaillist=[]
    risfaillist=[]

    #---------------Loop through folders---------------
    if len(folderlist)>0:
        for ii,folderii in enumerate(folderlist):
            fidii,fnameii=folderii
            if verbose:
                printNumHeader('Processing folder: "%s"' %fnameii,\
                        ii+1,len(folderlist),1)
            annotations={}
            exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\
                    processFolder(db,outdir,annotations,\
                fidii,fnameii,allfolders,action,separate,iszotero,verbose)

            exportfaillist.extend(exportfaillistii)
            annofaillist.extend(annofaillistii)
            bibfaillist.extend(bibfaillistii)
            risfaillist.extend(risfaillistii)

    #---------------Process canonical docs ------------
    if folder is None and len(canonical_doc_ids)>0:
        if verbose:
            printHeader('Processing docs under "My Library"')
        annotations={}
        exportfaillistii,annofaillistii,bibfaillistii,risfaillistii=\
                processCanonicals(db,outdir,annotations,\
                canonical_doc_ids,allfolders,action,separate,iszotero,verbose)

        exportfaillist.extend(exportfaillistii)
        annofaillist.extend(annofaillistii)
        bibfaillist.extend(bibfaillistii)
        risfaillist.extend(risfaillistii)

        printHeader('NOTE that docs not belonging to any folder is saved to directory : "Canonical-My Library"')

    #-----------------Close connection-----------------
    if verbose:
        printHeader('Drop connection to database:')
    db.close()

    #------------------Print summary------------------
    exportfaillist=list(set(exportfaillist))
    annofaillist=list(set(annofaillist))
    bibfaillist=list(set(bibfaillist))
    risfaillist=list(set(risfaillist))

    printHeader('Summary',1)
    if len(exportfaillist)>0:
        printHeader('Failed to export PDFs:',2)
        for failii in exportfaillist:
            printInd(failii,2)

    if len(annofaillist)>0:
        printHeader('Failed to extract and export highlights/notes:',2)
        for failii in annofaillist:
            printInd(failii,2)

    if len(bibfaillist)>0:
        printHeader('Failed to export to .bib files:',2)
        for failii in bibfaillist:
            printInd(failii,2)

    if len(risfaillist)>0:
        printHeader('Failed to export to .ris files:',2)
        for failii in risfaillist:
            printInd(failii,2)

    if len(exportfaillist)==0 and len(annofaillist)==0 and len(bibfaillist)==0 and\
            len(risfaillist)==0:
        if verbose:
            printHeader('All done.',2)

    #-----------------Remove tmp file-----------------
    if os.path.exists('tmp.txt'):
	    os.remove('tmp.txt')


    return 0
Esempio n. 4
0
def main(dbfile, outdir, album, verbose):

    try:
        db = sqlite3.connect(dbfile)
        if verbose:
            #printHeader('Connected to database:')
            printHeader(dgbk('打开数据文件:'))
            printInd(dbfile, 2)
    except:
        #printHeader('Failed to connect to database:')
        printHeader(dgbk('无法打开数据文件'))
        printInd(dbfile)
        return 1

    #--------------------Fetch data--------------------
    df = getData(db)
    indir = os.path.split(os.path.abspath(dbfile))[0]

    #----------------Get album list----------------
    albumlist = getAlbumList(df, album)
    if len(albumlist) == 0:
        return 1

    #----------Create output dir if not exist----------
    if not os.path.isdir(outdir):
        try:
            os.makedirs(outdir)
        except:
            printHeader('Failed to create output directory: %s' % outdir)
            return 1

    #---------------Loop through albums---------------
    faillist = []
    metafaillist = []

    for ii, albumii in enumerate(albumlist):
        idii, albumnameii = albumii
        if verbose:
            #printNumHeader('Processing album: "%s"' %albumnameii,\
            printNumHeader(dgbk('处理专辑: "')+albumnameii+'"',\
                       ii+1,len(albumlist),1)
        failistii, metafaillistii = processAlbum(df, indir, outdir, idii,
                                                 verbose)
        faillist.extend(failistii)
        metafaillist.extend(metafaillistii)

    #-----------------Close connection-----------------
    if verbose:
        #printHeader('Drop connection to database:')
        printHeader(dgbk('关闭数据文件:'))
    db.close()

    #------------------Print summary------------------
    faillist = list(set(faillist))
    metafaillist = list(set(metafaillist))

    #printHeader('Summary',1)
    printHeader(dgbk('总结'), 1)
    if len(faillist) > 0:
        #printHeader('Failed to export:',2)
        printHeader(dgbk('拷贝失败:'), 2)
        for failii in faillist:
            printInd(failii, 2)

    if len(metafaillist) > 0:
        #printHeader('Failed to write meta data in:',2)
        printHeader(dgbk('元数据写入失败:'), 2)
        for failii in metafaillist:
            printInd(failii, 2)

    if len(faillist) == 0 and len(metafaillist) == 0:
        #printHeader('All done.',2)
        printHeader(dgbk('全部完成'), 2)

    return 0