コード例 #1
0
ファイル: pubAlg.py プロジェクト: floe/pubMunch
def runAnnotate(reader, alg, paramDict, outName):
    """ annotate all articles in reader
    """
    tmpOutFname = makeLocalTempFile()

    if outName=="stdout":
        outFh = sys.stdout
    else:
        outFh = pubStore.utf8GzWriter(tmpOutFname)

    doSectioning = attributeTrue(alg, "sectioning")
    logging.debug("Sectioning activated: %s" % doSectioning)

    if "startup" in dir(alg):
        logging.debug("Running startup")
        alg.startup(paramDict)

    addFields = paramDict.get("addFields", [])
    writeHeaders(alg, outFh, doSectioning, addFields)

    annotIdAdd = getAnnotId(alg, paramDict)
    onlyMain, onlyMeta, bestMain = getAlgPrefs(alg, paramDict)

    #addSnippet = "snippet" in alg.headers

    for articleData, fileDataList in reader.iterArticlesFileList(onlyMeta, bestMain, onlyMain):
        logging.debug("Annotating article %s with %d files, %s" % \
            (articleData.articleId, len(fileDataList), [x.fileId for x in fileDataList]))
        for fileData in fileDataList:
            writeAnnotations(alg, articleData, fileData, outFh, \
                annotIdAdd, doSectioning, addFields)

    if outName!="stdout":
        outFh.close()
        moveTempToFinal(tmpOutFname, outName)
コード例 #2
0
ファイル: pubAlg.py プロジェクト: Moxikai/pubMunch
def newTempOutFile(tmpFnames, outName, alg, addFields):
    """ open a new temporary file on local disk and add it to the tmpFnames map 
    Write headers. 
    Returns a tuple outFh, tmpFnames where tmpFnames is a list (tempFilename, finalFilename)
    """
    if outName=="stdout":
        outFh = sys.stdout
        return outFh, tmpFnames

    tmpOutFname = makeLocalTempFile()

    tmpFnames.append( tmpOutFname )
    outFh = pubStore.utf8GzWriter(tmpOutFname)

    if addFields!=None:
        writeHeaders(alg, outFh, addFields)
    return outFh, tmpFnames