def runAnnotate(reader, alg, paramDict, outName): """ annotate all articles in reader """ tmpOutFname = makeLocalTempFile() if outName=="stdout": outFh = sys.stdout else: outFh = pubStore.utf8GzWriter(tmpOutFname) doSectioning = attributeTrue(alg, "sectioning") logging.debug("Sectioning activated: %s" % doSectioning) if "startup" in dir(alg): logging.debug("Running startup") alg.startup(paramDict) addFields = paramDict.get("addFields", []) writeHeaders(alg, outFh, doSectioning, addFields) annotIdAdd = getAnnotId(alg, paramDict) onlyMain, onlyMeta, bestMain = getAlgPrefs(alg, paramDict) #addSnippet = "snippet" in alg.headers for articleData, fileDataList in reader.iterArticlesFileList(onlyMeta, bestMain, onlyMain): logging.debug("Annotating article %s with %d files, %s" % \ (articleData.articleId, len(fileDataList), [x.fileId for x in fileDataList])) for fileData in fileDataList: writeAnnotations(alg, articleData, fileData, outFh, \ annotIdAdd, doSectioning, addFields) if outName!="stdout": outFh.close() moveTempToFinal(tmpOutFname, outName)
def newTempOutFile(tmpFnames, outName, alg, addFields): """ open a new temporary file on local disk and add it to the tmpFnames map Write headers. Returns a tuple outFh, tmpFnames where tmpFnames is a list (tempFilename, finalFilename) """ if outName=="stdout": outFh = sys.stdout return outFh, tmpFnames tmpOutFname = makeLocalTempFile() tmpFnames.append( tmpOutFname ) outFh = pubStore.utf8GzWriter(tmpOutFname) if addFields!=None: writeHeaders(alg, outFh, addFields) return outFh, tmpFnames