def findMethods(text): " find mat & method section in text " sections = pubGeneric.sectionRanges(text) if sections==None: return None if "methods" not in sections: return None methodStart, methodEnd = sections["methods"] text = text[methodStart:methodEnd] return text
def findMethods(text): " find mat & method section in text " sections = pubGeneric.sectionRanges(text) if sections == None: return None if "methods" not in sections: return None methodStart, methodEnd = sections["methods"] text = text[methodStart:methodEnd] return text
def writeAnnotations(alg, articleData, fileData, outFh, annotIdAdd, doSectioning, addFields): """ use alg to annotate fileData, write to outFh, adding annotIdAdd to all annotations return next free annotation id. """ annotDigits = int(pubConf.ANNOTDIGITS) fileDigits = int(pubConf.FILEDIGITS) annotIdStart = (int(fileData.fileId) * (10**annotDigits)) + annotIdAdd logging.debug("fileId %s, annotIdStart %d, fileLen %d" % (fileData.fileId, annotIdStart, len(fileData.content))) text = fileData.content.replace("\a", "\n") if fileData.fileType=="supp": sections = {"supplement": (0, len(text))} else: allTextSections = {"unknown": (0, len(text))} if doSectioning: sections = pubGeneric.sectionRanges(text) if sections==None: sections = allTextSections else: sections = allTextSections annotCount = 0 for section, sectionRange in sections.iteritems(): secStart, secEnd = sectionRange if section!="unknown": logging.debug("Annotating section %s, from %d to %d" % (section, secStart, secEnd)) secText = text[secStart:secEnd] fileData = fileData._replace(content=secText) annots = alg.annotateFile(articleData, fileData) if annots==None: logging.debug("No annotations received") continue for row in annots: # prefix with fileId, extId logging.debug("received annotation row: %s" % str(row)) fields = ["%018d" % (int(annotIdStart)+annotCount)] if articleData!=None: extId = articleData.externalId else: extId = "0" fields.append(extId) # add addFields artDict = articleData._asdict() if addFields!=None: for addField in addFields: fields.append(artDict.get(addField, "")) # add other fields fields.extend(row) # check if alg actually returns coordinates if alg.headers[0]=="start" and alg.headers[1]=="end": start, end = row[0:2] if (start,end) == (0,0): snippet = None else: snippet = getSnippet(secText, start, end) # lift start and end if sectioning start = secStart+int(start) end = secStart+int(end) # postfix with snippet logging.debug("Got row: %s" % str(row)) if doSectioning: fields.append(section) if snippet!=None: fields.append(snippet) #fields = [unicode(x).encode("utf8") for x in fields] fields = [pubStore.removeTabNl(unicode(x)) for x in fields] line = "\t".join(fields) outFh.write(line+"\n") annotCount+=1 assert(annotCount<10**annotDigits) # we can only store 100.000 annotations per file return annotCount