コード例 #1
0
ファイル: pubExpMatrix.py プロジェクト: Moxikai/pubMunch
def findMethods(text):
    " find mat & method section in text "
    sections = pubGeneric.sectionRanges(text)
    if sections==None:
        return None
    if "methods" not in sections:
        return None
    methodStart, methodEnd  = sections["methods"]
    text = text[methodStart:methodEnd]
    return text
コード例 #2
0
def findMethods(text):
    " find mat & method section in text "
    sections = pubGeneric.sectionRanges(text)
    if sections == None:
        return None
    if "methods" not in sections:
        return None
    methodStart, methodEnd = sections["methods"]
    text = text[methodStart:methodEnd]
    return text
コード例 #3
0
ファイル: pubAlg.py プロジェクト: floe/pubMunch
def writeAnnotations(alg, articleData, fileData, outFh, annotIdAdd, doSectioning, addFields):
    """ use alg to annotate fileData, write to outFh, adding annotIdAdd to all annotations 
    return next free annotation id.
    """
    annotDigits = int(pubConf.ANNOTDIGITS)
    fileDigits = int(pubConf.FILEDIGITS)
    annotIdStart = (int(fileData.fileId) * (10**annotDigits)) + annotIdAdd
    logging.debug("fileId %s, annotIdStart %d, fileLen %d" % (fileData.fileId, annotIdStart, len(fileData.content)))

    text = fileData.content.replace("\a", "\n")

    if fileData.fileType=="supp":
        sections = {"supplement": (0, len(text))}
    else:
        allTextSections = {"unknown": (0, len(text))}
        if doSectioning:
            sections = pubGeneric.sectionRanges(text)
            if sections==None:
                sections = allTextSections
        else:
            sections = allTextSections

    annotCount = 0
    for section, sectionRange in sections.iteritems():
        secStart, secEnd = sectionRange
        if section!="unknown":
            logging.debug("Annotating section %s, from %d to %d" % (section, secStart, secEnd))
        secText = text[secStart:secEnd]
        fileData = fileData._replace(content=secText)
        annots = alg.annotateFile(articleData, fileData)
        if annots==None:
            logging.debug("No annotations received")
            continue

        for row in annots:
            # prefix with fileId, extId
            logging.debug("received annotation row: %s" %  str(row))
            fields = ["%018d" % (int(annotIdStart)+annotCount)]
            if articleData!=None:
                extId = articleData.externalId
            else:
                extId = "0"
            fields.append(extId)
            # add addFields
            artDict = articleData._asdict()
            if addFields!=None:
                for addField in addFields:
                    fields.append(artDict.get(addField, ""))
            # add other fields
            fields.extend(row)

            # check if alg actually returns coordinates
            if alg.headers[0]=="start" and alg.headers[1]=="end":
                start, end = row[0:2]
                if (start,end) == (0,0):
                    snippet = None
                else:
                    snippet = getSnippet(secText, start, end)
                    # lift start and end if sectioning
                    start = secStart+int(start)
                    end = secStart+int(end)

                # postfix with snippet
                logging.debug("Got row: %s" % str(row))
                if doSectioning:
                    fields.append(section)
                if snippet!=None:
                    fields.append(snippet)
            #fields = [unicode(x).encode("utf8") for x in fields]
            fields = [pubStore.removeTabNl(unicode(x)) for x in fields]
                
            line = "\t".join(fields)
            outFh.write(line+"\n")
            annotCount+=1
            assert(annotCount<10**annotDigits) # we can only store 100.000 annotations per file
    return annotCount