Esempi in Python per removeTabNl

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pubStore

Metodo/funzione: removeTabNl

Esempi su hotexamples.com: 4

removeTabNl in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per pubStore.removeTabNl, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: pubAlg.py Progetto: Moxikai/pubMunch

def runAnnotate(reader, alg, paramDict, outName):
    """ annotate all articles in reader, write to outName in an atomic way via
    tempfiles kept on local disk that are only moved over to final on success.
    Starts a new output file if an empty row is returned from the annotator.
    """
    addFields = paramDict.get("addFields", [])
    tmpFnames = []
    outFh, tmpFnames = newTempOutFile(tmpFnames, outName, alg, addFields)

    for row in runAnnotateIter(reader, alg, paramDict, addFields):
        if len(row)==0 and outName!="stdout":
            outFh.close()
            outFh, tmpFnames = newTempOutFile(tmpFnames, outName, alg, addFields)
            continue
            
        row = [pubStore.removeTabNl(x) for x in row]
        line = "\t".join(row)
        outFh.write(line)
        outFh.write("\n")

    if "cleanup" in dir(alg):
        logging.info("Running cleanup")
        alg.cleanup()
        
    if outName!="stdout":
        outFh.close()
        moveManyTempToFinal(tmpFnames, outName)

Esempio n. 2

Mostra file

File: pubAlg.py Progetto: maximilianh/pubMunch

def writeRow(row, outFh):
    " write list as tab-sep to ofh "
    newRow = [pubStore.removeTabNl(unicode(x)) for x in row]
    newRow = [i.encode('utf8') if isinstance(i, unicode) else str(i) for i in newRow]
    outFh.write("\t".join(newRow))
    outFh.write("\n")

Esempio n. 3

Mostra file

File: pubAlg.py Progetto: floe/pubMunch

def writeAnnotations(alg, articleData, fileData, outFh, annotIdAdd, doSectioning, addFields):
    """ use alg to annotate fileData, write to outFh, adding annotIdAdd to all annotations 
    return next free annotation id.
    """
    annotDigits = int(pubConf.ANNOTDIGITS)
    fileDigits = int(pubConf.FILEDIGITS)
    annotIdStart = (int(fileData.fileId) * (10**annotDigits)) + annotIdAdd
    logging.debug("fileId %s, annotIdStart %d, fileLen %d" % (fileData.fileId, annotIdStart, len(fileData.content)))

    text = fileData.content.replace("\a", "\n")

    if fileData.fileType=="supp":
        sections = {"supplement": (0, len(text))}
    else:
        allTextSections = {"unknown": (0, len(text))}
        if doSectioning:
            sections = pubGeneric.sectionRanges(text)
            if sections==None:
                sections = allTextSections
        else:
            sections = allTextSections

    annotCount = 0
    for section, sectionRange in sections.iteritems():
        secStart, secEnd = sectionRange
        if section!="unknown":
            logging.debug("Annotating section %s, from %d to %d" % (section, secStart, secEnd))
        secText = text[secStart:secEnd]
        fileData = fileData._replace(content=secText)
        annots = alg.annotateFile(articleData, fileData)
        if annots==None:
            logging.debug("No annotations received")
            continue

        for row in annots:
            # prefix with fileId, extId
            logging.debug("received annotation row: %s" %  str(row))
            fields = ["%018d" % (int(annotIdStart)+annotCount)]
            if articleData!=None:
                extId = articleData.externalId
            else:
                extId = "0"
            fields.append(extId)
            # add addFields
            artDict = articleData._asdict()
            if addFields!=None:
                for addField in addFields:
                    fields.append(artDict.get(addField, ""))
            # add other fields
            fields.extend(row)

            # check if alg actually returns coordinates
            if alg.headers[0]=="start" and alg.headers[1]=="end":
                start, end = row[0:2]
                if (start,end) == (0,0):
                    snippet = None
                else:
                    snippet = getSnippet(secText, start, end)
                    # lift start and end if sectioning
                    start = secStart+int(start)
                    end = secStart+int(end)

                # postfix with snippet
                logging.debug("Got row: %s" % str(row))
                if doSectioning:
                    fields.append(section)
                if snippet!=None:
                    fields.append(snippet)
            #fields = [unicode(x).encode("utf8") for x in fields]
            fields = [pubStore.removeTabNl(unicode(x)) for x in fields]
                
            line = "\t".join(fields)
            outFh.write(line+"\n")
            annotCount+=1
            assert(annotCount<10**annotDigits) # we can only store 100.000 annotations per file
    return annotCount

Esempio n. 4

Mostra file

File: pubAlg.py Progetto: Moxikai/pubMunch

def writeRow(row, outFh):
    " write list as tab-sep to ofh "
    newRow = [pubStore.removeTabNl(unicode(x)) for x in row]
    outFh.write("\t".join(row))
    outFh.write("\n")