Esempio n. 1
0
def batchStartup(paramDict):
    logging.info("Compiling wordlist to fastFind-file")
    fastFind.compileDict(paramDict["wordFname"], wordRe=fastFind.DASHWORDRE)
Esempio n. 2
0
    #accs = list(set(accs))
    #for delChar in ["*", ",", ".", "/", "(", ")"]: 
        #accs = [acc.replace(delChar," ").replace("  ", " ") for acc in accs]
    #dictFh.write("\t".join( ("*"+row.acc, "|".join(accs)) )+"\n")

    names = set()
    names = appendAll(names,prepNames(row.protFullNames.split("|")))
    names = appendAll(names,prepNames(row.protShortNames.split("|")))
    names = appendAll(names,prepNames(row.protAltNames.split("|")))

    #names = appendAll(names,prepSymbols(row.hugo.split("|"), bncWords))
    #names = appendAll(names,prepSymbols(row.geneName.split("|"), bncWords))
    #names = appendAll(names,prepSymbols(row.geneSynonyms.split("|"), bncWords))

    #names = appendAll(names,row.isoNames.split("|"))
    #names = appendAll(names,row.geneOrdLocus.split("|"))
    #names = appendAll(names,row.geneOrf.split("|"))
    # certain characters cannot be part of a word, replace them with a space
    for delChar in ["*", ",", ".", "/", "(", ")"]:
        names = [name.replace(delChar," ").replace("  ", " ") for name in names if len(name)>2]
    names = list(set(names))
    dictFh.write("\t".join( (row.acc, "|".join(names)) )+"\n")

print "Wrote to %s" % (dictFh.name)
#fastFind.compileDict(dictFh.name, toLower=True)
print "Compiling dict to gzipped marshal file"
fastFind.compileDict(dictFh.name)
ignoredWords = list(set(ignoredWords))
ignoredWords.sort()
print "Ignored these symbols:", ",".join(ignoredWords)
Esempio n. 3
0
def batchStartup(paramDict):
    logging.info("Compiling wordlist to fastFind-file")
    fastFind.compileDict(paramDict["wordFname"], wordRe=fastFind.DASHWORDRE)