def batchStartup(paramDict): logging.info("Compiling wordlist to fastFind-file") fastFind.compileDict(paramDict["wordFname"], wordRe=fastFind.DASHWORDRE)
#accs = list(set(accs)) #for delChar in ["*", ",", ".", "/", "(", ")"]: #accs = [acc.replace(delChar," ").replace(" ", " ") for acc in accs] #dictFh.write("\t".join( ("*"+row.acc, "|".join(accs)) )+"\n") names = set() names = appendAll(names,prepNames(row.protFullNames.split("|"))) names = appendAll(names,prepNames(row.protShortNames.split("|"))) names = appendAll(names,prepNames(row.protAltNames.split("|"))) #names = appendAll(names,prepSymbols(row.hugo.split("|"), bncWords)) #names = appendAll(names,prepSymbols(row.geneName.split("|"), bncWords)) #names = appendAll(names,prepSymbols(row.geneSynonyms.split("|"), bncWords)) #names = appendAll(names,row.isoNames.split("|")) #names = appendAll(names,row.geneOrdLocus.split("|")) #names = appendAll(names,row.geneOrf.split("|")) # certain characters cannot be part of a word, replace them with a space for delChar in ["*", ",", ".", "/", "(", ")"]: names = [name.replace(delChar," ").replace(" ", " ") for name in names if len(name)>2] names = list(set(names)) dictFh.write("\t".join( (row.acc, "|".join(names)) )+"\n") print "Wrote to %s" % (dictFh.name) #fastFind.compileDict(dictFh.name, toLower=True) print "Compiling dict to gzipped marshal file" fastFind.compileDict(dictFh.name) ignoredWords = list(set(ignoredWords)) ignoredWords.sort() print "Ignored these symbols:", ",".join(ignoredWords)