def Write_SeqFile(seq,id) : masked_seq= Pick70_mask.mask_longAT(seq,"lower") outfile = '~temp' + string.replace(id[:100],'/','') out = open(outfile, 'w') out.write('>'+id+'\n') out.write(masked_seq +'\n') out.close() return outfile
def filter_mask(data_dic, SYMBOL_LIST, TOLERANT, MASK): new_dic = {} for key in data_dic.keys(): seq = data_dic[key][4] os.environ["seq"] = seq mask = Pick70_mask.mask(seq, SYMBOL_LIST, TOLERANT) if mask <= MASK: new_dic[key] = data_dic[key][:] print "mask:", len(data_dic.keys()), len(new_dic.keys()) return new_dic
if (os.access(genome+".nsq", os.R_OK)) and (os.access(genome+".nin", os.R_OK)) and (os.access(genome+".nhr", os.R_OK)): pass else: fr, fw, fe = os.popen3('formatdb -i $genome -p F') errormessage = fe.read() fr.close() fw.close() fe.close() if errormessage!="": print errormessage print "program terminated" sys.exit(1) maskinput =open('~maskedinput','w') for id in index: masked_seq = Pick70_mask.mask_longAT(seq_dic[id],"lower") maskinput.write('>'+id+'\n') maskinput.write(masked_seq+'\n') maskinput.close() os.environ['input'] = '~maskedinput' fr, fw, fe = os.popen3('blastall -i "$input" -d $genome -a $cpunum -p blastn -v 50 -b 50 -e 1 -o ~tempblatout -F "m D" -U') elif (VERSION =="BLAT"): Pick70_mask.mask_file(tempinput,input+".mask","dust", "lower") #blat version fr,fw,fe = os.popen3('blat "$genome" "$input".mask -out=blast -qMask=lower ~tempblatout') elif (VERSION =="GFCLIENT"): Pick70_mask.mask_file(tempinput,input+".mask","dust", "lower") #gfCleint version