def mask_file(infile, outfile,type,case): fin = open(infile,'r') seq_dic = fasta.fasta_dic(fin) index = seq_dic.keys() for id in index: seq =seq_dic[id] if type == "longAT": ret_seq = mask_longAT(seq,case) if type == "dust": ret_seq = mask_dust(seq, case) seq_dic[id] = ret_seq fout = open(outfile,'w') fasta.generate_fasta(seq_dic, fout)
def Generate_Group(fastafile, outfile): #fastafile: ptr or filename import string,sys,os try: fastafile +'a' fastafile = open(fastafile,'r') except TypeError: pass import fasta id_seq_dic = fasta.fasta_dic(fastafile) ids = id_seq_dic.keys() for id in ids: outfile.write('[%s]\n' % (id)) outfile.write(id+'\n') outfile.write('%s %s %s %s %s\n' %('+','1','1','1','1')) outfile.close()
if len(sys.argv[:]) != 6: print "USAGE: python Pick70_genome_itself.py inputfasta genomefasta groupfile(fout) version(BLAST/BLAT/GFCLIENT) masklower(yes, no)" else: print "Identifying the input sequences' genomic target ... " input = sys.argv[1] genome = sys.argv[2] fout =open( sys.argv[3],'w') VERSION = string.upper(sys.argv[4]) os.environ['genome'] = genome os.environ['input'] = input gfdir ="" #convert to upper case if no masking user lower case finput = open(input,'r') seq_dic= fasta.fasta_dic(finput) index= seq_dic.keys() if sys.argv[5] !="yes": for id in index: seq_dic[id] = string.upper(seq_dic[id]) tempinput = '~ftempinput' ftempinput =open(tempinput,'w') for id in index: ftempinput.write('>'+id+'\n') ftempinput.write(seq_dic[id]+'\n') ftempinput.close() #blast version if (VERSION =="BLAST"): #generate the blast db if the db files do not exist if (os.access(genome+".nsq", os.R_OK)) and (os.access(genome+".nin", os.R_OK)) and (os.access(genome+".nhr", os.R_OK)):
# OLIGOLEN = 70 os.environ["inputf"] = sys.argv[1] os.environ["genomef"] = sys.argv[2] genomefile = sys.argv[2] out = open(sys.argv[3], "w") os.environ["STRAND"] = sys.argv[4] # test if the program can procede if os.access(genomefile + ".nhr", os.R_OK): if not os.access(genomefile + ".nhr", os.W_OK): print "The same program is running. the current program can not procede" sys.exit() # redo the sw_score ,TM part inputf = open(sys.argv[1], "r") input_dic = fasta.fasta_dic(inputf) sw_dic = {} gc_dic = {} repeat_dic = {} for key in input_dic.keys(): seq = input_dic[key] os.environ["len"] = str(len(seq)) fout = open("~temp", "w") fout.write(seq) fout.close() os.environ["file"] = "~temp" f = os.popen("./code/SW $file 0 $len")
elif __name__ == "__main__": print "OLIGO program in progress ... " OLIGOLEN = int(sys.argv[6]) DB = sys.argv[3] TRACEFLAG = int(sys.argv[4]) # print to stdio STRAND = sys.argv[5] inputfile_name = sys.argv[1] infile = open(inputfile_name, "r") id_seq_dic = {} # process input seqfasta file { id:seq,.......} id_seq_dic = fasta.fasta_dic(infile) ids = id_seq_dic.keys() # only input ids # does this need to mask lower case in the furthur blast if sys.argv[7] != "yes": for id in ids: id_seq_dic[id] = string.upper(id_seq_dic[id]) # clean up redundant sequence in input file seq_id_dic = all_id_seq_dic(id_seq_dic) input_dup_list = [] print "the following sequences are duplicated in the input file:", # error log ferror = open("error.log", "a") for values in seq_id_dic.values(): if len(values) > 1: for id in values[1:]: