def parsing_each_query(content,fout): #1. have to be 100 similarity #2. different segment add up together to be the complete length #3. check strand constrains #4. check no overlapping constrains content = string.strip(string.join(content,'')) if content =='': return parse_dic = blast_parse.parse_query_alignment(content) query = parse_dic.keys()[0] print query, hit_candidate_segment =[] #[[hitid, strand, pstart, pend, hstart, hend],...] group_list =[] query_len= parse_dic[query][0] hit_dic = parse_dic[query][1] #only two hit_list = hit_dic.keys() for hitid in hit_list: #to get the id hit_align_list = hit_dic[hitid] first_100 =0 for seg_list in hit_align_list: strand, qstart, qend, hstart, hend, percentage = seg_list if percentage < 100 :#99.5: continue if abs(qend - qstart) +1 <query_len: continue hit_candidate_segment.append([hitid, strand,qstart,qend, hstart,hend]) #write to file of the group list fout.write('['+query+']\n') #write the query itself fout.write(query+'\n') fout.write("%s %s %s %s %s\n" % ( '+','1',str(query_len),'1',str(query_len))) hit_dic={} if gfdir !="": string_start = len(gfdir)+1 else: string_start = 0 for entry in hit_candidate_segment: hitid, hitstrand, qstart,qend, hstart,hend = entry hitid = hitid[string_start:] if gfdir !="": string_end = string.find(hitid, ".nib:") hitid = hitid[:string_end] fout.write(hitid+'\n') fout.write("%s %s %s %s %s\n" % (hitstrand, str(qstart), str(qend), str(hstart), str(hend))) fout.write('\n') print "done"
def parsing_each_query(content,fout): #1. have to be 100 similarity #2. different segment add up together to be the complete length #3. check strand constrains #4. check no overlapping constrains content = string.strip(string.join(content,'')) if content =='': return parse_dic = blast_parse.parse_query_alignment(content) query = parse_dic.keys()[0] print query, hit_candidate_segment =[] #[[hitid, strand, pstart, pend, hstart, hend],...] group_list =[] query_len= parse_dic[query][0] hit_dic = parse_dic[query][1] #only two hit_list = hit_dic.keys() for hitid in hit_list: #to get the id hit_align_list = hit_dic[hitid] first_100 =0 for seg_list in hit_align_list: strand, qstart, qend, hstart, hend, percentage = seg_list if percentage < 100 :#99.5: continue if first_100 == 0: first_100 =1 #inital test to see if the hits is likely to be real - large first hsp 100% if abs(qend - qstart) <50: break hit_candidate_segment.append([hitid, strand,qstart,qend, hstart,hend]) parse_dic ={} #save memory only one enty in the parse_dic #generate the combination constrains dictionary using the list position as the segment #id to form the dic as id:okid_list rule_dic, must_cover_list = rules(hit_candidate_segment,query_len) #generate all the combinations that do not conflict with the rule_dic combo_list = generate_combination(rule_dic, must_cover_list, hit_candidate_segment, query_len) #now check if any combination of the segment make a complete query # also need to check strand, overlapping constrains. #[[hitid, strand, pstart, pend, hstart, hend],...] #use 1,2,3,... segment of the list to reconstruct and then check constrains for i in range(0, len(combo_list)): candidate_list =[] for pos in combo_list[i]: candidate_list.append( hit_candidate_segment[pos]) passConstrain = constrains(query_len,candidate_list) if passConstrain: group_list.extend(candidate_list) #write to file of the group list fout.write('['+query+']\n') #write the query itself fout.write(query+'\n') fout.write("%s %s %s %s %s\n" % ( '+','1',str(query_len),'1',str(query_len))) hit_dic={} if gfdir !="": string_start = len(gfdir)+1 else: string_start = 0 for entry in group_list: hitid, hitstrand, qstart,qend, hstart,hend = entry hitid = hitid[string_start:] if gfdir !="": string_end = string.find(hitid, ".nib:") hitid = hitid[:string_end] fout.write(hitid+'\n') fout.write("%s %s %s %s %s\n" % (hitstrand, str(qstart), str(qend), str(hstart), str(hend))) fout.write('\n') print "done"