def annotate_junctions(junctions, reference_genepred_filename, splicebases): annot_struct = genepred_basics.get_gene_annotation_data_structure(reference_genepred_filename) genepred = genepred_basics.get_per_chromosome_array(reference_genepred_filename) result = {} for junc_name in junctions: splice = splicebases[junc_name] junc = junctions[junc_name] genepred_entries = [] if junc["chr1"] in genepred: genepred_entries = genepred[junc["chr1"]] leftend = "N" # are we on the end of an exon on the left side if junc["dir1"] == "+" and genepred_basics.is_exon_end(junc["chr1"], int(junc["coo1"]), genepred_entries): leftend = "Y" elif junc["dir1"] == "-" and genepred_basics.is_exon_start(junc["chr1"], int(junc["coo1"]), genepred_entries): leftend = "Y" genepred_entries = [] if junc["chr2"] in genepred: genepred_entries = genepred[junc["chr2"]] rightend = "N" if junc["dir2"] == "+" and genepred_basics.is_exon_start(junc["chr2"], int(junc["coo2"]), genepred_entries): rightend = "Y" elif junc["dir2"] == "-" and genepred_basics.is_exon_end(junc["chr2"], int(junc["coo2"]), genepred_entries): rightend = "Y" annot1 = genepred_basics.gene_annotate_by_coordinates( junc["chr1"], int(junc["coo1"]) - 1, int(junc["coo1"]), annot_struct ) annot2 = genepred_basics.gene_annotate_by_coordinates( junc["chr2"], int(junc["coo2"]) - 1, int(junc["coo2"]), annot_struct ) best1 = junc["chr1"] + ":" + str(junc["coo1"]) best1dir = "" best2 = junc["chr2"] + ":" + str(junc["coo2"]) best2dir = "" name1 = "" name2 = "" if annot1: best1 = annot1[0][0] name1 = annot1[0][0] best1dir = annot1[0][1] if annot2: best2 = annot2[0][0] name2 = annot2[0][0] best2dir = annot2[0][1] evidence_transcript = "N" evidence_splice = "N" action = "unknown" if best1dir == "+" and best2dir == "+": if junc["dir1"] == "-" and junc["dir2"] == "-": action = "reverse" elif junc["dir1"] == "+" and junc["dir2"] == "+": action = "no reverse" else: action = "unknown" elif best1dir == "-" and best2dir == "-": if junc["dir1"] == "+" and junc["dir2"] == "+": action = "reverse" elif junc["dir1"] == "-" and junc["dir2"] == "-": action = "no reverse" else: action = "unknown" if best1dir == "+" and best2dir == "-": if junc["dir1"] == "-" and junc["dir2"] == "+": action = "reverse" elif junc["dir1"] == "+" and junc["dir2"] == "-": action = "no reverse" else: action = "unknown" if best1dir == "-" and best2dir == "+": if junc["dir1"] == "+" and junc["dir2"] == "-": action = "reverse" elif junc["dir1"] == "-" and junc["dir2"] == "+": action = "no reverse" else: action = "unknown" if best1dir == "-" and best2dir == "": if junc["dir1"] == "+": action = "reverse" elif junc["dir1"] == "-": action = "no reverse" else: action = "unknown" if best1dir == "+" and best2dir == "": if junc["dir1"] == "-": action = "reverse" elif junc["dir1"] == "+": action = "no reverse" else: action = "unknown" if best1dir == "" and best2dir == "+": if junc["dir2"] == "-": action = "reverse" elif junc["dir2"] == "+": action = "no reverse" else: action = "unknown" if best1dir == "" and best2dir == "-": if junc["dir2"] == "+": action = "reverse" elif junc["dir2"] == "-": action = "no reverse" else: action = "unknown" # set if we have transcript evidence if action != "unknown": evidence_transcript = "Y" # set if we have splice evidence if action == "no reverse" and splice == "GT AG": evidence_splice = "Y" if action == "reverse" and splice == "CT AC": evidence_splice = "Y" if action == "unknown" and (splice == "GT AG" or splice == "CT AC"): evidence_splice = "Y" # if action is still unknown, use splice site to make choice to reverse if action == "unknown": if splice == "GT AG": action = "no reverse" elif splice == "CT AC": action = "reverse" # switch our left and right end assignments if reverse if action == "reverse": temp1 = leftend leftend = rightend rightend = temp1 names = "" if action == "no reverse": names = ( junc["chr1"] + "\t" + junc["coo1"] + "\t" + junc["dir1"] + "\t" + leftend + "\t" + junc["chr2"] + "\t" + junc["coo2"] + "\t" + junc["dir2"] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2 ) elif action == "reverse": names = ( junc["chr2"] + "\t" + junc["coo2"] + "\t" + opposite(junc["dir2"]) + "\t" + leftend + "\t" + junc["chr1"] + "\t" + junc["coo1"] + "\t" + opposite(junc["dir1"]) + "\t" + rightend + "\t" + best2 + "-" + best1 + "\t" + best2dir + "/" + best1dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name2 + "\t" + name1 ) else: names = ( junc["chr1"] + "\t" + junc["coo1"] + "\t" + junc["dir1"] + "\t" + leftend + "\t" + junc["chr2"] + "\t" + junc["coo2"] + "\t" + junc["dir2"] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2 ) result[junc_name] = ( junc_name + "\t" + names + "\t" + str(junc["lr_count"]) + "\t" + junc["type1nr"] + "\t" + junc["type1"] + "\t" + junc["type2"] + "\t" + junc["type3"] + "\t" + junc["type4"] + "\t" + str(junc["max_min_side_lengths"]) + "\t" + str(junc["max_min_unique_side_counts"]) + "\t" + junc["max_min_flanking_distance"] ) return result
def annotate_junctions(junctions, reference_genepred_filename): annot_struct = genepred_basics.get_gene_annotation_data_structure( reference_genepred_filename) genepred = genepred_basics.get_per_chromosome_array( reference_genepred_filename) for entry_set in junctions: f = entry_set['entries'] d = entry_set['dictionary'] if d['Gene_1'] != '' and d['Gene_2'] != '': print "\t".join(f) #already annotated. move on continue [best1dir, best2dir] = d['Transcript_strands'].split("/") leftend = d[ 'End_of_reference_exon_1'] # are we on the end of an exon on the left side name1 = d['Gene_1'] best1 = name1 name1change = 0 if d['Gene_1'] == '': best1 = d['Chromosome_1'] + ":" + str(d['Coordinate_1']) genepred_entries = [] if d['Chromosome_1'] in genepred: genepred_entries = genepred[d['Chromosome_1']] if d['Direction_1'] == '+' and genepred_basics.is_exon_end( d['Chromosome_1'], int(d['Coordinate_1']), genepred_entries): leftend = 'Y' elif d['Direction_1'] == '-' and genepred_basics.is_exon_start( d['Chromosome_1'], int(d['Coordinate_1']), genepred_entries): leftend = 'Y' annot1 = genepred_basics.gene_annotate_by_coordinates( d['Chromosome_1'], int(d['Coordinate_1']) - 1, int(d['Coordinate_1']), annot_struct) if annot1: name1change = 1 best1 = annot1[0][0] name1 = annot1[0][0] best1dir = annot1[0][1] rightend = d['Start_of_reference_exon_2'] name2 = d['Gene_2'] best2 = name2 name2change = 0 if d['Gene_2'] == '': best2 = d['Chromosome_2'] + ":" + str(d['Coordinate_2']) genepred_entries = [] if d['Chromosome_2'] in genepred: genepred_entries = genepred[d['Chromosome_2']] if d['Direction_2'] == '+' and genepred_basics.is_exon_start( d['Chromosome_2'], int(d['Coordinate_2']), genepred_entries): rightend = 'Y' elif d['Direction_2'] == '-' and genepred_basics.is_exon_end( d['Chromosome_2'], int(d['Coordinate_2']), genepred_entries): rightend = 'Y' annot2 = genepred_basics.gene_annotate_by_coordinates( d['Chromosome_2'], int(d['Coordinate_2']) - 1, int(d['Coordinate_2']), annot_struct) if annot2: name2change = 1 best2 = annot2[0][0] name2 = annot2[0][0] best2dir = annot2[0][1] if name1change == 0 and name2change == 0: print "\t".join(f) continue # didn't find any additional name information evidence_transcript = 'N' evidence_splice = 'N' action = 'unknown' junc = {} junc['dir1'] = d['Direction_1'] junc['dir2'] = d['Direction_2'] junc['coo1'] = d['Coordinate_1'] junc['coo2'] = d['Coordinate_2'] junc['chr1'] = d['Chromosome_1'] junc['chr2'] = d['Chromosome_2'] if best1dir == '+' and best2dir == '+': if junc['dir1'] == '-' and junc['dir2'] == '-': action = "reverse" elif junc['dir1'] == '+' and junc['dir2'] == '+': action = "no reverse" else: action = "unknown" elif best1dir == '-' and best2dir == '-': if junc['dir1'] == '+' and junc['dir2'] == '+': action = "reverse" elif junc['dir1'] == '-' and junc['dir2'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '+' and best2dir == '-': if junc['dir1'] == '-' and junc['dir2'] == '+': action = "reverse" elif junc['dir1'] == '+' and junc['dir2'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '-' and best2dir == '+': if junc['dir1'] == '+' and junc['dir2'] == '-': action = "reverse" elif junc['dir1'] == '-' and junc['dir2'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '-' and best2dir == '': if junc['dir1'] == '+': action = "reverse" elif junc['dir1'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '+' and best2dir == '': if junc['dir1'] == '-': action = "reverse" elif junc['dir1'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '' and best2dir == '+': if junc['dir2'] == '-': action = "reverse" elif junc['dir2'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '' and best2dir == '-': if junc['dir2'] == '+': action = "reverse" elif junc['dir2'] == '-': action = "no reverse" else: action = "unknown" #set if we have transcript evidence if action != "unknown": evidence_transcript = 'Y' # switch our left and right end assignments if reverse if action == "reverse": temp1 = leftend leftend = rightend rightend = temp1 names = '' if action == "no reverse": names = junc['chr1'] + "\t" + str( junc['coo1'] ) + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + str( junc['coo2'] ) + "\t" + junc[ 'dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2 elif action == "reverse": names = junc['chr2'] + "\t" + str(junc['coo2']) + "\t" + opposite( junc['dir2'] ) + "\t" + leftend + "\t" + junc['chr1'] + "\t" + str( junc['coo1'] ) + "\t" + opposite( junc['dir1'] ) + "\t" + rightend + "\t" + best2 + "-" + best1 + "\t" + best2dir + "/" + best1dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name2 + "\t" + name1 else: names = junc['chr1'] + "\t" + str( junc['coo1'] ) + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + str( junc['coo2'] ) + "\t" + junc[ 'dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2 print d['Alphabetized_junction_name'] + "\t" + names + "\t" + str( d['LR_count']) + "\t" + str(d['nr_Type1_count']) + "\t" + str( d['Type1_count']) + "\t" + str(d['Type2_count']) + "\t" + str( d['Type3_count']) + "\t" + str( d['Type4_count']) + "\t" + str( d['Max_min_side_length']) + "\t" + str( d['Max_min_side_unique'] ) + "\t" + d['Max_min_side_flank']
def annotate_junctions(junctions,reference_genepred_filename, splicebases): annot_struct = genepred_basics.get_gene_annotation_data_structure(reference_genepred_filename) genepred = genepred_basics.get_per_chromosome_array(reference_genepred_filename) result = {} for junc_name in junctions: splice = splicebases[junc_name] junc = junctions[junc_name] genepred_entries = [] if junc['chr1'] in genepred: genepred_entries = genepred[junc['chr1']] leftend = 'N' # are we on the end of an exon on the left side if junc['dir1'] == '+' and genepred_basics.is_exon_end(junc['chr1'],int(junc['coo1']),genepred_entries): leftend = 'Y' elif junc['dir1'] == '-' and genepred_basics.is_exon_start(junc['chr1'],int(junc['coo1']),genepred_entries): leftend = 'Y' genepred_entries = [] if junc['chr2'] in genepred: genepred_entries = genepred[junc['chr2']] rightend = 'N' if junc['dir2'] == '+' and genepred_basics.is_exon_start(junc['chr2'],int(junc['coo2']),genepred_entries): rightend = 'Y' elif junc['dir2'] == '-' and genepred_basics.is_exon_end(junc['chr2'],int(junc['coo2']),genepred_entries): rightend = 'Y' annot1 = genepred_basics.gene_annotate_by_coordinates(junc['chr1'],int(junc['coo1'])-1,int(junc['coo1']),annot_struct) annot2 = genepred_basics.gene_annotate_by_coordinates(junc['chr2'],int(junc['coo2'])-1,int(junc['coo2']),annot_struct) best1 = junc['chr1']+":"+str(junc['coo1']) best1dir = '' best2 = junc['chr2']+":"+str(junc['coo2']) best2dir = '' name1 = '' name2 = '' if annot1: best1 = annot1[0][0] name1 = annot1[0][0] best1dir = annot1[0][1] if annot2: best2 = annot2[0][0] name2 = annot2[0][0] best2dir = annot2[0][1] evidence_transcript = 'N' evidence_splice = 'N' action = 'unknown' if best1dir == '+' and best2dir == '+': if junc['dir1'] == '-' and junc['dir2'] == '-': action= "reverse" elif junc['dir1'] == '+' and junc['dir2'] == '+': action = "no reverse" else: action = "unknown" elif best1dir == '-' and best2dir == '-': if junc['dir1'] == '+' and junc['dir2'] == '+': action = "reverse" elif junc['dir1'] == '-' and junc['dir2'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '+' and best2dir == '-': if junc['dir1'] == '-' and junc['dir2'] == '+': action = "reverse" elif junc['dir1'] == '+' and junc['dir2'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '-' and best2dir == '+': if junc['dir1'] == '+' and junc['dir2'] == '-': action = "reverse" elif junc['dir1'] == '-' and junc['dir2'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '-' and best2dir == '': if junc['dir1'] == '+': action = "reverse" elif junc['dir1'] == '-': action = "no reverse" else: action = "unknown" if best1dir == '+' and best2dir == '': if junc['dir1'] == '-': action = "reverse" elif junc['dir1'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '' and best2dir == '+': if junc['dir2'] == '-': action = "reverse" elif junc['dir2'] == '+': action = "no reverse" else: action = "unknown" if best1dir == '' and best2dir == '-': if junc['dir2'] == '+': action = "reverse" elif junc['dir2'] == '-': action = "no reverse" else: action = "unknown" #set if we have transcript evidence if action != "unknown": evidence_transcript = 'Y' # set if we have splice evidence if action == "no reverse" and splice == 'GT AG': evidence_splice = 'Y' if action == "reverse" and splice == 'CT AC': evidence_splice = 'Y' if action == "unknown" and (splice == 'GT AG' or splice == 'CT AC'): evidence_splice = 'Y' # if action is still unknown, use splice site to make choice to reverse if action == "unknown": if splice == 'GT AG': action = "no reverse" elif splice == 'CT AC': action = "reverse" # switch our left and right end assignments if reverse if action == "reverse": temp1 = leftend leftend = rightend rightend = temp1 names = '' if action == "no reverse": names = junc['chr1'] + "\t" + junc['coo1'] + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + junc['coo2'] + "\t" + junc['dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t"+name1 + "\t" + name2 elif action == "reverse": names = junc['chr2'] + "\t" + junc['coo2'] + "\t" + opposite(junc['dir2']) + "\t" + leftend + "\t" + junc['chr1'] + "\t" + junc['coo1'] + "\t" + opposite(junc['dir1']) + "\t" + rightend+ "\t" +best2 + "-" + best1 + "\t" + best2dir + "/" + best1dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t" + name2 + "\t" + name1 else: names = junc['chr1'] + "\t" + junc['coo1'] + "\t" + junc['dir1'] + "\t" + leftend + "\t" +junc['chr2'] + "\t" + junc['coo2'] + "\t" + junc['dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t" + name1 + "\t" + name2 result[junc_name] = junc_name + "\t" + names + "\t" + str(junc['lr_count']) + "\t" + junc['type1nr'] + "\t" + junc['type1'] + "\t" + junc['type2'] + "\t" + junc['type3'] + "\t" + junc['type4'] + "\t" + str(junc['max_min_side_lengths']) + "\t" + str(junc['max_min_unique_side_counts']) + "\t" + junc['max_min_flanking_distance'] return result