def annotate_junctions(junctions, reference_genepred_filename, splicebases):
    annot_struct = genepred_basics.get_gene_annotation_data_structure(reference_genepred_filename)
    genepred = genepred_basics.get_per_chromosome_array(reference_genepred_filename)
    result = {}
    for junc_name in junctions:
        splice = splicebases[junc_name]
        junc = junctions[junc_name]
        genepred_entries = []
        if junc["chr1"] in genepred:
            genepred_entries = genepred[junc["chr1"]]
        leftend = "N"  # are we on the end of an exon on the left side
        if junc["dir1"] == "+" and genepred_basics.is_exon_end(junc["chr1"], int(junc["coo1"]), genepred_entries):
            leftend = "Y"
        elif junc["dir1"] == "-" and genepred_basics.is_exon_start(junc["chr1"], int(junc["coo1"]), genepred_entries):
            leftend = "Y"

        genepred_entries = []
        if junc["chr2"] in genepred:
            genepred_entries = genepred[junc["chr2"]]
        rightend = "N"
        if junc["dir2"] == "+" and genepred_basics.is_exon_start(junc["chr2"], int(junc["coo2"]), genepred_entries):
            rightend = "Y"
        elif junc["dir2"] == "-" and genepred_basics.is_exon_end(junc["chr2"], int(junc["coo2"]), genepred_entries):
            rightend = "Y"
        annot1 = genepred_basics.gene_annotate_by_coordinates(
            junc["chr1"], int(junc["coo1"]) - 1, int(junc["coo1"]), annot_struct
        )
        annot2 = genepred_basics.gene_annotate_by_coordinates(
            junc["chr2"], int(junc["coo2"]) - 1, int(junc["coo2"]), annot_struct
        )
        best1 = junc["chr1"] + ":" + str(junc["coo1"])
        best1dir = ""
        best2 = junc["chr2"] + ":" + str(junc["coo2"])
        best2dir = ""
        name1 = ""
        name2 = ""
        if annot1:
            best1 = annot1[0][0]
            name1 = annot1[0][0]
            best1dir = annot1[0][1]
        if annot2:
            best2 = annot2[0][0]
            name2 = annot2[0][0]
            best2dir = annot2[0][1]
        evidence_transcript = "N"
        evidence_splice = "N"
        action = "unknown"
        if best1dir == "+" and best2dir == "+":
            if junc["dir1"] == "-" and junc["dir2"] == "-":
                action = "reverse"
            elif junc["dir1"] == "+" and junc["dir2"] == "+":
                action = "no reverse"
            else:
                action = "unknown"
        elif best1dir == "-" and best2dir == "-":
            if junc["dir1"] == "+" and junc["dir2"] == "+":
                action = "reverse"
            elif junc["dir1"] == "-" and junc["dir2"] == "-":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "+" and best2dir == "-":
            if junc["dir1"] == "-" and junc["dir2"] == "+":
                action = "reverse"
            elif junc["dir1"] == "+" and junc["dir2"] == "-":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "-" and best2dir == "+":
            if junc["dir1"] == "+" and junc["dir2"] == "-":
                action = "reverse"
            elif junc["dir1"] == "-" and junc["dir2"] == "+":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "-" and best2dir == "":
            if junc["dir1"] == "+":
                action = "reverse"
            elif junc["dir1"] == "-":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "+" and best2dir == "":
            if junc["dir1"] == "-":
                action = "reverse"
            elif junc["dir1"] == "+":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "" and best2dir == "+":
            if junc["dir2"] == "-":
                action = "reverse"
            elif junc["dir2"] == "+":
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == "" and best2dir == "-":
            if junc["dir2"] == "+":
                action = "reverse"
            elif junc["dir2"] == "-":
                action = "no reverse"
            else:
                action = "unknown"

        # set if we have transcript evidence
        if action != "unknown":
            evidence_transcript = "Y"

        # set if we have splice evidence
        if action == "no reverse" and splice == "GT AG":
            evidence_splice = "Y"
        if action == "reverse" and splice == "CT AC":
            evidence_splice = "Y"
        if action == "unknown" and (splice == "GT AG" or splice == "CT AC"):
            evidence_splice = "Y"

        # if action is still unknown, use splice site to make choice to reverse
        if action == "unknown":
            if splice == "GT AG":
                action = "no reverse"
            elif splice == "CT AC":
                action = "reverse"

        # switch our left and right end assignments if reverse
        if action == "reverse":
            temp1 = leftend
            leftend = rightend
            rightend = temp1

        names = ""
        if action == "no reverse":
            names = (
                junc["chr1"]
                + "\t"
                + junc["coo1"]
                + "\t"
                + junc["dir1"]
                + "\t"
                + leftend
                + "\t"
                + junc["chr2"]
                + "\t"
                + junc["coo2"]
                + "\t"
                + junc["dir2"]
                + "\t"
                + rightend
                + "\t"
                + best1
                + "-"
                + best2
                + "\t"
                + best1dir
                + "/"
                + best2dir
                + "\t"
                + evidence_transcript
                + "\t"
                + evidence_splice
                + "\t"
                + name1
                + "\t"
                + name2
            )
        elif action == "reverse":
            names = (
                junc["chr2"]
                + "\t"
                + junc["coo2"]
                + "\t"
                + opposite(junc["dir2"])
                + "\t"
                + leftend
                + "\t"
                + junc["chr1"]
                + "\t"
                + junc["coo1"]
                + "\t"
                + opposite(junc["dir1"])
                + "\t"
                + rightend
                + "\t"
                + best2
                + "-"
                + best1
                + "\t"
                + best2dir
                + "/"
                + best1dir
                + "\t"
                + evidence_transcript
                + "\t"
                + evidence_splice
                + "\t"
                + name2
                + "\t"
                + name1
            )
        else:
            names = (
                junc["chr1"]
                + "\t"
                + junc["coo1"]
                + "\t"
                + junc["dir1"]
                + "\t"
                + leftend
                + "\t"
                + junc["chr2"]
                + "\t"
                + junc["coo2"]
                + "\t"
                + junc["dir2"]
                + "\t"
                + rightend
                + "\t"
                + best1
                + "-"
                + best2
                + "\t"
                + best1dir
                + "/"
                + best2dir
                + "\t"
                + evidence_transcript
                + "\t"
                + evidence_splice
                + "\t"
                + name1
                + "\t"
                + name2
            )
        result[junc_name] = (
            junc_name
            + "\t"
            + names
            + "\t"
            + str(junc["lr_count"])
            + "\t"
            + junc["type1nr"]
            + "\t"
            + junc["type1"]
            + "\t"
            + junc["type2"]
            + "\t"
            + junc["type3"]
            + "\t"
            + junc["type4"]
            + "\t"
            + str(junc["max_min_side_lengths"])
            + "\t"
            + str(junc["max_min_unique_side_counts"])
            + "\t"
            + junc["max_min_flanking_distance"]
        )
    return result
예제 #2
0
def annotate_junctions(junctions, reference_genepred_filename):
    annot_struct = genepred_basics.get_gene_annotation_data_structure(
        reference_genepred_filename)
    genepred = genepred_basics.get_per_chromosome_array(
        reference_genepred_filename)
    for entry_set in junctions:
        f = entry_set['entries']
        d = entry_set['dictionary']
        if d['Gene_1'] != '' and d['Gene_2'] != '':
            print "\t".join(f)  #already annotated.  move on
            continue

        [best1dir, best2dir] = d['Transcript_strands'].split("/")

        leftend = d[
            'End_of_reference_exon_1']  # are we on the end of an exon on the left side
        name1 = d['Gene_1']
        best1 = name1
        name1change = 0
        if d['Gene_1'] == '':
            best1 = d['Chromosome_1'] + ":" + str(d['Coordinate_1'])
            genepred_entries = []
            if d['Chromosome_1'] in genepred:
                genepred_entries = genepred[d['Chromosome_1']]
                if d['Direction_1'] == '+' and genepred_basics.is_exon_end(
                        d['Chromosome_1'], int(d['Coordinate_1']),
                        genepred_entries):
                    leftend = 'Y'
                elif d['Direction_1'] == '-' and genepred_basics.is_exon_start(
                        d['Chromosome_1'], int(d['Coordinate_1']),
                        genepred_entries):
                    leftend = 'Y'
                annot1 = genepred_basics.gene_annotate_by_coordinates(
                    d['Chromosome_1'],
                    int(d['Coordinate_1']) - 1, int(d['Coordinate_1']),
                    annot_struct)
                if annot1:
                    name1change = 1
                    best1 = annot1[0][0]
                    name1 = annot1[0][0]
                    best1dir = annot1[0][1]

        rightend = d['Start_of_reference_exon_2']
        name2 = d['Gene_2']
        best2 = name2
        name2change = 0
        if d['Gene_2'] == '':
            best2 = d['Chromosome_2'] + ":" + str(d['Coordinate_2'])
            genepred_entries = []
            if d['Chromosome_2'] in genepred:
                genepred_entries = genepred[d['Chromosome_2']]
                if d['Direction_2'] == '+' and genepred_basics.is_exon_start(
                        d['Chromosome_2'], int(d['Coordinate_2']),
                        genepred_entries):
                    rightend = 'Y'
                elif d['Direction_2'] == '-' and genepred_basics.is_exon_end(
                        d['Chromosome_2'], int(d['Coordinate_2']),
                        genepred_entries):
                    rightend = 'Y'
                annot2 = genepred_basics.gene_annotate_by_coordinates(
                    d['Chromosome_2'],
                    int(d['Coordinate_2']) - 1, int(d['Coordinate_2']),
                    annot_struct)
                if annot2:
                    name2change = 1
                    best2 = annot2[0][0]
                    name2 = annot2[0][0]
                    best2dir = annot2[0][1]

        if name1change == 0 and name2change == 0:
            print "\t".join(f)
            continue  # didn't find any additional name information

        evidence_transcript = 'N'
        evidence_splice = 'N'
        action = 'unknown'
        junc = {}
        junc['dir1'] = d['Direction_1']
        junc['dir2'] = d['Direction_2']
        junc['coo1'] = d['Coordinate_1']
        junc['coo2'] = d['Coordinate_2']
        junc['chr1'] = d['Chromosome_1']
        junc['chr2'] = d['Chromosome_2']

        if best1dir == '+' and best2dir == '+':
            if junc['dir1'] == '-' and junc['dir2'] == '-':
                action = "reverse"
            elif junc['dir1'] == '+' and junc['dir2'] == '+':
                action = "no reverse"
            else:
                action = "unknown"
        elif best1dir == '-' and best2dir == '-':
            if junc['dir1'] == '+' and junc['dir2'] == '+':
                action = "reverse"
            elif junc['dir1'] == '-' and junc['dir2'] == '-':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '+' and best2dir == '-':
            if junc['dir1'] == '-' and junc['dir2'] == '+':
                action = "reverse"
            elif junc['dir1'] == '+' and junc['dir2'] == '-':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '-' and best2dir == '+':
            if junc['dir1'] == '+' and junc['dir2'] == '-':
                action = "reverse"
            elif junc['dir1'] == '-' and junc['dir2'] == '+':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '-' and best2dir == '':
            if junc['dir1'] == '+':
                action = "reverse"
            elif junc['dir1'] == '-':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '+' and best2dir == '':
            if junc['dir1'] == '-':
                action = "reverse"
            elif junc['dir1'] == '+':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '' and best2dir == '+':
            if junc['dir2'] == '-':
                action = "reverse"
            elif junc['dir2'] == '+':
                action = "no reverse"
            else:
                action = "unknown"
        if best1dir == '' and best2dir == '-':
            if junc['dir2'] == '+':
                action = "reverse"
            elif junc['dir2'] == '-':
                action = "no reverse"
            else:
                action = "unknown"

        #set if we have transcript evidence
        if action != "unknown":
            evidence_transcript = 'Y'

        # switch our left and right end assignments if reverse
        if action == "reverse":
            temp1 = leftend
            leftend = rightend
            rightend = temp1

        names = ''
        if action == "no reverse":
            names = junc['chr1'] + "\t" + str(
                junc['coo1']
            ) + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + str(
                junc['coo2']
            ) + "\t" + junc[
                'dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2
        elif action == "reverse":
            names = junc['chr2'] + "\t" + str(junc['coo2']) + "\t" + opposite(
                junc['dir2']
            ) + "\t" + leftend + "\t" + junc['chr1'] + "\t" + str(
                junc['coo1']
            ) + "\t" + opposite(
                junc['dir1']
            ) + "\t" + rightend + "\t" + best2 + "-" + best1 + "\t" + best2dir + "/" + best1dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name2 + "\t" + name1
        else:
            names = junc['chr1'] + "\t" + str(
                junc['coo1']
            ) + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + str(
                junc['coo2']
            ) + "\t" + junc[
                'dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t" + evidence_transcript + "\t" + evidence_splice + "\t" + name1 + "\t" + name2
        print d['Alphabetized_junction_name'] + "\t" + names + "\t" + str(
            d['LR_count']) + "\t" + str(d['nr_Type1_count']) + "\t" + str(
                d['Type1_count']) + "\t" + str(d['Type2_count']) + "\t" + str(
                    d['Type3_count']) + "\t" + str(
                        d['Type4_count']) + "\t" + str(
                            d['Max_min_side_length']) + "\t" + str(
                                d['Max_min_side_unique']
                            ) + "\t" + d['Max_min_side_flank']
예제 #3
0
def annotate_junctions(junctions,reference_genepred_filename, splicebases):
  annot_struct = genepred_basics.get_gene_annotation_data_structure(reference_genepred_filename)
  genepred = genepred_basics.get_per_chromosome_array(reference_genepred_filename)
  result = {}
  for junc_name in junctions:
    splice = splicebases[junc_name]
    junc = junctions[junc_name]
    genepred_entries = []
    if junc['chr1'] in genepred:  genepred_entries = genepred[junc['chr1']]
    leftend = 'N'  # are we on the end of an exon on the left side
    if junc['dir1'] == '+' and genepred_basics.is_exon_end(junc['chr1'],int(junc['coo1']),genepred_entries):
       leftend = 'Y'
    elif junc['dir1'] == '-' and genepred_basics.is_exon_start(junc['chr1'],int(junc['coo1']),genepred_entries):
       leftend = 'Y'

    genepred_entries = []
    if junc['chr2'] in genepred:  genepred_entries = genepred[junc['chr2']]
    rightend = 'N'
    if junc['dir2'] == '+' and genepred_basics.is_exon_start(junc['chr2'],int(junc['coo2']),genepred_entries):
      rightend = 'Y'
    elif junc['dir2'] == '-' and genepred_basics.is_exon_end(junc['chr2'],int(junc['coo2']),genepred_entries):
      rightend = 'Y'
    annot1 = genepred_basics.gene_annotate_by_coordinates(junc['chr1'],int(junc['coo1'])-1,int(junc['coo1']),annot_struct)
    annot2 = genepred_basics.gene_annotate_by_coordinates(junc['chr2'],int(junc['coo2'])-1,int(junc['coo2']),annot_struct)
    best1 = junc['chr1']+":"+str(junc['coo1'])
    best1dir = ''
    best2 = junc['chr2']+":"+str(junc['coo2'])
    best2dir = ''
    name1 = ''
    name2 = ''
    if annot1:
      best1 = annot1[0][0]
      name1 = annot1[0][0]
      best1dir = annot1[0][1]
    if annot2:
      best2 = annot2[0][0]
      name2 = annot2[0][0]
      best2dir = annot2[0][1]
    evidence_transcript = 'N'
    evidence_splice = 'N'
    action = 'unknown'
    if best1dir == '+' and best2dir == '+':
      if junc['dir1'] == '-' and junc['dir2'] == '-':
        action= "reverse"
      elif junc['dir1'] == '+' and junc['dir2'] == '+':
        action = "no reverse"
      else:
        action = "unknown"
    elif best1dir == '-' and best2dir == '-':
      if junc['dir1'] == '+' and junc['dir2'] == '+':
        action = "reverse"
      elif junc['dir1'] == '-' and junc['dir2'] == '-':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '+' and best2dir == '-':
      if junc['dir1'] == '-' and junc['dir2'] == '+':
        action = "reverse"
      elif junc['dir1'] == '+' and junc['dir2'] == '-':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '-' and best2dir == '+':
      if junc['dir1'] == '+' and junc['dir2'] == '-':
        action = "reverse"
      elif junc['dir1'] == '-' and junc['dir2'] == '+':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '-' and best2dir == '':
      if junc['dir1'] == '+':
        action = "reverse"
      elif junc['dir1'] == '-':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '+' and best2dir == '':
      if junc['dir1'] == '-':
        action = "reverse"
      elif junc['dir1'] == '+':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '' and best2dir == '+':
      if junc['dir2'] == '-':
        action = "reverse"
      elif junc['dir2'] == '+':
        action = "no reverse"
      else:
        action = "unknown"
    if best1dir == '' and best2dir == '-':
      if junc['dir2'] == '+':
        action = "reverse"
      elif junc['dir2'] == '-':
        action = "no reverse"
      else:
        action = "unknown"

    #set if we have transcript evidence
    if action != "unknown":
      evidence_transcript = 'Y'

    # set if we have splice evidence
    if action == "no reverse" and splice == 'GT AG':
      evidence_splice = 'Y'
    if action == "reverse" and splice == 'CT AC':
      evidence_splice = 'Y'
    if action == "unknown" and (splice == 'GT AG' or splice == 'CT AC'):
      evidence_splice = 'Y'

    # if action is still unknown, use splice site to make choice to reverse
    if action == "unknown":
      if splice == 'GT AG':
        action = "no reverse"
      elif splice == 'CT AC':
        action = "reverse"

    # switch our left and right end assignments if reverse
    if action == "reverse":
      temp1 = leftend
      leftend = rightend
      rightend = temp1

    names = ''
    if action == "no reverse":
      names = junc['chr1'] + "\t" + junc['coo1'] + "\t" + junc['dir1'] + "\t" + leftend + "\t" + junc['chr2'] + "\t" + junc['coo2'] + "\t" + junc['dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t"+name1 + "\t" + name2
    elif action == "reverse":
      names = junc['chr2'] + "\t" + junc['coo2'] + "\t" + opposite(junc['dir2']) + "\t" + leftend + "\t" + junc['chr1'] + "\t" + junc['coo1'] + "\t" + opposite(junc['dir1']) + "\t" + rightend+ "\t" +best2 + "-" + best1 + "\t" + best2dir + "/" + best1dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t" + name2 + "\t" + name1
    else:
      names = junc['chr1'] + "\t" + junc['coo1'] + "\t" + junc['dir1'] + "\t" + leftend + "\t" +junc['chr2'] + "\t" + junc['coo2'] + "\t" + junc['dir2'] + "\t" + rightend + "\t" + best1 + "-" + best2 + "\t" + best1dir + "/" + best2dir + "\t"+evidence_transcript+"\t"+evidence_splice+"\t" + name1 + "\t" + name2
    result[junc_name] = junc_name + "\t" + names + "\t" + str(junc['lr_count']) + "\t" + junc['type1nr'] + "\t" + junc['type1'] + "\t" + junc['type2'] + "\t" + junc['type3'] + "\t" + junc['type4'] + "\t" + str(junc['max_min_side_lengths']) + "\t" + str(junc['max_min_unique_side_counts']) + "\t" + junc['max_min_flanking_distance']
  return result