Ejemplo n.º 1
0
def add_known(x_L, x_R, y_L, y_R, gap, genbank, ref, seq, temp, cds, trna, rrna, region, feature_count, results, features, feature_list, removed_results, line, file_loc):
    '''
    Adds a value to the table that is a known hit
    '''
    # Get orientation
    if y_L < x_R:
        start = y_L
        end = x_R
        orient = 'F'
    else:
        start = y_R
        end = x_L
        orient = 'R'
    # Get features and append to genbank
    note = 'Known hit'
    left_feature, right_feature = createFeature([x_L, y_L, x_R, y_R], orient, note)
    genbank.features.append(left_feature)
    genbank.features.append(right_feature)
    # Check to see if the sequence between actually belongs to the IS query
    seq_results = check_seq_between(ref, seq, start, end, 'region_' + str(region), temp)
    # This is a known site of coverage and %ID above 80
    if len(seq_results) != 0 and seq_results[0] >= 80 and seq_results[1] >= 80:
        # Taking all four coordinates and finding min and max to avoid coordinates 
        # that overlap the actual IS (don't want to return those in gene calls)
        # Mark as a known call to improve accuracy of gene calling
        gene_left, gene_right = get_flanking_genes(features, feature_list, start, end, cds, trna, rrna)
        #gene_left = get_other_gene(ref, min(y_L, y_R, x_R, x_L), "left", cds, trna, rrna, known=True)
        #gene_right = get_other_gene(ref, max(y_L, y_R, x_R, x_L), "right", cds, trna, rrna, known=True)

        # If the genes are the same, then this gene must be interrupted by the known site
        if gene_left[0] == gene_right[0]:
            func_pred = 'Gene interrupted'
            # Remove + and - from distance as the gene is interrupted
            gene_right[1] = gene_right[1][:-1]
            gene_left[1] = gene_left[1][:-1]
        # Otherwise we need to determine who is upstream/downstream of what
        else:
            func_pred = ''
        func_pred = ''
        # Add to the final results
        if 'unpaired' in file_loc:
            call = 'Known?'
        else:
            call = 'Known'
        results['region_' + str(region)] = [orient, str(start), str(end), gap, call, str(seq_results[0]), str('%.2f' % seq_results[1]), gene_left[-1][:-1], gene_left[-1][-1], gene_left[1], gene_right[-1][:-1], gene_right[-1][-1], gene_right[1], func_pred]
    else:   
        # Then I'm not sure what this is
        # Get flanking genes anyway
        gene_left, gene_right = get_flanking_genes(features, feature_list, start, end, cds, trna, rrna)
        if 'unpaired' in file_loc:
            call = 'Possible related IS?'
        else:
            call = 'Possible releated IS'
        func_pred = ''
        if len(seq_results) !=0:
            results['region_' + str(region)] = [orient, str(start), str(end), gap, call, str(seq_results[0]), str('%.2f' % seq_results[1]), gene_left[-1][:-1], gene_left[-1][-1], gene_left[1], gene_right[-1][:-1], gene_right[-1][-1], gene_right[1], func_pred]
        else:
            removed_results['region_' + str(region)] = line.strip() + '\t' + file_loc +'\n'                
Ejemplo n.º 2
0
def novel_hit(x_L, y_L, x_R, y_R, x, y, genbank, ref, cds, trna, rrna, gap, orient, feature_count, region, results, features, feature_list, unpaired=False, star=False):
    '''
    Get flanking gene information for novel hits.
    '''
    
    # Create features for genbank
    note = 'Novel hit'
    if unpaired == True:
        note += ' , unpaired hit'
    if star == True:
        note += ' , imprecise hit'
    left_feature, right_feature = createFeature([x_L, y_L, x_R, y_R], orient, note)
    # Add features to genbank
    genbank.features.append(left_feature)
    genbank.features.append(right_feature)
    
    # Get the genes flanking the left and right ends
    gene_left, gene_right = get_flanking_genes(features, feature_list, x, y, cds, trna, rrna)
    #print gene_left
    #print gene_right
    # If the genes are the same, then hit is inside the gene
    if gene_left[-1] == gene_right[-1]:
        func_pred = 'Gene interrupted'
    else:
        func_pred = ''
    func_pred = ''
    
    # This is a confident hit
    if unpaired == False:
        call = 'Novel'
    # Hit is paired with a low coverage end, so an unconfident hit
    elif unpaired == True:
        call = 'Novel?'
    # This hit is imprecise, as gap size is larger than expected
    if star == True:
        call = 'Novel*'
    
    # Store all information for final table output
    results['region_' + str(region)] = [orient, str(x), str(y), gap, call, '', '', gene_left[-1][:-1], gene_left[-1][-1], gene_left[1], gene_right[-1][:-1], gene_right[-1][-1], gene_right[1], func_pred]
Ejemplo n.º 3
0
def add_known(x_L, x_R, y_L, y_R, gap, genbank, ref, seq, temp, cds, trna,
              rrna, region, feature_count, results, features, feature_list,
              removed_results, line, file_loc):
    '''
    Adds a value to the table that is a known hit
    '''
    # Get orientation
    if y_L < x_R:
        start = y_L
        end = x_R
        orient = 'F'
    else:
        start = y_R
        end = x_L
        orient = 'R'
    # Get features and append to genbank
    note = 'Known hit'
    left_feature, right_feature = createFeature([x_L, y_L, x_R, y_R], orient,
                                                note)
    genbank.features.append(left_feature)
    genbank.features.append(right_feature)
    # Check to see if the sequence between actually belongs to the IS query
    seq_results = check_seq_between(ref, seq, start, end,
                                    'region_' + str(region), temp)
    # This is a known site of coverage and %ID above 80
    if len(seq_results) != 0 and seq_results[0] >= 80 and seq_results[1] >= 80:
        # Taking all four coordinates and finding min and max to avoid coordinates
        # that overlap the actual IS (don't want to return those in gene calls)
        # Mark as a known call to improve accuracy of gene calling
        gene_left, gene_right = get_flanking_genes(features, feature_list,
                                                   start, end, cds, trna, rrna)
        #gene_left = get_other_gene(ref, min(y_L, y_R, x_R, x_L), "left", cds, trna, rrna, known=True)
        #gene_right = get_other_gene(ref, max(y_L, y_R, x_R, x_L), "right", cds, trna, rrna, known=True)

        # If the genes are the same, then this gene must be interrupted by the known site
        if gene_left[0] == gene_right[0]:
            func_pred = 'Gene interrupted'
            # Remove + and - from distance as the gene is interrupted
            gene_right[1] = gene_right[1][:-1]
            gene_left[1] = gene_left[1][:-1]
        # Otherwise we need to determine who is upstream/downstream of what
        else:
            func_pred = ''
        func_pred = ''
        # Add to the final results
        if 'unpaired' in file_loc:
            call = 'Known?'
        else:
            call = 'Known'
        results['region_' + str(region)] = [
            orient,
            str(start),
            str(end), gap, call,
            str(seq_results[0]),
            str('%.2f' % seq_results[1]), gene_left[-1][:-1],
            gene_left[-1][-1], gene_left[1], gene_right[-1][:-1],
            gene_right[-1][-1], gene_right[1], func_pred
        ]
    else:
        # Then I'm not sure what this is
        # Get flanking genes anyway
        gene_left, gene_right = get_flanking_genes(features, feature_list,
                                                   start, end, cds, trna, rrna)
        if 'unpaired' in file_loc:
            call = 'Possible related IS?'
        else:
            call = 'Possible releated IS'
        func_pred = ''
        if len(seq_results) != 0:
            results['region_' + str(region)] = [
                orient,
                str(start),
                str(end), gap, call,
                str(seq_results[0]),
                str('%.2f' % seq_results[1]), gene_left[-1][:-1],
                gene_left[-1][-1], gene_left[1], gene_right[-1][:-1],
                gene_right[-1][-1], gene_right[1], func_pred
            ]
        else:
            removed_results[
                'region_' +
                str(region)] = line.strip() + '\t' + file_loc + '\n'
Ejemplo n.º 4
0
def novel_hit(x_L,
              y_L,
              x_R,
              y_R,
              x,
              y,
              genbank,
              ref,
              cds,
              trna,
              rrna,
              gap,
              orient,
              feature_count,
              region,
              results,
              features,
              feature_list,
              unpaired=False,
              star=False):
    '''
    Get flanking gene information for novel hits.
    '''

    # Create features for genbank
    note = 'Novel hit'
    if unpaired == True:
        note += ' , unpaired hit'
    if star == True:
        note += ' , imprecise hit'
    left_feature, right_feature = createFeature([x_L, y_L, x_R, y_R], orient,
                                                note)
    # Add features to genbank
    genbank.features.append(left_feature)
    genbank.features.append(right_feature)

    # Get the genes flanking the left and right ends
    gene_left, gene_right = get_flanking_genes(features, feature_list, x, y,
                                               cds, trna, rrna)
    #print gene_left
    #print gene_right
    # If the genes are the same, then hit is inside the gene
    if gene_left[-1] == gene_right[-1]:
        func_pred = 'Gene interrupted'
    else:
        func_pred = ''
    func_pred = ''

    # This is a confident hit
    if unpaired == False:
        call = 'Novel'
    # Hit is paired with a low coverage end, so an unconfident hit
    elif unpaired == True:
        call = 'Novel?'
    # This hit is imprecise, as gap size is larger than expected
    if star == True:
        call = 'Novel*'

    # Store all information for final table output
    results['region_' + str(region)] = [
        orient,
        str(x),
        str(y), gap, call, '', '', gene_left[-1][:-1], gene_left[-1][-1],
        gene_left[1], gene_right[-1][:-1], gene_right[-1][-1], gene_right[1],
        func_pred
    ]