Python get_fasta_from_file Beispiele, BioinformaticsToolkit.utils.get_fasta_from_file Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: views.py Projekt: guikarist/bioinformatics-toolkit

def algorithm(request):
    # Get necessary data
    method = int(request.POST['method'])
    threshold_pos = int(request.POST['threshold-pos'])
    threshold_neg = int(request.POST['threshold-neg'])
    train_set_pos = my_utils.get_fasta_from_file(
        request.FILES['train-set-pos'])
    train_set_neg = my_utils.get_fasta_from_file(
        request.FILES['train-set-neg'])
    test_set = my_utils.get_fasta_from_file(request.FILES['test-set'])

    # Process sequences.
    data = {}
    if method == 0:
        matrix, length, test_results = __normal_sensing_matrix(
            test_set, train_set_pos, train_set_neg, threshold_pos,
            threshold_neg)
        data['matrix'] = matrix
    else:
        matrices, length, test_results = __probability_sensing_matrix(
            test_set, train_set_pos, train_set_neg, threshold_pos,
            threshold_neg)
        data['matrices'] = matrices
    data['length'] = range(1, length + 1)
    data['test_results'] = test_results
    data['method'] = method

    return render(request, 'sensingmatrix/result.html', data)

Beispiel #2

0

Datei anzeigen

Datei: views.py Projekt: guikarist/bioinformatics-toolkit

def algorithm(request):
    # Read in fasta files
    records_1 = utils.get_fasta_from_file(request.FILES['file_1'])
    records_2 = utils.get_fasta_from_file(request.FILES['file_2'])
    string_1 = None
    string_2 = None
    for record in records_1:
        string_1 = record[1]
    for record in records_2:
        string_2 = record[1]

    # Read in K parameter
    k_parameter = request.POST['k_tuple']
    if k_parameter is not None and k_parameter:
        k_parameter = int(k_parameter)

    # Build position table.
    positions_table = {}
    for index, base in enumerate(string_1):
        if index + k_parameter > len(string_1):
            break
        temp_str = string_1[index:index + k_parameter]
        if temp_str in positions_table:
            positions_table[temp_str].append(index)
        else:
            positions_table[temp_str] = [index]

    offsets_frequency = {}
    offsets_table = []
    # Calculate the offsets.
    for index, base in enumerate(string_2):
        if index + k_parameter > len(string_2):
            break
        offsets_single_tuple = []
        temp_str = string_2[index:index + k_parameter]
        if temp_str in positions_table:
            for pos in positions_table[temp_str]:
                offset = index - pos
                offsets_single_tuple.append(offset)
                if offset not in offsets_frequency:
                    offsets_frequency[offset] = 1
                else:
                    offsets_frequency[offset] += 1
        offsets_table.append([temp_str, offsets_single_tuple])

    # Get the most occurring offset.
    result_offsets = [
        k for k, v in offsets_frequency.items()
        if v == max(offsets_frequency.values())
    ]

    result = {
        'positions_table': positions_table,
        'offsets_table': offsets_table,
        'result_offsets': result_offsets,
        'matched_base_number': k_parameter,
    }

    return render(request, 'fasta/result.html', result)

Beispiel #3

0

Datei anzeigen

def algorithm(request):
    # Read in fasta files
    records_1 = utils.get_fasta_from_file(request.FILES['file_1'])
    records_2 = utils.get_fasta_from_file(request.FILES['file_2'])
    string_1 = None
    string_2 = None
    for record in records_1:
        string_1 = record[1]
    for record in records_2:
        string_2 = record[1]

    # Select which mode and sequence type to be process with.
    mode = request.POST['mode']
    mode = int(mode)
    seq_type = request.POST['seq_type']
    seq_type = int(seq_type)
    if mode == 0 and seq_type == 0:
        alignments, colored_matrix = __global_mode(string_1, string_2, __BLAST,
                                                   __NC_ALIGN_GAP)
    elif mode == 0 and seq_type == 1:
        alignments, colored_matrix = __global_mode(string_1, string_2,
                                                   __BLOSUM_62,
                                                   __PRO_ALIGN_GAP)
    elif mode == 1 and seq_type == 0:
        alignments, colored_matrix = __local_mode(string_1, string_2, __BLAST,
                                                  __NC_ALIGN_GAP)
    elif mode == 1 and seq_type == 1:
        alignments, colored_matrix = __local_mode(string_1, string_2,
                                                  __BLOSUM_62, __PRO_ALIGN_GAP)
    else:
        raise RuntimeError("Unexpected mode code in SW Algorithm!")

    # Color alignments.
    colored_alignments = []
    for alignment in alignments:
        colored_alignment = []
        for ch1, ch2 in zip(alignment[0], alignment[1]):
            if ch1 == ch2:
                colored_alignment.append((ch1, ch2, __AlignType.MATCH))
            elif ch1 == '-' or ch2 == '-':
                colored_alignment.append((ch1, ch2, __AlignType.GAP))
            else:
                colored_alignment.append((ch1, ch2, __AlignType.MISMATCH))
        colored_alignments.append(colored_alignment)

    # Procession for django template.
    string_1 = '*' + string_1
    string_2 = '*' + string_2
    processed_matrix = []
    for row, ch in zip(colored_matrix, string_2):
        temp = row[:]
        temp.insert(0, ch)
        processed_matrix.append(tuple(temp))
    result = {
        'string_1': string_1,
        'string_2': string_2,
        'processed_matrix': processed_matrix,
        'colored_alignments': colored_alignments
    }

    return render(request, 'smithwaterman/result.html', result)

Beispiel #4

0

Datei anzeigen

Datei: views.py Projekt: guikarist/bioinformatics-toolkit

def algorithm(request):
    # Read in fasta files
    records_1 = utils.get_fasta_from_file(request.FILES['file_1'])
    records_2 = utils.get_fasta_from_file(request.FILES['file_2'])
    string_1 = None
    string_2 = None
    for record in records_1:
        string_1 = record[1]
    for record in records_2:
        string_2 = record[1]

    window_size = request.POST['window_size']
    if window_size is not None and window_size != '':
        window_size = int(window_size)
    move_step = request.POST['move_step']
    if move_step is not None and move_step != '':
        move_step = int(move_step)
    stringency = request.POST['stringency']
    if stringency is not None and stringency != '':
        stringency = int(stringency)

    # Use session to store data
    request.session['string_1'] = string_1
    request.session['string_2'] = string_2
    request.session['window_size'] = window_size
    request.session['move_step'] = move_step
    request.session['stringency'] = stringency

    box_plot_graph = [[0 for x in range(len(string_2))]
                      for y in range(len(string_1))]
    align_offset = int((window_size - 1) / 2)
    for index_1 in range(0, len(string_1) - (window_size - 1), move_step):
        for index_2 in range(0, len(string_2) - (window_size - 1), move_step):
            temp_1 = string_1[index_1:index_1 + window_size]
            temp_2 = string_2[index_2:index_2 + window_size]
            if __compare_string(temp_1, temp_2, stringency):
                box_plot_graph[index_1 + align_offset][index_2 +
                                                       align_offset] = 1
            else:
                box_plot_graph[index_1 + align_offset][index_2 +
                                                       align_offset] = 0
    request.session['box_plot_graph'] = box_plot_graph

    # Get a list of motifs
    motifs = []
    for index_1 in range(len(string_1)):
        is_motif_found = False

        # Get the max times of iteration.
        motif_length = 0
        if len(string_1) - index_1 <= len(string_2):
            loop_length = len(string_1) - index_1
        else:
            loop_length = len(string_2)

        for offset in range(loop_length):
            if box_plot_graph[index_1 + offset][offset]:
                if not is_motif_found:
                    is_motif_found = True
                motif_length += 1
            else:
                if is_motif_found:
                    temp_str_1 = string_1[index_1 + offset - motif_length -
                                          align_offset:index_1 + offset +
                                          align_offset]
                    temp_str_2 = string_2[offset - motif_length -
                                          align_offset:offset + align_offset]
                    motifs.append([temp_str_1, temp_str_2])
                    is_motif_found = False
                    motif_length = 0

    for index_2 in range(1, len(string_2)):
        is_motif_found = False

        # Get the max times of iteration.
        motif_length = 0
        if len(string_2) - index_2 <= len(string_1):
            loop_length = len(string_2) - index_2
        else:
            loop_length = len(string_1)

        for offset in range(loop_length):
            if box_plot_graph[offset][index_2 + offset]:
                if not is_motif_found:
                    is_motif_found = True
                motif_length += 1
            else:
                if is_motif_found:
                    temp_str_1 = string_1[offset - motif_length -
                                          align_offset:offset + align_offset]
                    temp_str_2 = string_2[index_2 + offset - motif_length -
                                          align_offset:index_2 + offset +
                                          align_offset]
                    motifs.append([temp_str_1, temp_str_2])
                    is_motif_found = False
                    motif_length = 0

    # Sort motifs by length.
    motifs.sort(key=lambda item: len(item[0]), reverse=True)
    result = {
        'motifs': motifs,
    }
    return render(request, 'boxplot/result.html', result)

Beispiel #5

0

Datei anzeigen

def algorithm(request):
    # Read in fasta files
    records_1 = utils.get_fasta_from_file(request.FILES['file_1'])
    records_2 = utils.get_fasta_from_file(request.FILES['file_2'])
    string_1 = None
    string_2 = None
    for record in records_1:
        string_1 = record[1]
    for record in records_2:
        string_2 = record[1]

    # Select which sequence type to be process with.
    seq_type = request.POST['seq_type']
    seq_type = int(seq_type)
    if seq_type == 0:
        gap_penalty = __NC_ALIGN_GAP
        sub_matrix = __BLAST
    else:
        gap_penalty = __PRO_ALIGN_GAP
        sub_matrix = __BLOSUM_62

    # Initialize score matrix.
    score_matrix = [[None] * (len(string_1) + 1) for i in range(len(string_2) + 1)]

    # Initialize the first element with 0.
    score_matrix[0][0] = 0

    # Initialize the first row.
    for i in range(1, len(score_matrix[0])):
        score_matrix[0][i] = score_matrix[0][i - 1] + gap_penalty

    # Initialize the first column.
    for i in range(1, len(score_matrix)):
        score_matrix[i][0] = score_matrix[i - 1][0] + gap_penalty

    # Calculate the score matrix.
    for row in range(1, len(score_matrix)):
        for col in range(1, len(score_matrix[row])):
            match_score = sub_matrix[string_2[row - 1]][string_1[col - 1]] + score_matrix[row - 1][col - 1]
            gap_from_above = score_matrix[row - 1][col] + gap_penalty
            gap_from_left = score_matrix[row][col - 1] + gap_penalty
            score_matrix[row][col] = max(match_score, gap_from_above, gap_from_left)

    # Traceback to find the best alignment (using BFS).
    colored_matrix = [[(col, False) for col in row] for row in score_matrix]
    paths_queue = [((len(string_2), len(string_1)), '', '')]
    alignments = []
    colored_matrix[len(string_2)][len(string_1)] = (colored_matrix[len(string_2)][len(string_1)][0], True)
    while True:
        if len(paths_queue) == 0:
            break
        temp_align_point = paths_queue.pop(0)
        temp_string_1 = temp_align_point[1]
        temp_string_2 = temp_align_point[2]
        temp_pos = temp_align_point[0]

        # End
        if temp_pos == (0, 0):
            alignment = (temp_string_1, temp_string_2)
            alignments.append(alignment)
            continue

        # Three cases:
        row, col = temp_pos
        if row - 1 >= 0 and score_matrix[row - 1][col] == score_matrix[row][col] - gap_penalty:
            colored_matrix[row - 1][col] = (colored_matrix[row - 1][col][0], True)
            new_string_1 = '-' + temp_string_1
            new_string_2 = string_2[row - 1] + temp_string_2
            new_pos = temp_pos[0] - 1, temp_pos[1]
            new_align_point = (new_pos, new_string_1, new_string_2)
            paths_queue.append(new_align_point)
        if col - 1 >= 0 and score_matrix[row][col - 1] == score_matrix[row][col] - gap_penalty:
            colored_matrix[row][col - 1] = (colored_matrix[row][col - 1][0], True)
            new_string_1 = string_1[col - 1] + temp_string_1
            new_string_2 = '-' + temp_string_2
            new_pos = temp_pos[0], temp_pos[1] - 1
            new_align_point = (new_pos, new_string_1, new_string_2)
            paths_queue.append(new_align_point)
        if row - 1 >= 0 and col - 1 >= 0 and score_matrix[row - 1][col - 1] == score_matrix[row][col] - \
                sub_matrix[string_2[row - 1]][string_1[col - 1]]:
            colored_matrix[row - 1][col - 1] = (colored_matrix[row - 1][col - 1][0], True)
            new_string_1 = string_1[col - 1] + temp_string_1
            new_string_2 = string_2[row - 1] + temp_string_2
            new_pos = temp_pos[0] - 1, temp_pos[1] - 1
            new_align_point = (new_pos, new_string_1, new_string_2)
            paths_queue.append(new_align_point)

    # Color alignments.
    colored_alignments = []
    for alignment in alignments:
        colored_alignment = []
        for ch1, ch2 in zip(alignment[0], alignment[1]):
            if ch1 == ch2:
                colored_alignment.append((ch1, ch2, __AlignType.MATCH))
            elif ch1 == '-' or ch2 == '-':
                colored_alignment.append((ch1, ch2, __AlignType.GAP))
            else:
                colored_alignment.append((ch1, ch2, __AlignType.MISMATCH))
        colored_alignments.append(colored_alignment)

    # Procession for django template.
    string_1 = '*' + string_1
    string_2 = '*' + string_2
    processed_matrix = []
    for row, ch in zip(colored_matrix, string_2):
        temp = row[:]
        temp.insert(0, ch)
        processed_matrix.append(tuple(temp))
    result = {
        'string_1': string_1,
        'string_2': string_2,
        'processed_matrix': processed_matrix,
        'colored_alignments': colored_alignments
    }

    return render(request, 'needlemanwunsch/result.html', result)