Esempio n. 1
0
    def convert_repeat_matrix_to_counts(self, x_pileup_batch, x_repeat_batch):
        caller = ConsensusCaller(sequence_to_float=sequence_to_float,
                                 sequence_to_index=sequence_to_index)

        batch_size, height, width = x_repeat_batch.shape
        # print("before", x_batch.shape)

        repeat_matrices = list()
        for b in range(batch_size):
            x_repeat = x_repeat_batch[b, :, :]
            x_pileup = x_pileup_batch[b, :, :]

            x_pileup = trim_empty_rows(x_pileup,
                                       background_value=sequence_to_float["-"])
            x_repeat = trim_empty_rows(x_repeat,
                                       background_value=sequence_to_float["-"])

            repeat_counts = caller.get_avg_repeat_counts(
                pileup_matrix=x_pileup, repeat_matrix=x_repeat)
            repeat_counts = numpy.expand_dims(repeat_counts, axis=0)

            repeat_matrices.append(repeat_counts)

        x_batch = numpy.concatenate(repeat_matrices, axis=0)

        return x_batch
Esempio n. 2
0
    def convert_pileup_to_frequency(self, x_batch):
        caller = ConsensusCaller(sequence_to_float=sequence_to_float,
                                 sequence_to_index=sequence_to_index)

        batch_size, height, width = x_batch.shape
        # print("before", x_batch.shape)

        frequency_matrices = list()
        for b in range(batch_size):
            x_pileup = x_batch[b, :, :]

            x_pileup = trim_empty_rows(x_pileup,
                                       background_value=sequence_to_float["-"])

            # print(type(x_pileup))
            # print(x_pileup.shape)
            normalized_frequencies = caller.get_normalized_frequencies(
                x_pileup)
            normalized_frequencies = numpy.expand_dims(normalized_frequencies,
                                                       axis=0)

            frequency_matrices.append(normalized_frequencies)

        x_batch = numpy.concatenate(frequency_matrices, axis=0)

        # print("after", x_batch.shape)

        return x_batch
Esempio n. 3
0
def test_consensus(consensus_caller, data_loader, n_batches=None):
    if n_batches is None:
        n_batches = len(data_loader)

    total_confusion = None
    total_realigned_confusion = None

    for b, batch in enumerate(data_loader):
        sys.stdout.write("\r %.2f%% COMPLETED  " % (100 * b / n_batches))

        paths, x, y = batch

        # (n,h,w) shape
        batch_size, height, width = x.shape

        for n in range(batch_size):
            x_n = x[n, :, :].data.numpy()
            y_n = y[n, :, :].data.numpy()

            x_n = trim_empty_rows(x_n, background_value=sequence_to_float["-"])

            y_predict_n = consensus_caller.call_consensus_as_one_hot(x_n)

            consensus_sequence = consensus_caller.decode_one_hot_to_string(
                y_predict_n)
            reference_sequence = consensus_caller.decode_one_hot_to_string(y_n)

            # print(consensus_sequence)
            # print(reference_sequence)

            if consensus_sequence == '':
                pyplot.imshow(y_predict_n)
                pyplot.show()
                pyplot.close()
                pyplot.imshow(x_n)
                pyplot.show()
                pyplot.close()

            y_predict_n = torch.FloatTensor(y_predict_n)
            y_n = torch.FloatTensor(y_n)
            confusion = sequential_confusion(y_predict=y_predict_n, y=y_n)

            # realign strings to each other and convert to one hot
            y_pileup_predict_expanded, y_pileup_expanded = \
                realign_consensus_to_reference(consensus_sequence=consensus_sequence,
                                               ref_sequence=reference_sequence, print_alignment=False)

            y_pileup_predict_expanded = torch.FloatTensor(
                y_pileup_predict_expanded)
            y_pileup_expanded = torch.FloatTensor(y_pileup_expanded)
            realigned_confusion = sequential_confusion(
                y_predict=y_pileup_predict_expanded, y=y_pileup_expanded)

            # normalized_frequencies = consensus_caller.call_consensus_as_normalized_frequencies(x_n)
            # plot_consensus_prediction(x=x_n,y=y_n,y_predict=normalized_frequencies)

            if total_confusion is None:
                total_confusion = confusion
                total_realigned_confusion = realigned_confusion
            else:
                total_confusion += confusion
                total_realigned_confusion += realigned_confusion

        if b == n_batches:
            break

    print()

    plot_confusion(total_confusion)
    plot_confusion(total_realigned_confusion)

    # total_confusion = normalize_confusion_matrix(total_confusion)
    # total_realigned_confusion = normalize_confusion_matrix(total_realigned_confusion)
    #
    # plot_confusion(total_confusion)
    # plot_confusion(total_realigned_confusion)

    accuracy = calculate_accuracy_from_confusion(total_realigned_confusion)

    print("Total accuracy", accuracy)