Exemple #1
0
 def __init__(self, maf_file, blat_alignment_file, corrected_error_rate,
              uncorrected_error_rate):
     self.corrected_error_rate = corrected_error_rate
     self.uncorrected_error_rate = uncorrected_error_rate
     self.maf_file = maf_file
     self.maf_dictionary = AssessUtil.read_maf_align(self.maf_file)
     self.blat_align = blat_alignment_file
    def get_corrected_read_coordinate(self):
        corrected_read_dictionary = {}
        with open(self.blat_result, 'r') as input_data:
            for line in input_data:
                line_split = line.split()
                cor_seq_name = line_split[9]
                original_seq_name = line_split[13]
                begin = int(line_split[15])
                end = int(line_split[16])
                if cor_seq_name.split('.', 1)[0] == original_seq_name:
                    # convert to original chromosome coordinate
                    begin = begin + self.original_read_coordinate[
                        original_seq_name][0]
                    end = end + self.original_read_coordinate[
                        original_seq_name][0]
                    if original_seq_name in corrected_read_dictionary:
                        corrected_read_dictionary[original_seq_name].append(
                            [begin, end])
                    else:
                        corrected_read_dictionary[original_seq_name] = []
                        corrected_read_dictionary[original_seq_name].append(
                            [begin, end])

        return AssessUtil.sort_dictionary_values(
            unsorted_dictionary=corrected_read_dictionary)
 def __init__(self, maf_file, working_dir, corrected_file=None):
     self.maf = maf_file
     if corrected_file:
         self.corrected_file = corrected_file
     else:
         self.corrected_file = ""
     self.work_dir = working_dir
     if not os.path.exists(self.work_dir):
         os.makedirs(self.work_dir)
     self.original_seq_file = self.work_dir + "/original_seq.fa"
     # this returns file name for a dictionary contains read and coordinate
     self.original_coordinate = AssessUtil.extract_original_read_maf(
         maf_file=self.maf, maf_fasta=self.original_seq_file)
     self.original_coordinate = pickle.load(
         open(self.original_coordinate, "rb"))
     self.original_sequence = Fasta(self.original_seq_file)
Exemple #4
0
    def get_corrected_read_coordinate(self):
        corrected_read_dictionary = {}
        print(os.path.isfile(self.output))
        with open(self.output, 'r') as input_data:
            for line in input_data:
                line_split = line.split()
                cor_seq_name = line_split[9]
                original_seq_name = line_split[13]
                begin = line_split[15]
                end = line_split[16]
                if cor_seq_name.split('.', 1)[0] == original_seq_name:
                    begin += self.original_coordinate[original_seq_name][0]
                    end += self.original_coordinate[original_seq_name][0]
                if original_seq_name in corrected_read_dictionary:
                    corrected_read_dictionary[original_seq_name].append(
                        [begin, end])
                else:
                    corrected_read_dictionary[original_seq_name] = []
                    corrected_read_dictionary[original_seq_name].append(
                        [begin, end])

        return AssessUtil.sort_dictionary_values(
            unsorted_dictionary=corrected_read_dictionary)
Exemple #5
0
    def get_correction_coordinate(self):
        corrected_read_dictionary = {}
        target_length = {}
        with open(self.blat_align, 'r') as input_data:
            for line in input_data:
                line_split = line.split()
                cor_seq_name = line_split[9]
                original_seq_name = line_split[13]
                if cor_seq_name.split('.', 1)[0] == original_seq_name:
                    begin = int(line_split[15])
                    end = int(line_split[16])
                    if original_seq_name in corrected_read_dictionary:
                        corrected_read_dictionary[original_seq_name].append(
                            [begin, end])
                    else:
                        corrected_read_dictionary[original_seq_name] = []
                        corrected_read_dictionary[original_seq_name].append(
                            [begin, end])
                        target_length[original_seq_name] = [
                            0, int(line_split[14])
                        ]

        return AssessUtil.sort_dictionary_values(
            unsorted_dictionary=corrected_read_dictionary), target_length
Exemple #6
0
 def get_uncorrected_coordinate(cls, corrected_coordinate_dict,
                                original_coordinate_dict):
     return AssessUtil.uncorrected_interval(
         corrected_coordinate=corrected_coordinate_dict,
         original_seq_length=original_coordinate_dict)
                    "--fullLength",
                    help="Proovread untrimmed unsplitted file",
                    type=str,
                    required=True)
parser.add_argument("-o",
                    "--outputFile",
                    help="A file that contains uncorrected PacBio reads",
                    type=str,
                    required=True)
parser.add_argument(
    "-m",
    "--minimumLength",
    help="The minimum length of uncorrected PacBio reads default 50",
    type=str,
    required=False)

args = parser.parse_args()

corrected = args.correctedReads
full_reads = args.fullLength
output = args.outputFile
minimum = args.minimumLength
if not minimum:
    minimum = 50

if __name__ == '__main__':
    AssessUtil.extract_uncorrected_reads_proovread(
        trimmed_corrected_reads=corrected,
        full_corrected_reads=full_reads,
        output_file=output,
        threshold=minimum)
Exemple #8
0
 def get_maf_sign_dictionary(self):
     maf_dict = AssessUtil()
     return maf_dict.read_maf_sign(self.maf_file)
Exemple #9
0
from AssessUtil import AssessUtil
import argparse
import time
from pyfaidx import Fasta


parser = argparse.ArgumentParser(description='Script to calculate complexity and GC content of read')
parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.01')

parser.add_argument("-i", "--input", help="Fatsa reads file",
                    type=str, required=True)
parser.add_argument("-o", "--outputFile", help="output reads file",
                    type=str, required=True)


args = parser.parse_args()

input_file = args.input
output_file = args.outputFile

if __name__ == '__main__':
    with Fasta(input_file) as data_in, open(output_file, 'w') as data_out:
        data_out.write("read_name\tGC\tcomplexity\n")
        for line in data_in:
            seq_name = line.name
            seq = str(line)
            gc = AssessUtil.gc_content(seq)
            complexity = AssessUtil.complexity(seq)
            data_out.write("{}\t{}\t{}\n".format(seq_name, gc, complexity))
    print('Done {} {}'.format(time.strftime("%d/%m/%Y"), time.strftime("%I:%M:%S")))
Exemple #10
0
 def get_uncorrected_coordinate(self):
     intervals_of_uncorrected_reads = AssessUtil.uncorrected_interval(
         corrected_coordinate=self.get_corrected_read_coordinate(),
         original_seq_length=self.original_read_coordinate)
     return intervals_of_uncorrected_reads
 def get_uncorrected_coordinate(self, dictionary_of_corrected_reads):
     intervals_of_uncorrected_reads = AssessUtil.uncorrected_interval(
         corrected_coordinate=dictionary_of_corrected_reads,
         original_seq_length=self.original_coordinate)
     return intervals_of_uncorrected_reads
Exemple #12
0
from AssessUtil import AssessUtil

parser = argparse.ArgumentParser(
    description='Script to extract original read  from maf file')
parser.add_argument('-v',
                    '--version',
                    action='version',
                    version='%(prog)s 0.01')
parser.add_argument(
    "-m",
    "--mafFile",
    help=
    "A maf file that contains the alignment of original and artificial data",
    type=str,
    required=True)
parser.add_argument(
    "-o",
    "--outputFile",
    help="A file that contains assessment of the corrected PacBio reads",
    type=str,
    required=True)

args = parser.parse_args()
maf = args.mafFile
result = args.outputFile

if __name__ == "__main__":
    original_coordinate = AssessUtil.extract_original_read_maf(
        maf_file=maf, maf_fasta=result)
    print("Dictionary with the coordinate in: {}".format(original_coordinate))