Ejemplo n.º 1
0
def RunModule(fasta_file, fulllist_file, blast_file, report_file, tcsfile):
    '''Runs the Module'''
    blast = open(blast_file, 'r')
    report = open(report_file, 'w')

    Dict = FASTA_parser(fasta_file)
    LargeDict = FASTA_parser(fulllist_file)
    Blasts = BLASTparser(blast)
    Characterize(Dict, Blasts, LargeDict, report, tcsfile)
    FASTAsplitter(Dict, LargeDict, report_file)
Ejemplo n.º 2
0
def Read_metrics(fasta_file):
    '''Calculate metrics from fasta files.'''
    fasta_dict = FASTA_parser(fasta_file)
    values = list(map(len, fasta_dict.values()))
    max_len = max(values)
    avg_len = "%.2f" % (sum(values)/len(values))
    values.sort()
    medianpos = len(values) / 2
    if len(values) % 2 == 0:
        median = (values[int(medianpos)] + values[int(medianpos - 1)]) / 2
    else:
        median = values[int(medianpos - 0.5)]

    return(avg_len, max_len, median)
Ejemplo n.º 3
0
def Read_metrics(fasta_file):
    '''Calculate metrics from fasta files.'''
    fasta_dict = FASTA_parser(fasta_file)
    values = list(map(len, fasta_dict.values()))
    max_len = max(values)
    avg_len = "%.2f" % (sum(values)/len(values))
    values.sort()
    medianpos = len(values) / 2
    if len(values) % 2 == 0:
        median = (values[int(medianpos)] + values[int(medianpos - 1)]) / 2
    else:
        median = values[int(medianpos - 0.5)]

    return(avg_len, max_len, median)
Ejemplo n.º 4
0
def SNP_gather(snp_file, orf_file):
    '''Gather SNP data from the fasta files:'''
    snps = FASTA_parser(snp_file)
    ammount = 0
    for titles in snps.keys():
        ammount += titles.count("#")
    num_contigs = len(snps)
    avg_snps_per_contig = "%.2f" % (ammount/num_contigs)
    contig_sizes = list(map(len, snps.values()))
    max_contig_size = max(contig_sizes)
    min_contig_size = min(contig_sizes)
    avg_contig_size = "%.2f" % (sum(contig_sizes)/len(contig_sizes))

    # This part counts how many SNPs are inside ORFs
    orfs = FASTA_parser(orf_file)
    contig_names = []
    for contigs in orfs.keys():
        if "REVERSE" in contigs:
            start = int(re.search('\[\d*', contigs).group()[1:])
            end = re.findall('\d+(?=:)', contigs)
            end = list(map(int, end))
            for items in end:
                suffix = str(start-items+2)
                contig = re.sub(' .*$', '', contigs) + ';' + suffix
                contig_names.append(contig)
        else:
            start = int(re.search('\[\d*', contigs).group()[1:])
            end = re.findall('\d+(?=:)', contigs)
            end = list(map(int, end))
            for items in end:
                suffix = str(start+items)
                contig = re.sub(' .*$', '', contigs) + ';' + suffix
                contig_names.append(contig)
    snps_in_orfs = len(set(contig_names))

    return(ammount, num_contigs, avg_snps_per_contig, max_contig_size,
           min_contig_size, avg_contig_size, snps_in_orfs)
Ejemplo n.º 5
0
def SNP_gather(snp_file, orf_file):
    '''Gather SNP data from the fasta files:'''
    snps = FASTA_parser(snp_file)
    ammount = 0
    for titles in snps.keys():
        ammount += titles.count("#")
    num_contigs = len(snps)
    avg_snps_per_contig = "%.2f" % (ammount/num_contigs)
    contig_sizes = list(map(len, snps.values()))
    max_contig_size = max(contig_sizes)
    min_contig_size = min(contig_sizes)
    avg_contig_size = "%.2f" % (sum(contig_sizes)/len(contig_sizes))

    # This part counts how many SNPs are inside ORFs
    orfs = FASTA_parser(orf_file)
    contig_names = []
    for contigs in orfs.keys():
        if "REVERSE" in contigs:
            start = int(re.search('\[\d*', contigs).group()[1:])
            end = re.findall('\d+(?=:)', contigs)
            end = list(map(int, end))
            for items in end:
                suffix = str(start-items+2)
                contig = re.sub(' .*$', '', contigs) + ';' + suffix
                contig_names.append(contig)
        else:
            start = int(re.search('\[\d*', contigs).group()[1:])
            end = re.findall('\d+(?=:)', contigs)
            end = list(map(int, end))
            for items in end:
                suffix = str(start+items)
                contig = re.sub(' .*$', '', contigs) + ';' + suffix
                contig_names.append(contig)
    snps_in_orfs = len(set(contig_names))

    return(ammount, num_contigs, avg_snps_per_contig, max_contig_size,
           min_contig_size, avg_contig_size, snps_in_orfs)
Ejemplo n.º 6
0
def RunModule(fasta_file, fasta_qual_file, endreport_file, etandem, minqual):
    endreport = open(endreport_file, 'w')

    FDict = FASTA_parser(fasta_file)
    QDict = FASTA_parser(fasta_qual_file)
    SmallFiles(FDict, QDict, etandem, endreport, minqual)
Ejemplo n.º 7
0
def RunModule(tcs_file, fasta_file, snp_fasta, minqual):
    '''Runs the module:'''
    Names = TCStoDict(tcs_file, minqual)
    Sequences = FASTA_parser(fasta_file)
    ShortListFASTA(Names, Sequences, tcs_file, snp_fasta)
Ejemplo n.º 8
0
def RunModule(bamfile_name, padded_fasta_name):
    """Run the module."""
    TCSwriter(bamfile_name, FASTA_parser(padded_fasta_name))