def RunModule(fasta_file, fulllist_file, blast_file, report_file, tcsfile): '''Runs the Module''' blast = open(blast_file, 'r') report = open(report_file, 'w') Dict = FASTA_parser(fasta_file) LargeDict = FASTA_parser(fulllist_file) Blasts = BLASTparser(blast) Characterize(Dict, Blasts, LargeDict, report, tcsfile) FASTAsplitter(Dict, LargeDict, report_file)
def Read_metrics(fasta_file): '''Calculate metrics from fasta files.''' fasta_dict = FASTA_parser(fasta_file) values = list(map(len, fasta_dict.values())) max_len = max(values) avg_len = "%.2f" % (sum(values)/len(values)) values.sort() medianpos = len(values) / 2 if len(values) % 2 == 0: median = (values[int(medianpos)] + values[int(medianpos - 1)]) / 2 else: median = values[int(medianpos - 0.5)] return(avg_len, max_len, median)
def SNP_gather(snp_file, orf_file): '''Gather SNP data from the fasta files:''' snps = FASTA_parser(snp_file) ammount = 0 for titles in snps.keys(): ammount += titles.count("#") num_contigs = len(snps) avg_snps_per_contig = "%.2f" % (ammount/num_contigs) contig_sizes = list(map(len, snps.values())) max_contig_size = max(contig_sizes) min_contig_size = min(contig_sizes) avg_contig_size = "%.2f" % (sum(contig_sizes)/len(contig_sizes)) # This part counts how many SNPs are inside ORFs orfs = FASTA_parser(orf_file) contig_names = [] for contigs in orfs.keys(): if "REVERSE" in contigs: start = int(re.search('\[\d*', contigs).group()[1:]) end = re.findall('\d+(?=:)', contigs) end = list(map(int, end)) for items in end: suffix = str(start-items+2) contig = re.sub(' .*$', '', contigs) + ';' + suffix contig_names.append(contig) else: start = int(re.search('\[\d*', contigs).group()[1:]) end = re.findall('\d+(?=:)', contigs) end = list(map(int, end)) for items in end: suffix = str(start+items) contig = re.sub(' .*$', '', contigs) + ';' + suffix contig_names.append(contig) snps_in_orfs = len(set(contig_names)) return(ammount, num_contigs, avg_snps_per_contig, max_contig_size, min_contig_size, avg_contig_size, snps_in_orfs)
def RunModule(fasta_file, fasta_qual_file, endreport_file, etandem, minqual): endreport = open(endreport_file, 'w') FDict = FASTA_parser(fasta_file) QDict = FASTA_parser(fasta_qual_file) SmallFiles(FDict, QDict, etandem, endreport, minqual)
def RunModule(tcs_file, fasta_file, snp_fasta, minqual): '''Runs the module:''' Names = TCStoDict(tcs_file, minqual) Sequences = FASTA_parser(fasta_file) ShortListFASTA(Names, Sequences, tcs_file, snp_fasta)
def RunModule(bamfile_name, padded_fasta_name): """Run the module.""" TCSwriter(bamfile_name, FASTA_parser(padded_fasta_name))