def snp_count_table(options): """ make a table of snp counts. there are serious problems with this now: 1) don't have snp count for baseline (as it's not gam or vcf) 2) snps counted differenty for gam/vcf (multiple alternates at same site counted in former but not latter) """ # tsv header count_table = "#\t{}\t\n".format(options.baseline) count_table += "#graph\tlinear_snp_count\tsample_snp_count\taugmented_snp_count\n" sums = defaultdict(lambda : (0,0,0)) counts = defaultdict(lambda : 0) for gam in options.in_gams: linear_vcf = linear_vcf_path(gam, options) + ".gz" vg_sample = sample_vg_path(gam, options) vg_augmented = augmented_vg_path(gam, options) vcf_snps = count_vcf_snps(linear_vcf, options) sample_snps = count_vg_paths(vg_sample, options) augmented_snps = count_vg_paths(vg_augmented, options) name = graph_path(gam, options) sums[name] = (sums[name][0] + vcf_snps, sums[name][1] + sample_snps, sums[name][2] + augmented_snps) counts[name] = counts[name] + 1 for name in list(set(map(lambda x : graph_path(x, options), options.in_gams))): avg_vcf = float(sums[name][0]) / float(counts[name]) avg_sam = float(sums[name][1]) / float(counts[name]) avg_aug = float(sums[name][2]) / float(counts[name]) count_table +="{}\t{}\t{}\t{}\n".format( os.path.splitext(os.path.basename(name))[0], avg_vcf, avg_sam, avg_aug) with open(count_tsv_path(options), "w") as ofile: ofile.write(count_table)
def graph_size_table(options): """ make a table of sequence lengths for the vg call outputs """ # tsv header length_table = "#\t{}\t\n".format(options.baseline) length_table += "#graph\tsample_snp_length\taugmented_snp_length\toriginal_length\n" sums = defaultdict(lambda : (0,0,0)) counts = defaultdict(lambda : 0) for gam in options.in_gams: linear_vcf = linear_vcf_path(gam, options) + ".gz" vg_sample = sample_vg_path(gam, options) vg_augmented = augmented_vg_path(gam, options) sample_snps = vg_length(vg_sample, options) augmented_snps = vg_length(vg_augmented, options) vg_original = graph_path(gam, options) original_snps = vg_length(vg_original, options) name = graph_path(gam, options) sums[name] = (sums[name][0] + sample_snps, sums[name][1] + augmented_snps, sums[name][2] + original_snps) counts[name] = counts[name] + 1 for name in list(set(map(lambda x : graph_path(x, options), options.in_gams))): avg_sam = float(sums[name][0]) / float(counts[name]) avg_aug = float(sums[name][1]) / float(counts[name]) avg_ori = float(sums[name][2]) / float(counts[name]) length_table +="{}\t{}\t{}\t{}\n".format( os.path.splitext(os.path.basename(name))[0], avg_sam, avg_aug, avg_ori) with open(size_tsv_path(options), "w") as ofile: ofile.write(length_table)