def sample_variants_csv(self, sample, type): if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type): self.__log_sample_doesnt_exist() return out_path = "%s/%s.csv" % ( self.output_files_dir, sample) print out_path csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC) header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene', 'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot'] csv_writer.writerow(header) client, db = mongo.get_connection() total_loaded_samples = variants_mongo.count_samples() for var in variants_mongo.get_sample_vars(sample, type, db): new_variant = {} chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT'] al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc']) new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt), 'GT': "/".join([al1, al2])}) hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db) annot = hotspot['ANNOTATION'][0] new_variant.update({'RSID': annot['snp137NonFlagged'], 'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'], 'AAChange': annot['AAChange_refGene']}) if 'p.' in new_variant['AAChange']: new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0] zygosity = hotspot['orig_stats']['zygosity'] freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples) final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC'] if hotspot['orig_stats']['qc']['final_qc_count'] > 0: in_hotspot = "TRUE" else: in_hotspot = "FALSE" new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af, "In_Hotspot": in_hotspot}) out_row = [str(new_variant[field]) for field in header] csv_writer.writerow(out_row) #print "\t".join(out_row) return out_path
def create_vcf_gt_orig_no_qc(sample, out_dir, db): sample_vars = variants_mongo.get_sample_vars(sample, 'hotspot', db) out_vcf = '%s/%s.vcf' % (out_dir, sample) with open(out_vcf, "w") as out_file: vcf_header = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % sample out_file.write(vcf_header) for var in sample_vars: chrom, pos, ref, alt = str(var['CHROM']), str(var['POS']), var['REF'], ",".join(var['ALT']) gt = var['GT_orig'] if gt is None: gt = './.' list_entry = [chrom, pos, '.', str(ref), str(alt)] variant = list_entry + ['.', '.', 'DP=%s' % var['READ_DEPTH'], 'GT', gt] out_file.write("\t".join([str(val) for val in variant]) + "\n")
def create_vcf_for_annotation(sample, type, out_dir): sample_vars = variants_mongo.get_sample_vars(sample, type) out_vcf = '%s/%s.vcf' % (out_dir, sample) with open(out_vcf, "w") as out_file: vcf_header = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % sample out_file.write(vcf_header) client, db = mongo.get_connection() for var in sample_vars: chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT'] if not hotspot_mongo.has_annotation(chrom, pos, ref, alt, db): chrom, pos, ref, alt = str(chrom), str(pos), ref, ",".join(alt) gt = var['GT_orig'] if gt is None: gt = './.' list_entry = [chrom, pos, '.', str(ref), str(alt)] variant = list_entry + ['.', '.', 'DP=%s' % var['READ_DEPTH'], 'GT', gt] out_file.write("\t".join([str(val) for val in variant]) + "\n") client.close() return out_vcf