def sample_variants_csv(self, sample, type): if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type): self.__log_sample_doesnt_exist() return out_path = "%s/%s.csv" % ( self.output_files_dir, sample) print out_path csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC) header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene', 'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot'] csv_writer.writerow(header) client, db = mongo.get_connection() total_loaded_samples = variants_mongo.count_samples() for var in variants_mongo.get_sample_vars(sample, type, db): new_variant = {} chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT'] al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc']) new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt), 'GT': "/".join([al1, al2])}) hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db) annot = hotspot['ANNOTATION'][0] new_variant.update({'RSID': annot['snp137NonFlagged'], 'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'], 'AAChange': annot['AAChange_refGene']}) if 'p.' in new_variant['AAChange']: new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0] zygosity = hotspot['orig_stats']['zygosity'] freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples) final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC'] if hotspot['orig_stats']['qc']['final_qc_count'] > 0: in_hotspot = "TRUE" else: in_hotspot = "FALSE" new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af, "In_Hotspot": in_hotspot}) out_row = [str(new_variant[field]) for field in header] csv_writer.writerow(out_row) #print "\t".join(out_row) return out_path
def __get_unsaved_hotspot_vcf_files(self): hotspot_dir = self.project_config['hotspot_dir'] output_dir = hotspot_dir + "/hotspot_output" vcf_files = glob(output_dir+"/*.vcf") final_vcf_files = [] client, db = mongo.get_connection() for vcf_file in vcf_files: sample = os.path.basename(vcf_file).split(".")[0] if sampleinfo_mongo.is_sample(sample, db) and not \ variants_mongo.is_sample_loaded(sample, self.variant_type, db): self.__log_adding_hotspot_sample_to_queue(sample, vcf_file) final_vcf_files.append((sample, vcf_file)) else: self.__log_hotspot_sample_already_loaded(sample) client.close() return final_vcf_files