Example #1
0
    def sample_variants_csv(self, sample, type):
        if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type):
            self.__log_sample_doesnt_exist()
            return

        out_path = "%s/%s.csv" % ( self.output_files_dir, sample)
        print out_path
        csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)

        header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene',
                  'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot']
        csv_writer.writerow(header)

        client, db = mongo.get_connection()
        total_loaded_samples = variants_mongo.count_samples()

        for var in variants_mongo.get_sample_vars(sample, type, db):
            new_variant = {}
            chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT']
            al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc'])
            new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt),
                                'GT': "/".join([al1, al2])})

            hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db)

            annot = hotspot['ANNOTATION'][0]

            new_variant.update({'RSID': annot['snp137NonFlagged'],
                                'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'],
                                'AAChange': annot['AAChange_refGene']})
            if 'p.' in new_variant['AAChange']:
                    new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0]

            zygosity = hotspot['orig_stats']['zygosity']
            freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples)
            final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC']

            if hotspot['orig_stats']['qc']['final_qc_count'] > 0:
                in_hotspot = "TRUE"
            else:
                in_hotspot = "FALSE"

            new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af,
                                "In_Hotspot": in_hotspot})

            out_row = [str(new_variant[field]) for field in header]
            csv_writer.writerow(out_row)
            #print "\t".join(out_row)

        return out_path
Example #2
0
    def __get_unsaved_hotspot_vcf_files(self):
        hotspot_dir = self.project_config['hotspot_dir']

        output_dir = hotspot_dir + "/hotspot_output"

        vcf_files = glob(output_dir+"/*.vcf")

        final_vcf_files = []

        client, db = mongo.get_connection()
        for vcf_file in vcf_files:
            sample = os.path.basename(vcf_file).split(".")[0]

            if sampleinfo_mongo.is_sample(sample, db) and not \
                    variants_mongo.is_sample_loaded(sample, self.variant_type, db):
                self.__log_adding_hotspot_sample_to_queue(sample, vcf_file)
                final_vcf_files.append((sample, vcf_file))
            else:
                self.__log_hotspot_sample_already_loaded(sample)

        client.close()

        return final_vcf_files