Ejemplo n.º 1
0
    def sample_variants_csv(self, sample, type):
        if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type):
            self.__log_sample_doesnt_exist()
            return

        out_path = "%s/%s.csv" % ( self.output_files_dir, sample)
        print out_path
        csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)

        header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene',
                  'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot']
        csv_writer.writerow(header)

        client, db = mongo.get_connection()
        total_loaded_samples = variants_mongo.count_samples()

        for var in variants_mongo.get_sample_vars(sample, type, db):
            new_variant = {}
            chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT']
            al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc'])
            new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt),
                                'GT': "/".join([al1, al2])})

            hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db)

            annot = hotspot['ANNOTATION'][0]

            new_variant.update({'RSID': annot['snp137NonFlagged'],
                                'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'],
                                'AAChange': annot['AAChange_refGene']})
            if 'p.' in new_variant['AAChange']:
                    new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0]

            zygosity = hotspot['orig_stats']['zygosity']
            freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples)
            final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC']

            if hotspot['orig_stats']['qc']['final_qc_count'] > 0:
                in_hotspot = "TRUE"
            else:
                in_hotspot = "FALSE"

            new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af,
                                "In_Hotspot": in_hotspot})

            out_row = [str(new_variant[field]) for field in header]
            csv_writer.writerow(out_row)
            #print "\t".join(out_row)

        return out_path
Ejemplo n.º 2
0
def create_vcf_gt_orig_no_qc(sample, out_dir, db):
    sample_vars = variants_mongo.get_sample_vars(sample, 'hotspot', db)

    out_vcf = '%s/%s.vcf' % (out_dir, sample)
    with open(out_vcf, "w") as out_file:
        vcf_header = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % sample
        out_file.write(vcf_header)

        for var in sample_vars:

            chrom, pos, ref, alt = str(var['CHROM']), str(var['POS']), var['REF'], ",".join(var['ALT'])

            gt = var['GT_orig']
            if gt is None:
                gt = './.'

            list_entry = [chrom, pos, '.', str(ref), str(alt)]
            variant = list_entry + ['.', '.', 'DP=%s' % var['READ_DEPTH'], 'GT', gt]
            out_file.write("\t".join([str(val) for val in variant]) + "\n")
Ejemplo n.º 3
0
def create_vcf_for_annotation(sample, type, out_dir):
    sample_vars = variants_mongo.get_sample_vars(sample, type)

    out_vcf = '%s/%s.vcf' % (out_dir, sample)
    with open(out_vcf, "w") as out_file:
        vcf_header = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % sample
        out_file.write(vcf_header)

        client, db = mongo.get_connection()
        for var in sample_vars:
            chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT']

            if not hotspot_mongo.has_annotation(chrom, pos, ref, alt, db):
                chrom, pos, ref, alt = str(chrom), str(pos), ref, ",".join(alt)
                gt = var['GT_orig']
                if gt is None:
                    gt = './.'

                list_entry = [chrom, pos, '.', str(ref), str(alt)]
                variant = list_entry + ['.', '.', 'DP=%s' % var['READ_DEPTH'], 'GT', gt]
                out_file.write("\t".join([str(val) for val in variant]) + "\n")
        client.close()
    return out_vcf