Beispiel #1
0
    def sample_variants_csv(self, sample, type):
        if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type):
            self.__log_sample_doesnt_exist()
            return

        out_path = "%s/%s.csv" % ( self.output_files_dir, sample)
        print out_path
        csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)

        header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene',
                  'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot']
        csv_writer.writerow(header)

        client, db = mongo.get_connection()
        total_loaded_samples = variants_mongo.count_samples()

        for var in variants_mongo.get_sample_vars(sample, type, db):
            new_variant = {}
            chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT']
            al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc'])
            new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt),
                                'GT': "/".join([al1, al2])})

            hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db)

            annot = hotspot['ANNOTATION'][0]

            new_variant.update({'RSID': annot['snp137NonFlagged'],
                                'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'],
                                'AAChange': annot['AAChange_refGene']})
            if 'p.' in new_variant['AAChange']:
                    new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0]

            zygosity = hotspot['orig_stats']['zygosity']
            freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples)
            final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC']

            if hotspot['orig_stats']['qc']['final_qc_count'] > 0:
                in_hotspot = "TRUE"
            else:
                in_hotspot = "FALSE"

            new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af,
                                "In_Hotspot": in_hotspot})

            out_row = [str(new_variant[field]) for field in header]
            csv_writer.writerow(out_row)
            #print "\t".join(out_row)

        return out_path
Beispiel #2
0
    def __reconcile(self, chrom, pos, ref, alt, db):
        query = {'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': {'$all': alt}}

        hotspot_coll = hotspot_mongo.get_collection(db)
        variants_coll = variants_mongo.get_collection(db)

        matched_documents = [doc for doc in hotspot_coll.find(query)]

        if len(matched_documents) == 1:
            self.__log_reconciling_variant(chrom, pos, ref, alt)
            # UPDATING THE ALTERNATE ALLELE FOR THE HOTSPOT VARIANT
            modified_count = hotspot_coll.update_one(query, {'$set': {'ALT': alt}}).modified_count
            if modified_count != 1:
                self.__log_serious_hotspot_discrepancy('THERE WAS A PROBLEM MODIFYING THE OLD HOTSPOT DOCUMENT')

            # UPDATING ALL THE PREVIOUSLY LOADED VARIANTS FOR THE HOTSPOT VARIANT
            variant_query = {'TYPE': 'orig', 'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': {'$all': alt}}

            loaded_variants = [doc for doc in variants_coll.find(variant_query)]
            for var in loaded_variants:
                var_ref, var_alt, var_gt, var_gt_orig = var['REF'], var['ALT'], var['GT_calc'], var['GT_orig']
                var_fao, var_af = var['FAO'], var['AF_calc']

                new_alleles = [ref] + alt
                if var_gt != './.':
                    al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt)
                    al_num1 = new_alleles.index(al1)
                    al_num2 = new_alleles.index(al2)
                    corrected_gt_calc = "/".join([str(val) for val in sorted([al_num1, al_num2])])

                else:
                    corrected_gt_calc = './.'

                if var_gt_orig != './.':
                    al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt_orig)
                    al_num1 = new_alleles.index(al1)
                    al_num2 = new_alleles.index(al2)
                    corrected_gt_orig = "/".join([str(val) for val in sorted([al_num1, al_num2])])
                else:
                    corrected_gt_orig = './.'

                if len(var_fao) == 2:
                    var_fao = list(reversed(var_fao))
                    var_af = list(reversed(var_af))

                    ########################################################################################
                    # I'm going to put off correcting the AF_calc and FAO when number of alternate alleles is > 2
                    # for now, because it just is not needed for downstream analysis
                    # DO THIS LATER
                    ########################################################################################

                query = {'TYPE': 'orig', 'SAMPLE': var['SAMPLE'],'CHROM': chrom, 'POS': pos, 'REF': ref,
                         'ALT': {'$all': alt}}
                update = {'$set': {'ALT': alt, 'GT_calc': corrected_gt_calc, 'FAO': var_fao, 'AF_calc': var_af,
                                   'GT_orig': corrected_gt_orig}}

                modified_count = variants_coll.update_one(query, update).modified_count
                if modified_count != 1:
                    self.__log_serious_hotspot_discrepancy('THERE WAS A PROBLEM MODIFYING THE ORIGINAL VARIANTS '
                                                           'DOCUMENT')

        else:
            self.__log_serious_hotspot_discrepancy('THE NUMBER OF DOCUMENTS WITH MATCHING THE '
                                                   'ALLELE GROUP IS NOT 1')
Beispiel #3
0
    def __reconcile(self, chrom, pos, ref, alt, db):
        query = {"CHROM": chrom, "POS": pos, "REF": ref, "ALT": {"$all": alt}}

        hotspot_coll = hotspot_mongo.get_collection(db)
        variants_coll = variants_mongo.get_collection(db)

        matched_documents = [doc for doc in hotspot_coll.find(query)]

        if len(matched_documents) == 1:
            self.__log_reconciling_variant(chrom, pos, ref, alt)
            # UPDATING THE ALTERNATE ALLELE FOR THE HOTSPOT VARIANT
            modified_count = hotspot_coll.update_one(query, {"$set": {"ALT": alt}}).modified_count
            if modified_count != 1:
                self.__log_serious_hotspot_discrepancy("THERE WAS A PROBLEM MODIFYING THE OLD HOTSPOT DOCUMENT")

            # UPDATING ALL THE PREVIOUSLY LOADED VARIANTS FOR THE HOTSPOT VARIANT
            variant_query = {"TYPE": "orig", "CHROM": chrom, "POS": pos, "REF": ref, "ALT": {"$all": alt}}

            loaded_variants = [doc for doc in variants_coll.find(variant_query)]
            for var in loaded_variants:
                var_ref, var_alt, var_gt, var_gt_orig = var["REF"], var["ALT"], var["GT_calc"], var["GT_orig"]
                var_fao, var_af = var["FAO"], var["AF_calc"]

                new_alleles = [ref] + alt
                if var_gt != "./.":
                    al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt)
                    al_num1 = new_alleles.index(al1)
                    al_num2 = new_alleles.index(al2)
                    corrected_gt_calc = "/".join([str(val) for val in sorted([al_num1, al_num2])])

                else:
                    corrected_gt_calc = "./."

                if var_gt_orig != "./.":
                    al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt_orig)
                    al_num1 = new_alleles.index(al1)
                    al_num2 = new_alleles.index(al2)
                    corrected_gt_orig = "/".join([str(val) for val in sorted([al_num1, al_num2])])
                else:
                    corrected_gt_orig = "./."

                if len(var_fao) == 2:
                    var_fao = list(reversed(var_fao))
                    var_af = list(reversed(var_af))

                    ########################################################################################
                    # I'm going to put off correcting the AF_calc and FAO when number of alternate alleles is > 2
                    # for now, because it just is not needed for downstream analysis
                    # DO THIS LATER
                    ########################################################################################

                query = {
                    "TYPE": "orig",
                    "SAMPLE": var["SAMPLE"],
                    "CHROM": chrom,
                    "POS": pos,
                    "REF": ref,
                    "ALT": {"$all": alt},
                }
                update = {
                    "$set": {
                        "ALT": alt,
                        "GT_calc": corrected_gt_calc,
                        "FAO": var_fao,
                        "AF_calc": var_af,
                        "GT_orig": corrected_gt_orig,
                    }
                }

                modified_count = variants_coll.update_one(query, update).modified_count
                if modified_count != 1:
                    self.__log_serious_hotspot_discrepancy(
                        "THERE WAS A PROBLEM MODIFYING THE ORIGINAL VARIANTS " "DOCUMENT"
                    )

        else:
            self.__log_serious_hotspot_discrepancy("THE NUMBER OF DOCUMENTSG THE " "ALLELE GROUP IS NOT 1")