Ejemplo n.º 1
0
Archivo: add.py Proyecto: jlaw9/TRI_Dev
    def add_one_sample(self, new_sample_dict):
        if 'SAMPLE' not in new_sample_dict:
            self.logger.info("The new sample entry does not contain a SAMPLE key.")
            sys.exit()
        else:
            new_sample_dict.update({"PROJECT": self.project_config['project_name']})
            sampleinfo_mongo.add_new_sample(new_sample_dict)

            sample = new_sample_dict["SAMPLE"]
            if not variants_mongo.is_sample_loaded(sample, "orig"):
                self.__load_variants(sample)
Ejemplo n.º 2
0
    def sample_variants_csv(self, sample, type):
        if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type):
            self.__log_sample_doesnt_exist()
            return

        out_path = "%s/%s.csv" % ( self.output_files_dir, sample)
        print out_path
        csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)

        header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene',
                  'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot']
        csv_writer.writerow(header)

        client, db = mongo.get_connection()
        total_loaded_samples = variants_mongo.count_samples()

        for var in variants_mongo.get_sample_vars(sample, type, db):
            new_variant = {}
            chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT']
            al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc'])
            new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt),
                                'GT': "/".join([al1, al2])})

            hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db)

            annot = hotspot['ANNOTATION'][0]

            new_variant.update({'RSID': annot['snp137NonFlagged'],
                                'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'],
                                'AAChange': annot['AAChange_refGene']})
            if 'p.' in new_variant['AAChange']:
                    new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0]

            zygosity = hotspot['orig_stats']['zygosity']
            freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples)
            final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC']

            if hotspot['orig_stats']['qc']['final_qc_count'] > 0:
                in_hotspot = "TRUE"
            else:
                in_hotspot = "FALSE"

            new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af,
                                "In_Hotspot": in_hotspot})

            out_row = [str(new_variant[field]) for field in header]
            csv_writer.writerow(out_row)
            #print "\t".join(out_row)

        return out_path
Ejemplo n.º 3
0
    def __get_unsaved_hotspot_vcf_files(self):
        hotspot_dir = self.project_config['hotspot_dir']

        output_dir = hotspot_dir + "/hotspot_output"

        vcf_files = glob(output_dir+"/*.vcf")

        final_vcf_files = []

        client, db = mongo.get_connection()
        for vcf_file in vcf_files:
            sample = os.path.basename(vcf_file).split(".")[0]

            if sampleinfo_mongo.is_sample(sample, db) and not \
                    variants_mongo.is_sample_loaded(sample, self.variant_type, db):
                self.__log_adding_hotspot_sample_to_queue(sample, vcf_file)
                final_vcf_files.append((sample, vcf_file))
            else:
                self.__log_hotspot_sample_already_loaded(sample)

        client.close()

        return final_vcf_files
Ejemplo n.º 4
0
    def load_all(self):

        if self.variant_type == 'orig':
            client, db = mongo.get_connection()

            vcf_files = sampleinfo_mongo.get_vcf_files()

            # CHECK IF THE VCFS ARE ALL VALID BEFORE STARTING
            for sample in vcf_files:
                vcf_file = vcf_files[sample]
                if not os.path.isfile(vcf_file):
                    self.__log_invalid_vcf_file(vcf_file)
                    sys.exit(1)

            pending_vcf_files = []
            for sample in vcf_files:
                print sample
                vcf_file = vcf_files[sample]

                is_loaded = variants_mongo.is_sample_loaded(sample, self.variant_type, db)
                if is_loaded:
                    self.__log_sample_already_loaded(sample)
                    continue
                else:
                    self.__log_adding_sample_to_queue(sample, vcf_file)
                    pending_vcf_files.append((sample, vcf_file))

            client.close()

        elif self.variant_type == 'hotspot':
            pending_vcf_files = self.__get_unsaved_hotspot_vcf_files()

        num_processors = 10
        self.__parallel_process_vcf_files(pending_vcf_files, num_processors)

        self.__log_successfully_loaded()