Esempio n. 1
0
    def load_single(self, sample):

        variants_mongo.index_variants()
        vcf_files = sampleinfo_mongo.get_vcf_files()

        vcf_file = vcf_files[sample]
        self.__load_sample_variants(sample, vcf_file)

        self.__log_single_successfully_loaded(sample)
Esempio n. 2
0
    def load_all(self):

        if self.variant_type == 'orig':
            client, db = mongo.get_connection()

            vcf_files = sampleinfo_mongo.get_vcf_files()

            # CHECK IF THE VCFS ARE ALL VALID BEFORE STARTING
            for sample in vcf_files:
                vcf_file = vcf_files[sample]
                if not os.path.isfile(vcf_file):
                    self.__log_invalid_vcf_file(vcf_file)
                    sys.exit(1)

            pending_vcf_files = []
            for sample in vcf_files:
                print sample
                vcf_file = vcf_files[sample]

                is_loaded = variants_mongo.is_sample_loaded(sample, self.variant_type, db)
                if is_loaded:
                    self.__log_sample_already_loaded(sample)
                    continue
                else:
                    self.__log_adding_sample_to_queue(sample, vcf_file)
                    pending_vcf_files.append((sample, vcf_file))

            client.close()

        elif self.variant_type == 'hotspot':
            pending_vcf_files = self.__get_unsaved_hotspot_vcf_files()

        num_processors = 10
        self.__parallel_process_vcf_files(pending_vcf_files, num_processors)

        self.__log_successfully_loaded()