def processIMPUTEFiles(self): logging.info("Loading people") names = Utilities.hapNamesFromFolder(self.dosage_folder) all_people = Person.Person.loadPeople(self.samples_input) selected_people = Person.Person.loadPeople(self.samples_output, delim=" ") selected_people_by_id = {p.id:p for p in selected_people} logging.info("Loading snps") snp_data_set = DataSet.DataSetFileUtilities.loadFromCompressedFile(self.snp_list) snp_dict = {rsid:True for rsid in snp_data_set.data} for name in names: output = os.path.join(self.output_folder, name) filter = ThousandGenomesUtilities.IMPUTEFilteredDosageFileBuilder() filter.base_path = self.dosage_folder filter.name = name filter.output_pattern = output filter.snp_dict = snp_dict filter.all_people = all_people filter.selected_people_by_id = selected_people_by_id if self.output_format == Formats.IMPUTE: filter.buildIMPUTE() elif self.output_format == Formats.PrediXcan: search = self.chromosome_in_name_regex.search(name) exitIf(search is None, Exceptions.InvalidInputFormat, \ "No files found in '%s' that match the pattern, '%s'" \ % (self.dosage_folder, self.chromosome_in_name_regex.pattern)) chr = search.group(1) filter.chromosome_name = chr filter.buildPrediXcan() else: raise Exceptions.InvalidOutputFormat(self.output_format)
def processIMPUTEFiles(self): logging.info("Loading people") names = Utilities.hapNamesFromFolder(self.dosage_folder) all_people = Person.Person.loadPeople(self.samples_input) selected_people = Person.Person.loadPeople(self.samples_output, delim=" ") selected_people_by_id = {p.id: p for p in selected_people} logging.info("Loading snps") snp_data_set = DataSet.DataSetFileUtilities.loadFromCompressedFile( self.snp_list) snp_dict = {rsid: True for rsid in snp_data_set.data} for name in names: output = os.path.join(self.output_folder, name) filter = ThousandGenomesUtilities.IMPUTEFilteredDosageFileBuilder() filter.base_path = self.dosage_folder filter.name = name filter.output_pattern = output filter.snp_dict = snp_dict filter.all_people = all_people filter.selected_people_by_id = selected_people_by_id if self.output_format == Formats.IMPUTE: filter.buildIMPUTE() elif self.output_format == Formats.PrediXcan: search = self.chromosome_in_name_regex.search(name) exitIf(search is None, Exceptions.InvalidInputFormat, \ "No files found in '%s' that match the pattern, '%s'" \ % (self.dosage_folder, self.chromosome_in_name_regex.pattern)) chr = search.group(1) filter.chromosome_name = chr filter.buildPrediXcan() else: raise Exceptions.InvalidOutputFormat(self.output_format)
def testHapNamesFromFolder(self): names = Utilities.hapNamesFromFolder("tests/_td/dosage_set_1") self.assertEqual(names, ["set_chr1"])
def testHapNamesFromFolder(self): names = Utilities.hapNamesFromFolder("tests/_td/dosage_set_1") self.assertEqual(names, ["set_chr1"])