コード例 #1
0
    def processIMPUTEFiles(self):
        logging.info("Loading people")
        names = Utilities.hapNamesFromFolder(self.dosage_folder)
        all_people = Person.Person.loadPeople(self.samples_input)

        selected_people = Person.Person.loadPeople(self.samples_output, delim=" ")
        selected_people_by_id = {p.id:p for p in selected_people}

        logging.info("Loading snps")
        snp_data_set = DataSet.DataSetFileUtilities.loadFromCompressedFile(self.snp_list)
        snp_dict = {rsid:True for rsid in snp_data_set.data}

        for name in names:
            output = os.path.join(self.output_folder, name)
            filter = ThousandGenomesUtilities.IMPUTEFilteredDosageFileBuilder()
            filter.base_path = self.dosage_folder
            filter.name = name
            filter.output_pattern = output
            filter.snp_dict = snp_dict
            filter.all_people = all_people
            filter.selected_people_by_id = selected_people_by_id

            if self.output_format == Formats.IMPUTE:
                filter.buildIMPUTE()
            elif self.output_format == Formats.PrediXcan:
                search = self.chromosome_in_name_regex.search(name)
                exitIf(search is None, Exceptions.InvalidInputFormat, \
                             "No files found in '%s' that match the pattern, '%s'" \
                             % (self.dosage_folder, self.chromosome_in_name_regex.pattern))
                chr = search.group(1)
                filter.chromosome_name = chr
                filter.buildPrediXcan()
            else:
                raise Exceptions.InvalidOutputFormat(self.output_format)
コード例 #2
0
    def processIMPUTEFiles(self):
        logging.info("Loading people")
        names = Utilities.hapNamesFromFolder(self.dosage_folder)
        all_people = Person.Person.loadPeople(self.samples_input)

        selected_people = Person.Person.loadPeople(self.samples_output,
                                                   delim=" ")
        selected_people_by_id = {p.id: p for p in selected_people}

        logging.info("Loading snps")
        snp_data_set = DataSet.DataSetFileUtilities.loadFromCompressedFile(
            self.snp_list)
        snp_dict = {rsid: True for rsid in snp_data_set.data}

        for name in names:
            output = os.path.join(self.output_folder, name)
            filter = ThousandGenomesUtilities.IMPUTEFilteredDosageFileBuilder()
            filter.base_path = self.dosage_folder
            filter.name = name
            filter.output_pattern = output
            filter.snp_dict = snp_dict
            filter.all_people = all_people
            filter.selected_people_by_id = selected_people_by_id

            if self.output_format == Formats.IMPUTE:
                filter.buildIMPUTE()
            elif self.output_format == Formats.PrediXcan:
                search = self.chromosome_in_name_regex.search(name)
                exitIf(search is None, Exceptions.InvalidInputFormat, \
                             "No files found in '%s' that match the pattern, '%s'" \
                             % (self.dosage_folder, self.chromosome_in_name_regex.pattern))
                chr = search.group(1)
                filter.chromosome_name = chr
                filter.buildPrediXcan()
            else:
                raise Exceptions.InvalidOutputFormat(self.output_format)
コード例 #3
0
 def testHapNamesFromFolder(self):
     names = Utilities.hapNamesFromFolder("tests/_td/dosage_set_1")
     self.assertEqual(names, ["set_chr1"])
コード例 #4
0
ファイル: test_utilities.py プロジェクト: Varoona/MetaXcan
 def testHapNamesFromFolder(self):
     names = Utilities.hapNamesFromFolder("tests/_td/dosage_set_1")
     self.assertEqual(names, ["set_chr1"])