def test_open_cnv_regions(self):
     ''' test that open_cnv_regions() works correctly
     '''
     
     lines = ['id_syndrome_feature\tid_syndrome\tcopy_number\tchr_start\tchr_end\tchr\n',
         '20\t1\t1\t1569197\t2110236\t4\tNA\t2650330\t149066\t1t\n']
     lines = [ x.encode('utf8') for x in lines ]
     
     self.temp.writelines(lines)
     self.temp.flush()
     
     self.assertEqual(open_cnv_regions(self.temp.name),
         {('4', '1569197', '2110236'): '1'})
Example #2
0
    def test_open_cnv_regions(self):
        ''' test that open_cnv_regions() works correctly
        '''

        lines = [
            'id_syndrome_feature\tid_syndrome\tcopy_number\tchr_start\tchr_end\tchr\n',
            '20\t1\t1\t1569197\t2110236\t4\tNA\t2650330\t149066\t1t\n'
        ]
        lines = [x.encode('utf8') for x in lines]

        self.temp.writelines(lines)
        self.temp.flush()

        self.assertEqual(open_cnv_regions(self.temp.name),
                         {('4', '1569197', '2110236'): '1'})
Example #3
0
    def __init__(self, population_tags=None, count=0, known_genes=None, date=None,
            regions=None, lof_sites=None, pp_filter=0.0, sum_x_lr2_file=None,
            output_path=None, export_vcf=None, debug_chrom=None, debug_pos=None):
        """ initialise the class object
        
        Args:
            population_tags: list of population ID tags, that could exist within
                the INFO field, or None.
            count: number of probands to analyse, helpful for tracking progress
                in output logs.
            known_genes: path to table of genes genes known to be associated
                with genetic disorders, or None.
            date: date of the known_genes file, or None if not using/unknown.
            regions: path to a table of regions for DECIPHER CNV syndromes.
            lof_sites: path to json file of [chrom, position] coordinates in
                genome, for modifying to a loss-of-function consequence if
                required. Can be None if unneeded.
            pp_filter: threshold from 0 to 1 for pp_dnm value to filter out
                candidiate DNMs which fall below this value
            sum_x_lr2_file: File containing sum of l2r values on x chromosome 
                for each person
            output_path: path to write output tab-separated file to
            export_vcf: path to file or folder to write VCFs to.
            debug_chrom: chromosome for debugging purposes.
            debug_pos: position for debugging variant filtering at.
        """
        
        self.pp_filter = pp_filter
        self.total = count
        self.count = 0
        
        self.populations = population_tags
        self.debug_chrom = debug_chrom
        self.debug_pos = debug_pos
        
        # open reference datasets, these return None if the paths are None
        self.known_genes = open_known_genes(known_genes)
        self.cnv_regions = open_cnv_regions(regions)
        self.last_base = open_last_base_sites(lof_sites)

        #open file containing sum of mean log 2 ratios on X, returns an empty dict if path is None
        self.sum_x_lr2 = open_x_lr2_file(sum_x_lr2_file)
        
        self.reporter = Report(output_path, export_vcf, date)
 def load_definitions_files(self):
     """loads all the config files for the script (eg filters, gene IDs)
     """
     
     # if we have named a gene file, then load a dictionary of genes, and
     # add them to the filters, so we can screen variants for being in genes
     # known to be involved with disorders
     self.known_genes = None
     if self.options.genes is not None:
         self.known_genes = open_known_genes(self.options.genes)
     
     # if we have named an ID mapping file, the load a dictionary of IDs and
     # alternate IDs, so we can convert between different ID schemes.
     self.ID_mapper = None
     if self.options.alternate_ids is not None:
         self.ID_mapper = create_person_ID_mapper(self.options.alternate_ids)
     
     # open a list of regions associated with DECIPHER syndromes
     self.cnv_regions = None
     if self.options.regions is not None:
         self.cnv_regions = open_cnv_regions(self.options.regions)
    def load_definitions_files(self):
        """loads all the config files for the script (eg filters, gene IDs)
        """

        # if we have named a gene file, then load a dictionary of genes, and
        # add them to the filters, so we can screen variants for being in genes
        # known to be involved with disorders
        self.known_genes = None
        if self.options.genes is not None:
            self.known_genes = open_known_genes(self.options.genes)

        # if we have named an ID mapping file, the load a dictionary of IDs and
        # alternate IDs, so we can convert between different ID schemes.
        self.ID_mapper = None
        if self.options.alternate_ids is not None:
            self.ID_mapper = create_person_ID_mapper(
                self.options.alternate_ids)

        # open a list of regions associated with DECIPHER syndromes
        self.cnv_regions = None
        if self.options.regions is not None:
            self.cnv_regions = open_cnv_regions(self.options.regions)