def test_open_known_genes_missing_lines(self):
     ''' test that open_known_genes() works correctly when we can't find any genes
     '''
     
     header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id']
     
     self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
     self.temp.flush()
     
     # if we have checked the file, and there aren't any genes in it, this
     # raises an error, since the most likely explanation is that something
     # has gone wrong with the data file, and likely the line-endings
     with self.assertRaises(ValueError):
         open_known_genes(self.temp.name)
Esempio n. 2
0
    def test_open_known_genes_missing_lines(self):
        ''' test that open_known_genes() works correctly when we can't find any genes
        '''

        header = [
            'gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id'
        ]

        self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
        self.temp.flush()

        # if we have checked the file, and there aren't any genes in it, this
        # raises an error, since the most likely explanation is that something
        # has gone wrong with the data file, and likely the line-endings
        with self.assertRaises(ValueError):
            open_known_genes(self.temp.name)
Esempio n. 3
0
    def test_open_known_genes_wrong_status(self):
        ''' test that open_known_genes() filters out genes without a good status
        '''

        header = [
            'gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id'
        ]
        line1 = [
            'TEST', '1', '1000', '2000', 'possible dd gene', 'Monoallelic',
            'Loss-of-function', '1001'
        ]
        line2 = [
            'TEST2', '1', '3000', '4000', 'confirmed dd gene', 'Monoallelic',
            'Loss-of-function', '2001'
        ]

        self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
        self.temp.write(('\t'.join(line1) + '\n').encode('utf8'))
        self.temp.write(('\t'.join(line2) + '\n').encode('utf8'))
        self.temp.flush()

        self.assertEqual(
            open_known_genes(self.temp.name), {
                '2001': {
                    'chrom': '1',
                    'start': 3000,
                    'end': 4000,
                    'symbol': 'TEST2',
                    'status': set(['confirmed dd gene']),
                    'inh': {
                        'Monoallelic': set(['Loss-of-function'])
                    }
                }
            })
Esempio n. 4
0
    def test_open_known_genes_multimechs(self):
        ''' test that open_known_genes() works correctly for genes with >1 mechs
        '''

        header = [
            'gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id'
        ]
        line1 = [
            'TEST', '1', '1000', '2000', 'confirmed dd gene', 'Monoallelic',
            'Loss-of-function', '1001'
        ]
        line2 = [
            'TEST', '1', '1000', '2000', 'confirmed dd gene', 'Monoallelic',
            'Activating', '1001'
        ]

        self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
        self.temp.write(('\t'.join(line1) + '\n').encode('utf8'))
        self.temp.write(('\t'.join(line2) + '\n').encode('utf8'))
        self.temp.flush()

        self.assertEqual(
            open_known_genes(self.temp.name), {
                '1001': {
                    'chrom': '1',
                    'start': 1000,
                    'end': 2000,
                    'symbol': 'TEST',
                    'status': set(['confirmed dd gene']),
                    'inh': {
                        'Monoallelic': set(['Loss-of-function', 'Activating'])
                    }
                }
            })
 def test_open_known_genes(self):
     ''' test that open_known_genes() works correctly
     '''
     
     header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id']
     line = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
         'Monoallelic', 'Loss-of-function', '1001']
     
     self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
     self.temp.write(('\t'.join(line) + '\n').encode('utf8'))
     self.temp.flush()
     
     self.assertEqual(open_known_genes(self.temp.name),
         {'1001': {'chrom': '1', 'start': 1000, 'end': 2000,
             'symbol': 'TEST', 'status': set(['confirmed dd gene']),
             'inh': {'Monoallelic': set(['Loss-of-function'])}}
         })
Esempio n. 6
0
    def __init__(self, population_tags=None, count=0, known_genes=None, date=None,
            regions=None, lof_sites=None, pp_filter=0.0, sum_x_lr2_file=None,
            output_path=None, export_vcf=None, debug_chrom=None, debug_pos=None):
        """ initialise the class object
        
        Args:
            population_tags: list of population ID tags, that could exist within
                the INFO field, or None.
            count: number of probands to analyse, helpful for tracking progress
                in output logs.
            known_genes: path to table of genes genes known to be associated
                with genetic disorders, or None.
            date: date of the known_genes file, or None if not using/unknown.
            regions: path to a table of regions for DECIPHER CNV syndromes.
            lof_sites: path to json file of [chrom, position] coordinates in
                genome, for modifying to a loss-of-function consequence if
                required. Can be None if unneeded.
            pp_filter: threshold from 0 to 1 for pp_dnm value to filter out
                candidiate DNMs which fall below this value
            sum_x_lr2_file: File containing sum of l2r values on x chromosome 
                for each person
            output_path: path to write output tab-separated file to
            export_vcf: path to file or folder to write VCFs to.
            debug_chrom: chromosome for debugging purposes.
            debug_pos: position for debugging variant filtering at.
        """
        
        self.pp_filter = pp_filter
        self.total = count
        self.count = 0
        
        self.populations = population_tags
        self.debug_chrom = debug_chrom
        self.debug_pos = debug_pos
        
        # open reference datasets, these return None if the paths are None
        self.known_genes = open_known_genes(known_genes)
        self.cnv_regions = open_cnv_regions(regions)
        self.last_base = open_last_base_sites(lof_sites)

        #open file containing sum of mean log 2 ratios on X, returns an empty dict if path is None
        self.sum_x_lr2 = open_x_lr2_file(sum_x_lr2_file)
        
        self.reporter = Report(output_path, export_vcf, date)
 def test_open_known_genes_wrong_status(self):
     ''' test that open_known_genes() filters out genes without a good status
     '''
     
     header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech', 'hgnc_id']
     line1 = ['TEST', '1', '1000', '2000', 'possible dd gene',
         'Monoallelic', 'Loss-of-function', '1001']
     line2 = ['TEST2', '1', '3000', '4000', 'confirmed dd gene',
         'Monoallelic', 'Loss-of-function', '2001']
     
     self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
     self.temp.write(('\t'.join(line1) + '\n').encode('utf8'))
     self.temp.write(('\t'.join(line2) + '\n').encode('utf8'))
     self.temp.flush()
     
     self.assertEqual(open_known_genes(self.temp.name),
         {'2001': {'chrom': '1', 'start': 3000, 'end': 4000,
             'symbol': 'TEST2', 'status': set(['confirmed dd gene']),
             'inh': {'Monoallelic': set(['Loss-of-function'])}}
         })
Esempio n. 8
0
 def test_open_known_genes_multimechs(self):
     ''' test that open_known_genes() works correctly for genes with >1 mechs
     '''
     
     header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
     line1 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
         'Monoallelic', 'Loss-of-function']
     line2 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
         'Monoallelic', 'Activating']
     
     self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
     self.temp.write(('\t'.join(line1) + '\n').encode('utf8'))
     self.temp.write(('\t'.join(line2) + '\n').encode('utf8'))
     self.temp.flush()
     
     self.assertEqual(open_known_genes(self.temp.name),
         {'TEST': {'chrom': '1', 'start': 1000, 'end': 2000,
             'status': set(['confirmed dd gene']),
             'inh': {'Monoallelic': set(['Loss-of-function', 'Activating'])}}
         })
Esempio n. 9
0
 def load_definitions_files(self):
     """loads all the config files for the script (eg filters, gene IDs)
     """
     
     # if we have named a gene file, then load a dictionary of genes, and
     # add them to the filters, so we can screen variants for being in genes
     # known to be involved with disorders
     self.known_genes = None
     if self.options.genes is not None:
         self.known_genes = open_known_genes(self.options.genes)
     
     # if we have named an ID mapping file, the load a dictionary of IDs and
     # alternate IDs, so we can convert between different ID schemes.
     self.ID_mapper = None
     if self.options.alternate_ids is not None:
         self.ID_mapper = create_person_ID_mapper(self.options.alternate_ids)
     
     # open a list of regions associated with DECIPHER syndromes
     self.cnv_regions = None
     if self.options.regions is not None:
         self.cnv_regions = open_cnv_regions(self.options.regions)
Esempio n. 10
0
    def load_definitions_files(self):
        """loads all the config files for the script (eg filters, gene IDs)
        """

        # if we have named a gene file, then load a dictionary of genes, and
        # add them to the filters, so we can screen variants for being in genes
        # known to be involved with disorders
        self.known_genes = None
        if self.options.genes is not None:
            self.known_genes = open_known_genes(self.options.genes)

        # if we have named an ID mapping file, the load a dictionary of IDs and
        # alternate IDs, so we can convert between different ID schemes.
        self.ID_mapper = None
        if self.options.alternate_ids is not None:
            self.ID_mapper = create_person_ID_mapper(
                self.options.alternate_ids)

        # open a list of regions associated with DECIPHER syndromes
        self.cnv_regions = None
        if self.options.regions is not None:
            self.cnv_regions = open_cnv_regions(self.options.regions)