Ejemplo n.º 1
def find_transcripts(ensembl, mut_dict, output, args):

    de_novos = load_de_novos(args.de_novos)


    for symbol in sorted(de_novos):
        func_events = de_novos[symbol]["missense"] + de_novos[symbol][

        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol,
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))

        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
Ejemplo n.º 2
def find_transcripts(ensembl, mut_dict, output, args):
    de_novos = load_de_novos(args.de_novos)
    for symbol in sorted(de_novos):
        func_events = de_novos[symbol]["missense"] + de_novos[symbol]["nonsense"]
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol, func_events)
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))
        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
Ejemplo n.º 3
 def test_minimise_transcripts(self):
     """ test that minimise_transcripts() works correctly
     # run through a test case for a single gene
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = minimise_transcripts(self.ensembl, hgnc, sites)
     expected = {'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000548342': {'len': 89, 'n': 2}}
     self.assertEqual(counts, expected)
     # check that when we don't have any de novos, we return an empty list
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, []), {})
     # check that when none of the de novos are in a transcript, we return
     # an empty list.
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, [100]), {})
Ejemplo n.º 4
 def test_minimise_transcripts(self):
     """ test that minimise_transcripts() works correctly
     # run through a test case for a single gene
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = minimise_transcripts(self.ensembl, hgnc, sites)
     expected = {'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000548342': {'len': 89, 'n': 2}}
     self.assertEqual(counts, expected)
     # check that when we don't have any de novos, we return an empty list
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, []), {})
     # check that when none of the de novos are in a transcript, we return
     # an empty list.
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, [100]), {})
Ejemplo n.º 5
def main():
    input_file, output_file, old_gene_id_file, cache_dir, genome_build, \
        all_transcripts, minimal_transcripts = get_options()
    # load all the data
    ensembl = EnsemblRequest(cache_dir, genome_build)
    old_gene_ids = {}
    if old_gene_id_file is not None:
        old_gene_ids = get_deprecated_gene_ids(old_gene_id_file)
    known_de_novos = load_de_novos(input_file, exclude_indels=False)
    output = open(output_file, "w")
    for gene_id in sorted(known_de_novos):
        de_novos = known_de_novos[gene_id]
        func_events = de_novos["missense"] + de_novos["nonsense"]
        # fix HGNC IDs that have been discontinued in favour of other gene IDs
        if gene_id in old_gene_ids:
            gene_id = old_gene_ids[gene_id]
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
            if all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, gene_id, func_events)
            elif minimal_transcripts:
                counts = minimise_transcripts(ensembl, gene_id, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(gene_id))
        # write the transcript details to a file
        for key in counts:
            line = "{0}\t{1}\t{2}\t{3}\n".format(gene_id, key, counts[key]["len"], counts[key]["n"])