Example #1
0
def find_transcripts(ensembl, mut_dict, output, args):

    de_novos = load_de_novos(args.de_novos)

    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")

    for symbol in sorted(de_novos):
        print(symbol)
        func_events = de_novos[symbol]["missense"] + de_novos[symbol][
            "nonsense"]

        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol,
                                                       func_events)
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))
            continue

        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
                                             counts[key]["n"])
            output.write(line)
Example #2
0
def find_transcripts(ensembl, mut_dict, output, args):
    
    de_novos = load_de_novos(args.de_novos)
    
    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")
    
    for symbol in sorted(de_novos):
        print(symbol)
        func_events = de_novos[symbol]["missense"] + de_novos[symbol]["nonsense"]
        
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol, func_events)
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))
            continue
        
        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
                counts[key]["n"])
            output.write(line)
Example #3
0
def main():
    
    input_file, output_file, old_gene_id_file, cache_dir, genome_build, \
        all_transcripts, minimal_transcripts = get_options()
    
    # load all the data
    ensembl = EnsemblRequest(cache_dir, genome_build)
    
    old_gene_ids = {}
    if old_gene_id_file is not None:
        old_gene_ids = get_deprecated_gene_ids(old_gene_id_file)
    
    known_de_novos = load_de_novos(input_file, exclude_indels=False)
    
    output = open(output_file, "w")
    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")
    
    for gene_id in sorted(known_de_novos):
        de_novos = known_de_novos[gene_id]
        func_events = de_novos["missense"] + de_novos["nonsense"]
        
        # fix HGNC IDs that have been discontinued in favour of other gene IDs
        if gene_id in old_gene_ids:
            gene_id = old_gene_ids[gene_id]
        
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, gene_id, func_events)
            elif minimal_transcripts:
                counts = minimise_transcripts(ensembl, gene_id, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(gene_id))
            continue
        
        # write the transcript details to a file
        for key in counts:
            line = "{0}\t{1}\t{2}\t{3}\n".format(gene_id, key, counts[key]["len"], counts[key]["n"])
            output.write(line)
        
    output.close()
Example #4
0
 def test_count_de_novos_per_transcript(self):
     """ test that we count de novos in transcripts correctly
     """
     
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = count_de_novos_per_transcript(self.ensembl, hgnc, sites)
     
     expected = {'ENST00000549649': {'len': 42, 'n': 1},
         'ENST00000548214': {'len': 67, 'n': 1},
         'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000550178': {'len': 67, 'n': 1},
         'ENST00000552870': {'len': 47, 'n': 1},
         'ENST00000550845': {'len': 67, 'n': 1},
         'ENST00000548342': {'len': 89, 'n': 2}}
     
     self.assertEqual(counts, expected)
Example #5
0
 def test_count_de_novos_per_transcript(self):
     """ test that we count de novos in transcripts correctly
     """
     
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = count_de_novos_per_transcript(self.ensembl, hgnc, sites)
     
     expected = {'ENST00000549649': {'len': 42, 'n': 1},
         'ENST00000548214': {'len': 67, 'n': 1},
         'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000550178': {'len': 67, 'n': 1},
         'ENST00000552870': {'len': 47, 'n': 1},
         'ENST00000550845': {'len': 67, 'n': 1},
         'ENST00000548342': {'len': 89, 'n': 2}}
     
     self.assertEqual(counts, expected)