Ejemplo n.º 1
0
    def test_divide_muts_by_sites(self):
        from views.network import divide_muts_by_sites

        # check if null case works
        divide_muts_by_sites([], [])

        # one site
        s_1 = Site(position=1)
        muts_by_sites = divide_muts_by_sites([], [s_1])
        assert muts_by_sites[s_1] == []

        # full test
        s_2 = Site(position=10)
        s_3 = Site(position=20)

        muts_by_pos = {
            pos: [Mutation(position=pos)]
            for pos in (1, 2, 8, 14, 16, 30)
        }

        muts_by_pos[16].append(Mutation(position=16))

        def get_muts_from_pos(*positions):
            lists = [muts_by_pos[p] for p in positions]
            return [mut for mut_list in lists for mut in mut_list]

        muts_by_sites = divide_muts_by_sites([
            mut for muts_on_pos_x in muts_by_pos.values()
            for mut in muts_on_pos_x
        ], [s_1, s_2, s_3])

        assert muts_by_sites[s_1] == get_muts_from_pos(1, 2, 8)
        assert muts_by_sites[s_2] == get_muts_from_pos(8, 14, 16)
        assert muts_by_sites[s_3] == get_muts_from_pos(14, 16)
Ejemplo n.º 2
0
def create_network():
    p = create_test_protein()
    cancer = Cancer(name='Ovarian', code='OV')

    known_interactor_of_x = create_test_kinase('Kinase Y', 'NM_0009')

    kinase_mutation = Mutation(position=1,
                               alt='T',
                               meta_MC3=[MC3Mutation(cancer=cancer)])

    known_interactor_of_x.protein.mutations = [kinase_mutation]

    drug = Drug(
        name='Drug targeting ' + known_interactor_of_x.name,
        drug_bank_id='DB01',
        target_genes=[known_interactor_of_x.protein.gene],
        # by default only approved drugs are shown
        groups={DrugGroup(name='approved')})

    group = KinaseGroup(name='Group of kinases', )
    s = Site(position=1,
             type='phosphorylation',
             residue='T',
             kinases=[known_interactor_of_x],
             kinase_groups=[group])
    s2 = Site(position=2,
              type='phosphorylation',
              residue='R',
              kinase_groups=[group])
    p.sites = [s, s2]

    predicted_interactor = create_test_kinase('Kinase Z', 'NM_0002')

    protein_mutation = Mutation(position=2,
                                alt='T',
                                meta_MC3=[MC3Mutation(cancer=cancer)],
                                meta_MIMP=[
                                    MIMPMutation(
                                        pwm=known_interactor_of_x.name,
                                        effect='loss',
                                        site=s,
                                        probability=0.1,
                                        position_in_motif=1),
                                    MIMPMutation(pwm=predicted_interactor.name,
                                                 effect='gain',
                                                 site=s,
                                                 probability=0.1,
                                                 position_in_motif=1)
                                ])

    p.mutations = [protein_mutation]
    db.session.add_all([p, drug, predicted_interactor])
    db.session.commit()

    # a new cancer was added, reload is necessary (this should not happen during normal app usage)
    from website.views.filters import cached_queries
    cached_queries.reload()
Ejemplo n.º 3
0
    def test_search_mutations(self):

        s = Site(position=13, types={SiteType(name='methylation')})
        p = Protein(refseq='NM_007', id=7, sites=[s], sequence='XXXXXXXXXXXXV')

        m_in_site = Mutation(protein=p, position=13, alt='V')
        m_out_site = Mutation(protein=p, position=50, alt='K')

        db.session.add(p)

        # points to the same location as first record in VCF_FILE_CONTENT
        test_query = 'chr20 14370 G A'

        from database import bdb

        # map the first genomic mutation from VCF_FILE_CONTENT
        # to some (mocked) protein mutation
        bdb.add_genomic_mut('20', 14370, 'G', 'A', m_in_site, is_ptm=True)

        #
        # basic test - is appropriate mutation in results?
        #
        response = self.search_mutations(mutations=test_query)

        assert response.status_code == 200

        # this mutation is exactly at a PTM site and should be included in results
        assert '<td>{0}</td>'.format(m_in_site.alt).encode() in response.data
        # this mutation lies outside of a PTM site - be default should be filtered out
        assert '<td>{0}</td>'.format(m_out_site.alt).encode() not in response.data

        #
        # count test - is mutation for this query annotated as shown twice?
        #
        response = self.search_mutations(
            mutations='{0}\n{0}'.format(test_query)
        )

        assert response.status_code == 200
        assert b'<td>2</td>' in response.data

        #
        # VCF file test
        #
        response = self.client.post(
            '/search/mutations',
            content_type='multipart/form-data',
            data={
                'vcf-file': (BytesIO(VCF_FILE_CONTENT), 'exemplar_vcf.vcf')
            }
        )

        assert response.status_code == 200
        assert b'NM_007' in response.data
Ejemplo n.º 4
0
def create_test_models():
    protein = Protein(refseq='NM_0001',
                      gene=Gene(name='SOMEGENE'),
                      sequence='ABCD')
    mutation = Mutation(protein=protein, position=1, alt='E')
    protein.gene.preferred_isoform = protein

    MC3Mutation(mutation=mutation,
                cancer=Cancer(code='CAN'),
                samples='Sample A,Sample B',
                count=2)
    InheritedMutation(mutation=mutation,
                      clin_data=[
                          ClinicalData(disease=Disease(name='Some disease'),
                                       sig_code=5),
                          ClinicalData(disease=Disease(name='Other disease'),
                                       sig_code=2)
                      ])

    protein_kinase = Protein(refseq='NM_0002',
                             gene=Gene(name='OTHERGENE'),
                             sequence='ABCD')
    kinase = Kinase(name='Kinase name', protein=protein_kinase)
    site = Site(protein=protein,
                position=1,
                residue='A',
                kinases={kinase},
                pmid={1, 2},
                types={SiteType(name='glycosylation')})
    protein.sites = [site]

    return locals()
    def test_prepare_dataset(self):
        from views.mutation import prepare_datasets

        p = Protein(refseq='NM_000123',
                    sequence='TRAN',
                    gene=Gene(name='TP53'))
        mutation = Mutation(protein=p, position=2, alt='K')
        details = MC3Mutation(mutation=mutation, count=2)

        db.session.add(mutation)

        datasets, user_datasets = prepare_datasets(mutation)

        expected_datasets = [{
            'filter': 'Mutation.sources:in:' + source.name,
            'name': source.display_name,
            'mutation_present': False
        } if source is not MC3Mutation else {
            'filter': 'Mutation.sources:in:' + MC3Mutation.name,
            'name': MC3Mutation.display_name,
            'mutation_present': [details]
        } for source in source_manager.confirmed]

        assert datasets == expected_datasets
        assert not user_datasets
Ejemplo n.º 6
0
    def test_sites(self):

        mutations = [Mutation(position=x) for x in (0, 5, 12, 57)]

        protein = Protein(refseq='NM_00002',
                          mutations=mutations,
                          sites=[Site(position=x) for x in (10, 14, 15, 57)])

        db.session.add(protein)
        db.session.commit()

        # ==test_find_closest_sites==

        # for mutation at position 0 there is no closest site;
        # for mutation at position 5 there should be 1 closest site
        expected_closest_sites = dict(zip(mutations, [0, 1, 2, 1]))

        for mutation, expected_sites_cnt in expected_closest_sites.items():
            sites_found = mutation.find_closest_sites()
            assert len(sites_found) == expected_sites_cnt

        # ==test_get_affected_ptm_sites==

        expected_affected_sites = dict(zip(mutations, [0, 1, 3, 1]))

        for mutation, expected_sites_cnt in expected_affected_sites.items():
            sites_found = mutation.get_affected_ptm_sites()
            assert len(sites_found) == expected_sites_cnt
Ejemplo n.º 7
0
    def test_mutate(self):

        p = Protein(sequence='ABCDE')
        s = Site(protein=p, position=3, residue='C')

        cases = {
            1: 'XBCDE',
            3: 'ABXDE',
        }

        for position, expected_seq in cases.items():
            m = Mutation(protein=p, position=position, alt='X')

            assert mutate_sequence(s, m, offset=2) == expected_seq
Ejemplo n.º 8
0
    def test_impact_on_ptm(self):

        mutations = [Mutation(position=61)]
        protein = Protein(refseq='NM_00001', mutations=mutations)
        db.session.add(protein)
        protein.sites = [
            Site(position=61),
            Site(position=54),
            Site(position=51)
        ]

        mutation = mutations[0]

        assert mutation.impact_on_ptm() == 'direct'
Ejemplo n.º 9
0
    def test_show(self):

        p = Protein(refseq='NM_000123',
                    sequence='TRAN',
                    gene=Gene(name='TP53'))
        mutation = Mutation(protein=p, position=2, alt='K')

        db.session.add(mutation)

        response = self.client.get('/mutation/show/NM_000123/2/K')

        assert response.status_code == 200
        assert b'TP53' in response.data
        assert b'NM_000123' in response.data
Ejemplo n.º 10
0
    def test_browse_list(self):

        from miscellaneous import make_named_temp_file
        from test_imports.test_gene_list import raw_gene_list
        from imports.protein_data import active_driver_gene_lists as load_active_driver_gene_lists

        filename = make_named_temp_file(raw_gene_list)

        # create gene list and genes
        with self.app.app_context():
            from imports.protein_data import ListData
            gene_lists = load_active_driver_gene_lists(lists=(
                ListData(name='TCGA', path=filename, mutations_source=TCGAMutation),
            ))
        db.session.add_all(gene_lists)

        # create preferred isoforms for genes
        for i, gene in enumerate(Gene.query.all()):
            # at least one mutation is required for gene on a gene list to be displayed
            mut = Mutation()
            MC3Mutation(mutation=mut)

            p = Protein(refseq='NM_000%s' % i, mutations=[mut])
            gene.isoforms = [p]
            gene.preferred_isoform = p

        # check the static template
        response = self.client.get('/gene/list/TCGA')
        assert response.status_code == 200
        assert b'TCGA' in response.data

        # check the dynamic data
        response = self.client.get('/gene/list_data/TCGA?order=asc')
        assert response.status_code == 200

        gene_list = GeneList.query.filter_by(name='TCGA').one()

        # all results retrieved
        assert response.json['total'] == len(gene_list.entries)

        # properly sorted by fdr
        fdrs = [row['fdr'] for row in response.json['rows']]
        assert fdrs == sorted(fdrs)
Ejemplo n.º 11
0
def create_test_models():
    protein = Protein(refseq='NM_0001',
                      gene=Gene(name='SOMEGENE'),
                      sequence='ABCD')
    mutation = Mutation(protein=protein, position=1, alt='E')

    MC3Mutation(mutation=mutation,
                cancer=Cancer(code='CAN'),
                samples='Some sample')
    InheritedMutation(
        mutation=mutation,
        clin_data=[ClinicalData(disease=Disease(name='Some disease'))])

    protein_kinase = Protein(refseq='NM_0002',
                             gene=Gene(name='OTHERGENE'),
                             sequence='ABCD')
    kinase = Kinase(name='Kinase name', protein=protein_kinase)
    site = Site(protein=protein, position=1, residue='A', kinases=[kinase])
    protein.sites = [site]

    return locals()
Ejemplo n.º 12
0
    def test_impact_on_specific_ptm(self):

        # case 0: there are no sites in the protein

        mutations = {
            Mutation(position=10): 'none',  # too far away
            Mutation(position=9): 'none',
            Mutation(position=8): 'distal',
            Mutation(position=4): 'distal',
            Mutation(position=3): 'proximal',
            Mutation(position=2): 'proximal',
            Mutation(position=1): 'direct'
        }

        protein = Protein(refseq='NM_00001', mutations=mutations.keys())

        db.session.add(protein)

        for mutation in mutations.keys():
            assert mutation.impact_on_ptm() == 'none'

        # case 1: there are some sites in the protein

        protein.sites = [Site(position=1), Site(position=50)]
        site = protein.sites[0]

        for mutation, impact in mutations.items():
            print(mutation)
            assert mutation.impact_on_ptm() == impact
            assert mutation.impact_on_specific_ptm(site) == impact

        # case 2: there are some sites but all will be excluded by a site filter

        def site_filter(sites):
            return []

        for mutation in mutations.keys():
            assert mutation.impact_on_ptm(site_filter=site_filter) == 'none'
Ejemplo n.º 13
0
 def mut(pos):
     return Mutation(position=pos, alt='X', protein=p)
Ejemplo n.º 14
0
    def test_autocomplete_all(self):

        # MC3 GeneList is required as a target (a href for links) where users will be pointed
        # after clicking of cancer autocomplete suggestion. Likewise with the ClinVar list.
        db.session.add_all([
            GeneList(name=name, mutation_source_name=detail_class.name)
            for name, detail_class in [
                ('TCGA', MC3Mutation), ('ClinVar', InheritedMutation)
            ]
        ])

        g = Gene(name='BR')
        p = Protein(id=1, refseq='NM_007', gene=g, sequence='XXXXXV')
        g.preferred_isoform = p     # required for gene search to work - genes without preferred isoforms are ignored
        mut = Mutation(protein=p, position=6, alt='E')
        db.session.add_all([mut, p, g])

        def autocomplete(query):
            r = self.client.get('/search/autocomplete_all/?q=' + query)
            self.visit_returned_urls(r)
            return r

        from database import bdb_refseq, bdb
        bdb_refseq['BR V6E'] = [p.id]  # required for mutation search
        bdb.add_genomic_mut('1', 10000, 'T', 'C', mut)

        # Gene and mutations

        response = autocomplete('BR V6E')
        entry = get_entry_and_check_type(response, 'aminoacid mutation')
        assert entry

        response = autocomplete('BR V6')
        entry = get_entry_and_check_type(response, 'message')
        assert 'Awaiting for <code>{alt}</code>' in entry['name']

        response = autocomplete('BR V')
        entry = get_entry_and_check_type(response, 'message')
        assert 'Awaiting for <code>{pos}{alt}</code>' in entry['name']

        response = autocomplete('B')
        entry = get_entry_and_check_type(response, 'gene')
        assert 'BR' == entry['name']

        # genomic mutation
        response = autocomplete('chr1 10000 T C')
        entry = get_entry_and_check_type(response, 'nucleotide mutation')
        assert entry and entry['input'] == 'CHR1 10000 T C'

        # is the search falling back to the other strand?
        response = autocomplete('chr1 10000 A G')
        entry = get_entry_and_check_type(response, 'nucleotide mutation')
        assert entry and entry['input'] == 'complement of CHR1 10000 A G'

        prompt = 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format'

        for prompt_invoking_query in ['chr1', 'chr1 ', 'chr1 40', 'chr1 40 ', 'chr1 40 T']:
            response = autocomplete(prompt_invoking_query)
            entry = get_entry_and_check_type(response, 'message')
            assert entry['name'] == prompt

        # Pathways

        pathways = [
            Pathway(description='Activation of RAS in B cells', reactome=1169092),
            Pathway(description='abortive mitotic cell cycle', gene_ontology=33277),
            Pathway(description='amacrine cell differentiation', gene_ontology=35881),
            Pathway(description='amniotic stem cell differentiation', gene_ontology=97086)
        ]

        db.session.add_all(pathways)

        # test partial matching and Reactome id pathways search
        for ras_activation_query in ['Activation', 'REAC:1', 'REAC:1169092']:
            response = autocomplete(ras_activation_query)
            entry = get_entry_and_check_type(response, 'pathway')
            assert entry['name'].startswith('Activation of RAS in B cells')

        # test Gene Ontology search:
        response = autocomplete('GO:33')
        go_pathway = get_entry_and_check_type(response, 'pathway')
        assert go_pathway['name'] == 'abortive mitotic cell cycle (GO:33277)'

        # check if multiple pathways are returned
        response = autocomplete('differentiation')
        assert len(response.json['entries']) == 2

        # check if both genes an pathways are returned simultaneously
        # there should be: a pathway ('a>b<ortive...') and the >B<R gene
        response = autocomplete('b')
        entries = response.json['entries']
        names = [entry['name'] for entry in entries]
        assert all([name in names for name in ['BR', 'abortive mitotic cell cycle']])

        # check if "search more pathways" is displayed
        response = autocomplete('cell')    # cell occurs in all four of added pathways;
        # as a limit of pathways shown is 3, we should get a "show more" link
        links = entries_with_type(response, 'see_more')
        assert len(links) == 1
        assert links[0]['name'] == 'Show all pathways matching <i>cell</i>'

        # test case insensitive text search
        response = autocomplete('AMNIOTIC STEM')
        pathways = entries_with_type(response, 'pathway')
        assert len(pathways) == 1
        assert pathways[0]['name'] == 'amniotic stem cell differentiation'

        # Disease
        disease_names = [
            'Cystic fibrosis', 'Polycystic kidney disease 2',
            'Frontotemporal dementia', 'Cataract, nuclear total'
        ]
        diseases = {name: Disease(name=name) for name in disease_names}
        db.session.add_all(diseases.values())

        response = autocomplete('cystic')
        cystic_matching = entries_with_type(response, 'disease')
        # both 'Cystic fibrosis' and PKD2 should match
        assert len(cystic_matching) == 2

        # is comma containing disease name properly linked?
        response = autocomplete('Cataract')
        cataract = get_entry_and_check_type(response, 'disease')
        assert cataract['name'] == 'Cataract, nuclear total'

        # Gene mutation in disease

        # test suggestions
        response = autocomplete('cystic ')
        entry = entries_with_type(response, 'message')[0]
        assert re.match('Do you wish to search for (.*?) mutations\?', entry['name'])

        # currently there are no mutations associated with any disease
        # so the auto-completion should not return any results
        response = autocomplete('cystic in ')
        assert not response.json['entries']

        # let's add a mutation
        m = Mutation(protein=p, position=1, alt='Y')
        bdb_refseq['BR X1Y'] = ['NM_007']
        # note: sig_code is required here
        data = ClinicalData(disease=diseases['Cystic fibrosis'], sig_code=1)
        disease_mutation = InheritedMutation(mutation=m, clin_data=[data])
        db.session.add_all([m, data, disease_mutation])

        # should return '.. in BR' suggestion now.
        for query in ['cystic in', 'cystic in ']:
            response = autocomplete(query)
            result = get_entry_and_check_type(response, 'disease_in_protein')
            assert result['gene'] == 'BR'
            assert result['name'] == 'Cystic fibrosis'

        # both gene search and refseq search should yield the same, non-empty results
        results = []

        for query in ['cystic in BR', 'cystic in NM_007', 'cystic in 007']:
            response = autocomplete(query)
            result = get_entry_and_check_type(response, 'disease_in_protein')
            results.append(result)

        assert all(r == result for r in results) and result
Ejemplo n.º 15
0
    def test_counting(self):

        motifs_db = {
            # xation happens whenever there is X which is not preceded with or followed by another X
            'xation': {
                'canonical': '.{6}[^X]X[^X].{6}',
                'non-canonical': 'XXY'
            }
        }

        p = Protein(refseq='NM_007', id=1, sequence='_X_X_______X________XXY')

        mutations = [
            Mutation(protein=p, position=1, alt='X'),  # proximal, breaking
            Mutation(protein=p, position=1, alt='o'),  # proximal, non-breaking
            Mutation(protein=p, position=2, alt='Y'),  # direct, breaking
            Mutation(protein=p, position=3,
                     alt='X'),  # proximal for two sites, breaking
        ]

        xation = SiteType(name='xation')

        canonical_sites = [
            Site(protein=p, position=2,
                 types={xation}),  # canonical, seriously mutated and broken
            Site(protein=p, position=4, types={xation}),  # canonical, mutated
            Site(protein=p, position=12,
                 types={xation}),  # canonical, not mutated
        ]
        other_sites = [
            Site(protein=p, position=22,
                 types={xation}),  # non-canonical motif, not mutated
        ]
        all_sites = canonical_sites + other_sites

        db.session.add(p)
        db.session.commit()

        counter = MotifsCounter(xation, motifs_db=motifs_db)
        counts = counter.count_muts_and_sites(Mutation.query, Site.query)

        assert counts.muts_around_sites_with_motif['canonical'] == 4
        assert counts.muts_breaking_sites_motif['canonical'] == 3
        assert counts.sites_with_broken_motif['canonical'] == 2
        assert counts.sites_with_motif['canonical'] == len(canonical_sites)

        assert counts.sites_with_broken_motif['non-canonical'] == 0
        assert counts.muts_around_sites_with_motif['non-canonical'] == 0

        x_motifs = motifs_db['xation']
        selection = select_sites_with_motifs(Site.query, x_motifs)

        assert selection['canonical'] == set(canonical_sites)
        assert select_sites_with_motifs(all_sites, x_motifs) == selection

        data = counter.gather_muts_and_sites(Mutation.query, Site.query)

        assert data.sites_with_broken_motif['canonical'] == {
            canonical_sites[0], canonical_sites[1]
        }
        assert data.sites_with_motif['canonical'] == set(canonical_sites)
Ejemplo n.º 16
0
    def test_mutation(self):

        s = Site(position=13, types={SiteType(name='methylation')})
        p = Protein(refseq='NM_007',
                    id=1,
                    sites=[s],
                    sequence='A' * 15,
                    gene=Gene(name='SomeGene'))

        db.session.add(p)

        from database import bdb

        muts = {13: 14370, 15: 14376}

        for aa_pos, dna_pos in muts.items():
            muts[aa_pos] = Mutation(protein=p, position=aa_pos, alt='V')
            bdb.add_genomic_mut('20',
                                dna_pos,
                                'G',
                                'A',
                                muts[aa_pos],
                                is_ptm=True)

        query_url = '/chromosome/mutation/{chrom}/{pos}/{ref}/{alt}'

        # query as a novel mutation
        response = self.client.get(
            query_url.format(chrom='chr20', pos=14370, ref='G', alt='A'))

        assert response.status_code == 200
        assert response.json == [{
            'alt':
            'V',
            'gene':
            'SomeGene',
            'in_datasets': {},
            'pos':
            13,
            'ptm_impact':
            'direct',
            'cnt_ptm':
            1,
            'closest_sites': ['13 A'],
            'protein':
            'NM_007',
            'sites': [{
                'kinases': [],
                'position': 13,
                'residue': 'A',
                'kinase_groups': [],
                'type': 'methylation'
            }],
            'ref':
            'A'
        }]

        # well let's look on a known mutation:
        m = muts[15]
        mc3 = MC3Mutation(mutation=m,
                          cancer=Cancer(name='Breast invasive carcinoma',
                                        code='BRCA'),
                          count=1)
        esp = ExomeSequencingMutation(mutation=m, maf_all=0.02, maf_aa=0.02)

        db.session.add_all([m, mc3, esp])
        db.session.commit()

        mutation_a15v_query = query_url.format(chrom='chr20',
                                               pos=14376,
                                               ref='G',
                                               alt='A')
        response = self.client.get(mutation_a15v_query)

        metadata = {
            'MC3': {
                'Cancers': [{
                    'Cancer': 'Breast invasive carcinoma',
                    'Value': 1
                }]
            },
            'ESP6500': {
                'MAF': 0.02,
                'MAF AA': 0.02,
                'MAF EA': None
            }
        }

        assert response.json[0]['in_datasets'] == metadata

        expected_values = {'MC3': 1, 'ESP6500': 0.02}

        # if user does not want to download data for all datasets he may use:
        for source, meta in metadata.items():
            response = self.client.get(mutation_a15v_query +
                                       '?filters=Mutation.sources:in:' +
                                       source)
            json = response.json[0]
            assert json['in_datasets'] == {source: meta}
            assert json['value'] == expected_values[source]

        response = self.client.get(
            mutation_a15v_query +
            '?filters=Mutation.sources:in:MC3;Mutation.mc3_cancer_code:in:BRCA'
        )
        assert response.json

        response = self.client.get(
            mutation_a15v_query +
            '?filters=Mutation.sources:in:ESP6500;Mutation.populations_ESP6500:in:African American'
        )
        assert response.json

        response = self.client.get(
            mutation_a15v_query +
            '?filters=Mutation.sources:in:ESP6500;Mutation.populations_ESP6500:in:European American'
        )
        assert not response.json
Ejemplo n.º 17
0
 def test_default_ref(self):
     p = Protein(sequence='ABC')
     m = Mutation(position=1, protein=p)
     db.session.add(p)
     db.session.commit()
     assert m.ref == 'A'