Example #1
0
def create_test_kinase(name, refseq):

    interactor = Kinase(name=name)

    kinase_gene = Gene(name='Gene of ' + interactor.name)
    kinase_protein = Protein(refseq=refseq, gene=kinase_gene)

    interactor.protein = kinase_protein

    return interactor
Example #2
0
def get_or_create_kinases(
        chosen_kinases_names, known_kinases,
        known_kinase_groups) -> [Set[Kinase], Set[KinaseGroup]]:
    """Create a subset of known kinases and known kinase groups based on given
    list of kinases names ('chosen_kinases_names'). If no kinase or kinase group
    of given name is known, it will be created.

    Returns a tuple of sets:
        kinases, groups
    """
    kinases, groups = set(), set()

    for name in set(chosen_kinases_names):

        # handle kinases group
        if name.endswith('_GROUP'):
            name = name[:-6]
            key = name.lower()
            if key not in known_kinase_groups:
                known_kinase_groups[key] = KinaseGroup(name=name)
            groups.add(known_kinase_groups[key])
        # if it's not a group, it surely is a kinase:
        else:
            key = name.lower()
            if key not in known_kinases:
                known_kinases[key] = Kinase(
                    name=name, protein=get_preferred_gene_isoform(name))
            kinases.add(known_kinases[key])

    return kinases, groups
Example #3
0
    def parser(line):

        # note that the subfamily is often absent
        group, family, subfamily = line[2:5]

        # the 'gene.clean' [6] fits better to the names
        # of kinases used in all other data files
        kinase_name = line[6]

        # 'group.clean' is not atomic and is redundant with respect to
        # family and subfamily. This check assures that in case of a change
        # the maintainer would be able to spot the inconsistency easily
        clean = family + '_' + subfamily if subfamily else family
        assert line[8] == clean

        if kinase_name.lower() not in known_kinases:
            kinase = Kinase(name=kinase_name,
                            protein=get_preferred_gene_isoform(kinase_name))
            known_kinases[kinase_name.lower()] = kinase

        # the 'family' corresponds to 'group' in the all other files
        if family.lower() not in known_groups:
            group = KinaseGroup(name=family)
            known_groups[family.lower()] = group
            new_groups.append(group)

        known_groups[family.lower()].kinases.append(
            known_kinases[kinase_name.lower()])
Example #4
0
def create_test_models():
    protein = Protein(refseq='NM_0001',
                      gene=Gene(name='SOMEGENE'),
                      sequence='ABCD')
    mutation = Mutation(protein=protein, position=1, alt='E')
    protein.gene.preferred_isoform = protein

    MC3Mutation(mutation=mutation,
                cancer=Cancer(code='CAN'),
                samples='Sample A,Sample B',
                count=2)
    InheritedMutation(mutation=mutation,
                      clin_data=[
                          ClinicalData(disease=Disease(name='Some disease'),
                                       sig_code=5),
                          ClinicalData(disease=Disease(name='Other disease'),
                                       sig_code=2)
                      ])

    protein_kinase = Protein(refseq='NM_0002',
                             gene=Gene(name='OTHERGENE'),
                             sequence='ABCD')
    kinase = Kinase(name='Kinase name', protein=protein_kinase)
    site = Site(protein=protein,
                position=1,
                residue='A',
                kinases={kinase},
                pmid={1, 2},
                types={SiteType(name='glycosylation')})
    protein.sites = [site]

    return locals()
Example #5
0
    def test_train_model(self):

        phosphorylation = SiteType(name='phosphorylation')

        # non-phosphorylated serine residues are needed to generate negative sites
        p = Protein(refseq='NM_007',
                    sequence='--------SLPA-----------SVIT-------')
        g = Gene(isoforms=[p], preferred_isoform=p)
        db.session.add(g)

        # phosphorylated, with sites
        p = Protein(refseq='NM_001',
                    sequence='--------SPAK-----------SPAR-------')
        g = Gene(isoforms=[p], preferred_isoform=p)
        db.session.add(g)

        k = Kinase(name='CDK1', is_involved_in={phosphorylation})

        for pos in [9, 24]:
            s = Site(position=pos,
                     types={phosphorylation},
                     residue='S',
                     protein=p,
                     kinases={k})
            db.session.add(s)

        db.session.commit()

        with TemporaryDirectory() as temp_dir:
            model = train_model(phosphorylation,
                                sequences_dir=temp_dir,
                                sampling_n=2,
                                threshold=2)

        # the model should have one set of params - for CDK1 kinase
        assert len(model) == 1

        cdk_params = model.rx2('CDK1')
        pwm = cdk_params.rx2('pwm')

        # and the position-specific weight matrix should be created
        assert pwm

        # the very detailed testing should be performed by rMIMP,
        # but why not test the basics?

        weights_of_central_aa = {
            aa: value
            for aa, value in zip(pwm.rownames, pwm.rx(True, 8))
        }
        assert weights_of_central_aa['S'] == max(
            weights_of_central_aa.values())
Example #6
0
    def test_interactions(self):

        from models import Protein, Site, Kinase, KinaseGroup

        p1 = Protein(sites=[
            Site(),
            Site(kinases=[Kinase()], kinase_groups=[KinaseGroup()])
        ])
        db.session.add(p1)
        p2 = Protein(sites=[Site(kinases=[Kinase()])])
        db.session.add(p2)

        u_all_interactions = 0
        u_kinases_covered = set()
        u_kinase_groups_covered = set()
        u_proteins_covered = set()
        for protein in models.Protein.query.all():
            for site in protein.sites:
                kinases = site.kinases
                kinase_groups = site.kinase_groups
                u_all_interactions += len(kinases) + len(kinase_groups)
                u_kinases_covered.update(kinases)
                u_kinase_groups_covered.update(kinase_groups)

                if kinases or kinase_groups:
                    u_proteins_covered.add(protein)

        from stats import Statistics
        statistics = Statistics()
        all_interactions = statistics.interactions()
        kinases_covered = statistics.kinases_covered()
        kinase_groups_covered = statistics.kinase_groups_covered()
        proteins_covered = statistics.proteins_covered()

        assert all_interactions == u_all_interactions
        assert kinases_covered == len(u_kinases_covered)
        assert kinase_groups_covered == len(u_kinase_groups_covered)
        assert proteins_covered == len(u_proteins_covered)
Example #7
0
    def parser(line):
        kinase_name, gene_name = line
        protein = get_preferred_gene_isoform(gene_name)

        if not protein:
            print('No isoform for %s kinase mapped to %s gene!' %
                  (kinase_name, gene_name))
            return

        if kinase_name in known_kinases:
            kinase = known_kinases[kinase_name]
            if kinase.protein and kinase.protein != protein:

                print('Overriding kinase-protein association for '
                      '%s kinase. Old isoform: %s; new isoform: %s.' %
                      (kinase_name, kinase.protein.refseq, protein.refseq))
            kinase.protein = protein

        else:
            new_kinases.append(Kinase(name=kinase_name, protein=protein))
Example #8
0
def create_test_models():
    protein = Protein(refseq='NM_0001',
                      gene=Gene(name='SOMEGENE'),
                      sequence='ABCD')
    mutation = Mutation(protein=protein, position=1, alt='E')

    MC3Mutation(mutation=mutation,
                cancer=Cancer(code='CAN'),
                samples='Some sample')
    InheritedMutation(
        mutation=mutation,
        clin_data=[ClinicalData(disease=Disease(name='Some disease'))])

    protein_kinase = Protein(refseq='NM_0002',
                             gene=Gene(name='OTHERGENE'),
                             sequence='ABCD')
    kinase = Kinase(name='Kinase name', protein=protein_kinase)
    site = Site(protein=protein, position=1, residue='A', kinases=[kinase])
    protein.sites = [site]

    return locals()
    def test_classification(self):
        """Following assertion about data file holds:
            - 'family' fits better to our 'group' than any other column
            - 'gene.clean', not 'Kinase' is being used as kinase name as it fits much better.
        """

        existing_kinases = {
            name: Kinase(name=name)
            for name in ('AKT1', 'Akt2', 'CIT')
        }

        existing_groups = {name: KinaseGroup(name=name) for name in ('Akt', )}

        def add_to_session():
            db.session.add_all(existing_kinases.values())
            db.session.add_all(existing_groups.values())

        filename = make_named_temp_file(raw_gene_list)

        add_to_session()

        with self.app.app_context():
            new_groups = load_kinase_classification(filename)

        assert len(new_groups) == 1
        new_group = new_groups[0]

        assert new_group.name == 'DMPK'

        add_to_session()

        assert len(new_group.kinases) == 2
        assert existing_kinases['CIT'] in new_group.kinases

        old_group = existing_groups['Akt']
        assert len(old_group.kinases) == 3

        assert existing_kinases['AKT1'] in old_group.kinases
        assert existing_kinases['Akt2'] in old_group.kinases