def processPathwayBKF(self, exhaustiveOr=False):
        assert len(self.pathways) > 0, "Have not processed pathways yet"
        for pathway in tqdm.tqdm(self.pathways,
                                 desc='Processing pathway BKFs'):
            bkf = BKB(name=pathway.pathwayID)

            pathwayActiveComp_idx = bkf.addComponent('{}_active='.format(
                pathway.pathwayID))
            pathwayActiveTrue_idx = bkf.addComponentState(
                pathwayActiveComp_idx, 'True')

            if exhaustiveOr:
                print("not implemented")
            else:
                geneSelectorComp_idx = bkf.addComponent("Gene_combo=")
                for gene in pathway.genes:  #gene[0] = geneName, gene[1] = low value gene[2] = high value
                    geneCombo_idx = bkf.addComponentState(
                        geneSelectorComp_idx, gene[0])

                    statConditionComp_idx = bkf.addComponent(
                        'mu-STD<={}<=mu+STD='.format(gene[0]))
                    statConditionTrue_idx = bkf.addComponentState(
                        statConditionComp_idx, 'True')

                    bkf.addSNode(
                        BKB_S_node(statConditionComp_idx,
                                   statConditionTrue_idx, 1.0))

                    bkf.addSNode(
                        BKB_S_node(
                            geneSelectorComp_idx, geneCombo_idx, 1.0,
                            [(statConditionComp_idx, statConditionTrue_idx)]))

                    bkf.addSNode(
                        BKB_S_node(pathwayActiveComp_idx,
                                   pathwayActiveTrue_idx, 1.0,
                                   [(geneSelectorComp_idx, geneCombo_idx)]))

                self.bkfs.append(bkf)
        assert len(self.pathways) > 0, "Have not processed pathways yet"
Example #2
0
            query = self.reasoner.analyze_query(
                copy.deepcopy(query), preprocessed_bkb=self.processed_bkb)
            summary = query.result.summary()
            #query.result.summary()
            for update, prob in query.result.updates.items():
                comp_idx, state_idx = update
                comp_name = query.bkb.getComponentName(comp_idx)
                state_name = query.bkb.getComponentINodeName(
                    comp_idx, state_idx)
                probs.append((comp_name, state_name, prob))

        return (probs[0], probs[1]), summary


if __name__ == '__main__':
    fused_bkb = BKB()

    fused_bkb.load('/home/public/data/ncats/663Pats6Holdouts/fusion.bkb')
    patient_data_file = '/home/public/data/ncats/663Pats6Holdouts/patient_data.pk'
    withheld_patients_file = '/home/public/data/ncats/663Pats6Holdouts/withheldPatients.csv'

    #fused_bkb.load('/home/public/data/ncats/90PERCENTValidation50Patients10Validation/set1/fusion.bkb')
    #patient_data_file = '/home/public/data/ncats/90PERCENTValidation50Patients10Validation/set1/patient_data.pk'
    #withheld_patients_file = '/home/public/data/ncats/90PERCENTValidation50Patients10Validation/set1/withheldPatients.csv'

    compNames = fused_bkb.getAllComponentNames()
    f = open(withheld_patients_file, 'r')
    withheldPatientHashes = f.read().split(',')

    patientDict = pickle.load(open(patient_data_file, 'rb'))
    patientsDynamicEvidence = []
Example #3
0
    priors[gene] = float(count / len(PATIENTS))

#--Pairs Calculate Correlations
counts = dict()
for gene1 in GENES:
    for gene2 in GENES:
        count = 0
        for patient in PATIENTS:
            if patient_data[patient]['Genes'][gene1] and patient_data[patient][
                    'Genes'][gene2]:
                count += 1
    counts[(gene1, gene2)] = float(count / len(PATIENTS)) / priors[gene2]

#-- Make BKB Patient frags
for patient in PATIENTS:
    bkf = BKB()

    for gene in patient_data[patient]['Genes']:
        #-- Setup Gene i component.
        geneComp_idx = bkf.addComponent(gene)
        geneStateTrue_idx = bkf.addComponentState(geneComp_idx, 'True')

        s_node_prior = BKB_S_node(init_component_index=geneComp_idx,
                                  init_state_index=geneStateTrue_idx,
                                  init_probability=1)
        bkf.addSNode(s_node_prior)

    patient_bkfs.append(bkf)

#-- Define Sample Pathway
PATHWAYS = ['Pathway{}'.format(i) for i in range(1)]
Example #4
0
AGE_RANGE_YRS = (20, 90)
SURIVAL_YRS = (0, 10)

patient_data = {
    patient: {
        'Gender': random.choice(GENDER_OPTIONS),
        'Age': random.randrange(AGE_RANGE_YRS[0], AGE_RANGE_YRS[1]),
        'Survival': random.randrange(SURIVAL_YRS[0], SURIVAL_YRS[1])
    }
    for patient in PATIENTS
}
bkfs = list()

#-- Make BKB frags
for patient in PATIENTS:
    bkf = BKB()

    for gene in GENES:
        if random.choice([True, False]):
            #-- Setup Gene i component.
            comp_idx = bkf.addComponent('mut_{}'.format(gene))
            stateTrue_idx = bkf.addComponentState(comp_idx, 'True')

            bkf.addSNode(
                BKB_S_node(init_component_index=comp_idx,
                           init_state_index=stateTrue_idx,
                           init_probability=1))
            variant_comp_idx = bkf.addComponent('mut-var_{}'.format(gene))
            random_variant = random.choice(GENE_VARIANTS)
            variant_state_idx = bkf.addComponentState(variant_comp_idx,
                                                      random_variant)
Example #5
0
import os
import sys

from pybkb import bayesianKnowledgeBase as BKB

#-- Change to your local data folder
NCATS_DIR = '/home/cyakaboski/src/python/pojects/bkb-pathway-provider'

sys.path.append(os.path.join(NCATS_DIR, 'core'))

from query import Query
from reasoner import Reasoner

if __name__ == '__main__':
    bkb = BKB()
    bkb.load('/home/public/data/ncats/fusedPatientBKBSmall/fusion.bkb')

    reasoner = Reasoner(bkb)

    query0 = Query(evidence={
        '_sourceFusion_14_02_2020_16:35:04__mut_CACNA1H=':
        '[0]_-7750956075882572850'
    },
                   targets=['mut_CACNA1H=', 'mu-STD>=CACNA1H<=mu+STD='],
                   type='updating')

    result = reasoner.analyze_query(query0)
Example #6
0
    def read_expression_snodes(self, expression_levels_dict, tcga_expression_file, react_gene_file, pathways_file):
        #-- Process all gene_reaction s_nodes and assume there is a header with gene names.
        with open(react_gene_file, 'r') as csv_file:
            reader = csv.reader(csv_file)
            rows = [row for row in reader]
        #-- Process Gene Names from header:
        print(rows[0])
        genes = rows[0][1:]
        genes = [genes[i][:-4] for i in range(0,len(genes),2)]
        print(genes)
        for row in rows[1:]:
            bkf = BKB(name=row[0] + ' BKF')
            reaction_head_name = row[0]
            react_component = BKB_component(reaction_head_name)
            i_node_true = BKB_I_node('True', react_component)
            i_node_false = BKB_I_node('False', react_component)
            react_component.addINode(i_node_true)
            react_component.addINode(i_node_false)
            bkf.addComponent(react_component)
            #-- Extract and Make I-nodes
            for gene_j, row_i in enumerate(range(1,len(row), 2)):
                gene_name = genes[gene_j]
                gene_low = row[row_i]
                gene_high = row[row_i + 1]
                if gene_low != '':
                    print(gene_low, gene_high)
                    num_levels = expression_levels_dict[gene_name]
                    levels = [rag for rag in np.linspace(int(gene_low), int(gene_high), num_levels)]

                    component_states = ['{} | {}'.format(levels[i], levels[i+1]) for i in range(len(levels)-1)]
                    component = BKB_component(gene_name + ' ExpressionLv')
                    for state in component_states:
                        i_node = BKB_I_node(state, component)
                        component.addINode(i_node)
                    bkf.addComponent(component)
                    for idx in range(bkf.findComponent(component.name).getNumberStates()):
                        i_node = component.getState(idx)
                        bkf.addSNode(BKB_S_node(component, i_node, random.random()))
                        bkf.addSNode(BKB_S_node(react_component, i_node_true, random.random(), [(component, i_node)]))
                        bkf.addSNode(BKB_S_node(react_component, i_node_false, random.random(), [(component, i_node)]))
            self.bkfs.append(bkf)
        print('Done')

        #-- Process Pathway BKFs
        with open(pathways_file, 'r') as csv_file:
            reader = csv.reader(csv_file)
            bkf = BKB()
            for row in reader:
                tail = row[0]
                head = row[1]
                states = ['True', 'False']
                if bkf.findComponent(tail) == -1:
                    component_tail = BKB_component(tail)
                    for state in states:
                        component_tail.addINode(BKB_I_node(state, component_tail))
                    bkf.addComponent(component_tail)
                else:
                    component_tail = bkf.findComponent(tail)

                if bkf.findComponent(head) == -1:
                    component_head = BKB_component(head)
                    for state in states:
                        component_head.addINode(BKB_I_node(state, component_head))
                    bkf.addComponent(component_head)
                else:
                    component_head = bkf.findComponent(head)

                for state1 in states:
                    for state2 in states:
                        bkf.addSNode(BKB_S_node(component_head,
                                                component_head.findState(state1),
                                                random.random(),
                                                [(component_tail, component_tail.findState(state2))])
                                    )
        self.bkfs.append(bkf)
        print('Complete')
    def processPathwayBKF(self, exhaustiveOr=False):
        assert len(self.pathways) > 0, "Have not processed pathways yet"

        for pathway in tqdm.tqdm(self.pathways,
                                 desc='Processing pathway BKFs'):
            bkf = BKB(name=pathway.ID)
            if "_hier" in pathway.ID:
                #head
                pathwayParentComp_idx = bkf.addComponent('{}_active='.format(
                    pathway.head))
                pathwayParentTrue_idx = bkf.addComponentState(
                    pathwayParentComp_idx, 'True')

                #tail - should only be 1 for _hier bkfs
                pathwayChildComp_idx = bkf.addComponent('{}_active='.format(
                    pathway.tails[0]))
                pathwayChildTrue_idx = bkf.addComponentState(
                    pathwayChildComp_idx, 'True')

                #S-node
                bkf.addSNode(
                    BKB_S_node(pathwayChildComp_idx, pathwayChildTrue_idx,
                               1.0))
                bkf.addSNode(
                    BKB_S_node(pathwayParentComp_idx, pathwayParentTrue_idx,
                               1.0,
                               [(pathwayChildComp_idx, pathwayChildTrue_idx)]))
            else:
                '''
                #head
                pathwayHierComp_idx = bkf.addComponent('{}_active='.format(pathway.head))
                pathwayHierTrue_idx = bkf.addComponentState(pathwayHierComp_idx, 'True')

                #tails
                for tail in pathway.tails:
                    statConditionComp_idx = bkf.addComponent('mu-STD<={}<=mu+STD'.format(tail))
                    statConditionTrue_idx = bkf.addComponentState(statConditionComp_idx, 'True')

                    bkf.addSNode(BKB_S_node(statConditionComp_idx, statConditionTrue_idx, 1.0))
                    bkf.addSNode(BKB_S_node(pathwayHierComp_idx, pathwayHierTrue_idx, 1.0, [(statConditionComp_idx, statConditionTrue_idx)]))

            self.bkfs.append(bkf)
                '''
                #=======

                if exhaustiveOr:
                    print("not implemented")
                else:
                    # pathway
                    pathwayReactionComp_idx = bkf.addComponent(pathway.head +
                                                               "_active=")
                    pathwayReactionTrue_idx = bkf.addComponentState(
                        pathwayReactionComp_idx, 'True')

                    # gene slector
                    geneSelectorComp_idx = bkf.addComponent("Gene_combo=")

                    #tails
                    for tail in pathway.tails:
                        geneCombo_idx = bkf.addComponentState(
                            geneSelectorComp_idx, tail)

                        statConditionComp = BKB_component("mu-STD>=" + tail +
                                                          "<=mu+STD=")
                        statConditionTrue = BKB_I_node('True',
                                                       statConditionComp)
                        statConditionComp_idx = bkf.addComponent("mu-STD>=" +
                                                                 tail +
                                                                 "<=mu+STD=")
                        statConditionTrue_idx = bkf.addComponentState(
                            statConditionComp_idx, 'True')

                        bkf.addSNode(
                            BKB_S_node(statConditionComp_idx,
                                       statConditionTrue_idx, 1.0))

                        bkf.addSNode(
                            BKB_S_node(geneSelectorComp_idx, geneCombo_idx,
                                       1.0, [(statConditionComp_idx,
                                              statConditionTrue_idx)]))

                        bkf.addSNode(
                            BKB_S_node(
                                pathwayReactionComp_idx,
                                pathwayReactionTrue_idx, 1.0,
                                [(geneSelectorComp_idx, geneCombo_idx)]))

            self.bkfs.append(bkf)
    def processPatientBKF(self):
        assert len(
            self.patients) > 0, "Have not processed Patient and gene data yet."
        #print("Forming Patient BKFs...")

        allGenes = list()
        for pat in tqdm.tqdm(self.patients, desc='Forming patient BKFs'):
            bkf = BKB(name=pat.patientID)
            variantCompIndices = list()
            variantCompNames = list()
            variantCompStateIndices = list()
            for idx, gene in enumerate(pat.mutatedGenes):
                allGenes.append(gene)
                # gene
                mutGeneComp_idx = bkf.addComponent('mut_{}='.format(gene))
                iNodeGeneMut_idx = bkf.addComponentState(
                    mutGeneComp_idx, 'True')
                # form SNode  o---->[mut_<genename>=True]
                bkf.addSNode(BKB_S_node(mutGeneComp_idx, iNodeGeneMut_idx,
                                        1.0))

                if 'var_' + pat.variants[idx] + '=' in variantCompNames:
                    cIdx = variantCompNames.index('var_{}='.format(
                        pat.variants[idx]))

                    #mut_var combo
                    mutVarComp_idx = bkf.addComponent('mut-var_{}='.format(
                        pat.mutatedGeneVariants[idx]))
                    iNodeMutVar_idx = bkf.addComponentState(
                        mutVarComp_idx, 'True')
                    #mut_var s-node
                    bkf.addSNode(
                        BKB_S_node(mutVarComp_idx, iNodeMutVar_idx, 1.0,
                                   [(mutGeneComp_idx, iNodeGeneMut_idx),
                                    (variantCompIndices[cIdx],
                                     variantCompStateIndices[cIdx])]))
                else:
                    #var
                    variantComp_idx = bkf.addComponent('var_{}='.format(
                        pat.variants[idx]))
                    iNodeVariant_idx = bkf.addComponentState(
                        variantComp_idx, 'True')
                    #var s-node
                    bkf.addSNode(
                        BKB_S_node(variantComp_idx, iNodeVariant_idx, 1.0))

                    variantCompIndices.append(variantComp_idx)
                    variantCompStateIndices.append(iNodeGeneMut_idx)
                    variantCompNames.append(
                        bkf.getComponentName(variantComp_idx))

                    #mut_var combo
                    mutVarComp_idx = bkf.addComponent('mut-var_{}='.format(
                        pat.mutatedGeneVariants[idx]))
                    iNodeMutVar_idx = bkf.addComponentState(
                        mutVarComp_idx, 'True')
                    #mut_var s-node
                    bkf.addSNode(
                        BKB_S_node(mutVarComp_idx, iNodeMutVar_idx, 1.0,
                                   [(mutGeneComp_idx, iNodeGeneMut_idx),
                                    (variantComp_idx, iNodeVariant_idx)]))
            self.bkfs.append(bkf)