def make_bkb(self, train_patients): t1 = time.time() #-- Make Fused BKB with training paitents #-- Collect all patient BKFs bkfs = list() hashes = list() for patient_hash in tqdm.tqdm(train_patients, leave=False, desc='Preparing BKFs for Fusion'): bkf_file = os.path.join(self.bkf_folder, '{}.bkf'.format(patient_hash)) _bkf = BKB() try: _bkf.load(bkf_file) bkfs.append(_bkf) hashes.append(str(patient_hash)) except: continue #-- Try to add PatientX try: bkf_file = os.path.join(self.bkf_folder, 'PatientX.bkf') _bkf = BKB() _bkf.load(bkf_file) bkfs.append(_bkf) hashes.append('PatientX') except: pass fused_bkb = fuse(bkfs, [1 for _ in range(len(bkfs))], hashes) logging.info('Make BKB Ok.') logging.info('Elapsed Time: {} sec'.format(time.time() - t1)) return fused_bkb
def __init__(self, config_file): #-- Read in configuration from config file. self.config = dict() with open(config_file, 'r') as csv_file: reader = csv.reader(csv_file) for row in reader: self.config[row[0]] = row[1] self.fused_bkb_path = self.config['fused_bkb_path'] self.ncats_dir = self.config['ncats_dir'] self.src_metadata_path = self.config['src_metadata_path'] self.gene_var_direct = self.config['gene_var_direct'] self.fused_bkb = BKB() self.fused_bkb.load(self.fused_bkb_path) self.reasoner = Reasoner(self.fused_bkb, gene_var_direct=self.gene_var_direct, max_new_ev=MAX_NEW_EV) self.reasoner.set_src_metadata(self.src_metadata_path)
def processPathwayBKF(self, exhaustiveOr=False): assert len(self.pathways) > 0, "Have not processed pathways yet" for pathway in tqdm.tqdm(self.pathways, desc='Processing pathway BKFs'): bkf = BKB(name=pathway.pathwayID) pathwayActiveComp_idx = bkf.addComponent('{}_active='.format( pathway.pathwayID)) pathwayActiveTrue_idx = bkf.addComponentState( pathwayActiveComp_idx, 'True') if exhaustiveOr: print("not implemented") else: geneSelectorComp_idx = bkf.addComponent("Gene_combo=") for gene in pathway.genes: #gene[0] = geneName, gene[1] = low value gene[2] = high value geneCombo_idx = bkf.addComponentState( geneSelectorComp_idx, gene[0]) statConditionComp_idx = bkf.addComponent( 'mu-STD<={}<=mu+STD='.format(gene[0])) statConditionTrue_idx = bkf.addComponentState( statConditionComp_idx, 'True') bkf.addSNode( BKB_S_node(statConditionComp_idx, statConditionTrue_idx, 1.0)) bkf.addSNode( BKB_S_node( geneSelectorComp_idx, geneCombo_idx, 1.0, [(statConditionComp_idx, statConditionTrue_idx)])) bkf.addSNode( BKB_S_node(pathwayActiveComp_idx, pathwayActiveTrue_idx, 1.0, [(geneSelectorComp_idx, geneCombo_idx)])) self.bkfs.append(bkf) assert len(self.pathways) > 0, "Have not processed pathways yet"
def processPathwayBKF(self, exhaustiveOr=False): assert len(self.pathways) > 0, "Have not processed pathways yet" for pathway in tqdm.tqdm(self.pathways, desc='Processing pathway BKFs'): bkf = BKB(name=pathway.ID) if "_hier" in pathway.ID: #head pathwayParentComp_idx = bkf.addComponent('{}_active='.format( pathway.head)) pathwayParentTrue_idx = bkf.addComponentState( pathwayParentComp_idx, 'True') #tail - should only be 1 for _hier bkfs pathwayChildComp_idx = bkf.addComponent('{}_active='.format( pathway.tails[0])) pathwayChildTrue_idx = bkf.addComponentState( pathwayChildComp_idx, 'True') #S-node bkf.addSNode( BKB_S_node(pathwayChildComp_idx, pathwayChildTrue_idx, 1.0)) bkf.addSNode( BKB_S_node(pathwayParentComp_idx, pathwayParentTrue_idx, 1.0, [(pathwayChildComp_idx, pathwayChildTrue_idx)])) else: ''' #head pathwayHierComp_idx = bkf.addComponent('{}_active='.format(pathway.head)) pathwayHierTrue_idx = bkf.addComponentState(pathwayHierComp_idx, 'True') #tails for tail in pathway.tails: statConditionComp_idx = bkf.addComponent('mu-STD<={}<=mu+STD'.format(tail)) statConditionTrue_idx = bkf.addComponentState(statConditionComp_idx, 'True') bkf.addSNode(BKB_S_node(statConditionComp_idx, statConditionTrue_idx, 1.0)) bkf.addSNode(BKB_S_node(pathwayHierComp_idx, pathwayHierTrue_idx, 1.0, [(statConditionComp_idx, statConditionTrue_idx)])) self.bkfs.append(bkf) ''' #======= if exhaustiveOr: print("not implemented") else: # pathway pathwayReactionComp_idx = bkf.addComponent(pathway.head + "_active=") pathwayReactionTrue_idx = bkf.addComponentState( pathwayReactionComp_idx, 'True') # gene slector geneSelectorComp_idx = bkf.addComponent("Gene_combo=") #tails for tail in pathway.tails: geneCombo_idx = bkf.addComponentState( geneSelectorComp_idx, tail) statConditionComp = BKB_component("mu-STD>=" + tail + "<=mu+STD=") statConditionTrue = BKB_I_node('True', statConditionComp) statConditionComp_idx = bkf.addComponent("mu-STD>=" + tail + "<=mu+STD=") statConditionTrue_idx = bkf.addComponentState( statConditionComp_idx, 'True') bkf.addSNode( BKB_S_node(statConditionComp_idx, statConditionTrue_idx, 1.0)) bkf.addSNode( BKB_S_node(geneSelectorComp_idx, geneCombo_idx, 1.0, [(statConditionComp_idx, statConditionTrue_idx)])) bkf.addSNode( BKB_S_node( pathwayReactionComp_idx, pathwayReactionTrue_idx, 1.0, [(geneSelectorComp_idx, geneCombo_idx)])) self.bkfs.append(bkf)
Thayer School of Engineering at Dartmouth College Authors: Dr. Eugene Santos, Jr Mr. Chase Yakaboski, Mr. Gregory Hyde, Dr. Keum Joo Kim ''' #-- This test case took 35,000 seconds = 9.7 hours #-- Import your version of BKB and reasoning appropriately. Below is how I do it with pybkb. from pybkb.common.bayesianKnowledgeBase import bayesianKnowledgeBase as BKB from pybkb.python_base.reasoning import updating #-- Other imports import pickle #-- Load in evidence and targets with open('query_evid_targs.pk', 'rb') as f_: data = pickle.load(f_) #-- Pickled as dictionary. evidence = data['evidence'] targets = data['targets'] #-- Load query BKB bkb = BKB() bkb.load('query_bkb_137.bkb') #-- Run Updating res = updating(bkb, evidence, targets)
import sys from pybkb.common.bayesianKnowledgeBase import bayesianKnowledgeBase as BKB from pybkb.python_base.reasoning import checkMutex #!!!!!!!!!! Change to your local bkb-pathway-core directory. #sys.path.append('/home/ghyde/bkb-pathway-provider/core') #sys.path.append('/home/cyakaboski/src/python/projects/bkb-pathway-provider/core') from chp.reasoner import Reasoner from chp.query import Query #-- Initalize a BKB fused_bkb = BKB() ''' #-- Load in the fused bkb from our datafiles fused_bkb.load('/home/public/data/ncats/AxleBKBS/660Pats6HoldoutSTAGING/fusion.bkb') #-- Here are the associated patient data files patient_data_file = '/home/public/data/ncats/AxleBKBS/660Pats6HoldoutSTAGING/patient_data.pk' withheld_patients_file = '/home/public/data/ncats/AxleBKBS/660Pats6HoldoutSTAGING/withheldPatients.csv' ''' #-- Load in the fused bkb from our datafiles fused_bkb.load('/home/public/data/ncats/BabelBKBs/smallProblem/fusion.bkb') #-- Here are the associated patient data files patient_data_file = '/home/public/data/ncats/BabelBKBs/smallProblem/patient_data.pk' withheld_patients_file = '/home/public/data/ncats/BabelBKBs/smallProblem/withheldPatients.csv' #-- Instiante reasoner reasoner = Reasoner(fused_bkb=fused_bkb)
def read_expression_snodes(self, expression_levels_dict, tcga_expression_file, react_gene_file, pathways_file): #-- Process all gene_reaction s_nodes and assume there is a header with gene names. with open(react_gene_file, 'r') as csv_file: reader = csv.reader(csv_file) rows = [row for row in reader] #-- Process Gene Names from header: print(rows[0]) genes = rows[0][1:] genes = [genes[i][:-4] for i in range(0, len(genes), 2)] print(genes) for row in rows[1:]: bkf = BKB(name=row[0] + ' BKF') reaction_head_name = row[0] react_component = BKB_component(reaction_head_name) i_node_true = BKB_I_node('True', react_component) i_node_false = BKB_I_node('False', react_component) react_component.addINode(i_node_true) react_component.addINode(i_node_false) bkf.addComponent(react_component) #-- Extract and Make I-nodes for gene_j, row_i in enumerate(range(1, len(row), 2)): gene_name = genes[gene_j] gene_low = row[row_i] gene_high = row[row_i + 1] if gene_low != '': print(gene_low, gene_high) num_levels = expression_levels_dict[gene_name] levels = [ rag for rag in np.linspace(int(gene_low), int( gene_high), num_levels) ] component_states = [ '{} | {}'.format(levels[i], levels[i + 1]) for i in range(len(levels) - 1) ] component = BKB_component(gene_name + ' ExpressionLv') for state in component_states: i_node = BKB_I_node(state, component) component.addINode(i_node) bkf.addComponent(component) for idx in range( bkf.findComponent( component.name).getNumberStates()): i_node = component.getState(idx) bkf.addSNode( BKB_S_node(component, i_node, random.random())) bkf.addSNode( BKB_S_node(react_component, i_node_true, random.random(), [(component, i_node)])) bkf.addSNode( BKB_S_node(react_component, i_node_false, random.random(), [(component, i_node)])) self.bkfs.append(bkf) print('Done') #-- Process Pathway BKFs with open(pathways_file, 'r') as csv_file: reader = csv.reader(csv_file) bkf = BKB() for row in reader: tail = row[0] head = row[1] states = ['True', 'False'] if bkf.findComponent(tail) == -1: component_tail = BKB_component(tail) for state in states: component_tail.addINode( BKB_I_node(state, component_tail)) bkf.addComponent(component_tail) else: component_tail = bkf.findComponent(tail) if bkf.findComponent(head) == -1: component_head = BKB_component(head) for state in states: component_head.addINode( BKB_I_node(state, component_head)) bkf.addComponent(component_head) else: component_head = bkf.findComponent(head) for state1 in states: for state2 in states: bkf.addSNode( BKB_S_node(component_head, component_head.findState(state1), random.random(), [(component_tail, component_tail.findState(state2))])) self.bkfs.append(bkf) print('Complete')