def _addDemographicOption(option, bkb, src_population, src_population_data, option_dependencies=list()): prop, op_str, val = option op = _process_operator(op_str) matched_srcs = set() pop_count_true = 0 for entity_name, src_name in src_population.items(): #print(src_population_data[src_name][prop]) if type(src_population_data[src_name][prop]) == tuple: if op(set(val), set(src_population_data[src_name][prop])): matched_srcs.add(entity_name) pop_count_true += 1 res = True else: res = False else: if op(src_population_data[src_name][prop], val): matched_srcs.add(entity_name) pop_count_true += 1 prob = float(pop_count_true / len(src_population)) comp_idx = bkb.addComponent('{} {} {}'.format(prop, op_str, val)) inode_true_idx = bkb.addComponentState(comp_idx, 'True') inode_false_idx = bkb.addComponentState(comp_idx, 'False') #-- Create option dictionary options_dict = { bkb.getComponentName(comp_idx): bkb.getComponentINodeName(comp_idx, inode_true_idx) } #-- If no chain rule dependencies, just add prior s-nodes if len(option_dependencies) == 0: snode_1 = BKB_S_node(init_component_index=comp_idx, init_state_index=inode_true_idx, init_probability=prob) snode_2 = BKB_S_node(init_component_index=comp_idx, init_state_index=inode_false_idx, init_probability=1 - prob) bkb.addSNode(snode_1) bkb.addSNode(snode_2) #-- Process Dependencies else: bkb = _processOptionDependency(option, option_dependencies, bkb, src_population, src_population_data) return bkb, options_dict, matched_srcs
def impose_structure(bkb, structure): bkb_ = copy.deepcopy(bkb) S_nodes_by_head = _constructSNodesByHead(bkb_) for gene1, dependecies in structure.items(): gene1_comp_idx = bkb_.getComponentIndex(gene1) gene1_state_idx = bkb_.getComponentINodeIndex(gene1_comp_idx, 'True') #-- Gather all source Inodes src_nodes = list() for snode in S_nodes_by_head[gene1_comp_idx][gene1_state_idx]: for tail_idx in range(snode.getNumberTail()): tail = snode.getTail(tail_idx) src_nodes.append(tail) bkb_.removeSNode(snode) #-- Add in edge with scr_nodes attached for gene2, (add, prob) in dependecies.items(): gene2_comp_idx = bkb_.getComponentIndex(gene2) gene2_state_idx = bkb_.getComponentINodeIndex( gene2_comp_idx, 'True') if add: for src_comp_idx, src_state_idx in src_nodes: bkb_.addSNode( BKB_S_node(init_component_index=gene1_comp_idx, init_state_index=gene1_state_idx, init_probability=prob, init_tail=[(src_comp_idx, src_state_idx), (gene2_comp_idx, gene2_state_idx) ])) return bkb_
if patient_data[patient]['Genes'][gene1] and patient_data[patient][ 'Genes'][gene2]: count += 1 counts[(gene1, gene2)] = float(count / len(PATIENTS)) / priors[gene2] #-- Make BKB Patient frags for patient in PATIENTS: bkf = BKB() for gene in patient_data[patient]['Genes']: #-- Setup Gene i component. geneComp_idx = bkf.addComponent(gene) geneStateTrue_idx = bkf.addComponentState(geneComp_idx, 'True') s_node_prior = BKB_S_node(init_component_index=geneComp_idx, init_state_index=geneStateTrue_idx, init_probability=1) bkf.addSNode(s_node_prior) patient_bkfs.append(bkf) #-- Define Sample Pathway PATHWAYS = ['Pathway{}'.format(i) for i in range(1)] pathway_data = { pathway: { gene: random.random() / 10 for gene in set([random.choice(GENES) for _ in range(len(GENES))]) } for pathway in PATHWAYS }
} bkfs = list() #-- Make BKB frags for patient in PATIENTS: bkf = BKB() for gene in GENES: if random.choice([True, False]): #-- Setup Gene i component. comp_idx = bkf.addComponent('mut_{}'.format(gene)) stateTrue_idx = bkf.addComponentState(comp_idx, 'True') bkf.addSNode( BKB_S_node(init_component_index=comp_idx, init_state_index=stateTrue_idx, init_probability=1)) variant_comp_idx = bkf.addComponent('mut-var_{}'.format(gene)) random_variant = random.choice(GENE_VARIANTS) variant_state_idx = bkf.addComponentState(variant_comp_idx, random_variant) bkf.addSNode( BKB_S_node(init_component_index=variant_comp_idx, init_state_index=variant_state_idx, init_probability=1, init_tail=[(comp_idx, stateTrue_idx)])) if 'Patient_Genes' not in patient_data[patient]: patient_data[patient]['Patient_Genes'] = [gene] else: patient_data[patient]['Patient_Genes'].append(gene) if 'Patient_Gene_Variants' not in patient_data[patient]:
def _linkSource(src_comp, src_state, non_src_comp, non_src_state, other_non_src_tails, prob, comp_idx, inode_true_idx, inode_false_idx, bkb, matched_srcs, src_population): #-- Process the source name collection src_state_name = bkb.getComponentINodeName(src_comp, src_state) src_split = src_state_name.split('_') src_nums_str = src_split[0][1:-1] src_name_str = ''.join(src_split[1:]) src_nums = [int(num) for num in src_nums_str.split(',')] src_names = src_name_str.split(',') #-- Delete the source node and snodes #-- Actually leaving states in for speed. #bkb.removeComponentState(src_comp, src_state) true_srcs = list() false_srcs = list() for src_num, src_name in zip(src_nums, src_names): if src_num in matched_srcs: true_srcs.append((src_num, src_name)) else: false_srcs.append((src_num, src_name)) true_prob = len(true_srcs) / (len(true_srcs) + len(false_srcs)) #-- Make new true and false collections true_state_name = '[{}]_{}'.format( ','.join([str(src[0]) for src in true_srcs]), ','.join([src[1] for src in true_srcs])) false_state_name = '[{}]_{}'.format( ','.join([str(src[0]) for src in false_srcs]), ','.join([src[1] for src in false_srcs])) #-- Now add the I nodes if true_prob > 0: src_true_idx = bkb.addComponentState(src_comp, true_state_name) #-- Now connect with S nodes bkb.addSNode( BKB_S_node(init_component_index=non_src_comp, init_state_index=non_src_state, init_probability=1, init_tail=[(src_comp, src_true_idx)] + other_non_src_tails)) bkb.addSNode( BKB_S_node(init_component_index=src_comp, init_state_index=src_true_idx, init_probability=prob * true_prob, init_tail=[(comp_idx, inode_true_idx)])) if (1 - true_prob) > 0: src_false_idx = bkb.addComponentState(src_comp, false_state_name) #-- Now connect with S nodes bkb.addSNode( BKB_S_node(init_component_index=non_src_comp, init_state_index=non_src_state, init_probability=1, init_tail=[(src_comp, src_false_idx)] + other_non_src_tails)) bkb.addSNode( BKB_S_node(init_component_index=src_comp, init_state_index=src_false_idx, init_probability=prob * (1 - true_prob), init_tail=[(comp_idx, inode_false_idx)])) return bkb
def _addTargetToLastTopologVariables(target, bkb, src_population, src_population_data): bottom_inodes = _collectBkbBottomINodes(bkb) prop, op_str, val = target op = _process_operator(op_str) transformed_meta = dict() for comp_idx, state_idx in bottom_inodes: comp_name = bkb.getComponentName(comp_idx) if 'mut-var_' == comp_name[:8]: #-- If this is a variant component gene = comp_name.split('_')[1] variant = bkb.getComponentINodeName(comp_idx, state_idx) joint_count = 0 prior_count = 0 neg_joint_count = 0 for src_hash, data_dict in src_population_data.items(): try: if src_population_data[src_hash]['Patient_Gene_Variant'][ gene] == variant: prior_count += 1 if op(src_population_data[src_hash][prop], val): joint_count += 1 else: neg_joint_count += 1 except: continue prob_true_cond = joint_count / prior_count prob_false_cond = neg_joint_count / prior_count #-- Add target Inode and attach s-node target_comp_idx = bkb.addComponent('{} {} {}_mut-var_{}_{}'.format( prop, op_str, val, gene, variant)) if prob_true_cond > 0: target_true_idx = bkb.addComponentState( target_comp_idx, 'True') bkb.addSNode( BKB_S_node(init_component_index=target_comp_idx, init_state_index=target_true_idx, init_probability=prob_true_cond, init_tail=[(comp_idx, state_idx)])) if prob_false_cond > 0: target_false_idx = bkb.addComponentState( target_comp_idx, 'False') bkb.addSNode( BKB_S_node(init_component_index=target_comp_idx, init_state_index=target_false_idx, init_probability=prob_false_cond, init_tail=[(comp_idx, state_idx)])) elif 'mut_' == comp_name[:4]: #-- If this is a mutation component gene = comp_name.split('_')[1] joint_count = 0 prior_count = 0 neg_joint_count = 0 for src_hash, data_dict in src_population_data.items(): try: if gene in src_population_data[src_hash]['Patient_Genes']: prior_count += 1 if op(src_population_data[src_hash][prop], val): joint_count += 1 else: neg_joint_count += 1 except: continue prob_true_cond = joint_count / prior_count prob_false_cond = neg_joint_count / prior_count #-- Add target Inode and attach s-node target_comp_idx = bkb.addComponent('{} {} {}_mut_{}'.format( prop, op_str, val, gene)) if prob_true_cond > 0: target_true_idx = bkb.addComponentState( target_comp_idx, 'True') bkb.addSNode( BKB_S_node(init_component_index=target_comp_idx, init_state_index=target_true_idx, init_probability=prob_true_cond, init_tail=[(comp_idx, state_idx)])) if prob_false_cond > 0: target_false_idx = bkb.addComponentState( target_comp_idx, 'False') bkb.addSNode( BKB_S_node(init_component_index=target_comp_idx, init_state_index=target_false_idx, init_probability=prob_false_cond, init_tail=[(comp_idx, state_idx)])) transformed_meta[bkb.getComponentName(target_comp_idx)] = 'True' return bkb, transformed_meta
def _processOptionDependency(option, option_dependencies, bkb, src_population, src_population_data): #-- Get consistent option combinations tail_product = [ combo for combo in itertools.product(option_dependencies, [True, False]) ] tail_combos = list() for combo in itertools.combinations(tail_product, r=len(option_dependencies)): combo = list(combo) prop_set = set() for ev_state in combo: ev_, state = ev_state prop_, op_, val_ = ev_ prop_set.add(prop_) if len(prop_set) == len(combo): tail_combos.append(combo) combos = [ combo for combo in itertools.product([(option, True), (option, False)], tail_combos) ] #-- Calculate joint probabilities from data counts = list() for combo in combos: head, tail = combo #-- Put head and tail in one list combo = [head] + tail count = 0 for entity_name, src_name in src_population.items(): truth = list() for ev_state in combo: ev_, state = ev_state prop_, op_str_, val_ = ev_ op_ = _process_operator(op_str_) #-- Check if the src_population item is a list of items (useful for drugs): if type(src_population_data[src_name][prop_]) == tuple: if op_(set(val_), set(src_population_data[src_name][prop_])): res = True else: res = False else: res = op_(src_population_data[src_name][prop_], val_) truth.append(res == state) if all(truth): count += 1 counts.append(count) probs = [float(count) / len(src_population) for count in counts] #-- Setup each S-node for j, combo in enumerate(combos): head, tail = combo #-- Process head comp_head_idx, i_node_head_idx = _processDependencyHead(head, bkb) #-- Process Tail processed_tail = _processDependecyTail(tail, bkb) #-- Add Snode if probs[j] > 0: bkb.addSNode( BKB_S_node(init_component_index=comp_head_idx, init_state_index=i_node_head_idx, init_probability=probs[j], init_tail=processed_tail)) return bkb
bkfs = list() #-- Make BKB frags for j, _ in enumerate(PATIENTS): bkf = BKB() for i in range(NUM_GENES): #-- Setup Gene i component. comp_idx = bkf.addComponent('Gene{}_mutated'.format(i)) stateTrue_idx = bkf.addComponentState(comp_idx, 'True') if random.choice([True, False]): bkf.addSNode( BKB_S_node(init_component_index=comp_idx, init_state_index=stateTrue_idx, init_probability=1)) bkfs.append(bkf) #-- Fuse patients together. fused_bkb = fuse(bkfs, [1 for _ in range(len(PATIENTS))], [str(hash(patient)) for patient in PATIENTS], working_dir=os.getcwd()) #-- Impose Structure while maintiaining mutual exclusivity. def impose_structure(bkb, structure): bkb_ = copy.deepcopy(bkb) S_nodes_by_head = _constructSNodesByHead(bkb_) for gene1, dependecies in structure.items(): gene1_comp_idx = bkb_.getComponentIndex(gene1)