def _addDemographicOption(option,
                          bkb,
                          src_population,
                          src_population_data,
                          option_dependencies=list()):
    prop, op_str, val = option
    op = _process_operator(op_str)
    matched_srcs = set()
    pop_count_true = 0
    for entity_name, src_name in src_population.items():
        #print(src_population_data[src_name][prop])
        if type(src_population_data[src_name][prop]) == tuple:
            if op(set(val), set(src_population_data[src_name][prop])):
                matched_srcs.add(entity_name)
                pop_count_true += 1
                res = True
            else:
                res = False
        else:
            if op(src_population_data[src_name][prop], val):
                matched_srcs.add(entity_name)
                pop_count_true += 1
    prob = float(pop_count_true / len(src_population))

    comp_idx = bkb.addComponent('{} {} {}'.format(prop, op_str, val))
    inode_true_idx = bkb.addComponentState(comp_idx, 'True')
    inode_false_idx = bkb.addComponentState(comp_idx, 'False')

    #-- Create option dictionary
    options_dict = {
        bkb.getComponentName(comp_idx):
        bkb.getComponentINodeName(comp_idx, inode_true_idx)
    }

    #-- If no chain rule dependencies, just add prior s-nodes
    if len(option_dependencies) == 0:
        snode_1 = BKB_S_node(init_component_index=comp_idx,
                             init_state_index=inode_true_idx,
                             init_probability=prob)
        snode_2 = BKB_S_node(init_component_index=comp_idx,
                             init_state_index=inode_false_idx,
                             init_probability=1 - prob)
        bkb.addSNode(snode_1)
        bkb.addSNode(snode_2)

    #-- Process Dependencies
    else:
        bkb = _processOptionDependency(option, option_dependencies, bkb,
                                       src_population, src_population_data)

    return bkb, options_dict, matched_srcs
def impose_structure(bkb, structure):
    bkb_ = copy.deepcopy(bkb)
    S_nodes_by_head = _constructSNodesByHead(bkb_)
    for gene1, dependecies in structure.items():
        gene1_comp_idx = bkb_.getComponentIndex(gene1)
        gene1_state_idx = bkb_.getComponentINodeIndex(gene1_comp_idx, 'True')
        #-- Gather all source Inodes
        src_nodes = list()
        for snode in S_nodes_by_head[gene1_comp_idx][gene1_state_idx]:
            for tail_idx in range(snode.getNumberTail()):
                tail = snode.getTail(tail_idx)
                src_nodes.append(tail)
            bkb_.removeSNode(snode)

        #-- Add in edge with scr_nodes attached
        for gene2, (add, prob) in dependecies.items():
            gene2_comp_idx = bkb_.getComponentIndex(gene2)
            gene2_state_idx = bkb_.getComponentINodeIndex(
                gene2_comp_idx, 'True')
            if add:
                for src_comp_idx, src_state_idx in src_nodes:
                    bkb_.addSNode(
                        BKB_S_node(init_component_index=gene1_comp_idx,
                                   init_state_index=gene1_state_idx,
                                   init_probability=prob,
                                   init_tail=[(src_comp_idx, src_state_idx),
                                              (gene2_comp_idx, gene2_state_idx)
                                              ]))
    return bkb_
Esempio n. 3
0
            if patient_data[patient]['Genes'][gene1] and patient_data[patient][
                    'Genes'][gene2]:
                count += 1
    counts[(gene1, gene2)] = float(count / len(PATIENTS)) / priors[gene2]

#-- Make BKB Patient frags
for patient in PATIENTS:
    bkf = BKB()

    for gene in patient_data[patient]['Genes']:
        #-- Setup Gene i component.
        geneComp_idx = bkf.addComponent(gene)
        geneStateTrue_idx = bkf.addComponentState(geneComp_idx, 'True')

        s_node_prior = BKB_S_node(init_component_index=geneComp_idx,
                                  init_state_index=geneStateTrue_idx,
                                  init_probability=1)
        bkf.addSNode(s_node_prior)

    patient_bkfs.append(bkf)

#-- Define Sample Pathway
PATHWAYS = ['Pathway{}'.format(i) for i in range(1)]
pathway_data = {
    pathway: {
        gene: random.random() / 10
        for gene in set([random.choice(GENES) for _ in range(len(GENES))])
    }
    for pathway in PATHWAYS
}
Esempio n. 4
0
}
bkfs = list()

#-- Make BKB frags
for patient in PATIENTS:
    bkf = BKB()

    for gene in GENES:
        if random.choice([True, False]):
            #-- Setup Gene i component.
            comp_idx = bkf.addComponent('mut_{}'.format(gene))
            stateTrue_idx = bkf.addComponentState(comp_idx, 'True')

            bkf.addSNode(
                BKB_S_node(init_component_index=comp_idx,
                           init_state_index=stateTrue_idx,
                           init_probability=1))
            variant_comp_idx = bkf.addComponent('mut-var_{}'.format(gene))
            random_variant = random.choice(GENE_VARIANTS)
            variant_state_idx = bkf.addComponentState(variant_comp_idx,
                                                      random_variant)
            bkf.addSNode(
                BKB_S_node(init_component_index=variant_comp_idx,
                           init_state_index=variant_state_idx,
                           init_probability=1,
                           init_tail=[(comp_idx, stateTrue_idx)]))
            if 'Patient_Genes' not in patient_data[patient]:
                patient_data[patient]['Patient_Genes'] = [gene]
            else:
                patient_data[patient]['Patient_Genes'].append(gene)
            if 'Patient_Gene_Variants' not in patient_data[patient]:
def _linkSource(src_comp, src_state, non_src_comp, non_src_state,
                other_non_src_tails, prob, comp_idx, inode_true_idx,
                inode_false_idx, bkb, matched_srcs, src_population):
    #-- Process the source name collection
    src_state_name = bkb.getComponentINodeName(src_comp, src_state)
    src_split = src_state_name.split('_')
    src_nums_str = src_split[0][1:-1]
    src_name_str = ''.join(src_split[1:])
    src_nums = [int(num) for num in src_nums_str.split(',')]
    src_names = src_name_str.split(',')

    #-- Delete the source node and snodes
    #-- Actually leaving states in for speed.
    #bkb.removeComponentState(src_comp, src_state)

    true_srcs = list()
    false_srcs = list()
    for src_num, src_name in zip(src_nums, src_names):
        if src_num in matched_srcs:
            true_srcs.append((src_num, src_name))
        else:
            false_srcs.append((src_num, src_name))

    true_prob = len(true_srcs) / (len(true_srcs) + len(false_srcs))

    #-- Make new true and false collections
    true_state_name = '[{}]_{}'.format(
        ','.join([str(src[0]) for src in true_srcs]),
        ','.join([src[1] for src in true_srcs]))
    false_state_name = '[{}]_{}'.format(
        ','.join([str(src[0]) for src in false_srcs]),
        ','.join([src[1] for src in false_srcs]))

    #-- Now add the I nodes
    if true_prob > 0:
        src_true_idx = bkb.addComponentState(src_comp, true_state_name)
        #-- Now connect with S nodes
        bkb.addSNode(
            BKB_S_node(init_component_index=non_src_comp,
                       init_state_index=non_src_state,
                       init_probability=1,
                       init_tail=[(src_comp, src_true_idx)] +
                       other_non_src_tails))
        bkb.addSNode(
            BKB_S_node(init_component_index=src_comp,
                       init_state_index=src_true_idx,
                       init_probability=prob * true_prob,
                       init_tail=[(comp_idx, inode_true_idx)]))
    if (1 - true_prob) > 0:
        src_false_idx = bkb.addComponentState(src_comp, false_state_name)
        #-- Now connect with S nodes
        bkb.addSNode(
            BKB_S_node(init_component_index=non_src_comp,
                       init_state_index=non_src_state,
                       init_probability=1,
                       init_tail=[(src_comp, src_false_idx)] +
                       other_non_src_tails))
        bkb.addSNode(
            BKB_S_node(init_component_index=src_comp,
                       init_state_index=src_false_idx,
                       init_probability=prob * (1 - true_prob),
                       init_tail=[(comp_idx, inode_false_idx)]))

    return bkb
def _addTargetToLastTopologVariables(target, bkb, src_population,
                                     src_population_data):
    bottom_inodes = _collectBkbBottomINodes(bkb)

    prop, op_str, val = target
    op = _process_operator(op_str)

    transformed_meta = dict()
    for comp_idx, state_idx in bottom_inodes:
        comp_name = bkb.getComponentName(comp_idx)
        if 'mut-var_' == comp_name[:8]:
            #-- If this is a variant component
            gene = comp_name.split('_')[1]
            variant = bkb.getComponentINodeName(comp_idx, state_idx)
            joint_count = 0
            prior_count = 0
            neg_joint_count = 0
            for src_hash, data_dict in src_population_data.items():
                try:
                    if src_population_data[src_hash]['Patient_Gene_Variant'][
                            gene] == variant:
                        prior_count += 1
                        if op(src_population_data[src_hash][prop], val):
                            joint_count += 1
                        else:
                            neg_joint_count += 1
                except:
                    continue
            prob_true_cond = joint_count / prior_count
            prob_false_cond = neg_joint_count / prior_count

            #-- Add target Inode and attach s-node
            target_comp_idx = bkb.addComponent('{} {} {}_mut-var_{}_{}'.format(
                prop, op_str, val, gene, variant))
            if prob_true_cond > 0:
                target_true_idx = bkb.addComponentState(
                    target_comp_idx, 'True')
                bkb.addSNode(
                    BKB_S_node(init_component_index=target_comp_idx,
                               init_state_index=target_true_idx,
                               init_probability=prob_true_cond,
                               init_tail=[(comp_idx, state_idx)]))
            if prob_false_cond > 0:
                target_false_idx = bkb.addComponentState(
                    target_comp_idx, 'False')
                bkb.addSNode(
                    BKB_S_node(init_component_index=target_comp_idx,
                               init_state_index=target_false_idx,
                               init_probability=prob_false_cond,
                               init_tail=[(comp_idx, state_idx)]))
        elif 'mut_' == comp_name[:4]:
            #-- If this is a mutation component
            gene = comp_name.split('_')[1]
            joint_count = 0
            prior_count = 0
            neg_joint_count = 0
            for src_hash, data_dict in src_population_data.items():
                try:
                    if gene in src_population_data[src_hash]['Patient_Genes']:
                        prior_count += 1
                        if op(src_population_data[src_hash][prop], val):
                            joint_count += 1
                        else:
                            neg_joint_count += 1
                except:
                    continue
            prob_true_cond = joint_count / prior_count
            prob_false_cond = neg_joint_count / prior_count

            #-- Add target Inode and attach s-node
            target_comp_idx = bkb.addComponent('{} {} {}_mut_{}'.format(
                prop, op_str, val, gene))
            if prob_true_cond > 0:
                target_true_idx = bkb.addComponentState(
                    target_comp_idx, 'True')
                bkb.addSNode(
                    BKB_S_node(init_component_index=target_comp_idx,
                               init_state_index=target_true_idx,
                               init_probability=prob_true_cond,
                               init_tail=[(comp_idx, state_idx)]))
            if prob_false_cond > 0:
                target_false_idx = bkb.addComponentState(
                    target_comp_idx, 'False')
                bkb.addSNode(
                    BKB_S_node(init_component_index=target_comp_idx,
                               init_state_index=target_false_idx,
                               init_probability=prob_false_cond,
                               init_tail=[(comp_idx, state_idx)]))
        transformed_meta[bkb.getComponentName(target_comp_idx)] = 'True'
    return bkb, transformed_meta
def _processOptionDependency(option, option_dependencies, bkb, src_population,
                             src_population_data):
    #-- Get consistent option combinations
    tail_product = [
        combo
        for combo in itertools.product(option_dependencies, [True, False])
    ]
    tail_combos = list()
    for combo in itertools.combinations(tail_product,
                                        r=len(option_dependencies)):
        combo = list(combo)
        prop_set = set()
        for ev_state in combo:
            ev_, state = ev_state
            prop_, op_, val_ = ev_
            prop_set.add(prop_)
        if len(prop_set) == len(combo):
            tail_combos.append(combo)
    combos = [
        combo
        for combo in itertools.product([(option, True), (option,
                                                         False)], tail_combos)
    ]

    #-- Calculate joint probabilities from data
    counts = list()
    for combo in combos:
        head, tail = combo
        #-- Put head and tail in one list
        combo = [head] + tail
        count = 0
        for entity_name, src_name in src_population.items():
            truth = list()
            for ev_state in combo:
                ev_, state = ev_state
                prop_, op_str_, val_ = ev_
                op_ = _process_operator(op_str_)
                #-- Check if the src_population item is a list of items (useful for drugs):
                if type(src_population_data[src_name][prop_]) == tuple:
                    if op_(set(val_),
                           set(src_population_data[src_name][prop_])):
                        res = True
                    else:
                        res = False
                else:
                    res = op_(src_population_data[src_name][prop_], val_)
                truth.append(res == state)
            if all(truth):
                count += 1
        counts.append(count)

    probs = [float(count) / len(src_population) for count in counts]

    #-- Setup each S-node
    for j, combo in enumerate(combos):
        head, tail = combo
        #-- Process head
        comp_head_idx, i_node_head_idx = _processDependencyHead(head, bkb)
        #-- Process Tail
        processed_tail = _processDependecyTail(tail, bkb)
        #-- Add Snode
        if probs[j] > 0:
            bkb.addSNode(
                BKB_S_node(init_component_index=comp_head_idx,
                           init_state_index=i_node_head_idx,
                           init_probability=probs[j],
                           init_tail=processed_tail))

    return bkb
bkfs = list()

#-- Make BKB frags
for j, _ in enumerate(PATIENTS):
    bkf = BKB()

    for i in range(NUM_GENES):
        #-- Setup Gene i component.
        comp_idx = bkf.addComponent('Gene{}_mutated'.format(i))
        stateTrue_idx = bkf.addComponentState(comp_idx, 'True')

        if random.choice([True, False]):
            bkf.addSNode(
                BKB_S_node(init_component_index=comp_idx,
                           init_state_index=stateTrue_idx,
                           init_probability=1))
    bkfs.append(bkf)

#-- Fuse patients together.
fused_bkb = fuse(bkfs, [1 for _ in range(len(PATIENTS))],
                 [str(hash(patient)) for patient in PATIENTS],
                 working_dir=os.getcwd())


#-- Impose Structure while maintiaining mutual exclusivity.
def impose_structure(bkb, structure):
    bkb_ = copy.deepcopy(bkb)
    S_nodes_by_head = _constructSNodesByHead(bkb_)
    for gene1, dependecies in structure.items():
        gene1_comp_idx = bkb_.getComponentIndex(gene1)