def _evaluate_node(self, node): raw_identified = self._bayes.identify(node.genome, node, self._ibd_detector) sibling_group, ln_ratio, identified_node = raw_identified node_generation = self._population.node_to_generation[node] print("Confidence score: {}".format(ln_ratio)) if node in sibling_group: self.generation_error[node_generation]["correct"] += 1 self.correct += 1 print("correct") else: self.generation_error[node_generation]["incorrect"] += 1 print("incorrect") self.incorrect += 1 write_log( "evaluate", { "target node": node._id, "log ratio": ln_ratio, "identified": set(x._id for x in sibling_group), "run_number": self._run_number }) stdout.flush() return IdentifyResult(node, sibling_group, identified_node, ln_ratio, node in sibling_group, self._run_number)
def run_evaluation(self, unlabeled, expansion=None): # generation_map = population.node_to_generation # write_log("labeled_nodes", [node._id for node in labeled_nodes]) # write_log("target_nodes", [node._id for node in unlabeled]) if expansion is None: print("Attempting to identify {} random nodes.".format( len(unlabeled)), flush=True) write_log("start time", datetime.now()) for i, node in enumerate(unlabeled): if expansion: i = expansion print("Iteration: {}, actual node ID: {}".format(i + 1, node._id)) start_time = perf_counter() if not expansion and self._randomize_labeled: potential = self._original_labeled.copy() if node._id in potential: potential.remove(node._id) assert len(potential) > self._randomize_labeled new_anchors = sample(potential, self._randomize_labeled) self.labeled_nodes = new_anchors if self._out_of_genealogy > 0: self._reset_out_of_genealogy() self._remove_node_from_genealogy(node) self.identify_results.append(self._evaluate_node(node)) end_time = perf_counter() print("It took {} seconds".format(end_time - start_time)) write_log("end time", datetime.now()) self._run_number += 1 return self.identify_results
def run_evaluation(self, unlabeled): # generation_map = population.node_to_generation # write_log("labeled_nodes", [node._id for node in labeled_nodes]) # write_log("target_nodes", [node._id for node in unlabeled]) print("Attempting to identify {} random nodes.".format(len(unlabeled)), flush=True) write_log("start time", datetime.now()) for i, node in enumerate(unlabeled): print("Iteration: {}, actual node ID: {}".format(i + 1, node._id)) self.identify_results.append(self._evaluate_node(node)) write_log("end time", datetime.now()) self._run_number += 1 return self.identify_results
def _evaluate_node(self, node): identified, ln_ratio = self._bayes.identify(node.genome, node, self._ibd_threshold) assert len(identified) > 0 node_generation = self._population.node_to_generation[node] if node in identified: self.generation_error[node_generation]["correct"] += 1 self.correct += 1 print("correct") else: self.generation_error[node_generation]["incorrect"] += 1 print("incorrect") self.incorrect += 1 write_log( "evaluate", { "target node": node._id, "log ratio": ln_ratio, "identified": set(x._id for x in identified), "run_number": self._run_number }) stdout.flush() return IdentifyResult(node, identified, ln_ratio, node in identified, self._run_number)
def run_expansion_round(self, identify_candidates, confidence_ratio, expansion_data=None, expansion_filename=None): print("Running expansion round.") to_evaluate = list(identify_candidates) shuffle(to_evaluate) added = [] correct_add_count = 0 write_log("expansion_confidence_ratio", confidence_ratio) for i, node in enumerate(to_evaluate): self.run_evaluation([node]) result = self.identify_results[-1] print("Ratio: {}".format(result.ln_ratio)) if result.ln_ratio > confidence_ratio: print("Adding node.") added.append(result) self._bayes.add_labeled_node_id(result.identified_node._id) if result.correct: correct_add_count += 1 else: result.identified_node.suspected_genome = result.target_node.genome if i % 20 == 0: self.print_metrics() print("Nodes added this round: {}".format(len(added))) print("Correct nodes added: {}".format(correct_add_count)) if expansion_data and expansion_filename and i % 500 == 0: remaining = set(node._id for node in to_evaluate[i:]) expansion_data.extend_round(added, remaining) with open(expansion_filename, "wb") as expansion_file: dump(expansion_data, expansion_file) write_log("expansion_data_written", { "current_node": node._id, "complete": False }) write_log( "expansion_round", { "added": len(added), "correct_added": correct_add_count, "accuracy": self.accuracy }) self.print_metrics() print("Added {} nodes this round.".format(len(added))) return added
def print_metrics(self): total = self.correct + self.incorrect print("{} correct, {} incorrect, {} total.".format( self.correct, self.incorrect, total)) stdout.flush() write_log("correct", self.correct) write_log("incorrect", self.incorrect) write_log("total", total) percent_accurate = self.accuracy std_dev = sqrt(percent_accurate * (1 - percent_accurate) * total) / total print("{}±{:0.3} percent accurate.".format(percent_accurate, std_dev)) for generation, counter in self.generation_error.items(): gen_correct = counter["correct"] gen_incorrect = counter["incorrect"] total = gen_correct + gen_incorrect format_string = "For generation {}: {} accuracy, {} total." print(format_string.format(generation, gen_correct / total, total))
parser.add_argument("--ibd-threshold", type = int, default = 5000000, help = "IBD segments smaller than this value will " "go undetected") parser.add_argument("--deterministic_random", "-d", action = "store_true", help = "Seed the random number generator such that the same labeled nodes will be chosen on runs with the same number of nodes.") parser.add_argument("--deterministic_labeled", "-ds", action = "store_true", help = "Seed the random number generator to ensure labeled node subset is deterministic.") args = parser.parse_args() if args.data_logfile: change_logfile_name(args.data_logfile) start_logging() else: stop_logging() write_log("args", args) print("Loading population.", flush = True) with open(args.population, "rb") as pickle_file: population = PopulationUnpickler(pickle_file).load() print("Loading classifier", flush = True) with open(args.classifier, "rb") as pickle_file: classifier = load(pickle_file) nodes = set(member for member in population.members if member.genome is not None) # nodes = set(member for member in population.generations[-1].members # if member.genome is not None) if args.subset_labeled:
def identify(self, genome, actual_node, ibd_threshold=5000000): node_probabilities = dict() # Probability that a node is a match id_map = self._population.id_mapping length_classifier = self._length_classifier shared_list = [] for labeled_node_id in length_classifier._labeled_nodes: labeled_node = id_map[labeled_node_id] s = shared_segment_length_genomes(genome, labeled_node.genome, ibd_threshold) shared_list.append((labeled_node_id, s)) node_data = dict() batch_node_id = [] batch_labeled_node_id = [] batch_lengths = [] batch_cryptic_lengths = [] # This is done for performance reasons, as appending to this # list is the hottest part of the loop. append_cryptic = batch_cryptic_lengths.append distributions = length_classifier._distributions # Set membership testing is faster than dictionary key # membership testing, so we use a set. distribution_members = set(distributions.keys()) nodes = self._to_search(shared_list) for node in nodes: node_start_i = len(batch_node_id) node_id = node._id cryptic_start_i = len(batch_cryptic_lengths) for labeled_node_id, shared in shared_list: if (node_id, labeled_node_id) not in distribution_members: append_cryptic(shared) else: batch_node_id.append(node_id) batch_labeled_node_id.append(labeled_node_id) batch_lengths.append(shared) cryptic_stop_i = len(batch_cryptic_lengths) node_stop_i = len(batch_node_id) node_data[node] = ProbabilityData(node_start_i, node_stop_i, cryptic_start_i, cryptic_stop_i) calc_prob = length_classifier.get_batch_probability( batch_lengths, batch_node_id, batch_labeled_node_id) cryptic_prob = length_classifier.get_batch_smoothing( batch_cryptic_lengths) # index_data = {node._id: tuple(indices) # for node, indices in node_data.items()} # siblings = {node._id for node in get_sibling_group(actual_node)} # to_dump = {"actual_node_id": actual_node._id, # "calc_prob": calc_prob, # "cryptic_lengths": batch_cryptic_lengths, # "siblings": siblings, # "index_data": index_data} # output_filename = "/media/paul/Fast Storage/optimize_data/{}.pickle".format(actual_node._id) # with open(output_filename, "wb") as pickle_file: # dump(to_dump, pickle_file) node_probabilities = dict() for node, prob_data in node_data.items(): start_i, stop_i, cryptic_start_i, cryptic_stop_i = prob_data if node == actual_node: pass # import pdb # pdb.set_trace() node_calc = calc_prob[start_i:stop_i] node_cryptic = cryptic_prob[cryptic_start_i:cryptic_stop_i] log_prob = (np.sum(np.log(node_calc)) + np.sum(np.log(node_cryptic))) node_probabilities[node] = log_prob # potential_node = max(node_probabilities.items(), # key = lambda x: x[1])[0] write_log( "identify", { "node": actual_node._id, "probs": {node._id: prob for node, prob in node_probabilities.items()} }) potential_nodes = nlargest(8, node_probabilities.items(), key=lambda x: x[1]) top, top_log_prob = potential_nodes[0] sibling_group = get_sibling_group(top) for node, log_prob in potential_nodes[1:]: if node in sibling_group: continue next_node = node next_log_prob = log_prob break else: next_node, next_log_prob = potential_nodes[1] log_ratio = top_log_prob - next_log_prob # log_data = {"actual_node_id": actual_node._id, # "prob_indices": prob_data, # "calc_prob": calc_prob, # "cryptic_prob": cryptic_prob # "sibling_group": [node._id for node in sibling_group]} # write_log("run_data", log_data) return (sibling_group, log_ratio)
def identify(self, genome, actual_node, segment_detector): node_probabilities = dict() # Probability that a node is a match id_map = self._population.id_mapping length_classifier = self._length_classifier # TODO: Eliminated shared_list and use shared_dict everywhere shared_list = [] for labeled_node_id in length_classifier._labeled_nodes: labeled_node = id_map[labeled_node_id] s = segment_detector.shared_segment_length(genome, labeled_node.suspected_genome) shared_list.append((labeled_node_id, s)) shared_dict = dict(shared_list) labeled_nodes = set(length_classifier._labeled_nodes) labeled_nodes_cryptic, all_lengths = list(zip(*shared_dict.items())) # We convert to python floats, as summing is faster. # all_cryptic_possibilities = [float(x) for x # in np.log(length_classifier.get_batch_smoothing(all_lengths))] all_cryptic_possibilities = [float(x) for x in np.log(length_classifier.get_batch_smoothing_gamma(all_lengths))] # Maps labeled nodes to the log cryptic value of the IBD detected cryptic_lookup = dict(zip(labeled_nodes_cryptic, all_cryptic_possibilities)) # if self.cryptic_logging: # unique_lengths = np.sort(np.unique(np.asarray(all_lengths, # dtype = np.uint64))) # cryptic_length_logging = dict() # non_cryptic_probabilties = dict() node_data = dict() batch_node_id = [] batch_labeled_node_id = [] batch_lengths = [] # Keep for logging purposes # batch_cryptic_lengths = [] node_cryptic_log_probs = dict() by_unlabeled = length_classifier.group_by_unlabeled nodes = self._to_search(shared_list, actual_node.sex) if len(nodes) == 0: # We have no idea which node it is return RawIdentified(set(), float("-inf"), None) empty = set() for node in nodes: node_start_i = len(batch_node_id) node_id = node._id #cryptic_start_i = len(batch_cryptic_lengths) cryptic_probability = 0 node_cryptic_log_probs[node] = 0 cryptic_nodes = labeled_nodes - by_unlabeled.get(node_id, empty) if len(cryptic_nodes) > 0: # batch_cryptic_lengths.extend(shared_dict[labeled_node_id] # for labeled_node_id # in cryptic_nodes) cryptic_probability = sum(cryptic_lookup[labeled_node_id] for labeled_node_id in cryptic_nodes) node_cryptic_log_probs[node] = cryptic_probability # if self.cryptic_logging: # to_log = _get_logging_cryptic_lengths(shared_dict, # cryptic_nodes, # unique_lengths) # cryptic_length_logging[node._id] = to_log non_cryptic_nodes = list(labeled_nodes - cryptic_nodes) if len(non_cryptic_nodes) > 0: batch_node_id.extend([node_id] * len(non_cryptic_nodes)) batch_labeled_node_id.extend(non_cryptic_nodes) batch_lengths.extend(shared_dict[labeled_node_id] for labeled_node_id in non_cryptic_nodes) #cryptic_stop_i = len(batch_cryptic_lengths) node_stop_i = len(batch_node_id) node_data[node] = ProbabilityData(node_start_i, node_stop_i, -1, -1) #cryptic_start_i, cryptic_stop_i) assert len(node_data) > 0 if len(batch_lengths) > 0: pdf_vals = length_classifier.get_batch_pdf(batch_lengths, batch_node_id, batch_labeled_node_id) calc_prob, zero_replace = pdf_vals else: calc_prob = [] node_probabilities = dict() for node, prob_data in node_data.items(): start_i, stop_i, cryptic_start_i, cryptic_stop_i = prob_data node_calc = calc_prob[start_i:stop_i] # if self.cryptic_logging: # zero_vec = zero_replace[start_i:stop_i] # non_cryptic_probabilties[node._id] = node_calc # non_cryptic_probabilties[node._id][zero_vec] = None #stores as NaN log_prob = (np.sum(np.log(node_calc)) + node_cryptic_log_probs[node]) node_probabilities[node] = log_prob assert len(node_probabilities) > 0 # potential_node = max(node_probabilities.items(), # key = lambda x: x[1])[0] write_log("identify", {"node": actual_node._id, "probs": {node._id: prob for node, prob in node_probabilities.items()}}) # if self.cryptic_logging: # to_log = {"unique lengths": unique_lengths, # "cryptic probability": cryptic_length_logging, # "non cryptic": non_cryptic_probabilties} # write_log("cryptic_logging", to_log) potential_nodes = nlargest(8, node_probabilities.items(), key = lambda x: x[1]) top, top_log_prob = potential_nodes[0] sibling_group = get_suspected_sibling_group(top) for node, log_prob in potential_nodes[1:]: if node in sibling_group: continue next_node = node next_log_prob = log_prob break else: next_node, next_log_prob = potential_nodes[1] log_ratio = top_log_prob - next_log_prob # log_data = {"actual_node_id": actual_node._id, # "prob_indices": prob_data, # "calc_prob": calc_prob, # "cryptic_prob": cryptic_prob # "sibling_group": [node._id for node in sibling_group]} # write_log("run_data", log_data) return RawIdentified(get_sibling_group(top), log_ratio, top)
parser.error( "A subset of labeled nodes is necessary for expansion rounds when expansion rounds data file does not already exist." ) if expansion_file_exists: with open(args.expansion_rounds_data, "rb") as expansion_file: expansion_data = load(expansion_file) else: expansion_data = None if args.data_logfile: change_logfile_name(args.data_logfile) start_logging() else: stop_logging() write_log("args", args) print("Loading population.", flush=True) with open(args.population, "rb") as pickle_file: population = PopulationUnpickler(pickle_file).load() print("Loading classifier", flush=True) with open(args.classifier, "rb") as pickle_file: classifier = load(pickle_file) if args.cm_ibd_threshold > 0: cur_path = realpath(__file__) parent = split(split(cur_path)[0])[0] rates_dir = join(parent, "data", "recombination_rates") print("Loading recombination data for centimorgan cutoff.", flush=True) recomb_data = centimorgan_data_from_directory(rates_dir)
def run_expansion_round(self, identify_candidates, confidence_ratio, expansion_data=None, expansion_filename=None, revisit=True): print("Running expansion round.") if revisit: to_evaluate = list(identify_candidates) else: id_map = self._population.id_mapping labeled_genomes = set(id_map[x].suspected_genome for x in self.labeled_nodes) to_evaluate = [ x for x in identify_candidates if x.suspected_genome not in labeled_genomes ] shuffle(to_evaluate) added = [] correct_add_count = 0 new_added = 0 self._bayes.probability_logging = False write_log("expansion_confidence_ratio", confidence_ratio) for i, node in enumerate(to_evaluate): self.run_evaluation([node], expansion=i) result = self.identify_results[-1] if result.ln_ratio > confidence_ratio: print("Adding node.") identified_node = result.identified_node added.append(result) if not expansion_data.identified_before(result.target_node): new_added += 1 if result.correct: correct_add_count += 1 prev_added = expansion_data.add_node(result) if prev_added != identified_node._id: self._bayes.add_labeled_node_id(identified_node._id) if prev_added is not None: self._bayes.remove_labeled_node_id(prev_added._id) if 0 < i and i % 20 == 0: self.print_metrics() print("Nodes added this round: {}, New nodes added: {}".format( len(added), new_added)) print("Correct nodes added: {}".format(correct_add_count)) if expansion_data and expansion_filename and i % 500 == 0: remaining = set(node._id for node in to_evaluate[i:]) expansion_data.remaining = remaining with open(expansion_filename, "wb") as expansion_file: dump(expansion_data, expansion_file) write_log("expansion_data_written", { "current_node": node._id, "complete": False }) expansion_data.remaining = None write_log( "expansion_round", { "added": len(added), "evaluated": len(to_evaluate), "correct_added": correct_add_count, "accuracy": self.accuracy }) self.print_metrics() print("Added {} nodes this round.".format(len(added))) if len(added) == 0: input("No nodes added. Press enter to continue") return added
def identify(self, genome, actual_node, segment_detector): id_map = self._population.id_mapping length_classifier = self._length_classifier # TODO: Eliminated shared_list and use shared_dict everywhere shared_list = [] anchors = set(length_classifier._labeled_nodes) - self.exclude_anchors sorted_labeled = sorted(anchors) np_sorted_labeled = np.array(sorted_labeled, dtype=np.uint32) sorted_shared = [] for labeled_node_id in sorted_labeled: labeled_node = id_map[labeled_node_id] s = segment_detector.shared_segment_length( genome, labeled_node.suspected_genome) shared_list.append((labeled_node_id, s)) sorted_shared.append(s) write_log("positive ibd count", sum(0.0 < x for x in sorted_shared)) #write_log("shared", sorted_shared) shared_dict = dict(shared_list) sorted_shared = np.array(sorted_shared, dtype=np.float64) labeled_nodes_cryptic, all_lengths = list(zip(*shared_dict.items())) np_cryptic = np.log( length_classifier.get_batch_smoothing_gamma(sorted_shared)) node_data = [] batch_shape = [] batch_scale = [] batch_zero_prob = [] batch_lengths = [] # Keep for logging purposes # batch_cryptic_lengths = [] nodes = self._to_search(shared_list, actual_node.sex) if len(nodes) == 0: # We have no idea which node it is return RawIdentified(set(), float("-inf"), None) for node in nodes: node_start_i = len(batch_shape) node_id = node._id #node_cryptic_log_probs[node] = 0 if node_id in length_classifier._distributions: labeled_ids, shape, scale, zero_prob = length_classifier._distributions[ node_id] else: labeled_ids = np.array([], dtype=np.uint32) shape = scale = zero_prob = np.array([], dtype=np.float64) calc_data = calculate_probabilities(labeled_ids, shape, scale, zero_prob, sorted_shared, np_sorted_labeled, np_cryptic, node_id) cur_lengths, cur_shapes, cur_scales, cur_zero_prob, cur_cryptic = calc_data batch_lengths.extend(cur_lengths) batch_shape.extend(cur_shapes) batch_scale.extend(cur_scales) batch_zero_prob.extend(cur_zero_prob) node_stop_i = len(batch_shape) node_data.append( ProbabilityData(node, node_start_i, node_stop_i, cur_cryptic)) assert len(node_data) > 0 if len(batch_lengths) > 0: pdf_vals = length_classifier.batch_pdf_distributions( batch_lengths, batch_shape, batch_scale, batch_zero_prob) calc_prob, zero_replace = pdf_vals else: calc_prob = [] log_calc_prob_cum = np.cumsum(np.log(calc_prob)) del calc_prob log_calc_prob_cum = np.concatenate(([0.0], log_calc_prob_cum)) node_probabilities = dict() for node, start_i, stop_i, cryptic_prob in node_data: log_prob = (log_calc_prob_cum[stop_i] - log_calc_prob_cum[start_i]) + cryptic_prob node_probabilities[node] = log_prob assert len(node_probabilities) > 0 if self.probability_logging: write_log( "identify", { "node": actual_node._id, "probs": { node._id: prob for node, prob in node_probabilities.items() } }) if len(node_probabilities) == 0: return RawIdentified(set(), -INF, None) # The value 8 is somewhat arbitrary. We are always able to # generate our confidence value with the top 8, as sibships # tend to be small. This number may need to be larger for # populations with large sibships. potential_nodes = nlargest(8, node_probabilities.items(), key=lambda x: x[1]) top, top_log_prob = potential_nodes[0] sibling_group = get_suspected_sibling_group(top) for node, log_prob in potential_nodes[1:]: if node in sibling_group: continue next_node = node next_log_prob = log_prob break else: if len(potential_nodes) > 1: next_node, next_log_prob = potential_nodes[1] if len(potential_nodes) > 1: log_ratio = top_log_prob - next_log_prob else: log_ratio = -INF return RawIdentified(get_sibling_group(top), log_ratio, top)
def identify(self, genome, actual_node, segment_detector): node_probabilities = dict() # Probability that a node is a match id_map = self._population.id_mapping length_classifier = self._length_classifier # TODO: Eliminated shared_list and use shared_dict everywhere shared_list = [] for labeled_node_id in length_classifier._labeled_nodes: labeled_node = id_map[labeled_node_id] s = segment_detector.shared_segment_length( genome, labeled_node.suspected_genome) shared_list.append((labeled_node_id, s)) shared_dict = dict(shared_list) labeled_nodes = set(length_classifier._labeled_nodes) node_data = dict() batch_node_id = [] batch_labeled_node_id = [] batch_lengths = [] batch_cryptic_lengths = [] by_unlabeled = length_classifier.group_by_unlabeled nodes = self._to_search(shared_list) if len(nodes) == 0: # We have no idea which node it is return RawIdentified(set(), float("-inf"), None) empty = set() for node in nodes: node_start_i = len(batch_node_id) node_id = node._id cryptic_start_i = len(batch_cryptic_lengths) cryptic_nodes = labeled_nodes - by_unlabeled.get(node_id, empty) if len(cryptic_nodes) > 0: batch_cryptic_lengths.extend( shared_dict[labeled_node_id] for labeled_node_id in cryptic_nodes) non_cryptic_nodes = list(labeled_nodes - cryptic_nodes) if len(non_cryptic_nodes) > 0: batch_node_id.extend([node_id] * len(non_cryptic_nodes)) batch_labeled_node_id.extend(non_cryptic_nodes) batch_lengths.extend(shared_dict[labeled_node_id] for labeled_node_id in non_cryptic_nodes) cryptic_stop_i = len(batch_cryptic_lengths) node_stop_i = len(batch_node_id) node_data[node] = ProbabilityData(node_start_i, node_stop_i, cryptic_start_i, cryptic_stop_i) assert len(node_data) > 0 if len(batch_lengths) > 0: calc_prob = length_classifier.get_batch_probability( batch_lengths, batch_node_id, batch_labeled_node_id) else: calc_prob = [] cryptic_prob = length_classifier.get_batch_smoothing( batch_cryptic_lengths) # index_data = {node._id: tuple(indices) # for node, indices in node_data.items()} # siblings = {node._id for node in get_sibling_group(actual_node)} # to_dump = {"actual_node_id": actual_node._id, # "calc_prob": calc_prob, # "cryptic_lengths": batch_cryptic_lengths, # "siblings": siblings, # "index_data": index_data} # output_filename = "/media/paul/Fast Storage/optimize_data/{}.pickle".format(actual_node._id) # with open(output_filename, "wb") as pickle_file: # dump(to_dump, pickle_file) node_probabilities = dict() for node, prob_data in node_data.items(): start_i, stop_i, cryptic_start_i, cryptic_stop_i = prob_data if node == actual_node: pass # import pdb # pdb.set_trace() node_calc = calc_prob[start_i:stop_i] node_cryptic = cryptic_prob[cryptic_start_i:cryptic_stop_i] log_prob = (np.sum(np.log(node_calc)) + np.sum(np.log(node_cryptic))) node_probabilities[node] = log_prob assert len(node_probabilities) > 0 # potential_node = max(node_probabilities.items(), # key = lambda x: x[1])[0] write_log( "identify", { "node": actual_node._id, "probs": {node._id: prob for node, prob in node_probabilities.items()} }) potential_nodes = nlargest(8, node_probabilities.items(), key=lambda x: x[1]) top, top_log_prob = potential_nodes[0] sibling_group = get_suspected_sibling_group(top) for node, log_prob in potential_nodes[1:]: if node in sibling_group: continue next_node = node next_log_prob = log_prob break else: next_node, next_log_prob = potential_nodes[1] log_ratio = top_log_prob - next_log_prob # log_data = {"actual_node_id": actual_node._id, # "prob_indices": prob_data, # "calc_prob": calc_prob, # "cryptic_prob": cryptic_prob # "sibling_group": [node._id for node in sibling_group]} # write_log("run_data", log_data) return RawIdentified(sibling_group, log_ratio, top)
help= "Search only nodes that are related to labeled nodes for which there is nonzero ibd." ) args = parser.parse_args() if args.expansion_rounds > 1 and args.subset_labeled is None: parser.error( "A subset of labeled nodes is necessary for expansion rounds.") if args.data_logfile: change_logfile_name(args.data_logfile) start_logging() else: stop_logging() write_log("args", args) IdentifyResult = namedtuple( "IdentifyResult", ["target_node", "identified_node", "ln_ratio", "correct", "run_number"]) class Evaluation: def __init__(self, population, classifier, labeled_nodes=None, ibd_threshold=0, search_related=False): self._population = population self._classifier = classifier