def compute_heuristic(self, gate, gate_data_axes): pos_data = self.cur_data_from_parent_pos if self.greedy_filtering else self.pos_data neg_data = self.cur_data_from_parent_neg if self.greedy_filtering else self.neg_data pos_data_inside_gate_bool_idxs = dh.filter_rectangle( pos_data, gate_data_axes[0], gate_data_axes[1], gate[0], gate[1], gate[2], gate[3], return_idx=True, ) neg_data_inside_gate_bool_idxs = dh.filter_rectangle(neg_data, gate_data_axes[0], gate_data_axes[1], gate[0], gate[1], gate[2], gate[3], return_idx=True) pos_prop = np.sum( pos_data_inside_gate_bool_idxs) / self.pos_data.shape[0] neg_prop = np.sum( neg_data_inside_gate_bool_idxs) / self.neg_data.shape[0] return (pos_prop - neg_prop)
def construct_heuristic_gates(self): heuristic_gates = [] self.cur_data_from_parent_pos = self.pos_data self.cur_data_from_parent_neg = self.neg_data for gate_idx in range(self.num_gates): heuristic_gates.append(self.find_best_gate(gate_idx)) if self.greedy_filtering: gate_data_axes = self.gate_data_axes_ids[gate_idx] best_gate = heuristic_gates[-1] self.cur_data_from_parent_pos = \ dh.filter_rectangle( self.cur_data_from_parent_pos, gate_data_axes[0], gate_data_axes[1], best_gate[0], best_gate[1], best_gate[2], best_gate[3] ) self.cur_data_from_parent_neg = \ dh.filter_rectangle( self.cur_data_from_parent_neg, gate_data_axes[0], gate_data_axes[1], best_gate[0], best_gate[1], best_gate[2], best_gate[3] ) self.heuristic_gates = heuristic_gates print(self.heuristic_gates)
def filter_data_inside_first_model_gate(self, model): gate = model.get_gates()[0] gate = [g if g > 0 else 0 for g in gate] idxs_in_gate_per_sample = [ dh.filter_rectangle(x, 0, 1, gate[0], gate[1], gate[2], gate[3], return_idx=True) for x in self.x_tr ] #idxs_in_gate = dh.filter_rectangle( # self.x_tr, 0, 1, gate[0], gate[1], gate[2], gate[3], # return_idx=True #) self.unfiltered_by_model_gates_x_tr = self.x_tr self.x_tr = [ x_tr[idxs_in_gate].detach().numpy() for x_tr, idxs_in_gate in zip(self.x_tr, idxs_in_gate_per_sample) ] #put in one dummy data point so umap can work self.x_tr = [ x_tr if x_tr.shape[0] > 0 else np.array([[0, 0]]) for x_tr in self.x_tr ] self.y_tr = self.y_tr.detach().numpy() # self.x_tr = self.untransformed_matched_x_tr[idxs] self.filtered_idxs_per_sample = idxs_in_gate_per_sample
def get_idxs_in_gate4(self, data, dim_to_shift): idxs_gate_3 = self.get_idxs_in_gate3(data, GATE_NAME_TO_DIMS['gate3']) idxs_gate_4 = dh.filter_rectangle(data, dim_to_shift[0], dim_to_shift[1], 0., 1228, 0, 1843, return_idx=True) idxs_in_both = idxs_gate_3 & idxs_gate_4 return idxs_in_both
def get_idxs_in_gate3(self, data, dim_to_shift): idxs_gate_2 = self.get_idxs_in_gate2(data, GATE_NAME_TO_DIMS['gate2']) idxs_gate_3 = dh.filter_rectangle(data, dim_to_shift[0], dim_to_shift[1], 1638, 3891, 2150, 3891, return_idx=True) idxs_in_both = idxs_gate_2 & idxs_gate_3 return idxs_in_both
def get_idxs_in_gate2(self, data, dim_to_shift): idxs_gate_1 = self.get_idxs_in_gate1(data, GATE_NAME_TO_DIMS['gate1']) idxs_gate_2 = dh.filter_rectangle(data, dim_to_shift[0], dim_to_shift[1], 921, 2150, 102, 921, return_idx=True) idxs_in_both = idxs_gate_1 & idxs_gate_2 return idxs_in_both
def get_idxs_in_gate1(self, data, dim_to_shift): idxs_gate_0 = self.get_idxs_in_gate0(data, GATE_NAME_TO_DIMS['gate0']) idxs_gate_1 = dh.filter_rectangle(data, dim_to_shift[0], dim_to_shift[1], 102, 921, 2048, 3891, return_idx=True) idxs_in_both = idxs_gate_0 & idxs_gate_1 return idxs_in_both
def get_idxs_in_gate1(self, data, dim_to_shift): idxs_gate_0 = self.get_idxs_in_gate0(data, GATE_NAME_TO_DIMS['gate0']) idxs_gate_1 = dh.filter_rectangle(data, dim_to_shift[0], dim_to_shift[1], 102, 921, 2048, 3891, return_idx=True) print(idxs_gate_0.shape, idxs_gate_1.shape) # idxs_in_both = [i for i, idx in enumerate(idxs_gate_1) if idx in idxs_gate_0[i]] idxs_in_both = idxs_gate_0 & idxs_gate_1 return idxs_in_both
def expand_data_past_gates_single_sample(self, sample, random_state=0): clusterer = KMeans(n_clusters=self.k, random_state=0).fit(sample) clusters = clusterer.cluster_centers_ clusters_bool_idxs_in_gate = dh.filter_rectangle(clusters, 0, 1, self.gate[0], self.gate[1], self.gate[2], self.gate[3], return_idx=True) clusters_idxs_in_gate = [ idx for idx in range(clusters_bool_idxs_in_gate.shape[0]) if clusters_bool_idxs_in_gate[idx] ] data_idxs_in_gate = dh.filter_rectangle(sample, 0, 1, self.gate[0], self.gate[1], self.gate[2], self.gate[3], return_idx=True) cells_outside_gate_per_cluster = [] for cluster_idx in clusters_idxs_in_gate: print(cluster_idx, clusters[cluster_idx]) cells_in_cluster = clusterer.labels_ == cluster_idx cells_outside_gate = cells_in_cluster & ~data_idxs_in_gate.cpu( ).detach().numpy() cells_outside_gate_per_cluster.append(sample[cells_outside_gate]) if len(cells_outside_gate_per_cluster) == 0: return np.zeros(sample.shape), clusterer return np.concatenate(cells_outside_gate_per_cluster), clusterer
def collect_expanded_cells_per_sample(self): for sample in self.data: idxs_init_gate = dh.filter_rectangle(sample, 0, 1, self.init_gate[0], self.init_gate[1], self.init_gate[2], self.init_gate[3], return_idx=True) idxs_final_gate = dh.filter_rectangle(sample, 0, 1, self.expanded_gate[0], self.expanded_gate[1], self.expanded_gate[2], self.expanded_gate[3], return_idx=True) expanded_cell_bool_idxs = ~idxs_init_gate & idxs_final_gate self.expanded_data_per_sample.append( sample[expanded_cell_bool_idxs])
def get_expanded_data_new_samples(self, data): expanded_data = [] for sample in data: idxs_init_gate = dh.filter_rectangle(sample, 0, 1, self.init_gate[0], self.init_gate[1], self.init_gate[2], self.init_gate[3], return_idx=True) idxs_final_gate = dh.filter_rectangle(sample, 0, 1, self.expanded_gate[0], self.expanded_gate[1], self.expanded_gate[2], self.expanded_gate[3], return_idx=True) expanded_cell_bool_idxs = ~idxs_init_gate & idxs_final_gate expanded_data.append(sample[expanded_cell_bool_idxs]) return expanded_data
def filter_single_flat_gate(data, gate, ids): print(ids) filtered_data = dh.filter_rectangle(data, ids[0], ids[1], gate[0], gate[1], gate[2], gate[3]) return filtered_data
def filter_data_at_single_node(self, data, node): gate = DataAndGatesPlotter.get_gate(node) filtered_data = dh.filter_rectangle(data, node.gate_dim1, node.gate_dim2, gate.low1, gate.upp1, gate.low2, gate.upp2) return filtered_data