def test(self): data = make_example_date() rect = dict(xmin=0, xmax=8, ymin=-2.5, ymax=6.0) self.assertTrue( all(x == 1 for x in utilities.rectangular_filter( data, x='feature0', y='feature1', definition=rect).blobID.values))
def _build_gate(dimensions=1): example_data = make_example_date() if dimensions == 1: populations = ChildPopulationCollection() populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') return (Gate(data=example_data, x='feature0', y='feature1', child_populations=populations, transform_x=None, transform_y=None), None), example_data populations1 = ChildPopulationCollection() populations1.add_population('positive', definition='++') populations1.add_population('negative', definition=['--', '-+', '+-']) populations2 = ChildPopulationCollection() populations2.add_population('positive', definition=['++', '-+']) populations2.add_population('negative', definition=['--', '+-']) return (Gate(data=example_data, x='feature0', y='feature1', child_populations=p, transform_x=None, transform_y=None) for p in [populations1, populations2]), example_data
def test_update_populations(self): g = self._add_population(self._build()) data = make_example_date(n_samples=100, centers=3, n_features=2) pos_idx = data[data.blobID == 0].index.values neg_idx = data[data.blobID != 0].index.values self.assertTrue('positive' in g.populations.keys()) self.assertTrue('negative' in g.populations.keys()) self.assertListEqual(list(g.populations.get('positive').index), list(pos_idx)) self.assertListEqual(list(g.populations.get('negative').index), list(neg_idx)) self.assertEqual(g.populations.get('positive').prop_of_parent, len(pos_idx)/data.shape[0]) self.assertEqual(g.populations.get('positive').prop_of_total, len(pos_idx)/data.shape[0]) self.assertEqual(g.populations.get('negative').prop_of_parent, len(neg_idx)/data.shape[0]) self.assertEqual(g.populations.get('negative').prop_of_total, len(neg_idx)/data.shape[0]) self.assertEqual(g.populations.get('positive').parent.name, 'root') self.assertEqual(g.populations.get('negative').parent.name, 'root') self.assertDictEqual(g.populations.get('positive').geom, {'shape': 'threshold', 'x': 'feature0', 'y': 'feature1'}) self.assertDictEqual(g.populations.get('negative').geom, {'shape': 'threshold', 'x': 'feature0', 'y': 'feature1'})
def _build(): data = make_example_date() mask = utilities.inside_ellipse(data[['feature0', 'feature1']].values, center=(4.5, 2.5), width=2.3, height=3, angle=0) return data, mask
def test_drop(self): data = make_example_date(n_samples=100, centers=3, n_features=5) mask = data.blobID == 1 e = self._build() e.drop_data(mask) y = data[~mask].index.values y_hat = e.data.index.values self.assertListEqual(list(y), list(y_hat))
def test_apply_gate(self): data = make_example_date(n_samples=100, centers=3, n_features=2) g = self._dummy_gate(self._build()) g.apply('test', plot_output=False, feedback=False) self.assertTrue(all([x in g.populations.keys() for x in ['positive', 'negative']])) y = data[data.blobID == 1].index.values y_hat = g.populations.get('positive').index self.assertListEqual(list(y), list(y_hat))
def test_merge(self): g = self._add_population(self._build()) g.merge(population_left='positive', population_right='negative', new_population_name='merged') data = make_example_date(n_samples=100, centers=3, n_features=2) self.assertTrue('merged' in g.populations.keys()) self.assertListEqual(list(g.populations.get('merged').index), list(data.index.values))
def test_predict_ctrl_pop(self): from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1) g = self._add_population(self._build()) data = make_example_date(n_samples=100, centers=3, n_features=2) g._predict_ctrl_population(target_population='positive', ctrl_id='dummy_ctrl', model=model) y = data[data.blobID == 0].index.values y_hat = g.populations.get('positive').control_idx.get('dummy_ctrl') self.assertListEqual(list(y), list(y_hat))
def _build(): data = make_example_date() data = pd.concat( [data[data.blobID != 2], data[data.blobID == 2].sample(frac=0.25)]) d = data['feature0'].values density = KernelDensity(bandwidth=0.5, kernel='gaussian') density.fit(d[:, None]) x_d = np.linspace(min(d), max(d), 1000) prob = np.exp(density.score_samples(x_d[:, None])) peaks = find_peaks(prob)[0] return prob, peaks, x_d
def test_generate_poly(self): example_data = make_example_date(n_samples=100) example_data['labels'] = example_data['blobID'] populations = ChildPopulationCollection() populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') gate = Gate(data=example_data, x='feature0', y='feature1', child_populations=populations, transform_x=None, transform_y=None) self.assertTrue(len(gate.generate_polygons()) == 3)
def test_generate_chunks(self): example_data = make_example_date(n_samples=100) populations = ChildPopulationCollection() populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') gate = Gate(data=example_data, x='feature0', y='feature1', child_populations=populations, transform_x=None, transform_y=None) chunks = gate.generate_chunks(chunksize=10) self.assertTrue(len(chunks) == 10) self.assertTrue(all(x.shape[0] == 10 for x in chunks))
def _build(populations: ChildPopulationCollection, return_data: bool = False, n=1000, **kwargs): example_data = make_example_date(n_samples=n, centers=3) gate = static.Static(data=example_data, child_populations=populations, x='feature0', y='feature1', transform_x=None, transform_y=None, **kwargs) if return_data: return gate, example_data return gate
def _build(self, return_data: bool = False, min_pop_size=2): example_data = make_example_date(n_samples=100) example_data['labels'] = example_data['blobID'] populations = ChildPopulationCollection(gate_type='cluster') populations.add_population('blob1', target=(-2.5, 10), weight=1) populations.add_population('blob2', target=(5, 1), weight=1) populations.add_population('blob3', target=(-7.5, -7.5), weight=1) gate = dbscan.DensityClustering(data=example_data, child_populations=populations, x='feature0', y='feature1', transform_x=None, transform_y=None, min_pop_size=min_pop_size) if return_data: return gate, example_data return gate
def _build(return_data: bool = False, blobs=3, **kwargs): example_data = make_example_date(n_samples=1000, centers=blobs) example_data['labels'] = example_data['blobID'] populations = ChildPopulationCollection(gate_type='geom') populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') gate = mixturemodel.MixtureModel(data=example_data, child_populations=populations, x='feature0', y='feature1', transform_x=None, transform_y=None, **kwargs) if return_data: return gate, example_data return gate
def _add_population(g): data = make_example_date(n_samples=100, centers=3, n_features=2) pos_idx = data[data.blobID == 0].index.values neg_idx = data[data.blobID != 0].index.values populations = ChildPopulationCollection(gate_type='threshold_1d') populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') populations.populations['positive'].update_index(pos_idx) populations.populations['negative'].update_index(neg_idx) populations.populations['positive'].update_geom(shape='threshold', x='feature0', y='feature1') populations.populations['negative'].update_geom(shape='threshold', x='feature0', y='feature1') g.update_populations(output=populations, parent_name='root', warnings=['this is a test']) return g
def test_nudge_threshold(self): # Make a dummy threshold gate g = self._build() populations = ChildPopulationCollection(gate_type='threshold_2d') populations.add_population('positive', definition='++') populations.add_population('negative', definition=['--', '+-', '-+']) g.create_gate(gate_name='test', parent='root', class_='Static', method='threshold_2d', kwargs=dict(x='feature0', y='feature1', transform_x=None, transform_y=None, threshold_x=1, threshold_y=-2.5), child_populations=populations) g.apply('test') data = make_example_date(n_samples=100, centers=3, n_features=2) g.nudge_threshold('test', new_x=4, new_y=2.5) y = data[(data.feature0.round(2) >= 4) & (data.feature1.round(2) >= 2.5)].index.values y_hat = g.populations.get('positive').index self.assertListEqual(list(y), list(y_hat))
def test_edit_gate(self): data = make_example_date(n_samples=100, centers=3, n_features=2) g = self._build() # Threshold 2D populations = ChildPopulationCollection(gate_type='threshold_2d') populations.add_population('positive', definition='++') populations.add_population('negative', definition=['--', '+-', '-+']) g.create_gate(gate_name='test', parent='root', class_='Static', method='threshold_2d', kwargs=dict(x='feature0', y='feature1', transform_x=None, transform_y=None, threshold_x=2.5, threshold_y=-5), child_populations=populations) g.apply('test', plot_output=False, feedback=False) new_geom = {'positive': {'definition': '++', 'x': 'feature0', 'y': 'feature1', 'threshold_x': -2.5, 'threshold_y': 5, 'shape': '2d_threshold', 'transform_x': None, 'transform_y': None}, 'negative': {'definition': ['--', '+-', '-+'], 'x': 'feature0', 'y': 'feature1', 'threshold_x': -2.5, 'threshold_y': 5, 'shape': '2d_threshold', 'transform_x': None, 'transform_y': None} } g.edit_gate('test', updated_geom=new_geom) y = data[(data.feature0.round(2) >= -2.5) & (data.feature1.round(2) >= 5)].index.values y_hat = g.populations.get('positive').index self.assertListEqual(list(y), list(y_hat)) # Rectangular gate populations = ChildPopulationCollection(gate_type='geom') populations.add_population('positive', definition='+') populations.add_population('negative', definition='-') g.create_gate(gate_name='test', parent='root', class_='Static', method='rect_gate', kwargs=dict(x='feature0', y='feature1', transform_x=None, transform_y=None, x_min=1.5, x_max=8.0, y_min=-5, y_max=5.5), child_populations=populations) g.apply('test', plot_output=False, feedback=False) new_geom = {'positive': {'definition': '+', 'x': 'feature0', 'y': 'feature1', 'x_min': -12, 'x_max': -2.5, 'y_min': -12, 'y_max': 0, 'shape': 'rect', 'transform_x': None, 'transform_y': None}, 'negative': {'definition': '-', 'x': 'feature0', 'y': 'feature1', 'x_min': -12, 'x_max': -2.5, 'y_min': -12, 'y_max': 0, 'shape': 'rect', 'transform_x': None, 'transform_y': None} } g.edit_gate('test', updated_geom=new_geom) y = data[data.blobID == 2.0].index.values y_hat = g.populations.get('positive').index self.assertListEqual(list(y), list(y_hat)) # Ellipse gate g.create_gate(gate_name='test', parent='root', class_='Static', method='ellipse_gate', kwargs=dict(x='feature0', y='feature1', transform_x=None, transform_y=None, centroid=(5, 2.5), width=5, height=8, angle=0), child_populations=populations) g.apply('test', plot_output=False, feedback=False) new_geom = {'positive': {'definition': '+', 'x': 'feature0', 'y': 'feature1', 'centroid': (-7., -7), 'width': 5, 'height': 8, 'angle': 0, 'shape': 'ellipse', 'transform_x': None, 'transform_y': None}, 'negative': {'definition': '-', 'x': 'feature0', 'y': 'feature1', 'centroid': (-7., -7), 'width': 5, 'height': 8, 'angle': 0, 'shape': 'ellipse', 'transform_x': None, 'transform_y': None} } g.edit_gate('test', updated_geom=new_geom) y = data[data.blobID == 2.0].index.values y_hat = g.populations.get('positive').index self.assertListEqual(list(y), list(y_hat))
def _build(): data = make_example_date(n_samples=100, centers=3, n_features=5) data['pt_id'] = 'test_pt' return main.Explorer(data=data)
def test(self): data = make_example_date(n_samples=10000) samples = utilities.density_dependent_downsample( data=data, features=['feature0', 'feature1'], mmd_sample_n=2000) for x, y in self._equal_ratio(data, samples): self.assertAlmostEqual(x, y, places=1)