def test_compute_model_performance_multitask_regressor(self): random_seed = 42 n_data_points = 20 n_features = 2 np.random.seed(seed=random_seed) X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", batch_size=20, random_seed=random_seed, learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator(databag.iterbatches(), metric, labels=labels, per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
def test_compute_model_performance_multitask_classifier(self): n_data_points = 20 n_features = 2 X = np.ones(shape=(n_data_points // 2, n_features)) * -1 X1 = np.ones(shape=(n_data_points // 2, n_features)) X = np.concatenate((X, X1)) class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))]) class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))]) y1 = np.concatenate((class_0, class_1)) y2 = np.concatenate((class_1, class_0)) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] entropies = [] labels = [] for i in range(2): label = Label(shape=(None, 2)) labels.append(label) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) entropies.append(smce) outputs.append(output) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=entropies) tg = dc.models.TensorGraph(learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") scores = tg.evaluate_generator(databag.iterbatches(), [metric], labels=labels, per_task_metrics=True) scores = list(scores[1].values()) # Loosening atol to see if tests stop failing sporadically assert_true(np.all(np.isclose(scores, [1.0, 1.0], atol=0.20)))
def test_compute_model_performance_singletask_regressor_ordering(self): n_data_points = 1000 n_features = 1 X = np.array(range(n_data_points)) X = np.expand_dims(X, axis=-1) y1 = X + 1 X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(1): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression") ] scores = tg.evaluate_generator(databag.iterbatches(batch_size=1), metric, labels=labels, per_task_metrics=True) print(scores) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
def test_compute_model_performance_singletask_regressor(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(1): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches( epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric( dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator( databag.iterbatches(batch_size=tg.batch_size), metric, labels=labels, per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
def test_multi_task_regressor(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) predictions = tg.predict_on_generator(databag.iterbatches()) for i in range(2): y_real = ys[i].X y_pred = predictions[i] assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))
def test_multi_task_classifier(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.array([[0, 1] for x in range(n_data_points)]) y2 = np.array([[1, 0] for x in range(n_data_points)]) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] entropies = [] for i in range(2): label = Label(shape=(None, 2)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) entropies.append(smce) outputs.append(output) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=entropies) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) predictions = tg.predict_on_generator(databag.iterbatches()) for i in range(2): y_real = ys[i].X y_pred = predictions[i] assert_true(np.all(np.isclose(y_pred, y_real, atol=0.6)))
def test_multi_task_classifier(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.array([[0, 1] for x in range(n_data_points)]) y2 = np.array([[1, 0] for x in range(n_data_points)]) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] entropies = [] for i in range(2): label = Label(shape=(None, 2)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) entropies.append(smce) outputs.append(output) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=entropies) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches( epochs=1000, batch_size=tg.batch_size, pad_batches=True)) predictions = tg.predict_on_generator(databag.iterbatches()) for i in range(2): y_real = ys[i].X y_pred = predictions[i] assert_true(np.all(np.isclose(y_pred, y_real, atol=0.6)))
def test_shared_layer(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.array([[0, 1] for x in range(n_data_points)]) X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] label = Label(shape=(None, 2)) dense1 = Dense(out_channels=2, in_layers=[features]) dense2 = dense1.shared(in_layers=[features]) output1 = SoftMax(in_layers=[dense1]) output2 = SoftMax(in_layers=[dense2]) smce = SoftMaxCrossEntropy(in_layers=[label, dense1]) outputs.append(output1) outputs.append(output2) databag.add_dataset(label, ys[0]) total_loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1, batch_size=tg.batch_size, pad_batches=True)) prediction = tg.predict_on_generator(databag.iterbatches()) assert_true(np.all(np.isclose(prediction[0], prediction[1], atol=0.01)))
def test_multi_task_regressor(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches( epochs=1000, batch_size=tg.batch_size, pad_batches=True)) predictions = tg.predict_on_generator(databag.iterbatches()) for i in range(2): y_real = ys[i].X y_pred = predictions[i] assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))
def test_shared_layer(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.array([[0, 1] for x in range(n_data_points)]) X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] label = Label(shape=(None, 2)) dense1 = Dense(out_channels=2, in_layers=[features]) dense2 = dense1.shared(in_layers=[features]) output1 = SoftMax(in_layers=[dense1]) output2 = SoftMax(in_layers=[dense2]) smce = SoftMaxCrossEntropy(in_layers=[label, dense1]) outputs.append(output1) outputs.append(output2) databag.add_dataset(label, ys[0]) total_loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches( epochs=1, batch_size=tg.batch_size, pad_batches=True)) prediction = tg.predict_on_generator(databag.iterbatches()) assert_true(np.all(np.isclose(prediction[0], prediction[1], atol=0.01)))