Ejemplo n.º 1
0
  def test_neighbor_list_vina(self):
    """Test under conditions closer to Vina usage."""
    N_atoms = 5
    M_nbrs = 2
    ndim = 3
    start = 0
    stop = 4
    nbr_cutoff = 1

    X = NumpyDataset(start + np.random.rand(N_atoms, ndim) * (stop - start))

    coords = Feature(shape=(N_atoms, ndim))

    # Now an (N, M) shape
    nbr_list = NeighborList(
        N_atoms, M_nbrs, ndim, nbr_cutoff, start, stop, in_layers=[coords])

    nbr_list = ToFloat(in_layers=[nbr_list])
    flattened = Flatten(in_layers=[nbr_list])
    dense = Dense(out_channels=1, in_layers=[flattened])
    output = ReduceSum(in_layers=[dense])

    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg.set_loss(output)

    databag = Databag({coords: X})
    tg.fit_generator(databag.iterbatches(epochs=1))
Ejemplo n.º 2
0
    def test_compute_model_performance_multitask_regressor(self):
        random_seed = 42
        n_data_points = 20
        n_features = 2
        np.random.seed(seed=random_seed)

        X = np.random.rand(n_data_points, n_features)
        y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]),
                            axis=-1)
        y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]),
                            axis=-1)
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1), NumpyDataset(y2)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []
        losses = []
        labels = []
        for i in range(2):
            label = Label(shape=(None, 1))
            dense = Dense(out_channels=1, in_layers=[features])
            loss = ReduceSquareDifference(in_layers=[dense, label])

            outputs.append(dense)
            losses.append(loss)
            labels.append(label)
            databag.add_dataset(label, ys[i])

        total_loss = ReduceMean(in_layers=losses)

        tg = dc.models.TensorGraph(mode="regression",
                                   batch_size=20,
                                   random_seed=random_seed,
                                   learning_rate=0.1)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1000,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        metric = [
            dc.metrics.Metric(dc.metrics.mean_absolute_error,
                              np.mean,
                              mode="regression"),
        ]
        scores = tg.evaluate_generator(databag.iterbatches(),
                                       metric,
                                       labels=labels,
                                       per_task_metrics=True)
        scores = list(scores[1].values())
        assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
Ejemplo n.º 3
0
    def test_compute_model_performance_multitask_classifier(self):
        n_data_points = 20
        n_features = 2

        X = np.ones(shape=(n_data_points // 2, n_features)) * -1
        X1 = np.ones(shape=(n_data_points // 2, n_features))
        X = np.concatenate((X, X1))
        class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))])
        class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))])
        y1 = np.concatenate((class_0, class_1))
        y2 = np.concatenate((class_1, class_0))
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1), NumpyDataset(y2)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []
        entropies = []
        labels = []
        for i in range(2):
            label = Label(shape=(None, 2))
            labels.append(label)
            dense = Dense(out_channels=2, in_layers=[features])
            output = SoftMax(in_layers=[dense])
            smce = SoftMaxCrossEntropy(in_layers=[label, dense])

            entropies.append(smce)
            outputs.append(output)
            databag.add_dataset(label, ys[i])

        total_loss = ReduceMean(in_layers=entropies)

        tg = dc.models.TensorGraph(learning_rate=0.1)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1000,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        metric = dc.metrics.Metric(dc.metrics.roc_auc_score,
                                   np.mean,
                                   mode="classification")

        scores = tg.evaluate_generator(databag.iterbatches(), [metric],
                                       labels=labels,
                                       per_task_metrics=True)
        scores = list(scores[1].values())
        # Loosening atol to see if tests stop failing sporadically
        assert_true(np.all(np.isclose(scores, [1.0, 1.0], atol=0.20)))
Ejemplo n.º 4
0
    def test_compute_model_performance_singletask_regressor_ordering(self):
        n_data_points = 1000
        n_features = 1

        X = np.array(range(n_data_points))
        X = np.expand_dims(X, axis=-1)
        y1 = X + 1
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []
        losses = []
        labels = []
        for i in range(1):
            label = Label(shape=(None, 1))
            dense = Dense(out_channels=1, in_layers=[features])
            loss = ReduceSquareDifference(in_layers=[dense, label])

            outputs.append(dense)
            losses.append(loss)
            labels.append(label)
            databag.add_dataset(label, ys[i])

        total_loss = ReduceMean(in_layers=losses)

        tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1000,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        metric = [
            dc.metrics.Metric(dc.metrics.mean_absolute_error,
                              np.mean,
                              mode="regression"),
            dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
        ]
        scores = tg.evaluate_generator(databag.iterbatches(batch_size=1),
                                       metric,
                                       labels=labels,
                                       per_task_metrics=True)
        print(scores)
        scores = list(scores[1].values())
        assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
Ejemplo n.º 5
0
  def test_weighted_combo(self):
    """Tests that weighted linear combinations can be built"""
    N = 10
    n_features = 5

    X1 = NumpyDataset(np.random.rand(N, n_features))
    X2 = NumpyDataset(np.random.rand(N, n_features))
    y = NumpyDataset(np.random.rand(N))

    features_1 = Feature(shape=(None, n_features))
    features_2 = Feature(shape=(None, n_features))
    labels = Label(shape=(None,))

    combo = WeightedLinearCombo(in_layers=[features_1, features_2])
    out = ReduceSum(in_layers=[combo], axis=1)
    loss = ReduceSquareDifference(in_layers=[out, labels])

    databag = Databag({features_1: X1, features_2: X2, labels: y})

    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg.set_loss(loss)
    tg.fit_generator(databag.iterbatches(epochs=1))
Ejemplo n.º 6
0
  def test_weighted_combo(self):
    """Tests that weighted linear combinations can be built"""
    N = 10
    n_features = 5

    X1 = NumpyDataset(np.random.rand(N, n_features))
    X2 = NumpyDataset(np.random.rand(N, n_features))
    y = NumpyDataset(np.random.rand(N))

    features_1 = Feature(shape=(None, n_features))
    features_2 = Feature(shape=(None, n_features))
    labels = Label(shape=(None,))

    combo = WeightedLinearCombo(in_layers=[features_1, features_2])
    out = ReduceSum(in_layers=[combo], axis=1)
    loss = ReduceSquareDifference(in_layers=[out, labels])

    databag = Databag({features_1: X1, features_2: X2, labels: y})

    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg.set_loss(loss)
    tg.fit_generator(databag.iterbatches(epochs=1))
    assert len(tg.get_layer_variables(combo)) >= 2
    assert len(tg.get_layer_variables(out)) == 0
Ejemplo n.º 7
0
    def test_multi_task_regressor(self):
        n_data_points = 20
        n_features = 2

        X = np.random.rand(n_data_points, n_features)
        y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]),
                            axis=-1)
        y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]),
                            axis=-1)
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1), NumpyDataset(y2)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []
        losses = []
        for i in range(2):
            label = Label(shape=(None, 1))
            dense = Dense(out_channels=1, in_layers=[features])
            loss = ReduceSquareDifference(in_layers=[dense, label])

            outputs.append(dense)
            losses.append(loss)
            databag.add_dataset(label, ys[i])

        total_loss = ReduceMean(in_layers=losses)

        tg = dc.models.TensorGraph(learning_rate=0.01)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1000,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        predictions = tg.predict_on_generator(databag.iterbatches())
        for i in range(2):
            y_real = ys[i].X
            y_pred = predictions[i]
            assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))
Ejemplo n.º 8
0
    def test_multi_task_classifier(self):
        n_data_points = 20
        n_features = 2

        X = np.random.rand(n_data_points, n_features)
        y1 = np.array([[0, 1] for x in range(n_data_points)])
        y2 = np.array([[1, 0] for x in range(n_data_points)])
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1), NumpyDataset(y2)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []
        entropies = []
        for i in range(2):
            label = Label(shape=(None, 2))
            dense = Dense(out_channels=2, in_layers=[features])
            output = SoftMax(in_layers=[dense])
            smce = SoftMaxCrossEntropy(in_layers=[label, dense])

            entropies.append(smce)
            outputs.append(output)
            databag.add_dataset(label, ys[i])

        total_loss = ReduceMean(in_layers=entropies)

        tg = dc.models.TensorGraph(learning_rate=0.01)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1000,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        predictions = tg.predict_on_generator(databag.iterbatches())
        for i in range(2):
            y_real = ys[i].X
            y_pred = predictions[i]
            assert_true(np.all(np.isclose(y_pred, y_real, atol=0.6)))
Ejemplo n.º 9
0
  def test_compute_model_performance_singletask_regressor(self):
    n_data_points = 20
    n_features = 2

    X = np.random.rand(n_data_points, n_features)
    y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1)
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1)]

    databag = Databag()

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    losses = []
    labels = []
    for i in range(1):
      label = Label(shape=(None, 1))
      dense = Dense(out_channels=1, in_layers=[features])
      loss = ReduceSquareDifference(in_layers=[dense, label])

      outputs.append(dense)
      losses.append(loss)
      labels.append(label)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=losses)

    tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    metric = [
        dc.metrics.Metric(
            dc.metrics.mean_absolute_error, np.mean, mode="regression"),
    ]
    scores = tg.evaluate_generator(
        databag.iterbatches(batch_size=tg.batch_size),
        metric,
        labels=labels,
        per_task_metrics=True)
    scores = list(scores[1].values())
    assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
Ejemplo n.º 10
0
    def test_shared_layer(self):
        n_data_points = 20
        n_features = 2

        X = np.random.rand(n_data_points, n_features)
        y1 = np.array([[0, 1] for x in range(n_data_points)])
        X = NumpyDataset(X)
        ys = [NumpyDataset(y1)]

        databag = Databag()

        features = Feature(shape=(None, n_features))
        databag.add_dataset(features, X)

        outputs = []

        label = Label(shape=(None, 2))
        dense1 = Dense(out_channels=2, in_layers=[features])
        dense2 = dense1.shared(in_layers=[features])
        output1 = SoftMax(in_layers=[dense1])
        output2 = SoftMax(in_layers=[dense2])
        smce = SoftMaxCrossEntropy(in_layers=[label, dense1])

        outputs.append(output1)
        outputs.append(output2)
        databag.add_dataset(label, ys[0])

        total_loss = ReduceMean(in_layers=[smce])

        tg = dc.models.TensorGraph(learning_rate=0.01)
        for output in outputs:
            tg.add_output(output)
        tg.set_loss(total_loss)

        tg.fit_generator(
            databag.iterbatches(epochs=1,
                                batch_size=tg.batch_size,
                                pad_batches=True))
        prediction = tg.predict_on_generator(databag.iterbatches())
        assert_true(np.all(np.isclose(prediction[0], prediction[1],
                                      atol=0.01)))
Ejemplo n.º 11
0
  def test_multi_task_classifier(self):
    n_data_points = 20
    n_features = 2

    X = np.random.rand(n_data_points, n_features)
    y1 = np.array([[0, 1] for x in range(n_data_points)])
    y2 = np.array([[1, 0] for x in range(n_data_points)])
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1), NumpyDataset(y2)]

    databag = Databag()

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    entropies = []
    for i in range(2):
      label = Label(shape=(None, 2))
      dense = Dense(out_channels=2, in_layers=[features])
      output = SoftMax(in_layers=[dense])
      smce = SoftMaxCrossEntropy(in_layers=[label, dense])

      entropies.append(smce)
      outputs.append(output)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=entropies)

    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    predictions = tg.predict_on_generator(databag.iterbatches())
    for i in range(2):
      y_real = ys[i].X
      y_pred = predictions[i]
      assert_true(np.all(np.isclose(y_pred, y_real, atol=0.6)))
Ejemplo n.º 12
0
  def test_multi_task_regressor(self):
    n_data_points = 20
    n_features = 2

    X = np.random.rand(n_data_points, n_features)
    y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1)
    y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1)
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1), NumpyDataset(y2)]

    databag = Databag()

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    losses = []
    for i in range(2):
      label = Label(shape=(None, 1))
      dense = Dense(out_channels=1, in_layers=[features])
      loss = ReduceSquareDifference(in_layers=[dense, label])

      outputs.append(dense)
      losses.append(loss)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=losses)

    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    predictions = tg.predict_on_generator(databag.iterbatches())
    for i in range(2):
      y_real = ys[i].X
      y_pred = predictions[i]
      assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))
Ejemplo n.º 13
0
  def test_shared_layer(self):
    n_data_points = 20
    n_features = 2

    X = np.random.rand(n_data_points, n_features)
    y1 = np.array([[0, 1] for x in range(n_data_points)])
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1)]

    databag = Databag()

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []

    label = Label(shape=(None, 2))
    dense1 = Dense(out_channels=2, in_layers=[features])
    dense2 = dense1.shared(in_layers=[features])
    output1 = SoftMax(in_layers=[dense1])
    output2 = SoftMax(in_layers=[dense2])
    smce = SoftMaxCrossEntropy(in_layers=[label, dense1])

    outputs.append(output1)
    outputs.append(output2)
    databag.add_dataset(label, ys[0])

    total_loss = ReduceMean(in_layers=[smce])

    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1, batch_size=tg.batch_size, pad_batches=True))
    prediction = tg.predict_on_generator(databag.iterbatches())
    assert_true(np.all(np.isclose(prediction[0], prediction[1], atol=0.01)))