def test_train(self):
        """Tests train."""

        # with test set
        train_size = 0.75
        opt = Optimizer((self.A, self.y), train_size=train_size)
        self.assertIsNone(opt._rmse_train)
        self.assertIsNone(opt._rmse_test)
        self.assertIsNone(opt.train_scatter_data)
        self.assertIsNone(opt.test_scatter_data)
        opt.train()
        self.assertIsNotNone(opt._rmse_train)
        self.assertIsNotNone(opt._rmse_test)
        self.assertIsNotNone(opt.train_scatter_data)
        self.assertIsNotNone(opt.test_scatter_data)

        # without testing
        train_size = 1.0
        opt = Optimizer((self.A, self.y), train_size=train_size)
        self.assertIsNone(opt._rmse_train)
        self.assertIsNone(opt._rmse_test)
        self.assertIsNone(opt.train_scatter_data)
        self.assertIsNone(opt.test_scatter_data)
        opt.train()
        self.assertIsNotNone(opt._rmse_train)
        self.assertIsNone(opt._rmse_test)
        self.assertIsNotNone(opt.train_scatter_data)
        self.assertIsNone(opt.test_scatter_data)
    def test_get_rows_from_indices(self):
        """Tests _get_rows_from_indices."""
        opt = Optimizer((self.A, self.y))
        all_rows = np.arange(self.n_rows)

        train_size = int(0.8 * self.n_rows)
        train_set_target = np.random.choice(all_rows,
                                            train_size,
                                            replace=False)
        test_set_target = sorted(np.setdiff1d(all_rows, train_set_target))

        # specify only train_set, test set should default to remaining rows
        train_set, test_set = opt._get_rows_from_indices(
            train_set_target, None)
        self.assertSequenceEqual(sorted(train_set_target), sorted(train_set))
        self.assertSequenceEqual(sorted(test_set_target), sorted(test_set))

        # specify only test_set, train set should default to remaining rows
        train_set, test_set = opt._get_rows_from_indices(None, test_set_target)
        self.assertSequenceEqual(sorted(train_set_target), sorted(train_set))
        self.assertSequenceEqual(sorted(test_set_target), sorted(test_set))

        # specify partial sets meaning not all rows are used
        train_set_target = np.delete(train_set_target, [0, 1, 2])
        test_set_target = np.delete(test_set_target, [0, 1, 2])
        train_set, test_set = opt._get_rows_from_indices(
            train_set_target, test_set_target)
        self.assertSequenceEqual(sorted(train_set_target), sorted(train_set))
        self.assertSequenceEqual(sorted(test_set_target), sorted(test_set))

        # test invalid input
        with self.assertRaises(ValueError):
            opt._get_rows_from_indices(None, None)
    def test_size_properties(self):
        """Tests the properties in regards to training/test sets and sizes."""

        # test without test_set
        train_set = np.arange(0, self.n_rows)
        opt = Optimizer((self.A, self.y), train_set=train_set)
        self.assertSequenceEqual(opt.train_set.tolist(), train_set.tolist())
        self.assertEqual(len(train_set), opt.train_size)
        self.assertEqual(1.0, opt.train_fraction)
        self.assertIsNone(opt.test_set)
        self.assertEqual(opt.test_size, 0)
        self.assertEqual(opt.test_fraction, 0)

        # test with test set
        test_set = np.arange(int(0.7 * self.n_rows), int(0.8 * self.n_rows))
        opt = Optimizer((self.A, self.y), test_set=test_set)
        self.assertSequenceEqual(test_set.tolist(), opt.test_set.tolist())
        self.assertEqual(opt.test_size, len(test_set))
        self.assertAlmostEqual(opt.test_fraction, len(test_set) / self.n_rows)
    def test_summary_property(self):
        """Tests summary property."""

        # without having trained
        opt = Optimizer((self.A, self.y))
        self.assertIsInstance(opt.summary, dict)

        # with having trained
        opt.train()
        self.assertIsInstance(opt.summary, dict)
        self.assertIn('rmse_train', opt.summary.keys())
        self.assertIn('rmse_test', opt.summary.keys())
    def test_zero_error_with_least_square_fit(self):
        """ Test that the error is zero if training without noise and with
        least-squares. """

        # set up dummy linear problem data
        for standardize in [True, False]:
            y = np.dot(self.A, self.x)
            opt = Optimizer((self.A, y),
                            fit_method='least-squares',
                            standardize=standardize)
            opt.train()

            self.assertAlmostEqual(opt.rmse_train, 0.0)
            self.assertAlmostEqual(opt.rmse_test, 0.0)

            self.assertAlmostEqual(np.abs(self.x - opt.parameters).max(), 0)
    def test_get_rows_via_sizes(self):
        """Tests _get_rows_via_sizes functionality."""

        opt = Optimizer((self.A, self.y))

        # test with only train_size defined
        train_size, test_size = int(0.8 * self.n_rows), None
        train_set, test_set = opt._get_rows_via_sizes(train_size, test_size)
        self.assertEqual(train_size, len(train_set))
        self.assertEqual(self.n_rows - train_size, len(test_set))

        # test with only test_size defined
        train_size, test_size = None, int(0.8 * self.n_rows)
        train_set, test_set = opt._get_rows_via_sizes(train_size, test_size)
        self.assertEqual(test_size, len(test_set))
        self.assertEqual(self.n_rows - test_size, len(train_set))

        # test with both defined
        train_size, test_size = int(0.8 * self.n_rows), int(0.15 * self.n_rows)
        train_set, test_set = opt._get_rows_via_sizes(train_size, test_size)
        self.assertEqual(train_size, len(train_set))
        self.assertEqual(test_size, len(test_set))

        # test with fractions
        train_size, test_size = 0.7, 0.2
        train_set, test_set = opt._get_rows_via_sizes(train_size, test_size)
        self.assertLess(abs(train_size * self.n_rows - len(train_set)),
                        self.tol)
        self.assertLess(abs(test_size * self.n_rows - len(test_set)), self.tol)

        # test edge case with full training set
        test_size = None
        for train_size in [1.0, self.n_rows]:
            train_set, test_set = opt._get_rows_via_sizes(
                train_size, test_size)
            self.assertEqual(len(train_set), self.n_rows)
            self.assertIsNone(test_set)

        # test invalid sizes
        with self.assertRaises(ValueError):
            train_size, test_size = None, 1.0
            opt._get_rows_via_sizes(train_size, test_size)
        with self.assertRaises(ValueError):
            train_size, test_size = None, None
            opt._get_rows_via_sizes(train_size, test_size)
    def test_setup_rows(self):
        """
        Tests _setup_rows.

        Simply test that function raise when no training data available
        """
        opt = Optimizer((self.A, self.y))

        # no training data from train_size
        with self.assertRaises(ValueError):
            train_size, test_size = 0, 0.5
            opt._setup_rows(train_size, test_size, None, None)

        # no training data from train_set
        with self.assertRaises(ValueError):
            train_set, test_set = [], np.arange(0, int(0.5 * self.n_rows))
            opt._setup_rows(None, None, train_set, test_set)

        # overlapping indices in train_set and test_set
        with self.assertRaises(ValueError):
            train_set, test_set = [1, 2, 3, 4, 5], [5, 6, 7, 8, 9, 10]
            opt._setup_rows(None, None, train_set, test_set)
A = np.random.random((n, m))
y = np.random.random(n)

# constraint sum eci[inds] = 0
inds1 = [1, 3, 4, 5]
inds2 = [2, 6, 7, 8]
M = np.zeros((2, m))
M[0, inds1] = 1
M[1, inds2] = 1

c = Constraints(m)
c.add_constraint(M)

Ac = c.transform(A)
opt = Optimizer((Ac, y), fit_method='ridge')
opt.train()

parameters = c.inverse_transform(opt.parameters)
sum_1 = parameters[inds1].sum()
sum_2 = parameters[inds2].sum()
print('constraints 1, ', sum_1)
print('constraints 2, ', sum_2)

assert abs(sum_1) < 1e-12
assert abs(sum_2) < 1e-12

# Test get_mixing_energy_constraints function
a = 4.0
prim = bulk('Au', a=a)
prim.append(Atom('H', position=(a / 2, a / 2, a / 2)))
 def test_repr(self):
     """Tests repr dunder."""
     opt = Optimizer((self.A, self.y))
     self.assertIsInstance(repr(opt), str)