Example #1
0
    def test_partition_iterator(self):

        fines_issued = np.array([(2001, 12.31), (1999, 14.32), (1999, 120.76),
                                 (2002, 32.12), (2004, 98.64), (2005, 32.21),
                                 (2002, 100.23), (2006, 123.40),
                                 (2000, 72.21)],
                                dtype=[('year', int), ('fine', float)])
        y = fines_issued['year']
        training_windows = by_window_ranges(1999, 2000, 2004, 2)
        testing_windows = by_window_ranges(2001, 2002, 2006, 2)
        ctrls = {
            ByWindowMode.SLIDING: [(set([8, 1, 2]), set([0, 3, 6])),
                                   (set([0, 3, 6]), set([4])),
                                   (set([4]), set([5, 7]))],
            ByWindowMode.EXPANDING: [(set([8, 1, 2]), set([0, 3, 6])),
                                     (set([0, 1, 2, 3, 6, 8]), set([4])),
                                     (set([0, 1, 2, 3, 4, 6, 8]), set([5, 7]))]
        }

        for mode in ctrls:
            bw = ByWindow(y, training_windows, testing_windows, mode)
            result = [(set(train_inds), set(test_inds))
                      for train_inds, test_inds in bw]
            ctrl = ctrls[mode]
            self.assertEqual(result, ctrl)
            self.assertEqual(
                ByWindow.est_n_folds(y, training_windows, testing_windows,
                                     mode), len(result))
Example #2
0
 def test_by_window_ranges(self):
     self.assertEqual(by_window_ranges(1999, 2000, 2006, 2), [(1999, 2000),
                                                              (2001, 2002),
                                                              (2003, 2004),
                                                              (2005, 2006)])
     self.assertEqual(by_window_ranges(1, 3, 7, 1), [(1, 3), (2, 4), (3, 5),
                                                     (4, 6), (5, 7)])
Example #3
0
    def test_partition_iterator(self):

        fines_issued = np.array([(2001, 12.31), (1999, 14.32), (1999, 120.76),
                                 (2002, 32.12), (2004, 98.64), (2005, 32.21),
                                 (2002, 100.23), (2006, 123.40), (2000, 72.21)],
                                dtype=[('year', int), ('fine', float)])
        y = fines_issued['year']
        training_windows = by_window_ranges(1999, 2000, 2004, 2)
        testing_windows = by_window_ranges(2001, 2002, 2006, 2)
        ctrls = {ByWindowMode.SLIDING: [(set([8, 1, 2]), set([0, 3, 6])), 
                                        (set([0, 3, 6]), set([4])), 
                                        (set([4]), set([5, 7]))],
                 ByWindowMode.EXPANDING: [(set([8, 1, 2]), set([0, 3, 6])), 
                                          (set([0, 1, 2, 3, 6, 8]), 
                                           set([4])), 
                                          (set([0, 1, 2, 3, 4, 6, 8]), 
                                           set([5, 7]))]}

        for mode in ctrls:
            bw = ByWindow(y,
                          training_windows,
                          testing_windows,
                          mode)
            result = [(set(train_inds), set(test_inds)) for 
                      train_inds, test_inds in bw]
            ctrl = ctrls[mode]
            self.assertEqual(result, ctrl) 
            self.assertEqual(ByWindow.est_n_folds(
                y,
                training_windows,
                testing_windows,
                mode), len(result))
Example #4
0
 def test_by_window_ranges(self):
     self.assertEqual(
             by_window_ranges(1999, 2000, 2006, 2),
             [(1999, 2000), (2001, 2002), (2003, 2004), (2005, 2006)])
     self.assertEqual(by_window_ranges(1, 3, 7, 1),
                      [(1, 3), (2, 4), (3, 5), (4, 6), (5, 7)])
Example #5
0
    def test_wrap_cross_validation(self):
        X = np.array(
            [
                (0, 2001, 12.31),
                (1, 1999, 14.32),
                (2, 1999, 120.76),
                (3, 2002, 32.12),
                (4, 2004, 98.64),
                (5, 2005, 32.21),
                (6, 2002, 100.23),
                (7, 2006, 123.40),
                (8, 2000, 72.21),
            ],
            dtype=[("id", int), ("year", int), ("fine", float)],
        )
        y = np.array([(0,), (1,), (0,), (1,), (0,), (1,), (0,), (1,), (0,)], dtype=[("category", int)])
        ctrl_inds = [([1, 2, 8], [0, 3, 6]), ([0, 3, 6], [4]), ([4], [5, 7])]
        p = Pipeline()

        node_X_in = p.add(NumpyRead(X))

        node_y_in = p.add(NumpyRead(y))

        node_just_time = p.add(SplitColumns(["year"]))
        node_just_time(node_X_in)

        training_windows = by_window_ranges(1999, 2000, 2004, 2)
        testing_windows = by_window_ranges(2001, 2002, 2006, 2)
        mode = ByWindowMode.SLIDING
        node_cv = p.add(
            wrap_and_make_instance(
                "upsg.transform.partition_iterators.ByWindow",
                n_arrays=2,
                training_windows=training_windows,
                testing_windows=testing_windows,
                mode=ByWindowMode.SLIDING,
            )
        )
        node_cv(input0=node_X_in, input1=node_y_in, y=node_just_time)

        self.assertEqual(len(node_cv.output_keys), 2 * 2 * len(ctrl_inds))
        out_nodes = []
        for i in xrange(len(ctrl_inds)):
            train_node_X = p.add(NumpyWrite())
            train_node_X(node_cv["train0_{}".format(i)])

            train_node_y = p.add(NumpyWrite())
            train_node_y(node_cv["train1_{}".format(i)])

            test_node_X = p.add(NumpyWrite())
            test_node_X(node_cv["test0_{}".format(i)])

            test_node_y = p.add(NumpyWrite())
            test_node_y(node_cv["test1_{}".format(i)])

            out_nodes.append((train_node_X, train_node_y, test_node_X, test_node_y))
        p.run()

        for i, (train_node_X, train_node_y, test_node_X, test_node_y) in enumerate(out_nodes):
            self.assertTrue(np.array_equal(train_node_X.get_stage().result, X[ctrl_inds[i][0]]))
            self.assertTrue(np.array_equal(train_node_y.get_stage().result, y[ctrl_inds[i][0]]))
            self.assertTrue(np.array_equal(test_node_X.get_stage().result, X[ctrl_inds[i][1]]))
            self.assertTrue(np.array_equal(test_node_y.get_stage().result, y[ctrl_inds[i][1]]))
Example #6
0
    def test_wrap_cross_validation(self):
        X = np.array([(0, 2001, 12.31), (1, 1999, 14.32), (2, 1999, 120.76),
                      (3, 2002, 32.12), (4, 2004, 98.64), (5, 2005, 32.21),
                      (6, 2002, 100.23), (7, 2006, 123.40), (8, 2000, 72.21)],
                     dtype=[('id', int), ('year', int), ('fine', float)])
        y = np.array([(0, ), (1, ), (0, ), (1, ), (0, ), (1, ), (0, ), (1, ),
                      (0, )],
                     dtype=[('category', int)])
        ctrl_inds = [([1, 2, 8], [0, 3, 6]), ([0, 3, 6], [4]), ([4], [5, 7])]
        p = Pipeline()

        node_X_in = p.add(NumpyRead(X))

        node_y_in = p.add(NumpyRead(y))

        node_just_time = p.add(SplitColumns(['year']))
        node_just_time(node_X_in)

        training_windows = by_window_ranges(1999, 2000, 2004, 2)
        testing_windows = by_window_ranges(2001, 2002, 2006, 2)
        mode = ByWindowMode.SLIDING
        node_cv = p.add(
            wrap_and_make_instance(
                'upsg.transform.partition_iterators.ByWindow',
                n_arrays=2,
                training_windows=training_windows,
                testing_windows=testing_windows,
                mode=ByWindowMode.SLIDING))
        node_cv(input0=node_X_in, input1=node_y_in, y=node_just_time)

        self.assertEqual(len(node_cv.output_keys), 2 * 2 * len(ctrl_inds))
        out_nodes = []
        for i in xrange(len(ctrl_inds)):
            train_node_X = p.add(NumpyWrite())
            train_node_X(node_cv['train0_{}'.format(i)])

            train_node_y = p.add(NumpyWrite())
            train_node_y(node_cv['train1_{}'.format(i)])

            test_node_X = p.add(NumpyWrite())
            test_node_X(node_cv['test0_{}'.format(i)])

            test_node_y = p.add(NumpyWrite())
            test_node_y(node_cv['test1_{}'.format(i)])

            out_nodes.append(
                (train_node_X, train_node_y, test_node_X, test_node_y))
        p.run()

        for i, (train_node_X, train_node_y, test_node_X, test_node_y) in \
            enumerate(out_nodes):
            self.assertTrue(
                np.array_equal(train_node_X.get_stage().result,
                               X[ctrl_inds[i][0]]))
            self.assertTrue(
                np.array_equal(train_node_y.get_stage().result,
                               y[ctrl_inds[i][0]]))
            self.assertTrue(
                np.array_equal(test_node_X.get_stage().result,
                               X[ctrl_inds[i][1]]))
            self.assertTrue(
                np.array_equal(test_node_y.get_stage().result,
                               y[ctrl_inds[i][1]]))