def set_data_set(self, data_set):
     self.data_set = data_set
     if data_set == bc.DATA_NG:
         self.set_data_set_defaults('20ng-2000')
         self.loss_function = loss_function.ZeroOneError()
         self.cv_loss_function = loss_function.ZeroOneError()
         #self.num_labels = [5, 10, 20, 40]
         self.target_labels = CR[0]
         self.source_labels = np.vstack((CR[1], ST[1]))
         self.oracle_labels = CR[1]
     elif data_set == bc.DATA_SYNTHETIC_HYP_TRANS_1_1:
         self.set_data_set_defaults(
             'synthetic_hyp_trans_class500-50-1.0-0.3-1-1')
         self.num_labels = [10, 20, 40]
         self.target_labels = None
         self.source_labels = None
         self.oracle_data_set_ids = np.asarray([1])
     elif data_set == bc.DATA_SYNTHETIC_HYP_TRANS_2_2:
         self.set_data_set_defaults(
             'synthetic_hyp_trans_class500-50-1.0-0.3-2-2')
         self.num_labels = [10, 20, 40]
         self.target_labels = None
         self.source_labels = None
         self.oracle_data_set_ids = np.asarray([1, 2])
     else:
         assert False
     '''
Ejemplo n.º 2
0
 def set_synthetic_classification(self):
     self.loss_function = loss_function.ZeroOneError()
     self.data_dir = 'data_sets/synthetic_classification'
     self.data_name = 'synthetic_classification'
     self.data_set_file_name = 'split_data.pkl'
     self.results_dir = 'synthetic_classification'
     self.target_labels = np.asarray([1, 2])
     #self.target_labels = array_functions.vec_to_2d(self.target_labels).T
     self.source_labels = np.asarray([3, 4])
     self.source_labels = array_functions.vec_to_2d(self.source_labels).T
     self.cv_loss_function = loss_function.LogLoss()
Ejemplo n.º 3
0
    def set_ng_transfer(self):
        self.loss_function = loss_function.ZeroOneError()
        self.set_ng()
        '''
        self.target_labels = np.asarray([1,2])
        S1 = np.asarray([7,8])
        S2 = np.asarray([12,13])
        self.source_labels = np.vstack((S1,S2))
        '''

        self.target_labels = CR[0]
        #self.source_labels = CR[1]
        self.source_labels = np.vstack((CR[1], ST[1]))
        self.oracle_labels = CR[1]
        #self.source_labels = ST[1]

        #self.oracle_labels = np.empty(0)
        #self.cv_loss_function = loss_function.ZeroOneError()
        self.cv_loss_function = loss_function.LogLoss()
    def set_data_set(self, data_set):
        self.data_set = data_set

        if data_set == bc.DATA_SYNTHETIC_PIECEWISE:
            self.set_data_set_defaults('synthetic_piecewise')
            self.num_labels = np.asarray([num_starting_labels])
            self.target_labels = np.zeros([1])
            self.source_labels = np.ones([1])
        elif data_set == bc.DATA_MNIST:
            self.set_data_set_defaults('mnist')
            self.num_labels = [num_starting_labels / 2]
            self.target_labels = np.asarray([1, 3])
            self.source_labels = np.asarray([7, 8])
            self.loss_function = loss_function.ZeroOneError()
            self.cv_loss_function = loss_function.ZeroOneError()
        elif data_set == bc.DATA_BOSTON_HOUSING:
            self.set_data_set_defaults('boston_housing-13(transfer)')
            self.num_labels = [num_starting_labels]
            self.target_labels = np.asarray([0])
            self.source_labels = np.asarray([1])
        elif data_set == bc.DATA_WINE:
            self.set_data_set_defaults('wine-small-11')
            self.num_labels = [num_starting_labels]
            self.target_labels = np.asarray([0])
            self.source_labels = np.asarray([1])
        elif data_set == bc.DATA_CONCRETE:
            self.set_data_set_defaults('concrete-7')
            self.num_labels = [num_starting_labels]
            self.target_labels = np.asarray([1])
            self.source_labels = np.asarray([3])
        elif data_set == bc.DATA_CLIMATE_MONTH:
            self.set_data_set_defaults('climate-month',
                                       source_labels=[0],
                                       target_labels=[4],
                                       is_regression=True)
            self.num_labels = np.asarray([num_starting_labels])
        elif data_set == bc.DATA_IRS:
            self.set_data_set_defaults('irs-income',
                                       source_labels=[0],
                                       target_labels=[1],
                                       is_regression=True)
            self.num_labels = np.asarray([num_starting_labels])
        elif data_set == bc.DATA_KC_HOUSING:
            self.set_data_set_defaults('kc-housing-spatial-floors',
                                       source_labels=[0],
                                       target_labels=[1],
                                       is_regression=True)
            self.num_labels = np.asarray([num_starting_labels])
        elif data_set == bc.DATA_ZILLOW:
            self.set_data_set_defaults('zillow-traffic',
                                       source_labels=[1],
                                       target_labels=[0],
                                       is_regression=True)
            #self.set_data_set_defaults('zillow', source_labels=[1], target_labels=[0], is_regression=True)
            self.num_labels = np.asarray([num_starting_labels])
        elif data_set == bc.DATA_TAXI:
            #self.set_data_set_defaults('taxi2-20', source_labels=[1], target_labels=[0], is_regression=True)
            #self.set_data_set_defaults('taxi2-50', source_labels=[1], target_labels=[0], is_regression=True)
            #self.set_data_set_defaults('taxi2', source_labels=[0], target_labels=[1], is_regression=True)
            #self.set_data_set_defaults('taxi3', source_labels=[1], target_labels=[0], is_regression=True)
            self.set_data_set_defaults('taxi',
                                       source_labels=[1],
                                       target_labels=[0],
                                       is_regression=True)
            #self.num_labels = np.asarray([5, 10, 20, 40, 100, 200, 400, 800])
            self.num_labels = np.asarray([num_starting_labels])
        else:
            assert False, 'unknown transfer data set'

        assert self.source_labels.size > 0
        assert self.target_labels.size > 0
        self.labels_to_not_sample = self.source_labels.ravel()
        a = self.source_labels.ravel()
        self.labels_to_keep = np.concatenate((self.target_labels, a))
Ejemplo n.º 5
0
from configs import base_configs

data_splitter = DataSplitter()
data_splitter.data = data

splits = data_splitter.generate_splits(data.y)

split_data = data_lib.SplitData(data, splits)
use_transfer = True

use_regression = False
m = base_configs.MethodConfigs()
m.use_validation = True
if use_transfer:
    assert not use_regression
    m.loss_function = loss_function.ZeroOneError()
    m.cv_loss_function = loss_function.ZeroOneError()
    transfer_learner = transfer_methods.StackingTransfer(deepcopy(m))
    transfer_learner.base_learner = method.SKLLogisticRegression(deepcopy(m))
    #transfer_learner.source_learner = method.SKLLogisticRegression(deepcopy(m))
    transfer_learner.source_learner = method.SKLKNN(deepcopy(m))
    transfer_learner.source_learner.configs.use_validation = False
    transfer_learner.use_all_source = True
    #transfer_learner.target_learner = method.SKLLogisticRegression(deepcopy(m))
    transfer_learner.target_learner = method.SKLKNN(deepcopy(m))

#learner = method.SKLKNN(deepcopy(m))
#learner = method.SKLLogisticRegression(deepcopy(m))
#learner = method.SKLRidgeClassification()
if use_regression:
    learner = method.SKLKNNRegression(deepcopy(m))