def set_drosophilia(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/drosophilia'
     self.data_name = 'drosophilia'
     self.results_dir = 'drosophilia'
     self.data_set_file_name = 'split_data.pkl'
 def set_boston_housing(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/boston_housing'
     self.data_name = 'boston_housing'
     self.results_dir = 'boston_housing'
     self.data_set_file_name = 'split_data.pkl'
 def set_adience_aligned_cnn_1(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/adience_aligned_cnn_1_per_instance_id'
     self.data_name = 'adience_aligned_cnn_1_per_instance_id'
     self.results_dir = 'adience_aligned_cnn_1_per_instance_id'
     self.data_set_file_name = 'split_data.pkl'
Example #4
0
    def __init__(self, data_set=None):
        super(ProjectConfigs, self).__init__()
        self._num_labels = None
        self.data_set = data_set
        self.project_dir = 'base'
        self.loss_function = loss_function.MeanSquaredError()
        self.cv_loss_function = loss_function.MeanSquaredError()
        self.data_dir = ''
        self.data_name = ''
        self.data_set_file_name = ''
        self.results_dir = ''
        self.include_name_in_results = False
        self.labels_to_use = None
        self.labels_to_not_sample = None
        self.target_labels = None
        self.source_labels = None
        self.oracle_labels = None
        self.num_labels = range(40, 201, 40)
        #self.num_labels = range(40,81,40)
        self.set_boston_housing()
        self.num_splits = 30
        self.labels_to_keep = None
        self.labels_to_not_sample = {}
        self.data_set = None
        self.use_pool = False
        self.pool_size = 2
        self.method_results_class = results_lib.MethodResults

        self.oracle_data_set_ids = None
 def set_synthetic_linear_reg(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/synthetic_linear_reg500-50-1'
     self.data_name = 'synthetic_linear_reg500-50-1'
     self.results_dir = 'synthetic_linear_reg500-50-1'
     self.data_set_file_name = 'split_data.pkl'
 def set_data_path_results(self, name):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/' + name
     self.data_name = name
     self.results_dir = name
     self.data_set_file_name = 'split_data.pkl'
 def set_concrete(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/concrete'
     self.data_name = 'concrete'
     self.results_dir = 'concrete'
     self.data_set_file_name = 'split_data.pkl'
 def set_wine_red(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     s = 'wine-red'
     self.data_dir = 'data_sets/' + s
     self.data_name = s
     self.results_dir = s
     self.data_set_file_name = 'split_data.pkl'
Example #9
0
 def set_synthetic_regression(self, name):
     self.loss_function = loss_function.MeanSquaredError()
     self.target_labels = np.zeros(1)
     self.source_labels = np.ones(1)
     self.loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/' + name
     self.data_name = name
     self.results_dir = name
     self.data_set_file_name = 'split_data.pkl'
Example #10
0
 def set_bike_sharing(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     assert self.use_1d_data == True
     self.data_dir = 'data_sets/bike_sharing-feat=1'
     self.data_name = 'bike_sharing-feat=1'
     self.results_dir = 'bike_sharing-feat=1'
     self.data_set_file_name = 'split_data.pkl'
     self.target_labels = np.asarray([1])
     self.source_labels = np.asarray([0])
 def set_pollution(self, id, size):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     #assert self.use_1d_data == True
     s = 'pollution-%d-%d-norm' % (id, size)
     self.data_dir = 'data_sets/' + s
     self.data_name = s
     self.results_dir = s
     self.data_set_file_name = 'split_data.pkl'
     self.target_labels = np.asarray([0])
     self.source_labels = np.asarray([1])
Example #12
0
 def __init__(self, configs=MethodConfigs()):
     super(KDE, self).__init__(configs)
     self.cv_params = {'sigma': np.asarray(10.0**np.asarray(range(-4, 5)))}
     self.cv_params = {}
     self.is_classifier = False
     self._estimated_error = None
     self.quiet = True
     self.best_params = None
     self.model = None
     self.configs.loss_function = loss_function.MeanSquaredError()
     self.configs.cv_loss_function = loss_function.MeanSquaredError()
Example #13
0
 def __init__(self):
     super(MethodConfigs, self).__init__()
     pc = create_project_configs()
     self.z_score = False
     self.quiet = False
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = pc.cv_loss_function
     self.loss_function = pc.loss_function
     self.use_validation = False
     self.metric = 'euclidean'
     self.use_saved_cv_output = False
Example #14
0
    def set_data_set(self, name, target_labels, source_labels, is_regression):
        assert is_regression
        self.loss_function = loss_function.MeanSquaredError()
        self.cv_loss_function = loss_function.MeanSquaredError()

        self.data_dir = 'data_sets/' + name
        self.data_name = name
        self.results_dir = name

        self.data_set_file_name = 'split_data.pkl'
        self.target_labels = np.asarray(target_labels)
        self.source_labels = np.asarray(source_labels)
 def set_pollution(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     s = 'pollution-[3 4]-500-norm'
     #s = 'pollution-[60 71]-500-norm'
     self.data_dir = 'data_sets/' + s
     self.data_name = s
     self.results_dir = s
     self.data_set_file_name = 'split_data.pkl'
     self.target_labels = np.asarray([0])
     self.source_labels = np.asarray([1, 2, 3])
     self.target_domain_order = np.asarray([1, 0])
     self.source_domain_order = np.asarray([3, 2])
Example #16
0
    def set_boston_housing_transfer(self):
        self.loss_function = loss_function.MeanSquaredError()
        self.cv_loss_function = loss_function.MeanSquaredError()

        if self.use_1d_data:
            self.data_dir = 'data_sets/boston_housing(transfer)'
            self.data_name = 'boston_housing'
            self.results_dir = 'boston_housing'
        else:
            self.data_dir = 'data_sets/boston_housing-13(transfer)'
            self.data_name = 'boston_housing-13'
            self.results_dir = 'boston_housing-13'
        self.data_set_file_name = 'split_data.pkl'
        self.target_labels = np.asarray([0])
        self.source_labels = np.asarray([1])
Example #17
0
    def set_concrete_transfer(self):
        self.loss_function = loss_function.MeanSquaredError()
        self.cv_loss_function = loss_function.MeanSquaredError()

        if self.use_1d_data:
            self.data_dir = 'data_sets/concrete-feat=0'
            self.data_name = 'concrete-feat=0'
            self.results_dir = 'concrete-feat=0'
        else:
            self.data_dir = 'data_sets/concrete-7'
            self.data_name = 'concrete-7'
            self.results_dir = 'concrete-7'
        self.data_set_file_name = 'split_data.pkl'
        self.target_labels = np.asarray([1])
        self.source_labels = np.asarray([3])
Example #18
0
    def set_wine(self):
        self.loss_function = loss_function.MeanSquaredError()
        self.cv_loss_function = loss_function.MeanSquaredError()

        if self.use_1d_data:
            self.data_dir = 'data_sets/wine-small-feat=1'
            self.data_name = 'wine-small-feat=1'
            self.results_dir = 'wine-small-feat=1'
        else:
            self.data_dir = 'data_sets/wine-small-11'
            self.data_name = 'wine-small-11'
            self.results_dir = 'wine-small-11'

        self.data_set_file_name = 'split_data.pkl'
        self.target_labels = np.asarray([0])
        self.source_labels = np.asarray([1])
Example #19
0
 def set_synthetic_step_linear_transfer(self):
     self.loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/synthetic_step_linear_transfer'
     self.data_name = 'synthetic_step_linear_transfer'
     self.data_set_file_name = 'split_data.pkl'
     self.results_dir = 'synthetic_step_linear_transfer'
     self.target_labels = np.asarray([0])
     self.source_labels = np.asarray([1])
Example #20
0
 def set_data_set_defaults(self,
                           data_set_name,
                           target_labels=None,
                           source_labels=None,
                           is_regression=True):
     assert is_regression
     self.loss_function = loss_function.MeanSquaredError()
     self.cv_loss_function = loss_function.MeanSquaredError()
     self.data_dir = 'data_sets/' + data_set_name
     self.data_name = data_set_name
     self.results_dir = data_set_name
     self.data_set_file_name = 'split_data.pkl'
     self.target_labels = target_labels
     self.source_labels = source_labels
     if target_labels is not None:
         self.target_labels = np.asarray(target_labels)
     if source_labels is not None:
         self.source_labels = np.asarray(source_labels)
Example #21
0
 def __init__(self, configs=base_configs.MethodConfigs()):
     super(ScipyOptNonparametricHypothesisTransfer, self).__init__(configs)
     self.cv_params['C'] = 10**np.asarray(range(-4, 4), dtype='float64')
     self.g_nw = method.NadarayaWatsonMethod(configs)
     self.g_nw.configs.target_labels = None
     self.g_nw.configs.source_labels = None
     self.g_nw.configs.cv_loss_function = loss_function.MeanSquaredError()
     self.g_nw.quiet = True
     self.k = 3
     self.metric = configs.metric
     self.bias = 0
     self.use_huber = use_huber
    def __init__(self, data_set=None, **kwargs):
        super(VisualizationConfigs, self).__init__(data_set, **kwargs)
        self.max_rows = max_rows
        pc = ProjectConfigs(data_set)
        self.copy_fields(pc, pc_fields_to_copy)

        self.data_set_to_use = pc.data_set
        self.title = bc.data_name_dict.get(self.data_set_to_use,
                                           'Unknown Data Set')
        self.show_legend_on_all = True
        self.x_axis_string = 'Number of labeled instances'
        self.ylims = None
        self.generate_file_names(pc)

        viz_loss_function = loss_function.MeanSquaredError()
        self.always_show_y_label = True
        is_regression = not self.data_set_to_use in classification_data_sets

        instance_subset = 'is_train'
        if not hasattr(self, 'loss_to_use'):
            self.loss_to_use = loss_to_use
        if self.loss_to_use == LOSS_Y:
            if pc.use_var:
                results_features = ['y', 'true_y']
                self.y_axis_string = 'Variance Error'
            else:
                results_features = ['y', 'true_y']
                self.y_axis_string = 'Prediction Error'
        elif self.loss_to_use == LOSS_P:
            results_features = ['p', 'true_p']
            self.y_axis_string = 'P(X) Error'
        elif self.loss_to_use == LOSS_NOISY:
            results_features = ['is_noisy', 'is_selected']
            viz_loss_function = loss_function.LossAnyOverlap()
            self.y_axis_string = 'Noisy Error'
        elif self.loss_to_use == LOSS_ENTROPY:
            instance_subset = 'is_selected'
            results_features = ['y_orig', 'y_orig']
            viz_loss_function = loss_function.LossSelectedEntropy(
                is_regression=is_regression)
            self.y_axis_string = 'Selection Distribution Error'
        else:
            assert False

        self.instance_subset = instance_subset
        self.results_features = results_features
        self.loss_function = viz_loss_function
Example #23
0
        value_housing = pricing_data[I, 1]
        value_housing /= value_housing.max()
        if apply_log:
            value_housing = np.log(value_housing)
        data = combine_data(loc_traffic, value_traffic, loc_housing, value_housing)
    else:
        I &= np.isfinite(locations[:, 0])
        data = create_transfer_data(locations, pricing_data, I, apply_log)
    print 'n: ' + str(I.sum())
    # pricing_data[:] = 1



if run_state_tests:
    m = base_configs.MethodConfigs()
    m.cv_loss_function = loss_function.MeanSquaredError()
    m.loss_function = loss_function.MeanSquaredError()
    loss = loss_function.MeanSquaredError()
    m.use_validation = True
    m.target_labels = np.asarray([1])
    m.source_labels = np.asarray([0])
    stacking_transfer = transfer_methods.StackingTransfer(deepcopy((m)))

    m.just_target = True
    target_learner = far_transfer_methods.GraphTransfer(deepcopy(m))
    m.just_target = False
    m.just_transfer = True
    source_learner = far_transfer_methods.GraphTransfer(deepcopy(m))
    num_splits = 10
    errors = np.zeros((all_states.size, 3))
    for state_idx, s in enumerate(all_states):