Exemple #1
0
    def __init__(self,
                 X,
                 y,
                 cb_classifier,
                 label_inds_5=None,
                 unlabel_inds_5=None,
                 modelOutput_5=None):
        self.X = X
        self.y = y
        self.cb_classifier = cb_classifier
        dt = DataSet('au', X=X, y=y)
        self.distacne = dt.get_distance()
        _, self.cluster_center_index = dt.get_cluster_center()

        self.flag = False
        if label_inds_5 is not None:
            if unlabel_inds_5 is not None:
                if modelOutput_5 is not None:
                    self.label_inds_5 = label_inds_5
                    self.unlabel_inds_5 = unlabel_inds_5
                    self.modelOutput_5 = modelOutput_5
                    self.flag = True

        if self.flag is False:
            self.label_inds_5 = []
            self.unlabel_inds_5 = []
            self.modelOutput_5 = []
    split_count = [30, 50, 70, 90]
    # The number of unlabel data to select to generate the meta data.
    num_xjselect = 30

    diff_five_round = 20

    n_labelleds = np.arange(2, 100, 2)

    # first choose a dataset
    for datasetname in datasetnames:

        dataset = DataSet(datasetname, dataset_path)
        X = dataset.X
        y = dataset.y
        distacne = dataset.get_distance()
        _, cluster_center_index = dataset.get_cluster_center()
        print(
            datasetname +
            ' DataSet currently being processed********************************************'
        )
        # run multiple split on the same dataset
        # every time change the value of initial_label_rate
        for split_c in split_count:
            for n_labelled in n_labelleds:
                metadata = None
                # trains, tests, label_inds, unlabel_inds = dataset.split_data_by_nlabelled(n_labelled, test_ratio=0.6, split_count=split_count, saving_path='./n_labelled_split_info')
                trains, tests, label_inds, unlabel_inds = dataset.split_data_by_nlabelled_fulldataset(
                    n_labelled, test_ratio=0.5, split_count=split_c)
                for t in range(split_c):
                    meta_data = cal_meta_data_sequence(
                        X, y, distacne, cluster_center_index, modelnames,