def __init__(self, X, y, cb_classifier, label_inds_5=None, unlabel_inds_5=None, modelOutput_5=None): self.X = X self.y = y self.cb_classifier = cb_classifier dt = DataSet('au', X=X, y=y) self.distacne = dt.get_distance() _, self.cluster_center_index = dt.get_cluster_center() self.flag = False if label_inds_5 is not None: if unlabel_inds_5 is not None: if modelOutput_5 is not None: self.label_inds_5 = label_inds_5 self.unlabel_inds_5 = unlabel_inds_5 self.modelOutput_5 = modelOutput_5 self.flag = True if self.flag is False: self.label_inds_5 = [] self.unlabel_inds_5 = [] self.modelOutput_5 = []
split_count = [30, 50, 70, 90] # The number of unlabel data to select to generate the meta data. num_xjselect = 30 diff_five_round = 20 n_labelleds = np.arange(2, 100, 2) # first choose a dataset for datasetname in datasetnames: dataset = DataSet(datasetname, dataset_path) X = dataset.X y = dataset.y distacne = dataset.get_distance() _, cluster_center_index = dataset.get_cluster_center() print( datasetname + ' DataSet currently being processed********************************************' ) # run multiple split on the same dataset # every time change the value of initial_label_rate for split_c in split_count: for n_labelled in n_labelleds: metadata = None # trains, tests, label_inds, unlabel_inds = dataset.split_data_by_nlabelled(n_labelled, test_ratio=0.6, split_count=split_count, saving_path='./n_labelled_split_info') trains, tests, label_inds, unlabel_inds = dataset.split_data_by_nlabelled_fulldataset( n_labelled, test_ratio=0.5, split_count=split_c) for t in range(split_c): meta_data = cal_meta_data_sequence( X, y, distacne, cluster_center_index, modelnames,