def prepare(config, env, logger, learning_class): env.batch_train = batch_train env.clip_th = 10/env.policy.xi0 env.logger = logger model = MModel.CModel() model.w = np.zeros(env.experiment.data_dim) env.learning = learning_class(model, config, MLearning.square_loss_val, MLearning.square_loss_grad)
def gen_dist_policy(dataset, pos_corr_flag): model = MModel.CModel() d = dataset.train_data[0].x.shape[0] model.w = np.zeros(d) learn = MModel.CLearning(model, CModelParameters(0, 1)) learn.random = random.Random() l = len(dataset.train_data) // 10 learn, sum_loss = bl.batch_train(learn, dataset.train_data[:l], l) w = learn.model.w w = w / math.sqrt(np.inner(w, w)) dst = sorted( [np.square(np.inner(e.x, w)) for e in dataset.train_data[-400:]]) threshold = len(dst) // 3 if pos_corr_flag: c = 0.1 / (np.square(dst[-threshold]) / math.sqrt(d)) return lambda x: max( q0, min(1, c * (np.power(np.inner(x, w), 4) / math.sqrt(d)))) else: c = math.log(0.1) / ((dst[threshold]) / d) return lambda x: max(q0, math.exp(c * np.square(np.inner(x, w)) / d))
def generic_batch_learning(dataset, logger, data_paras, model_paras, digest): assert isinstance(dataset, MData.CDataSet) assert isinstance(logger, MLogger.CLogger) assert isinstance(data_paras, experiments.CDataParameters) assert isinstance(model_paras, experiments.CModelParameters) model = MModel.CModel() model.w = np.zeros(dataset.all_data[0].x.shape) learning = CLearning(model, model_paras) learning.random = dataset.random learning, sum_loss = batch_train(learning, dataset.log_data, data_paras.cnt_log) data_batches = [dataset.log_data] cur_online = 0 cur_sz = model_paras.batch_sz logger.on_start(learning, dataset) logger.check_and_log(learning, dataset, cur_online) while cur_online < len(dataset.online_data)\ and (model_paras.label_budget==-1 or learning.cnt_labels<=model_paras.label_budget*2): next_batch = [ e for e in dataset.online_data[cur_online:cur_online + cur_sz] ] cur_online += len(next_batch) data_batches.append(next_batch) cur_dataset, next_batch = digest(learning, data_paras, data_batches, sum_loss) data_batches[-1] = next_batch learning, sum_loss = batch_train(learning, cur_dataset, data_paras.cnt_log + cur_online) cur_sz = int(cur_sz * model_paras.batch_rate) logger.check_and_log(learning, dataset, cur_online) logger.on_stop(learning, dataset) return learning
def IDBAL(dataset, logger, data_paras, model_paras): assert isinstance(dataset, MData.CDataSet) assert isinstance(logger, MLogger.CLogger) assert isinstance(data_paras, experiments.CDataParameters) assert isinstance(model_paras, experiments.CModelParameters) model = MModel.CModel() model.w = np.zeros(dataset.all_data[0].x.shape) learning = CLearning(model, model_paras) learning.random = dataset.random last_tot_cnt = idx = int(data_paras.cnt_log * model_paras.init_log_prop) tmp_len = 0 while tmp_len < len( dataset.log_data) and dataset.log_data[tmp_len].z < last_tot_cnt: tmp_len += 1 learning, sum_loss = batch_train(learning, dataset.log_data[:tmp_len], last_tot_cnt) opt_idx = int(sum([e.w for e in dataset.log_data[:tmp_len] if e.z > 0])) alpha = data_paras.cnt_log * (1.0 - model_paras.init_log_prop) / len( dataset.online_data) cur_online = 0 cur_log_cnt = idx cur_log_idx = tmp_len cur_k = model_paras.batch_sz train_data = [ MData.CExample(e.x, e.y, 1.0 / data_paras.Q0(e.x), e.z) for e in dataset.train_data ] xi = data_paras.xi0 wmaxk = 1 / xi logger.on_start(learning, dataset) xis = [xi] sum_online_z = 0 while cur_online < len(dataset.online_data): cur_log_batch = [] while cur_log_idx < len(dataset.log_data) and dataset.log_data[ cur_log_idx].z <= cur_log_cnt + int(cur_k * alpha): e = dataset.log_data[cur_log_idx] cur_log_batch.append(e) cur_log_idx += 1 eta = model_paras.learning_rate xi_next = min([1.0/e.w for e in train_data[:int((1+alpha)*cur_k)]\ if test_dis(sum_loss, model_paras.c0*wmaxk, last_tot_cnt, learning.model, e, eta, opt_idx)]+[1]) wmaxk_next = (alpha + 1) / (alpha * xi_next + 1) Qk = lambda x: 1 if data_paras.Q0(x) <= xi + 1 / alpha else 0 last_log_cnt = cur_log_cnt if len(cur_log_batch) != 0: cur_log_cnt = cur_log_batch[-1].z cur_log_batch = [MData.CExample(e.x, e.y, (1.0+alpha)/(alpha/e.w+Qk(e.x)), e.z) \ for e in cur_log_batch] cur_online_batch = [] for tmp in dataset.online_data[cur_online:cur_online + cur_k]: cur_data = MData.CExample( tmp.x, tmp.y, (1.0 + alpha) / (alpha * data_paras.Q0(tmp.x) + Qk(tmp.x)), 1) if Qk(cur_data.x) == 0: cur_data.z = 0 else: sum_online_z += 1 if test_dis(sum_loss, model_paras.c0 * wmaxk, last_tot_cnt, learning.model, cur_data, eta, opt_idx): learning.cnt_labels += 1 else: cur_data.y = 1 if learning.model.predict( cur_data.x) >= 0 else -1 cur_online_batch.append(cur_data) last_tot_cnt = int(len(cur_online_batch) * (1 + alpha)) learning, sum_loss = batch_train(learning, cur_log_batch + cur_online_batch, \ last_tot_cnt, opt_idx) opt_idx = int(opt_idx + sum([e.w for e in cur_log_batch if e.z > 0]) + sum([e.w for e in cur_online_batch if e.z > 0])) idx += last_tot_cnt cur_online += len(cur_online_batch) cur_k = int(cur_k * model_paras.batch_rate) xi = xi_next wmaxk = wmaxk_next xis.append(xi) logger.log_misc_info("[" + ",".join([str((sum_online_z + 1) / (cur_online + 1))] + [str(x) for x in xis]) + "]") logger.on_stop(learning, dataset) return learning