def test_force_floatx(): x = [numpy.array(d, dtype="float64") for d in [[1, 2], [3, 4], [5, 6]]] y = [numpy.array(d, dtype="int64") for d in [1, 2, 3]] dataset = IterableDataset(OrderedDict([("x", x), ("y", y)])) wrapper = ForceFloatX(DataStream(dataset)) data = next(wrapper.get_epoch_iterator()) assert str(data[0].dtype) == config.floatX assert str(data[1].dtype) == "int64"
def test_force_floatx(self): transformer = ForceFloatX(DataStream(self.dataset)) data = next(transformer.get_epoch_iterator()) assert_equal(str(data[0].dtype), config.floatX) assert_equal(str(data[1].dtype), 'int64')
# ) # ) valid_monitor_stream = ForceFloatX(data_stream=MovieLensTransformer( data_stream=DataStream(dataset=validset, iteration_scheme=ShuffledScheme( validset.num_examples, batch_size)))) test_monitor_stream = ForceFloatX(data_stream=MovieLensTransformer( data_stream=DataStream(dataset=testset, iteration_scheme=ShuffledScheme( testset.num_examples, batch_size)))) rating_freq = np.zeros((6040, 5)) init_b = np.zeros((6040, 5)) for batch in valid_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, out_m = batch rating_freq += inp_r.sum(axis=0) log_rating_freq = np.log(rating_freq + 1e-8) log_rating_freq_diff = np.diff(log_rating_freq, axis=1) init_b[:, 1:] = log_rating_freq_diff init_b[:, 0] = log_rating_freq[:, 0] # init_b = np.log(rating_freq / (rating_freq.sum(axis=1)[:, None] + 1e-8) +1e-8) * (rating_freq>0) new_items = np.where(rating_freq.sum(axis=1) == 0)[0] input_ratings = T.tensor3(name='input_ratings', dtype=theano.config.floatX) output_ratings = T.tensor3(name='output_ratings', dtype=theano.config.floatX) input_masks = T.matrix(name='input_masks', dtype=theano.config.floatX)
def prediction(self, test_input: list): """ :param test_input: (uid, item_id) list :return: predicted rates """ model_manager = self.model_manager testset = self.load_dataset(which_set=['test'], sources=('input_ratings', 'output_ratings', 'input_masks', 'output_masks')) test_monitor_stream = ForceFloatX(data_stream=MovieLensTransformer( data_stream=DataStream(dataset=testset, iteration_scheme=SequentialScheme( testset.num_examples, self.batch_size)))) f_monitor_best = self.f_monitor_best best_valid_error = self.best_valid_error best_model = self.best_model best_polyak = self.best_polyak best_epoch = self.best_epoch rating_category = self.rating_category new_items = self.new_items print('\tTesting ...', ) start_time = t.time() squared_error_test = [] n_sample_test = [] test_time = 0 rate_score = np.array(list(range(1, rating_category + 1)), np.float32) preds = [] for batch in test_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, _ = batch test_t = t.time() pred_ratings = f_monitor_best(inp_r) test_time += t.time() - test_t true_r = out_r.argmax(axis=2) + 1 pred_r = (pred_ratings[0] * rate_score[np.newaxis, np.newaxis, :]).sum(axis=2) pred_r[:, new_items] = 3 mask = out_r.sum(axis=2) se = np.sum(np.square(true_r - pred_r) * mask) n = np.sum(mask) squared_error_test.append(se) n_sample_test.append(n) preds.extend(pred_r) predictions = list(map(lambda x: preds[x[1]][x[0]], test_input)) squared_error_ = np.array(squared_error_test).sum() n_samples = np.array(n_sample_test).sum() test_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8)) print('Test:', " RMSE: {0:.6f}".format(test_RMSE), "Test Time: {0:.6f}".format(test_time), get_done_text(start_time)) f = open( os.path.join(model_manager.path_name, 'Reco_NADE_masked_directly_itembased.txt'), 'a') to_write = { 'test_RMSE': test_RMSE, 'best_valid_error': best_valid_error, 'best_epoch': best_epoch } to_write.update( dict( filter(lambda x: type(x[1]) in [int, float, str], self.__dict__.items()))) json.dump(to_write, f, ensure_ascii=False) f.close() print('\tTesting with polyak parameters...', ) best_param_list = [] [best_param_list.extend(p.parameters) for p in best_model.children] f_replace = polyak_replace(best_param_list, best_polyak) f_replace() cc = 0 for pp in best_polyak: pp_value = pp.get_value() np.save(os.path.join(self.model_manager.path_name, str(cc)), pp_value) cc += 1 start_time = t.time() squared_error_test = [] n_sample_test = [] test_time = 0 for batch in test_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, _ = batch test_t = t.time() pred_ratings = f_monitor_best(inp_r) test_time += t.time() - test_t true_r = out_r.argmax(axis=2) + 1 pred_r = (pred_ratings[0] * rate_score[np.newaxis, np.newaxis, :]).sum(axis=2) pred_r[:, new_items] = 3 mask = out_r.sum(axis=2) se = np.sum(np.square(true_r - pred_r) * mask) n = np.sum(mask) squared_error_test.append(se) n_sample_test.append(n) squared_error_ = np.array(squared_error_test).sum() n_samples = np.array(n_sample_test).sum() test_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8)) print('Test:', " RMSE: {0:.6f}".format(test_RMSE), "Test Time: {0:.6f}".format(test_time), get_done_text(start_time)) f = open( os.path.join(self.model_manager.path_name, 'Reco_NADE_masked_directly_itembased.txt'), 'a') to_write = { 'test_RMSE': test_RMSE, 'best_valid_error': best_valid_error, 'best_epoch': best_epoch } to_write.update( dict( filter(lambda x: type(x[1]) in [int, float, str], self.__dict__.items()))) json.dump(to_write, f, ensure_ascii=False) f.close() return predictions
def fit(self, trainset, retrain=True): batch_size = self.batch_size n_iter = self.n_iter look_ahead = self.look_ahead lr = self.lr b1 = self.b1 b2 = self.b2 epsilon = self.epsilon hidden_size = self.hidden_size activation_function = self.activation_function drop_rate = self.drop_rate weight_decay = self.weight_decay optimizer = self.optimizer std = self.std alpha = self.alpha polyak_mu = self.polyak_mu rating_category = self.rating_category item_num = self.item_num user_num = self.user_num trainset = self.load_dataset(which_set=['train'], sources=('input_ratings', 'output_ratings', 'input_masks', 'output_masks')) validset = self.load_dataset(which_set=['valid'], sources=('input_ratings', 'output_ratings', 'input_masks', 'output_masks')) train_loop_stream = ForceFloatX(data_stream=MovieLensTransformer( data_stream=Trainer_MovieLensTransformer(data_stream=DataStream( dataset=trainset, iteration_scheme=ShuffledScheme(trainset.num_examples, batch_size))))) valid_monitor_stream = ForceFloatX(data_stream=MovieLensTransformer( data_stream=DataStream(dataset=validset, iteration_scheme=ShuffledScheme( validset.num_examples, batch_size)))) rating_freq = np.zeros((user_num, rating_category)) init_b = np.zeros((user_num, rating_category)) for batch in valid_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, out_m = batch rating_freq += inp_r.sum(axis=0) log_rating_freq = np.log(rating_freq + 1e-8) log_rating_freq_diff = np.diff(log_rating_freq, axis=1) init_b[:, 1:] = log_rating_freq_diff init_b[:, 0] = log_rating_freq[:, 0] # init_b = np.log(rating_freq / (rating_freq.sum(axis=1)[:, None] + 1e-8) +1e-8) * (rating_freq>0) new_items = np.where(rating_freq.sum(axis=1) == 0)[0] self.new_items = new_items input_ratings = T.tensor3(name='input_ratings', dtype=theano.config.floatX) output_ratings = T.tensor3(name='output_ratings', dtype=theano.config.floatX) input_masks = T.matrix(name='input_masks', dtype=theano.config.floatX) output_masks = T.matrix(name='output_masks', dtype=theano.config.floatX) input_ratings_cum = T.extra_ops.cumsum(input_ratings[:, :, ::-1], axis=2)[:, :, ::-1] # hidden_size = [256] if activation_function == 'reclin': act = Rectifier elif activation_function == 'tanh': act = Tanh elif activation_function == 'sigmoid': act = Logistic else: act = Softplus layers_act = [act('layer_%d' % i) for i in range(len(hidden_size))] NADE_CF_model = tabula_NADE(activations=layers_act, input_dim0=user_num, input_dim1=rating_category, other_dims=hidden_size, batch_size=batch_size, weights_init=Uniform(std=0.05), biases_init=Constant(0.0)) NADE_CF_model.push_initialization_config() dims = [user_num] + hidden_size + [user_num] linear_layers = [ layer for layer in NADE_CF_model.children if 'linear' in layer.name ] assert len(linear_layers) == len(dims) - 1 for i in range(len(linear_layers)): H1 = dims[i] H2 = dims[i + 1] width = 2 * np.sqrt(6) / np.sqrt(H1 + H2) # std = np.sqrt(2. / dim) linear_layers[i].weights_init = Uniform(width=width) NADE_CF_model.initialize() NADE_CF_model.children[-1].parameters[-1].set_value( init_b.astype(theano.config.floatX)) y = NADE_CF_model.apply(input_ratings_cum) y_cum = T.extra_ops.cumsum(y, axis=2) predicted_ratings = NDimensionalSoftmax().apply(y_cum, extra_ndim=1) d = input_masks.sum(axis=1) D = (input_masks + output_masks).sum(axis=1) cost, nll, nll_item_ratings, cost_ordinal_1N, cost_ordinal_N1, prob_item_ratings = rating_cost( y, output_ratings, input_masks, output_masks, D, d, alpha=alpha, std=std) cost.name = 'cost' cg = ComputationGraph(cost) if weight_decay > 0.0: all_weights = VariableFilter(roles=[WEIGHT])(cg.variables) l2_weights = T.sum([(W**2).sum() for W in all_weights]) l2_cost = cost + weight_decay * l2_weights l2_cost.name = 'l2_decay_' + cost.name cg = ComputationGraph(l2_cost) if drop_rate > 0.0: dropped_layer = VariableFilter(roles=[INPUT], bricks=NADE_CF_model.children)( cg.variables) dropped_layer = [ layer for layer in dropped_layer if 'linear' in layer.name ] dropped_layer = dropped_layer[1:] cg_dropout = apply_dropout(cg, dropped_layer, drop_rate) else: cg_dropout = cg training_cost = cg_dropout.outputs[0] lr0 = T.scalar(name='learning_rate', dtype=theano.config.floatX) input_list = [input_ratings, input_masks, output_ratings, output_masks] if optimizer == 'Adam': f_get_grad, f_update_parameters, shared_gradients = Adam_optimizer( input_list, training_cost, cg_dropout.parameters, lr0, b1, b2, epsilon) elif optimizer == 'Adadelta': f_get_grad, f_update_parameters, shared_gradients = Adadelta_optimizer( input_list, training_cost, cg_dropout.parameters, lr, epsilon) else: f_get_grad, f_update_parameters, shared_gradients = SGD_optimizer( input_list, training_cost, cg_dropout.parameters, lr0, b1) param_list = [] [param_list.extend(p.parameters) for p in NADE_CF_model.children] f_update_polyak, shared_polyak = polyak(param_list, mu=polyak_mu) f_monitor = theano.function(inputs=[input_ratings], outputs=[predicted_ratings]) nb_of_epocs_without_improvement = 0 best_valid_error = np.Inf epoch = 0 best_model = cp.deepcopy(NADE_CF_model) best_polyak = cp.deepcopy(shared_polyak) start_training_time = t.time() lr_tracer = [] rate_score = np.array(list(range(1, rating_category + 1)), np.float32) best_epoch = -1 while epoch < n_iter and nb_of_epocs_without_improvement < look_ahead: print('Epoch {0}'.format(epoch)) epoch += 1 start_time_epoch = t.time() cost_train = [] squared_error_train = [] n_sample_train = [] cntt = 0 train_time = 0 for batch in train_loop_stream.get_epoch_iterator(): inp_r, out_r, inp_m, out_m = batch train_t = t.time() cost_value = f_get_grad(inp_r, inp_m, out_r, out_m) train_time += t.time() - train_t # pred_ratings = f_monitor(inp_r) if optimizer == 'Adadelta': f_update_parameters() else: f_update_parameters(lr) f_update_polyak() pred_ratings = f_monitor(inp_r) true_r = out_r.argmax(axis=2) + 1 pred_r = (pred_ratings[0] * rate_score[np.newaxis, np.newaxis, :]).sum(axis=2) pred_r[:, new_items] = 3 mask = out_r.sum(axis=2) se = np.sum(np.square(true_r - pred_r) * mask) n = np.sum(mask) squared_error_train.append(se) n_sample_train.append(n) cost_train.append(cost_value) cntt += 1 cost_train = np.array(cost_train).mean() squared_error_ = np.array(squared_error_train).sum() n_samples = np.array(n_sample_train).sum() train_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8)) print('\tTraining ...') print('Train :', "RMSE: {0:.6f}".format(train_RMSE), " Cost Error: {0:.6f}".format(cost_train), "Train Time: {0:.6f}".format(train_time), get_done_text(start_time_epoch)) print('\tValidating ...', ) start_time = t.time() squared_error_valid = [] n_sample_valid = [] valid_time = 0 for batch in valid_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, out_m = batch valid_t = t.time() pred_ratings = f_monitor(inp_r) valid_time += t.time() - valid_t true_r = out_r.argmax(axis=2) + 1 pred_r = (pred_ratings[0] * rate_score[np.newaxis, np.newaxis, :]).sum(axis=2) pred_r[:, new_items] = 3 mask = out_r.sum(axis=2) se = np.sum(np.square(true_r - pred_r) * mask) n = np.sum(mask) squared_error_valid.append(se) n_sample_valid.append(n) squared_error_ = np.array(squared_error_valid).sum() n_samples = np.array(n_sample_valid).sum() valid_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8)) print('Validation:', " RMSE: {0:.6f}".format(valid_RMSE), "Valid Time: {0:.6f}".format(valid_time), get_done_text(start_time)) if valid_RMSE < best_valid_error: best_epoch = epoch nb_of_epocs_without_improvement = 0 best_valid_error = valid_RMSE del best_model del best_polyak gc.collect() best_model = cp.deepcopy(NADE_CF_model) best_polyak = cp.deepcopy(shared_polyak) print('\n\n Got a good one') else: nb_of_epocs_without_improvement += 1 if optimizer == 'Adadelta': pass elif nb_of_epocs_without_improvement == look_ahead and lr > 1e-5: nb_of_epocs_without_improvement = 0 lr /= 4 print("learning rate is now %s" % lr) lr_tracer.append(lr) print('\n### Training, n_layers=%d' % (len(hidden_size)), get_done_text(start_training_time)) best_y = best_model.apply(input_ratings_cum) best_y_cum = T.extra_ops.cumsum(best_y, axis=2) best_predicted_ratings = NDimensionalSoftmax().apply(best_y_cum, extra_ndim=1) self.f_monitor_best = theano.function(inputs=[input_ratings], outputs=[best_predicted_ratings]) self.best_valid_error = best_valid_error self.best_epoch = best_epoch self.best_model = best_model self.best_polyak = best_polyak
data_stream=MovieLensTransformer( data_stream=DataStream( dataset=testset, iteration_scheme=ShuffledScheme( testset.num_examples, batch_size ) ) ) ) rating_freq = np.zeros((6040, 5)) init_b = np.zeros((6040, 5)) for batch in valid_monitor_stream.get_epoch_iterator(): inp_r, out_r, inp_m, out_m = batch rating_freq += inp_r.sum(axis=0) log_rating_freq = np.log(rating_freq + 1e-8) log_rating_freq_diff = np.diff(log_rating_freq, axis=1) init_b[:, 1:] = log_rating_freq_diff init_b[:, 0] = log_rating_freq[:, 0] # init_b = np.log(rating_freq / (rating_freq.sum(axis=1)[:, None] + 1e-8) +1e-8) * (rating_freq>0) new_items = np.where(rating_freq.sum(axis=1) == 0)[0] input_ratings = T.tensor3(name='input_ratings', dtype=theano.config.floatX) output_ratings = T.tensor3(name='output_ratings', dtype=theano.config.floatX) input_masks = T.matrix(name='input_masks', dtype=theano.config.floatX) output_masks = T.matrix(name='output_masks', dtype=theano.config.floatX)