def main_1(): sparseness = 5 index = 1 extend_near_num = 1 userId, itemId, rating = load_data(csv_file_path(sparseness, index)) result = calculate_distance(csv_file_path(sparseness, index)) distance = convert_distance_result(result) file = extend_csv_file_path(sparseness, index, extend_near_num) print(file) u, i, r = extend_array(extend_near_num, distance, userId, itemId, rating) save_data(file, u, i, r)
def main(): t1 = time() r = calculate_distance(csv_file_path(5, 1)) c_r = convert_distance_result(r) print(time() - t1) for k, v in c_r.items(): print(k, v[:3])
def uppc_experiment(**kwargs): exp_data = {} exp_data.update(kwargs) sparseness = kwargs['sparseness'] index = kwargs['index'] extend = kwargs['extend'] matrix_type = kwargs['matrix_type'] matrix = create_sparse_matrix( load_training_data(sparseness, index, extend, matrix_type=matrix_type)) test_userId, test_itemId, test_rating = \ load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type)) R = np.corrcoef(matrix) R = np.nan_to_num(R, nan=-1) def predict(user, item): i = matrix[:, item] i[i >= 0] = 1 return np.matmul(R[user], matrix[:, item]) / np.sum( np.abs(np.matmul(R[user], i))) test_predict = np.array( list(map(lambda u, i: predict(u, i), test_userId, test_itemId))) mae = np.mean(np.abs(test_predict - test_rating)) rmse = np.sqrt(np.mean(np.square(test_predict - test_rating))) exp_data["mae"] = float(mae) exp_data["rmse"] = float(rmse) exp_data['datetime'] = datetime.now() print(exp_data) auto_insert_database(database_remote_config, exp_data, f'uppc_{matrix_type}')
def save_extend_array(sparseness, index, extend_near_nums, matrix_type): if isinstance(extend_near_nums, int) is True: extend_near_nums = (extend_near_nums, ) userId, itemId, rating = load_data( csv_file_path(sparseness, index, matrix_type=matrix_type)) result = calculate_distance( csv_file_path(sparseness, index, matrix_type=matrix_type)) distance = convert_distance_result(result) for e in extend_near_nums: extend_array_and_save(sparseness, index, e, distance, userId, itemId, rating, matrix_type=matrix_type)
def experiment(experiment_data: ExperimentData, last_activation, matrix_type): sparseness = experiment_data.sparseness # 5 index = experiment_data.data_index # 3 mf_dim = experiment_data.mf_dim # 32 epochs = experiment_data.epochs # 30 batch_size = experiment_data.batch_size # 128 layers = experiment_data.layers # [32, 16] reg_layers = experiment_data.reg_layers # [0, 0] learning_rate = experiment_data.learning_rate # 0.007 extend_near_num = experiment_data.extend_near_num # 5 learner = experiment_data.learner # adagrad optimizer = { 'adagrad': Adagrad(lr=learning_rate), 'rmsprop': RMSprop(lr=learning_rate), 'adam': Adam(lr=learning_rate), 'sgd': SGD(lr=learning_rate) }[learner] matrix_type = matrix_type dataset_name = 'sparseness%s_%s' % (sparseness, index) model_out_file = '%s_NeuMF_%d_%s_%s.h5' % (dataset_name, mf_dim, layers, datetime.now()) userId, itemId, rating = load_training_data(sparseness, index, extend_near_num, matrix_type=matrix_type) test_userId, test_itemId, test_rating = \ load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type)) early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error', min_delta=0.0002, patience=10) model = custom_model.NeuMF.get_model(num_users=user_num, num_items=ws_num, layers=layers, reg_layers=reg_layers, mf_dim=mf_dim, last_activation=last_activation) model.compile(optimizer=optimizer, loss='mae', metrics=[metrics.mae, metrics.mse]) model.fit([userId, itemId], rating, batch_size=batch_size, epochs=epochs, callbacks=[early_stop], verbose=0, shuffle=True) mkdir('./Trained') model.save('./Trained/{}'.format(model_out_file)) loss, mae, mse = model.evaluate([test_userId, test_itemId], test_rating, steps=1) # print('loss: ', loss) # print('mae: ', mae) # print('rmse', np.sqrt(mse)) experiment_data.model = model_out_file experiment_data.loss = loss experiment_data.mae = mae experiment_data.rmse = np.sqrt(mse) exp_data = experiment_data.to_dict() exp_data['datetime'] = datetime.now() exp_data['last_activation'] = last_activation print(exp_data) auto_insert_database(database_config, exp_data, f'ncf_{matrix_type}')
def experiment(**kwargs): sparseness = kwargs['sparseness'] index = kwargs['index'] epochs = kwargs['epochs'] batch_size = kwargs['batch_size'] mf_dim = kwargs['mf_dim'] regs = kwargs['regs'] last_activation = kwargs['last_activation'] learning_rate = kwargs['learning_rate'] extend_near_num = kwargs['extend_near_num'] learner = kwargs['learner'] matrix_type = kwargs['matrix_type'] exp_data = { 'sparseness': sparseness, 'index': index, 'epochs': epochs, 'batch_size': batch_size, 'mf_dim': mf_dim, 'regs': regs, 'last_activation': last_activation, 'learning_rate': learning_rate, 'extend_near_num': extend_near_num, 'learner': learner } optimizer = { 'adagrad': Adagrad(lr=learning_rate), 'rmsprop': RMSprop(lr=learning_rate), 'adam': Adam(lr=learning_rate), 'sgd': SGD(lr=learning_rate) }[learner] dataset_name = 'sparseness%s_%s' % (sparseness, index) model_out_file = '%s_GMF_%s_extend_%s_%s.h5' % ( dataset_name, regs, extend_near_num, datetime.now()) # load file userId, itemId, rating = load_training_data(sparseness, index, extend_near_num, matrix_type=matrix_type) test_userId, test_itemId, test_rating = \ load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type)) # load end early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error', min_delta=0.0002, patience=10) model = custom_model.GMF.get_model(num_users=user_num, num_items=ws_num, latent_dim=mf_dim, regs=regs, last_activation=last_activation) model.compile(optimizer=optimizer, loss='mae', metrics=[metrics.mae, metrics.mse]) model.fit([userId, itemId], rating, batch_size=batch_size, epochs=epochs, callbacks=[early_stop], verbose=0, shuffle=True) mkdir('./Trained') model.save('./Trained/{}'.format(model_out_file)) loss, mae, mse = model.evaluate([test_userId, test_itemId], test_rating, steps=1) # print('loss: ', loss) # print('mae: ', mae) # print('rmse', np.sqrt(mse)) exp_data['model'] = model_out_file exp_data['mae'] = float(mae) exp_data['rmse'] = float(np.sqrt(mse)) exp_data['datetime'] = datetime.now() print(exp_data) auto_insert_database(database_config, exp_data, f'gmf_{matrix_type}') return exp_data
def experiment(**kwargs): sparseness = kwargs['sparseness'] index = kwargs['index'] epochs = kwargs['epochs'] batch_size = kwargs['batch_size'] layers = kwargs['layers'] reg_layers = kwargs['reg_layers'] fake_layers = kwargs['fake_layers'] fake_reg_layers = kwargs['fake_reg_layers'] last_activation = kwargs['last_activation'] fake_last_activation = kwargs['fake_last_activation'] learning_rate = kwargs['learning_rate'] extend_near_num = kwargs['extend_near_num'] learner = kwargs['learner'] exp_data = { 'sparseness': sparseness, 'index': index, 'epochs': epochs, 'batch_size': batch_size, 'layers': layers, 'reg_layers': reg_layers, 'fake_layers': fake_layers, 'fake_reg_layers': fake_reg_layers, 'last_activation': last_activation, 'fake_last_activation': fake_last_activation, 'learning_rate': learning_rate, 'extend_near_num': extend_near_num, 'learner': learner } optimizer = {'adagrad': Adagrad(lr=learning_rate), 'rmsprop': RMSprop(lr=learning_rate), 'adam': Adam(lr=learning_rate), 'sgd': SGD(lr=learning_rate)}[learner] dataset_name = 'sparseness%s_%s' % (sparseness, index) model_out_file = '%s_exMLP_%s_%s_%s.h5' % (dataset_name, layers, fake_layers, datetime.now()) userId, itemId, rating = load_data(csv_file_path(sparseness, index)) result = calculate_distance(csv_file_path(sparseness, index)) distance = convert_distance_result(result) fake_user_id, userId, itemId, rating = extend_array(extend_near_num, distance, userId, itemId, rating) test_userId, test_itemId, test_rating = load_data(csv_file_path(sparseness, index, training_set=False)) # early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error', min_delta=0.0002, patience=10) model = custom_model.extend_mlp_model.get_model(num_users=user_num, num_items=ws_num, layers=layers, reg_layers=reg_layers, fake_layers=fake_layers, fake_reg_layers=fake_reg_layers, last_activation=last_activation, fake_last_activation=fake_last_activation) model.compile(optimizer=optimizer) model.fit(x=[fake_user_id, userId, itemId, rating], y=None, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) mkdir('./Trained') model.save('./Trained/{}'.format(model_out_file)) # prediction, fake_prediction = model.predict([np.zeros(len(test_userId)), test_userId, test_itemId]) # mae = np.mean(np.abs(prediction - test_rating)) # rmse = np.sqrt(np.mean(np.square(prediction - test_rating))) _, _, mae, rmse = model.evaluate([np.zeros(len(test_rating)), test_userId, test_itemId, test_rating], steps=1) # print('loss: ', loss) # print('mae: ', mae) # print('rmse', np.sqrt(mse)) exp_data['model'] = model_out_file exp_data['mae'] = float(mae) exp_data['rmse'] = float(rmse) exp_data['datetime'] = datetime.now() exp_data['last_activation'] = last_activation print(exp_data) auto_insert_database(database_config, exp_data, 'exmlp_rt')