def load_data(self, slice_idx=None): """Loads the data from the instance's data file, by default returning the entire data set (slice_idx is None). If slice_idx is a numpy.s_ slice operation, attempts to return a hyperslab (HDF5 feature - returns a slice of the data instead without loading the complete data). """ self.original_data = dataio.get_data(self.data_file, slice_idx) self.revert_data()
parser.add_argument('--item_fails', type=bool, nargs='?', const=True, default=False) parser.add_argument('--iter', type=int, nargs='?', default=1000) options = parser.parse_args() DATASET_NAME = options.dataset CSV_FOLDER, CSV_ALL, CONFIG_FILE, Q_NPZ, SKILL_WINS, SKILL_FAILS = dataio.build_new_paths( DATASET_NAME) config = dataio.get_config(CONFIG_FILE) experiment_args = vars(options) df_train, df_val, df_test = dataio.get_data(DATASET_NAME) try: skill_wins = load_npz(SKILL_WINS) skill_fails = load_npz(SKILL_FAILS) except: skill_wins = None skill_fails = None short_legend, full_legend, latex_legend, active_agents = dataio.get_legend( experiment_args) EXPERIMENT_FOLDER = os.path.join(CSV_FOLDER, short_legend) dataio.prepare_folder(EXPERIMENT_FOLDER) def df_to_sparse(df, filename): SPARSE_NPZ = os.path.join(EXPERIMENT_FOLDER, filename)
LAMBDA_REG = 0.1 LOG_STEP = 101 user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() # Attention: only var_list = embd_user, bias_user cost, auc, update_op, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE, var_list=[user_bias, user_features]) df_train, _, df_test = dataio.get_data() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, os.path.join(BASE_DIR, "fm.ckpt")) all_user_features = sess.run(user_features, feed_dict={user_batch: range(USER_NUM)}) all_user_features_norms = np.diag(all_user_features.dot(all_user_features.T)) all_user_bias = sess.run(user_bias, feed_dict={user_batch: range(USER_NUM)}) # print('all_features', all_user_features.min(), 'to', all_user_features.max()) # print('all_features_norms', all_user_features_norms.min(), 'to', all_user_features_norms.max()) # print('all_bias', all_user_bias.min(), 'to', all_user_bias.max()) #print('item_features', all_user_bias.min(), 'to', all_user_bias.max()) start = time.time()
from config import * from scipy.sparse import lil_matrix, save_npz, load_npz import pandas as pd import dataio import pickle import numpy as np os.environ['LIBFM_PATH'] = '/Users/jin/code/libfm/bin/' df_train, df_val, df_test = dataio.get_data() X_train = load_npz('X_train.npz') X_test = load_npz('X_test.npz') with open('fm.pickle', 'rb') as f: bundle = pickle.load(f) V = bundle['V'] V2 = np.power(V, 2) W = bundle['W'] mu = bundle['mu'] def fma(x): return mu + x.dot(W) + 0.5 * (np.linalg.norm(x.dot(V), axis=1)**2 - x.dot(V2).sum(axis=1).A1) print(X_train[:2]) print(fma(X_train[:2])) print(X_train[:5]) print(fma(X_train[:5]))
# # For more details, try:\n # python3 classifier.py -h\n # python3 classifier.py train -h\n # python3 classifier.py test -h\n # example: python3 classifier.py -b 20 -s 17845 train ../data/ ../results/ -e 100 import graph import dataio import argument import procedure import logging as log import tensorflow as tf import numpy as np logger = log.getLogger("classifier") args = argument.args dataio.save_command_line(args) if args.seed is not None: np.random.seed(seed=args.seed) tf.set_random_seed(args.seed) spectrums, labels = dataio.get_data(args) data_tensors = dataio.get_data_tensors(args, spectrums, labels) graph = graph.get_graph(args, data_tensors) with tf.Session() as sess: procedure.initialize(sess, graph, args.test_or_train == 'test') output_data = procedure.run(sess, args, graph) dataio.save(sess, args, output_data) logger.info("Success")
def show_history(): data = list(dataio.get_data('work_history')) return render_template('show_history.html', data=data)
if __name__ == "__main__": config = Configure() root = "../Random_forest_results" time_str = '{0:%Y-%m-%dT%H-%M-%S-}'.format(datetime.datetime.now()) config.output_path = os.path.join(root, time_str+'lout40-5') subdirs = ["model"] hyper_search = True if not os.path.exists(config.output_path): os.makedirs(config.output_path) for subdir in subdirs: os.makedirs(config.output_path + '/{}'.format(subdir)) # Splitting the dataset into the training set and val set train_data, test_data = get_data(config) if hyper_search: # Number of trees in random forest n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)] # Number of features to consider at every split max_features = ['auto', 'sqrt'] # Maximum number of levels in tree max_depth = [int(x) for x in np.linspace(10, 110, num = 11)] max_depth.append(None) # Minimum number of samples required to split a node min_samples_split = [2, 5, 10] # Minimum number of samples required at each leaf node min_samples_leaf = [1, 2, 4] # Method of selecting samples for training each tree bootstrap = [True, False]