Esempio n. 1
0
 def load_data(self, slice_idx=None):
     """Loads the data from the instance's data file, by default returning the entire data set (slice_idx is None).
     If slice_idx is a numpy.s_ slice operation, attempts to return a hyperslab (HDF5 feature - returns a slice
     of the data instead without loading the complete data).
     """
     self.original_data = dataio.get_data(self.data_file, slice_idx)
     self.revert_data()
Esempio n. 2
0
 def load_data(self, slice_idx=None):
     """Loads the data from the instance's data file, by default returning the entire data set (slice_idx is None).
     If slice_idx is a numpy.s_ slice operation, attempts to return a hyperslab (HDF5 feature - returns a slice
     of the data instead without loading the complete data).
     """
     self.original_data = dataio.get_data(self.data_file, slice_idx)
     self.revert_data()
Esempio n. 3
0
parser.add_argument('--item_fails',
                    type=bool,
                    nargs='?',
                    const=True,
                    default=False)
parser.add_argument('--iter', type=int, nargs='?', default=1000)
options = parser.parse_args()
DATASET_NAME = options.dataset

CSV_FOLDER, CSV_ALL, CONFIG_FILE, Q_NPZ, SKILL_WINS, SKILL_FAILS = dataio.build_new_paths(
    DATASET_NAME)

config = dataio.get_config(CONFIG_FILE)
experiment_args = vars(options)

df_train, df_val, df_test = dataio.get_data(DATASET_NAME)
try:
    skill_wins = load_npz(SKILL_WINS)
    skill_fails = load_npz(SKILL_FAILS)
except:
    skill_wins = None
    skill_fails = None

short_legend, full_legend, latex_legend, active_agents = dataio.get_legend(
    experiment_args)
EXPERIMENT_FOLDER = os.path.join(CSV_FOLDER, short_legend)
dataio.prepare_folder(EXPERIMENT_FOLDER)


def df_to_sparse(df, filename):
    SPARSE_NPZ = os.path.join(EXPERIMENT_FOLDER, filename)
LAMBDA_REG = 0.1
LOG_STEP = 101


user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float32, shape=[None])
wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins")
fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails")

infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
global_step = tf.train.get_or_create_global_step()
# Attention: only var_list = embd_user, bias_user
cost, auc, update_op, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE, var_list=[user_bias, user_features])

df_train, _, df_test = dataio.get_data()

saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, os.path.join(BASE_DIR, "fm.ckpt"))

    all_user_features = sess.run(user_features, feed_dict={user_batch: range(USER_NUM)})
    all_user_features_norms = np.diag(all_user_features.dot(all_user_features.T))
    all_user_bias = sess.run(user_bias, feed_dict={user_batch: range(USER_NUM)})
    # print('all_features', all_user_features.min(), 'to', all_user_features.max())
    # print('all_features_norms', all_user_features_norms.min(), 'to', all_user_features_norms.max())
    # print('all_bias', all_user_bias.min(), 'to', all_user_bias.max())
    #print('item_features', all_user_bias.min(), 'to', all_user_bias.max())
    start = time.time()
Esempio n. 5
0
from config import *
from scipy.sparse import lil_matrix, save_npz, load_npz
import pandas as pd
import dataio
import pickle
import numpy as np

os.environ['LIBFM_PATH'] = '/Users/jin/code/libfm/bin/'

df_train, df_val, df_test = dataio.get_data()
X_train = load_npz('X_train.npz')
X_test = load_npz('X_test.npz')

with open('fm.pickle', 'rb') as f:
    bundle = pickle.load(f)
    V = bundle['V']
    V2 = np.power(V, 2)
    W = bundle['W']
    mu = bundle['mu']


def fma(x):
    return mu + x.dot(W) + 0.5 * (np.linalg.norm(x.dot(V), axis=1)**2 -
                                  x.dot(V2).sum(axis=1).A1)


print(X_train[:2])
print(fma(X_train[:2]))
print(X_train[:5])
print(fma(X_train[:5]))
#
#  For more details, try:\n
#  python3 classifier.py -h\n
#  python3 classifier.py train -h\n
#  python3 classifier.py test -h\n
#  example: python3 classifier.py -b 20 -s 17845 train ../data/ ../results/ -e 100
import graph
import dataio
import argument
import procedure
import logging as log
import tensorflow as tf
import numpy as np

logger = log.getLogger("classifier")
args = argument.args
dataio.save_command_line(args)

if args.seed is not None:
    np.random.seed(seed=args.seed)
    tf.set_random_seed(args.seed)

spectrums, labels = dataio.get_data(args)
data_tensors = dataio.get_data_tensors(args, spectrums, labels)
graph = graph.get_graph(args, data_tensors)
with tf.Session() as sess:
    procedure.initialize(sess, graph, args.test_or_train == 'test')
    output_data = procedure.run(sess, args, graph)
    dataio.save(sess, args, output_data)
logger.info("Success")
Esempio n. 7
0
def show_history():
    data = list(dataio.get_data('work_history'))
    return render_template('show_history.html', data=data)
if __name__ == "__main__":
    config = Configure()

    root = "../Random_forest_results"
    time_str = '{0:%Y-%m-%dT%H-%M-%S-}'.format(datetime.datetime.now())
    config.output_path = os.path.join(root, time_str+'lout40-5')
    subdirs = ["model"]
    hyper_search = True
    if not os.path.exists(config.output_path):
        os.makedirs(config.output_path)
        for subdir in subdirs:
            os.makedirs(config.output_path + '/{}'.format(subdir))
            
    # Splitting the dataset into the training set and val set
    train_data, test_data = get_data(config)
    
    if hyper_search:
        # Number of trees in random forest
        n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
        # Number of features to consider at every split
        max_features = ['auto', 'sqrt']
        # Maximum number of levels in tree
        max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
        max_depth.append(None)
        # Minimum number of samples required to split a node
        min_samples_split = [2, 5, 10]
        # Minimum number of samples required at each leaf node
        min_samples_leaf = [1, 2, 4]
        # Method of selecting samples for training each tree
        bootstrap = [True, False]