def main(opts, logfile=None, restore_point=None): if logfile is not None: LOG = open(logfile, "w", 0) else: LOG = sys.stdout gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] if 'movielens-100k' in path: data = get_data( path, train=.75, valid=.05, test=.2, mode='sparse', fold=1 ) # ml-100k uses official test set so only the valid paramter matters else: data = get_data(path, train=.6, valid=.2, test=.2, mode='sparse', fold=1) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M lossfn = opts.get("loss", "mse") if opts['verbose'] > 0: print('\nFactorized Autoencoder run settings:', file=LOG) print('dataset: ', path, file=LOG) print('Exchangable layer pool mode: ', opts['defaults']['matrix_sparse']['pool_mode'], file=LOG) print('Pooling layer pool mode: ', opts['defaults']['matrix_pool_sparse']['pool_mode'], file=LOG) print('learning rate: ', opts['lr'], file=LOG) print('activation: ', opts['defaults']['matrix_sparse']['activation'], file=LOG) print('number of latent features: ', opts['encoder'][-2]['units'], file=LOG) print('maxN: ', opts['maxN'], file=LOG) print('maxM: ', opts['maxM'], file=LOG) print('', file=LOG) with tf.Graph().as_default(): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_split = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') tr_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else 5, 'shape': [N, M], } val_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else 5, 'shape': [N, M], } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], scope="encoder", verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder enc_ema_op, enc_getter = setup_ema("encoder", opts.get("ema_decay", 1.)) out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=enc_getter) #get encoder output, reusing the neural net tr_dict = { 'nvec': out_enc_tr['nvec'], 'mvec': out_enc_tr['mvec'], 'units': out_enc_tr['units'], 'mask_indices': mask_indices_tr, 'shape': out_enc_tr['shape'], } val_dict = { 'nvec': out_enc_val['nvec'], 'mvec': out_enc_val['mvec'], 'units': out_enc_val['units'], 'mask_indices': mask_indices_tr_val, 'shape': out_enc_val['shape'], } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], scope="decoder", verbose=2) #define the decoder out_dec_tr = decoder.get_output(tr_dict) #build it out_tr = out_dec_tr['input'] dec_ema_op, dec_getter = setup_ema("decoder", opts.get("ema_decay", 1.)) ema_op = enc_ema_op + dec_ema_op out_dec_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=dec_getter) #reuse it for validation out_val = out_dec_val['input'] eout_val = expected_value( tf.nn.softmax(tf.reshape(out_val, shape=[-1, 5]))) #loss and training reg_loss = sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss, rec_loss_val, total_loss = get_losses( lossfn, reg_loss, mat_values_tr, mat_values_val, mask_indices_tr, mask_indices_val, out_tr, out_val, mask_split) train_step = get_optimizer(total_loss, opts) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts[ 'sample_mode'] or 'conditional_sample_sparse' in opts[ 'sample_mode']: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size elif 'neighbourhood' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) weights = csr_matrix( (np.ones_like(data['mat_values_tr']), (data['mask_indices_tr'][:, 0], data['mask_indices_tr'][:, 1])), data["mat_shape"][0:2]) sp_mat = csr_matrix( (data['mat_values_all'], (data['mask_indices_all'][:, 0], data['mask_indices_all'][:, 1])), data["mat_shape"][0:2]) min_loss = 5. min_train = 5. min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] losses["test"] = [] min_ts_loss = 5. min_val_ts = 5. saver = tf.train.Saver() if restore_point is not None: saver.restore(sess, restore_point) best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log" print("epoch,train,valid,test\n", file=open(best_log, "a")) saved_tr_loss = [] saved_val_loss = [] for ep in range(opts.get('restore_point_epoch', 0), opts['epochs'] + opts.get('restore_point_epoch', 0)): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0. if 'by_row_column_density' in opts['sample_mode']: for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM, sample_uniform=False), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_( indn_, indm_, [0] ) #select a sub-matrix given random indices for users/movies mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_] mat_values = dense_array_to_sparse(mat_sp)['values'] mask_indices = dense_array_to_sparse( data['mask_tr'][inds_])['indices'][:, 0:2] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, mask_split: np.ones_like(mat_values) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [i for i in returns[1:3]] loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif 'uniform_over_dense_values' in opts['sample_mode']: for sample_ in tqdm(sample_dense_values_uniform( data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, mask_split: np.ones_like(mat_values) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() elif 'neighbourhood' in opts['sample_mode']: hops = opts.get("n_hops", 4) n_samp = opts.get("n_neighbours", 100) iters_per_epoch = max( 1, data['mask_indices_tr'].shape[0] / minibatch_size) for sample_ in tqdm(neighbourhood_sampling( data['mask_indices_tr'], minibatch_size, iters_per_epoch, hops=4), total=iters_per_epoch): w = np.array(weights[sample_[:, 0], sample_[:, 1]]).flatten() mat_values = np.array(sp_mat[sample_[:, 0], sample_[:, 1]]).flatten() mat_weight = weights.sum() / float( data['mask_indices_tr'].shape[0]) / w mask_indices = sample_ weights = weights + csr_matrix( (np.ones(sample_.shape[0]), (sample_[:, 0], sample_[:, 1])), data["mat_shape"][0:2]) tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, mask_split: mat_weight } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() elif 'conditional_sample_sparse' in opts['sample_mode']: for _, _, _, _, sample_ in tqdm(conditional_sample_sparse( data['mask_indices_tr'], data['mask_tr_val_split'], [N, M, 1], maxN, maxM), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, mask_split: np.ones_like(mat_values) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() else: raise ValueError('\nERROR - unknown <sample_mode> in main()\n') loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch losses['train'].append(loss_tr_) print( "Training: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f});" .format(ep + 1, time.time() - begin, loss_tr_, rec_loss_tr_)) if (ep + 1) % opts[ 'validate_interval'] == 0: # Validate and test every validate_interval epochs ## Validation Loss print("Validating: ") if opts['sample_mode'] == "neighbourhood": tf_dic = { "sess": sess, "mat_values_tr": mat_values_tr, "mask_indices_tr": mask_indices_tr, "mat_values_val": mat_values_val, "mask_indices_val": mask_indices_val, "mask_indices_tr_val": mask_indices_tr_val, "mask_split": mask_split, "rec_loss_val": rec_loss_val } hops = opts.get("n_hops", 4) n_samp = opts.get("n_neighbours", 100) loss_val_ = neighbourhood_validate( sparse_matrix=sp_mat, mat_values_val=data['mat_values_val'], mask_indices_val=data['mask_indices_val'], mask_indices_tr=data['mask_indices_tr'], mask_indices_all=data['mask_indices_all'], tf_dic=tf_dic, hops=hops, n_samp=n_samp, lossfn=lossfn, minibatch_size=minibatch_size / 100) #TODO: what should this be? loss_ts_ = neighbourhood_validate( sparse_matrix=sp_mat, mat_values_val=data['mat_values_test'], mask_indices_val=data['mask_indices_test'], mask_indices_tr=data['mask_indices_tr'], mask_indices_all=data['mask_indices_all'], tf_dic=tf_dic, hops=hops, n_samp=n_samp, lossfn=lossfn, minibatch_size=minibatch_size / 100) else: # entries_val = np.zeros(data['mask_indices_all'].shape[0]) predictions_val = np.mean(data['mat_values_tr']) * np.ones( data['mask_indices_all'].shape[0]) predictions_val_count = np.zeros( data['mask_indices_all'].shape[0]) num_entries_val = data['mask_indices_val'].shape[0] while np.sum( predictions_val_count ) < opts['validation_threshold'] * num_entries_val: for sample_tr_, sample_val_, sample_tr_val_, _, _ in tqdm( conditional_sample_sparse( data['mask_indices_all'], data['mask_tr_val_split'], [N, M, 1], maxN, maxM), total=iters_per_epoch): mat_values_tr_ = data['mat_values_all'][sample_tr_] mat_values_tr_val_ = data['mat_values_all'][ sample_tr_val_] mask_indices_tr_ = data['mask_indices_all'][ sample_tr_] mask_indices_val_ = data['mask_indices_all'][ sample_val_] mask_indices_tr_val_ = data['mask_indices_all'][ sample_tr_val_] mask_split_ = (data['mask_tr_val_split'] [sample_tr_val_] == 1) * 1. val_dict = { mat_values_tr: mat_values_tr_ if lossfn == "mse" else one_hot(mat_values_tr_), mask_indices_tr: mask_indices_tr_, mat_values_val: mat_values_tr_val_ if lossfn == "mse" else one_hot(mat_values_tr_val_), mask_indices_val: mask_indices_val_, mask_indices_tr_val: mask_indices_tr_val_, mask_split: mask_split_ } bloss_val, beout_val, = sess.run( [rec_loss_val, eout_val], feed_dict=val_dict) predictions_val[sample_val_] = beout_val[ mask_split_ == 1.] predictions_val_count[sample_val_] = 1 loss_val_ = np.sqrt( np.mean( (data['mat_values_all'][data['mask_tr_val_split'] == 1] - predictions_val[data['mask_tr_val_split'] == 1] )**2)) ## Test Loss print("Testing: ") predictions_ts = np.mean( data['mat_values_tr_val']) * np.ones( data['mask_indices_all'].shape[0]) predictions_ts_count = np.zeros( data['mask_indices_all'].shape[0]) num_entries_ts = data['mask_indices_test'].shape[0] while np.sum( predictions_ts_count ) < opts['validation_threshold'] * num_entries_ts: for sample_tr_, _, sample_tr_val_, sample_ts_, sample_all_ in tqdm( conditional_sample_sparse( data['mask_indices_all'], data['mask_tr_val_split'], [N, M, 1], maxN, maxM), total=iters_per_epoch): mat_values_tr_val_ = data['mat_values_all'][ sample_tr_val_] mat_values_all_ = data['mat_values_all'][ sample_all_] mask_indices_tr_val_ = data['mask_indices_all'][ sample_tr_val_] mask_indices_ts_ = data['mask_indices_all'][ sample_ts_] mask_indices_all_ = data['mask_indices_all'][ sample_all_] mask_split_ = (data['mask_tr_val_split'] [sample_all_] == 2) * 1. test_dict = { mat_values_tr: mat_values_tr_val_ if lossfn == "mse" else one_hot(mat_values_tr_val_), mask_indices_tr: mask_indices_tr_val_, mat_values_val: mat_values_all_ if lossfn == "mse" else one_hot(mat_values_all_), mask_indices_val: mask_indices_ts_, mask_indices_tr_val: mask_indices_all_, mask_split: mask_split_ } bloss_test, beout_ts, = sess.run( [rec_loss_val, eout_val], feed_dict=test_dict) predictions_ts[sample_ts_] = beout_ts[mask_split_ == 1.] predictions_ts_count[sample_ts_] = 1 loss_ts_ = np.sqrt( np.mean((data['mat_values_all'][ data['mask_tr_val_split'] == 2] - predictions_ts[data['mask_tr_val_split'] == 2] )**2)) losses['valid'].append(loss_val_) losses['test'].append(loss_ts_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep + 1 min_train = rec_loss_tr_ min_test = loss_ts_ print("{:d},{:4},{:4},{:4}\n".format( ep, loss_tr_, loss_val_, loss_ts_), file=open(best_log, "a")) if opts.get("save_best", False): save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_best.ckpt" % opts.get('model_name', "test")) print("Model saved in file: %s" % save_path, file=LOG) if loss_ts_ < min_ts_loss: # keep track of the best test loss min_ts_loss = loss_ts_ min_val_ts = loss_val_ saved_tr_loss.append(loss_tr_) saved_val_loss.append(loss_val_) print( "Validation: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} (train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})" .format(ep + 1, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_train, min_test, min_loss_epoch, loss_ts_, min_ts_loss, min_val_ts), file=LOG) gc.collect() if (ep + 1) % opts.get("checkpoint_interval", 10000000) == 0: save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" % (opts.get('model_name', "test"), ep + 1)) print("Model saved in file: %s" % save_path, file=LOG) if loss_val_ > min_loss * 1.075: # overfitting: break if validation loss diverges break saved_tr_loss = np.array(saved_tr_loss) saved_val_loss = np.array(saved_val_loss) np.save(os.path.join('output', 'ml-1m_train_loss.npy'), saved_tr_loss) np.save(os.path.join('output', 'ml-1m_val_loss.npy'), saved_val_loss) return losses
def main(opts, logfile=None, restore_point=None): if logfile is not None: # LOG = open(logfile, "w", 0) LOG = open(logfile, "w") else: LOG = sys.stdout gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) path = opts['data_path'] if 'movielens-100k' in path: data = get_data( path, train=.85, valid=.05, test=.1, mode='sparse', fold=1 ) # ml-100k uses official test set so only the valid paramter matters else: data = get_data(path, train=.85, valid=.05, test=.1, mode='sparse', fold=1) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M if opts['verbose'] > 0: print('\nSelf supervised run settings:') print('dataset: ', path) print('Exchangable layer pool mode: ', opts['defaults']['matrix_sparse']['pool_mode']) print('learning rate: ', opts['lr']) print('activation: ', opts['defaults']['matrix_sparse']['activation']) print('dae_noise_rate: ', opts['dae_noise_rate']) print('dae_loss_alpha: ', opts['dae_loss_alpha']) print('l2_regularization: ', opts['l2_regularization']) print('') # with tf.device('/cpu:0'): with tf.Graph().as_default(): with tf.device('/gpu:0'): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_split = tf.placeholder(tf.float32, shape=[None], name='mask_split') mat_values_tr_noisy = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr_noisy') mask_indices_tr = tf.placeholder(tf.int64, shape=[None, 2], name='mask_indices_tr') mat_shape_tr = tf.placeholder(tf.int32, shape=[3], name='mat_shape_tr') noise_mask_tr = tf.placeholder(tf.int64, shape=(None), name='noise_mask_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mat_values_val_noisy = tf.placeholder(tf.float32, shape=[None], name='mat_values_val_noisy') mask_indices_val = tf.placeholder(tf.int64, shape=[None, 2], name='mask_indices_val') mat_shape_val = tf.placeholder(tf.int32, shape=[3], name='mat_shape_val') noise_mask_val = tf.placeholder(tf.int64, shape=(None), name='noise_mask_val') with tf.variable_scope("network"): tr_dict = { 'input': mat_values_tr_noisy, 'mask_indices': mask_indices_tr, 'units': 5, 'shape': [N, M] } val_dict = { 'input': mat_values_val_noisy, 'mask_indices': mask_indices_val, 'units': 5, 'shape': [N, M] } network = Model(layers=opts['network'], layer_defaults=opts['defaults'], verbose=2) #define the network out_tr = network.get_output(tr_dict)[ 'input'] #build the network out_val = network.get_output( val_dict, reuse=True, verbose=0, is_training=False)[ 'input'] #get network output, reusing the neural net iters_per_epoch = math.ceil(N // maxN) * math.ceil(M // maxM) #loss and training rec_loss = dae_loss_fn_sp(mat_values_tr, out_tr, noise_mask_tr, opts['dae_loss_alpha'], mask_split) #rec_loss = ordinal_hinge_loss_fn_sp(mat_values_tr, out_tr, noise_mask_tr, opts['dae_loss_alpha'], minibatch_size) reg_loss = sum( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization total_loss = rec_loss + reg_loss ev = expected_value( tf.nn.softmax(tf.reshape(out_val, shape=[-1, 5]))) av = expected_value(tf.reshape(mat_values_val, shape=[-1, 5])) nm = tf.cast(noise_mask_val, tf.float32) rec_loss_val = tf.reduce_sum((av - ev)**2 * nm) / tf.reduce_sum(nm) # rec_loss_val = dae_loss_fn_sp(mat_values_val, out_val, noise_mask_val, 1, valid=True) train_step = tf.train.AdamOptimizer( opts['lr']).minimize(total_loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True, device_count={'GPU': 0})) sess.run(tf.global_variables_initializer()) min_loss = np.inf min_train = np.inf min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] losses["test"] = [] min_ts_loss = np.inf min_val_ts = np.inf noise_rate = opts['dae_noise_rate'] sample_mode = opts.get('sample_mode', 'conditional_sample_sparse') if 'conditional_sample_sparse' in sample_mode: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in sample_mode: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[ 0] // minibatch_size elif 'neighbourhood' in sample_mode: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) hops = opts.get("n_hops", 3) n_samp = opts.get("n_neighbours", None) # None for "get all neighbours" print( "Using neighbourhood sampling with %d hops and %s samples" % (hops, n_samp)) sp_mat = csr_matrix( (data['mat_values_all'], (data['mask_indices_all'][:, 0], data['mask_indices_all'][:, 1])), data["mat_shape"][0:2]) saver = tf.train.Saver() if restore_point is not None: saver.restore(sess, restore_point) best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log" print("epoch,train,valid,test\n", file=open(best_log, "a")) restore_point_epoch = opts.get('restore_point_epoch', 0) ep = 0 # for ep in range(restore_point_epoch, opts['epochs'] + restore_point_epoch): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0. if 'conditional_sample_sparse' in sample_mode: # set up helper for drawing sample with common interface so we can reuse code between # 'conditional_sample_sparse' and 'uniform_over_dense_values' draw_sample = lambda mask, split, sample_dict: conditional_sample_sparse( mask, split, [N, M, 1], maxN, maxM, sample_dict) draw_sample_val = lambda mask, split, sample_dict: conditional_sample_sparse( mask, split, [N, M, 1], maxN, maxM, sample_dict, valid=True) else: draw_sample = lambda mask, split, sample_dict: sample_dense_values_uniform( mask, minibatch_size, iters_per_epoch) draw_sample_val = lambda mask, split, sample_dict: sample_dense_values_uniform_val( mask, split, minibatch_size, iters_per_epoch) # sample_dict = prep_conditional_sample_sparse(data['mask_indices_tr'], [N,M,1]) # for sample_ in tqdm(draw_sample(data['mask_indices_tr'], data['mask_tr_val_split'], sample_dict), total=iters_per_epoch): # mat_values = one_hot(data['mat_values_tr'][sample_]) # mask_indices = data['mask_indices_tr'][sample_] # # which entries to 'corrupt' by dropping out # noise_mask = np.random.choice([0,1], size=mask_indices.shape[0], p=[1-noise_rate, noise_rate]) # no_noise_mask = np.ones_like(noise_mask) - noise_mask # mat_values_noisy = (mat_values.reshape((-1, 5)) * no_noise_mask[:, None]).flatten() # tr_dict = {mat_values_tr:mat_values, # mat_values_tr_noisy:mat_values_noisy, # mask_indices_tr:mask_indices, # noise_mask_tr:noise_mask, # mask_split:np.ones_like(noise_mask) # } # _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict) # loss_tr_ += np.sqrt(bloss_) # rec_loss_tr_ += np.sqrt(brec_loss_) # elif 'neighbourhood' in sample_mode: # iters_per_epoch = max(1,data['mask_indices_tr'].shape[0] / minibatch_size) # for seed_set_idx in tqdm(sample_dense_values_uniform(data['mask_indices_tr'], minibatch_size, iters_per_epoch), # total=iters_per_epoch): # seed_set = data['mask_indices_tr'][seed_set_idx] # neighbours = sample_k_neighbours(seed_set, data['mask_indices_tr'], hops, n_samp) # mask_indices_ = np.concatenate([seed_set, neighbours], axis=0) # mask_split_ = np.concatenate([np.ones(seed_set.shape[0]), np.zeros(neighbours.shape[0])]) # only evaluate the seed set # mat_values_ = np.array(sp_mat[mask_indices_[:,0], mask_indices_[:,1]]).flatten() # mat_values_ = one_hot(mat_values_) # # which entries to 'corrupt' by dropping out # noise_mask = mask_split_ # no_noise_mask = np.ones_like(noise_mask) - noise_mask # mat_values_noisy = (mat_values_.reshape((-1, 5)) * no_noise_mask[:, None]).flatten() # tr_dict = {mat_values_tr:mat_values_, # mat_values_tr_noisy:mat_values_noisy, # noise_mask_tr:noise_mask, # mask_indices_tr:mask_indices_, # mask_split:mask_split_ # } # _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict) # loss_tr_ += bloss_ # rec_loss_tr_ += np.sqrt(brec_loss_) # else: # raise KeyError("Unrecognized sample mode: %s" % sample_mode) # loss_tr_ /= iters_per_epoch # rec_loss_tr_ /= iters_per_epoch # losses['train'].append(loss_tr_) # print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f})".format(ep+1, time.time() - begin, loss_tr_, rec_loss_tr_)) # if (ep+1) % opts.get("checkpoint_interval", 10000000) == 0: # save_path = saver.save(sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" % (opts.get('model_name', "test"), ep + 1)) # print("Model saved in file: %s" % save_path, file=LOG) # if (ep+1) % opts['validate_interval'] == 0: tf_dict = { "sess": sess, "mat_values_val": mat_values_val, "mat_values_val_noisy": mat_values_val_noisy, "mask_indices_val": mask_indices_val, "noise_mask_val": noise_mask_val, "ev": ev } if 'conditional_sample_sparse' in sample_mode: # loss_val_ = conditional_validation(tf_dict, data['mat_values_tr_val'], data['mask_indices_tr_val'], # data['mask_tr_val_split'], split_id=1, draw_sample=draw_sample_val, # iters_per_epoch=iters_per_epoch, shape=[N,M,1]) loss_ts_ = conditional_validation( tf_dict, data['mat_values_all'], data['mask_indices_all'], data['mask_tr_val_split'], split_id=2, draw_sample=draw_sample_val, iters_per_epoch=iters_per_epoch, shape=[N, M, 1]) elif 'uniform_over_dense_values' in sample_mode: # loss_val_ = conditional_validation(tf_dict, data['mat_values_tr_val'], data['mask_indices_tr_val'], # data['mask_tr_val_split'], split_id=1, draw_sample=draw_sample_val, # iters_per_epoch=iters_per_epoch, shape=[N,M,1]) loss_ts_ = conditional_validation( tf_dict, data['mat_values_all'], data['mask_indices_all'], data['mask_tr_val_split'], split_id=2, draw_sample=draw_sample_val, iters_per_epoch=iters_per_epoch, shape=[N, M, 1]) elif 'neighbourhood' in sample_mode: # loss_val_ = neighbourhood_validation(tf_dict, data['mask_indices_all'], data['mask_indices_tr'], data['mat_values_all'], # data['mask_tr_val_split'], sp_mat=sp_mat, split_id=1, hops=hops, n_samp=n_samp) loss_ts_ = neighbourhood_validation( tf_dict, data['mask_indices_all'], np.concatenate( [data['mask_indices_tr'], data['mask_indices_val']], axis=0), data['mat_values_all'], data['mask_tr_val_split'], sp_mat=sp_mat, split_id=2, hops=hops, n_samp=n_samp) # losses['valid'].append(loss_val_) losses['test'].append(loss_ts_) print("Test loss: {:.3})".format(loss_val_, loss_ts_), file=LOG) return losses
def main(opts, data=None): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) if data is None: data, top_words = read_data() N, M, num_features = data['mat_shape'] with tf.Graph().as_default(): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') with tf.variable_scope("model"): tr_dict = {'input':mat_values_tr, 'mask_indices':mask_indices_tr, 'units':1, 'shape':[N,M]} model = Model(layers=opts['architecture'], layer_defaults=opts['defaults'], verbose=2) #define the model model_output = model.get_output(tr_dict) #build the model #words = tf.squeeze(tf.nn.log_softmax(model_output['nvec'], dim=0)) words = tf.nn.log_softmax(model_output['nvec'], dim=0) docs = tf.nn.log_softmax(model_output['mvec'], dim=2) #docs = tf.squeeze(tf.nn.log_softmax(model_output['mvec'], dim=-1)) # log_prob_topics = words + docs # gather # take sum eps = 1e-16 total_prob = tf.clip_by_value(masked_inner_product(words, docs, mask_indices_tr), -np.inf, 0.) #total_prob = masked_inner_product(words, docs, mask_indices_tr, log_inp=True) topic_loss = ce_loss(total_prob, mat_values_tr) #loss and training reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization total_loss = topic_loss + reg_loss train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss) #train_step = tf.train.RMSPropOptimizer(opts['lr']).minimize(total_loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts['sample_mode']: iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size min_loss = 5 min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] for ep in range(opts['epochs']): begin = time.time() loss_tr_, topic_loss_tr_, loss_val_, loss_ts_ = 0,0,0,0 for sample_ in tqdm(sample_dense_values_uniform(data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = {mat_values_tr:mat_values,# * 100, mask_indices_tr:mask_indices} _, bloss_, btopic_loss_ = sess.run([train_step, total_loss, topic_loss], feed_dict=tr_dict) loss_tr_ += bloss_ topic_loss_tr_ += btopic_loss_ loss_tr_ /= iters_per_epoch topic_loss_tr_ /= iters_per_epoch losses['train'].append(loss_tr_) losses['valid'].append(loss_val_) print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f})".format(ep, time.time() - begin, loss_tr_, topic_loss_tr_)) if ep % 100 == 0: W, = sess.run([tf.squeeze(words)], feed_dict=tr_dict) print("Top words for each topic:") for i in xrange(W.shape[1]): print("Topic %d: %s" % (i, ', '.join(top_words(W, i)))) return losses, {"sess":sess, "total_loss": total_loss, "rec_loss": rec_loss, "rec_loss_val":rec_loss_val, "mat_values_tr": mat_values_tr, "mask_indices_tr": mask_indices_tr, "mat_values_val":mat_values_val, "mask_indices_val":mask_indices_val, "mask_indices_tr_val":mask_indices_tr_val}
def main(opts, logfile=None, restore_point=None): if logfile is not None: logging.basicConfig(format='%(asctime)s %(message)s', filename=logfile, level=logging.INFO) else: logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) log = logging.getLogger() log.addHandler(logging.StreamHandler(sys.stdout)) try: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data = load_data() #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M lossfn = opts.get("loss", "mse") if opts['verbose'] > 0: logging.info('Factorized Autoencoder run settings:') logging.info('dataset: %s' % path) logging.info('Exchangable layer pool mode: %s' % opts['defaults']['matrix_sparse']['pool_mode']) logging.info('Pooling layer pool mode: %s' % opts['defaults']['matrix_pool_sparse']['pool_mode']) logging.info('learning rate: %s' % opts['lr']) logging.info('activation: %s' % opts['defaults']['matrix_sparse']['activation']) logging.info('number of latent features: %s' % opts['encoder'][-2]['units']) logging.info('maxN: %s' % opts['maxN']) logging.info('maxM: %s' % opts['maxM']) with tf.Graph().as_default(): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_split = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') expected_value = prep_ev(data["mat_values_all"]) tr_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else num_features, 'shape': [N, M], } val_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else num_features, 'shape': [N, M], } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], scope="encoder", verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder enc_ema_op, enc_getter = setup_ema("encoder", opts.get("ema_decay", 1.)) out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=enc_getter) #get encoder output, reusing the neural net tr_dict = { 'nvec': out_enc_tr['nvec'], 'mvec': out_enc_tr['mvec'], 'units': out_enc_tr['units'], 'mask_indices': mask_indices_tr, 'shape': out_enc_tr['shape'], } val_dict = { 'nvec': out_enc_val['nvec'], 'mvec': out_enc_val['mvec'], 'units': out_enc_val['units'], 'mask_indices': mask_indices_tr_val, 'shape': out_enc_val['shape'], } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], scope="decoder", verbose=2) #define the decoder out_dec_tr = decoder.get_output(tr_dict) #build it out_tr = out_dec_tr['input'] dec_ema_op, dec_getter = setup_ema("decoder", opts.get("ema_decay", 1.)) ema_op = enc_ema_op + dec_ema_op out_dec_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=dec_getter) #reuse it for validation out_val = out_dec_val['input'] eout_val = expected_value( tf.nn.softmax(tf.reshape(out_val, shape=[-1, num_features]))) #loss and training reg_loss = sum( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization print(num_features) rec_loss, rec_loss_val, total_loss = get_losses( lossfn, reg_loss, mat_values_tr, mat_values_val, mask_indices_tr, mask_indices_val, out_tr, out_val, mask_split, expected_value, num_outputs=num_features) train_step = get_optimizer(total_loss, opts) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts[ 'sample_mode'] or 'conditional_sample_sparse' in opts[ 'sample_mode']: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[ 0] // minibatch_size elif 'neighbourhood' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) weights = csr_matrix((np.ones_like(data['mat_values_tr']), (data['mask_indices_tr'][:, 0], data['mask_indices_tr'][:, 1])), data["mat_shape"][0:2]) sp_mat = csr_matrix( (data['mat_values_all'], (data['mask_indices_all'][:, 0], data['mask_indices_all'][:, 1])), data["mat_shape"][0:2]) min_loss = np.inf min_train = np.inf min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] losses["test"] = [] min_ts_loss = np.inf min_val_ts = np.inf saver = tf.train.Saver(max_to_keep=1000) # keep all checkpoints if restore_point is not None: saver.restore(sess, restore_point) best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log" print("epoch,train,valid,test", file=open(best_log, "w")) tf_nodes = { "sess": sess, "mat_values_tr": mat_values_tr, "mask_indices_tr": mask_indices_tr, "mat_values_val": mat_values_val, "mask_indices_val": mask_indices_val, "mask_indices_tr_val": mask_indices_tr_val, "mask_split": mask_split, "total_loss": total_loss, "rec_loss": rec_loss, "rec_loss_val": rec_loss_val, "out_tr": out_tr, "out_val": out_val } saved_tr_loss = [] saved_val_loss = [] for ep in range( opts.get('restore_point_epoch', 0), opts['epochs'] + opts.get('restore_point_epoch', 0)): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0. if 'by_row_column_density' in opts['sample_mode']: for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM, sample_uniform=False), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_( indn_, indm_, [0] ) #select a sub-matrix given random indices for users/movies mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][ inds_] mat_values = dense_array_to_sparse(mat_sp)['values'] mask_indices = dense_array_to_sparse( data['mask_tr'][inds_])['indices'][:, 0:2] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, mask_split: np.ones_like(mat_values) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [i for i in returns[1:3]] loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif 'uniform_over_dense_values' in opts['sample_mode']: for sample_ in tqdm(sample_dense_values_uniform( data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][ sample_] if lossfn == "mse" else data[ 'mat_values_tr_one_hot'][sample_].flatten() mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values.flatten(), mask_indices_tr: mask_indices, mask_split: np.ones_like(mat_values) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() elif 'conditional_sample_sparse' in opts['sample_mode']: for _, _, _, _, sample_ in tqdm(conditional_sample_sparse( data['mask_indices_tr'], data['mask_tr_val_split'], [N, M, 1], maxN, maxM), total=iters_per_epoch): mat_values = data['mat_values_tr'][ sample_] if lossfn == "mse" else data[ 'mat_values_tr_one_hot'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values.flatten(), mask_indices_tr: reindex_mask(mask_indices), mask_split: np.ones_like(mat_values[:, 0]) } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() else: raise ValueError( '\nERROR - unknown <sample_mode> in main()\n') loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch losses['train'].append(loss_tr_) logging.info( "Training: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f});" .format(ep + 1, time.time() - begin, loss_tr_, rec_loss_tr_)) if (ep + 1) % opts[ 'validate_interval'] == 0: # Validate and test every validate_interval epochs ## Validation Loss if lossfn == "mse": val_ratings = data['mat_values_all'].copy() val_ratings[data['mask_tr_val_split'] == 0] = 3. else: val_ratings = data['mat_values_all_one_hot'].copy() val_ratings[data['mask_tr_val_split'] == 0, :] = 0 val_ratings = val_ratings.flatten() vals = data['mat_values_tr'] if lossfn == "mse" else data[ 'mat_values_tr_one_hot'].flatten() val_dict = { mat_values_tr: vals.flatten(), mask_indices_tr: data['mask_indices_tr'], mat_values_val: val_ratings.flatten(), mask_indices_val: data['mask_indices_all'], mask_indices_tr_val: data['mask_indices_all'], mask_split: (data['mask_tr_val_split'] == 1) * 1. } bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_val) ## Test Loss test_dict = { mat_values_tr: vals.flatten(), mask_indices_tr: data['mask_indices_tr'], mat_values_val: val_ratings.flatten(), mask_indices_val: data['mask_indices_all'], mask_indices_tr_val: data['mask_indices_all'], mask_split: (data['mask_tr_val_split'] == 2) * 1. } bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict) loss_ts_ += np.sqrt(bloss_test) losses['valid'].append(loss_val_) losses['test'].append(loss_ts_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep + 1 min_train = rec_loss_tr_ min_test = loss_ts_ print("{:d},{:4},{:4},{:4}".format( ep, loss_tr_, loss_val_, loss_ts_), file=open(best_log, "a")) if opts.get("save_best", False): save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_best.ckpt" % opts.get('model_name', "test")) logging.info("Model saved in file: %s" % save_path) if loss_ts_ < min_ts_loss: # keep track of the best test loss min_ts_loss = loss_ts_ min_val_ts = loss_val_ saved_tr_loss.append(loss_tr_) saved_val_loss.append(loss_val_) logging.info( "Validation: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} (train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})." .format(ep + 1, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_train, min_test, min_loss_epoch, loss_ts_, min_ts_loss, min_val_ts)) gc.collect() if (ep + 1) % opts.get("checkpoint_interval", 10000000) == 0: save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" % (opts.get('model_name', "test"), ep + 1)) logging.info("Model saved in file: %s" % save_path) saved_tr_loss = np.array(saved_tr_loss) saved_val_loss = np.array(saved_val_loss) np.save(os.path.join('output', 'yahoo_music_train_loss.npy'), saved_tr_loss) np.save(os.path.join('output', 'yahoo_music_val_loss.npy'), saved_val_loss) #if loss_val_ > min_loss * 1.075: # logging.info("Overfitting... exiting") # overfitting: break if validation loss diverges # break return losses except Exception as e: logging.exception("Training failed")
def main(opts): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data = get_data(path, train=.8, valid=.2, test=.001) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_tr_val'].shape maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M # if opts['verbose'] > 0: # print('\nRun Settings:') # print('dataset: ', path) # print('drop mask: ', opts['defaults']['matrix_dense']['drop_mask']) # print('Exchangable layer pool mode: ', opts['defaults']['matrix_dense']['pool_mode']) # print('Pooling layer pool mode: ', opts['defaults']['matrix_pool']['pool_mode']) # print('learning rate: ', opts['lr']) # print('activation: ', opts['defaults']['matrix_dense']['activation']) # print('maxN: ', opts['maxN']) # print('maxM: ', opts['maxM']) # print('') with tf.Graph().as_default(): mat = tf.placeholder(tf.float32, shape=(maxN, maxM, num_features), name='mat') #data matrix for training mask_tr = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mask_tr') #for validation, since we need less memory (forward pass only), we are feeding the whole matrix. This is only feasible for this smaller dataset. In the long term we could perform validation on CPU to avoid memory problems mat_val = tf.placeholder(tf.float32, shape=(N, M, num_features), name='mat') ##data matrix for validation: mask_val = tf.placeholder( tf.float32, shape=(N, M, 1), name='mask_val') #the entries not present during training mask_tr_val = tf.placeholder( tf.float32, shape=(N, M, 1), name='mask_tr_val') #both training and validation entries noise_mask = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='noise_mask') mask_tr_noise = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mask_tr_noise') with tf.variable_scope("encoder"): tr_dict = {'input': mat, 'mask': mask_tr_noise} val_dict = {'input': mat_val, 'mask': mask_tr_val} encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False) #get encoder output, reusing the neural net with tf.variable_scope("decoder"): tr_dict = {'input': out_enc_tr['input'], 'mask': mask_tr} val_dict = {'input': out_enc_val['input'], 'mask': mask_val} decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], verbose=2) #define the decoder out_tr = decoder.get_output(tr_dict)['input'] #build it out_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False)['input'] #reuse it for validation #loss and training rec_loss = dae_loss_fn(mat, mask_tr, noise_mask, out_tr, opts['dae_loss_alpha']) reg_loss = sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss_val = rec_loss_fn(mat_val, mask_val, out_val) total_loss = rec_loss + reg_loss train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch min_loss = 5 min_loss_epoch = 0 for ep in range(opts['epochs']): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_ = 0, 0, 0 for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_(indn_, indm_, [ 0 ]) #select a sub-matrix given random indices for users/movies noise_rate = opts['dae_noise_rate'] noise = np.random.choice([0, 1], size=[maxN, maxM, 1], p=[noise_rate, 1 - noise_rate]) tr_dict = { mat: data['mat_tr_val'][inds_], mask_tr: data['mask_tr'][inds_], mask_tr_noise: (data['mask_tr'][inds_] * noise), noise_mask: noise } _, bloss_, brec_loss_ = sess.run( [train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch val_dict = { mat_val: data['mat_tr_val'], mask_val: data['mask_val'], mask_tr_val: data['mask_tr'] } bloss_, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep print( "epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d}" .format(ep, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_loss_epoch), flush=True)
def main(opts): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data = get_data(path, train=.8, valid=.1, test=.1) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M if opts['verbose'] > 0: print('\nFactorized Autoencoder run settings:') print('dataset: ', path) print('Exchangable layer pool mode: ', opts['defaults']['matrix_sparse']['pool_mode']) print('Pooling layer pool mode: ', opts['defaults']['matrix_pool_sparse']['pool_mode']) print('learning rate: ', opts['lr']) print('activation: ', opts['defaults']['matrix_sparse']['activation']) print('number of latent features: ', opts['encoder'][-2]['units']) print('maxN: ', opts['maxN']) print('maxM: ', opts['maxM']) print('') with tf.Graph().as_default(): # with tf.device('/gpu:0'): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') with tf.variable_scope( None, default_name="input_features", initializer=opts['defaults']['matrix_sparse'].get( 'kernel_initializer', None), regularizer=opts['defaults']['matrix_sparse'].get( 'regularizer', None), reuse=False, ): mvec_feat = model_variable("mvec_feat", shape=[1, M, 1], trainable=True) nvec_feat = model_variable("nvec_feat", shape=[N, 1, 1], trainable=True) with tf.variable_scope("encoder"): tr_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1, 'mvec': mvec_feat, 'shape': [N, M], 'nvec': nvec_feat } #with tf.variable_scope("encoder"): # tr_dict = {'input':mat_values_tr, # 'mask_indices':mask_indices_tr, # 'units':1} val_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1, 'mvec': mvec_feat, 'nvec': nvec_feat, 'shape': [N, M] } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False) #get encoder output, reusing the neural net with tf.variable_scope("decoder"): tr_dict = { #'nvec':out_enc_tr['nvec'], #'mvec':out_enc_tr['mvec'], 'input': masked_inner_product(out_enc_tr['nvec'], out_enc_tr['mvec'], mask_indices_tr), 'mask_indices': mask_indices_tr, 'units': 1, #out_enc_tr['units'], 'shape': out_enc_tr['shape'] } val_dict = { #'nvec':out_enc_val['nvec'], #'mvec':out_enc_val['mvec'], 'input': masked_inner_product(out_enc_val['nvec'], out_enc_val['mvec'], mask_indices_tr_val), 'mask_indices': mask_indices_tr_val, 'units': 1, #out_enc_val['units'], 'shape': out_enc_val['shape'] } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], verbose=2) #define the decoder out_dec_tr = decoder.get_output(tr_dict) #build it out_tr = out_dec_tr['input'] out_dec_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False) #reuse it for validation out_val = out_dec_val['input'] #loss and training rec_loss = rec_loss_fn_sp(mat_values_tr, mask_indices_tr, out_tr, tf.ones(tf.shape(mat_values_tr))) reg_loss = sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss_val = rec_loss_fn_sp(mat_values_val, mask_indices_val, out_val, data['mask_tr_val_split']) total_loss = rec_loss + reg_loss train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss) #train_step = tf.train.GradientDescentOptimizer(opts['lr']).minimize(total_loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts['sample_mode']: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size min_loss = 5 min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] for ep in range(opts['epochs']): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0, 0, 0, 0 if 'by_row_column_density' in opts['sample_mode']: for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM, sample_uniform=False), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_( indn_, indm_, [0] ) #select a sub-matrix given random indices for users/movies mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_] mat_values = dense_array_to_sparse(mat_sp)['values'] mask_indices = dense_array_to_sparse( data['mask_tr'][inds_])['indices'][:, 0:2] tr_dict = { mat_values_tr: mat_values, mask_indices_tr: mask_indices } _, bloss_, brec_loss_ = sess.run( [train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif 'uniform_over_dense_values' in opts['sample_mode']: for sample_ in tqdm(sample_dense_values_uniform( data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values, mask_indices_tr: mask_indices } _, bloss_, brec_loss_ = sess.run( [train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) else: print('\nERROR - unknown <sample_mode> in main()\n') return loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch new_nvec, new_mvec = sess.run([nvec_feat, mvec_feat]) ## Validation Loss val_dict = { mat_values_tr: data['mat_values_tr'], mask_indices_tr: data['mask_indices_tr'], mat_values_val: data['mat_values_tr_val'], mask_indices_val: data['mask_indices_val'], mask_indices_tr_val: data['mask_indices_tr_val'] } bloss_, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep losses['train'].append(loss_tr_) losses['valid'].append(loss_val_) print( "epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d} \t test loss: {:.3f}" .format(ep, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_loss_epoch, loss_ts_)) return losses
def main(opts, logfile=None, restore_point=None): if logfile is not None: LOG = open(logfile, "w", 0) else: LOG = sys.stdout gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data, eval_data = load_ratings() #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M lossfn = opts.get("loss", "mse") if opts['verbose'] > 0: print('\nFactorized Autoencoder run settings:', file=LOG) print('dataset: ', path, file=LOG) print('Exchangable layer pool mode: ', opts['defaults']['matrix_sparse']['pool_mode'], file=LOG) print('Pooling layer pool mode: ', opts['defaults']['matrix_pool_sparse']['pool_mode'], file=LOG) print('learning rate: ', opts['lr'], file=LOG) print('activation: ', opts['defaults']['matrix_sparse']['activation'], file=LOG) print('number of latent features: ', opts['encoder'][-2]['units'], file=LOG) print('maxN: ', opts['maxN'], file=LOG) print('maxM: ', opts['maxM'], file=LOG) print('', file=LOG) with tf.Graph().as_default(): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_split = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') tr_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else 5, 'shape': [N, M], } val_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1 if lossfn == "mse" else 5, 'shape': [N, M], } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], scope="encoder", verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder enc_ema_op, enc_getter = setup_ema("encoder", opts.get("ema_decay", 1.)) out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=enc_getter) #get encoder output, reusing the neural net tr_dict = { 'nvec': out_enc_tr['nvec'], 'mvec': out_enc_tr['mvec'], 'units': out_enc_tr['units'], 'mask_indices': mask_indices_tr, 'shape': out_enc_tr['shape'], } val_dict = { 'nvec': out_enc_val['nvec'], 'mvec': out_enc_val['mvec'], 'units': out_enc_val['units'], 'mask_indices': mask_indices_tr_val, 'shape': out_enc_val['shape'], } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], scope="decoder", verbose=2) #define the decoder out_dec_tr = decoder.get_output(tr_dict) #build it out_tr = out_dec_tr['input'] dec_ema_op, dec_getter = setup_ema("decoder", opts.get("ema_decay", 1.)) ema_op = enc_ema_op + dec_ema_op out_dec_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False, getter=dec_getter) #reuse it for validation out_val = out_dec_val['input'] #loss and training reg_loss = sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss, rec_loss_val, total_loss = get_losses( lossfn, reg_loss, mat_values_tr, mat_values_val, mask_indices_tr, mask_indices_val, out_tr, out_val, mask_split) train_step = get_optimizer(total_loss, opts) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts['sample_mode']: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size min_loss = 5 min_train = 5 min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] losses["test"] = [] min_ts_loss = 5 min_val_ts = 5 saver = tf.train.Saver() if restore_point is not None: saver.restore(sess, restore_point) best_log = "logs/" + opts.get("model_name", "TEST") + "_best.log" print("epoch,train,valid,test\n", file=open(best_log, "a")) for ep in range(opts['epochs']): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0, 0, 0, 0 if 'by_row_column_density' in opts['sample_mode']: for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM, sample_uniform=False), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_( indn_, indm_, [0] ) #select a sub-matrix given random indices for users/movies mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_] mat_values = dense_array_to_sparse(mat_sp)['values'] mask_indices = dense_array_to_sparse( data['mask_tr'][inds_])['indices'][:, 0:2] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [i for i in returns[1:3]] loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif 'uniform_over_dense_values' in opts['sample_mode']: for sample_ in tqdm(sample_dense_values_uniform( data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values if lossfn == "mse" else one_hot(mat_values), mask_indices_tr: mask_indices, } returns = sess.run([train_step, total_loss, rec_loss] + ema_op, feed_dict=tr_dict) bloss_, brec_loss_ = [ i for i in returns[1:3] ] # ema_op may be empty and we only need these two outputs loss_tr_ += bloss_ rec_loss_tr_ += np.sqrt(brec_loss_) gc.collect() else: raise ValueError('\nERROR - unknown <sample_mode> in main()\n') loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch ## Validation Loss val_dict = { mat_values_tr: data['mat_values_tr'] if lossfn == "mse" else one_hot(data['mat_values_tr']), mask_indices_tr: data['mask_indices_tr'], mat_values_val: data['mat_values_tr_val'] if lossfn == "mse" else one_hot(data['mat_values_tr_val']), mask_indices_val: data['mask_indices_val'], mask_indices_tr_val: data['mask_indices_tr_val'], mask_split: (data['mask_tr_val_split'] == 1) * 1. } bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_val) ## Test Loss test_dict = { mat_values_tr: eval_data['mat_values_tr'] if lossfn == "mse" else one_hot(eval_data['mat_values_tr']), mask_indices_tr: eval_data['mask_indices_tr'], mat_values_val: eval_data['mat_values_tr_val'] if lossfn == "mse" else one_hot( eval_data['mat_values_tr_val']), mask_indices_val: eval_data['mask_indices_test'], mask_indices_tr_val: eval_data['mask_indices_tr_val'], mask_split: (eval_data['mask_tr_val_split'] == 2) * 1. } bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict) loss_ts_ += np.sqrt(bloss_test) if loss_ts_ < min_ts_loss: # keep track of the best validation loss min_ts_loss = loss_ts_ min_val_ts = loss_val_ if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep min_train = rec_loss_tr_ min_test = loss_ts_ print("{:d},{:4},{:4},{:4}\n".format(ep, loss_tr_, loss_val_, loss_ts_), file=open(best_log, "a")) if ep > 1000 and (min_loss < 0.942): save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_best.ckpt" % opts.get('model_name', "test")) print("Model saved in file: %s" % save_path, file=LOG) if (ep + 1) % 500 == 0: save_path = saver.save( sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" % (opts.get('model_name', "test"), ep + 1)) print("Model saved in file: %s" % save_path, file=LOG) losses['train'].append(loss_tr_) losses['valid'].append(loss_val_) losses['test'].append(loss_ts_) print( "epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} \ (train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})" .format(ep, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_train, min_test, min_loss_epoch, loss_ts_, min_ts_loss, min_val_ts), file=LOG) gc.collect() if loss_val_ > min_loss * 1.075: # overfitting break saver.restore( sess, opts['ckpt_folder'] + "/%s_best.ckpt" % opts.get('model_name', "test")) return losses, { "sess": sess, "mat_values_tr": mat_values_tr, "mask_indices_tr": mask_indices_tr, "mat_values_val": mat_values_val, "mask_indices_val": mask_indices_val, "mask_indices_tr_val": mask_indices_tr_val, "mask_split": mask_split, "total_loss": total_loss, "rec_loss": rec_loss, "rec_loss_val": rec_loss_val, "out_tr": out_tr, "out_val": out_val }
def main(opts, logfile=None, restore_point=None): if logfile is not None: logging.basicConfig(format='%(asctime)s %(message)s', filename=logfile, level=logging.INFO) else: logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) try: cpu_config = tf.ConfigProto( device_count = {'GPU': 0} ) path = opts['data_path'] if 'movielens-100k' in path: data = get_data(path, train=.75, valid=.05, test=.2, mode='sparse', fold=1) # ml-100k uses official test set so only the valid paramter matters else: data = get_data(path, train=.6, valid=.2, test=.2, mode='sparse', fold=1) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M lossfn = opts.get("loss", "mse") if opts['verbose'] > 0: logging.info('Factorized Autoencoder run settings:') logging.info('dataset: %s' % path) logging.info('Exchangable layer pool mode: %s' % opts['defaults']['matrix_sparse']['pool_mode']) logging.info('Pooling layer pool mode: %s' % opts['defaults']['matrix_pool_sparse']['pool_mode']) logging.info('learning rate: %s' % opts['lr']) logging.info('activation: %s' % opts['defaults']['matrix_sparse']['activation']) logging.info('number of latent features: %s' % opts['encoder'][-2]['units']) logging.info('maxN: %s' % opts['maxN']) logging.info('maxM: %s' % opts['maxM']) with tf.Graph().as_default(): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_split = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') tr_dict = {'input':mat_values_tr, 'mask_indices':mask_indices_tr, 'units':1 if lossfn == "mse" else 5, 'shape':[N,M], } val_dict = {'input':mat_values_tr, 'mask_indices':mask_indices_tr, 'units':1 if lossfn == "mse" else 5, 'shape':[N,M], } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], scope="encoder", verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder enc_ema_op, enc_getter = setup_ema("encoder", opts.get("ema_decay", 1.)) out_enc_val = encoder.get_output(val_dict, reuse=True, verbose=0, is_training=False, getter=enc_getter)#get encoder output, reusing the neural net tr_dict = {'nvec':out_enc_tr['nvec'], 'mvec':out_enc_tr['mvec'], 'units':out_enc_tr['units'], 'mask_indices':mask_indices_tr, 'shape':out_enc_tr['shape'], } val_dict = {'nvec':out_enc_val['nvec'], 'mvec':out_enc_val['mvec'], 'units':out_enc_val['units'], 'mask_indices':mask_indices_tr_val, 'shape':out_enc_val['shape'], } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], scope="decoder", verbose=2)#define the decoder out_dec_tr = decoder.get_output(tr_dict)#build it out_tr = out_dec_tr['input'] dec_ema_op, dec_getter = setup_ema("decoder", opts.get("ema_decay", 1.)) ema_op = enc_ema_op + dec_ema_op out_dec_val = decoder.get_output(val_dict, reuse=True, verbose=0, is_training=False, getter=dec_getter)#reuse it for validation out_val = out_dec_val['input'] eout_val = expected_value(tf.nn.softmax(tf.reshape(out_val, shape=[-1,5]))) #loss and training reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss, rec_loss_val, total_loss = get_losses(lossfn, reg_loss, mat_values_tr, mat_values_val, out_tr, out_val, mask_split) train_step = get_optimizer(total_loss, opts) sess = tf.Session(config=cpu_config) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts['sample_mode'] or 'conditional_sample_sparse' in opts['sample_mode']: iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size elif 'neighbourhood' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) weights = csr_matrix((np.ones_like(data['mat_values_tr']), (data['mask_indices_tr'][:,0], data['mask_indices_tr'][:,1])), data["mat_shape"][0:2]) sp_mat = csr_matrix((data['mat_values_all'], (data['mask_indices_all'][:,0], data['mask_indices_all'][:, 1])), data["mat_shape"][0:2]) saver = tf.train.Saver() if restore_point is not None: saver.restore(sess, restore_point) logging.info("Restored successfully, running validation") ## Validation Loss train_data = data['mat_values_all'][data['mask_tr_val_split'] == 0] train_mask = data['mask_indices_all'][data['mask_tr_val_split'] == 0,:] val_dict = {mat_values_tr:train_data if lossfn =="mse" else one_hot(train_data), mat_values_val:data['mat_values_all'] if lossfn =="mse" else one_hot(data['mat_values_all']), mask_indices_tr:train_mask, mask_indices_tr_val:data['mask_indices_all'], mask_split:(data['mask_tr_val_split'] == 1) * 1. } bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ = np.sqrt(bloss_val) logging.info("Validation complete. Got {:4}".format(loss_val_)) ## Test Loss test_dict = {mat_values_tr:train_data if lossfn =="mse" else one_hot(train_data), mat_values_val:data['mat_values_all'] if lossfn =="mse" else one_hot(data['mat_values_all']), mask_indices_tr:train_mask, mask_indices_tr_val:data['mask_indices_all'], mask_split:(data['mask_tr_val_split'] == 2) * 1. } bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict) loss_test_ = np.sqrt(bloss_test) print("Valid: {:4}, Test: {:4}\n".format(loss_val_, loss_test_)) except Exception as e: logging.exception("Training failed")
def main(opts): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data = get_data(path, train=.8, valid=.2, test=.001) standardize = inverse_trans = lambda x: x # defaults if opts.get("loss", "mse") == "mse": input_data = data['mat_tr_val'] raw_input_data = data['mat_tr_val'].copy() if opts.get('normalize', False): print("Normalizing data") standardize, inverse_trans = normalize(input_data) else: raw_input_data = data['mat_tr_val'].copy() input_data = to_indicator(data['mat_tr_val']) loss_fn = get_loss_function(opts.get("loss", "mse")) #build encoder and decoder and use VAE loss N, M, num_features = input_data.shape opts['decoder'][-1]['units'] = num_features maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M if opts['verbose'] > 0: print('\nRun Settings:') print('dataset: ', path) print('drop mask: ', opts['defaults']['matrix_dense']['drop_mask']) print('Exchangable layer pool mode: ', opts['defaults']['matrix_dense']['pool_mode']) print('Pooling layer pool mode: ', opts['defaults']['matrix_pool']['pool_mode']) print('learning rate: ', opts['lr']) print('activation: ', opts['defaults']['matrix_dense']['activation']) print('maxN: ', opts['maxN']) print('maxM: ', opts['maxM']) print('') with tf.Graph().as_default(): mat_raw = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mat_raw')#data matrix for training mat_raw_valid = tf.placeholder(tf.float32, shape=(N, M, 1), name='mat_raw_valid')#data matrix for training mat = tf.placeholder(tf.float32, shape=(maxN, maxM, num_features), name='mat')#data matrix for training mask_tr = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mask_tr') # For validation, since we need less memory (forward pass only), # we are feeding the whole matrix. This is only feasible for this smaller dataset. # In the long term we could perform validation on CPU to avoid memory problems mat_val = tf.placeholder(tf.float32, shape=(N, M, num_features), name='mat')##data matrix for validation: mask_val = tf.placeholder(tf.float32, shape=(N, M, 1), name='mask_val')#the entries not present during training mask_tr_val = tf.placeholder(tf.float32, shape=(N, M, 1), name='mask_tr_val')#both training and validation entries indn = tf.placeholder(tf.int32, shape=(None), name='indn') indm = tf.placeholder(tf.int32, shape=(None), name='indm') with tf.variable_scope("encoder"): tr_dict = {'input':mat, 'mask':mask_tr, 'total_shape':[N,M], 'indn':indn, 'indm':indm} val_dict = {'input':mat_val, 'mask':mask_tr_val, 'total_shape':[N,M], 'indn':indn, 'indm':indm} encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder out_enc_val = encoder.get_output(val_dict, reuse=True, verbose=0, is_training=False)#get encoder output, reusing the neural net with tf.variable_scope("decoder"): tr_dict = {'nvec':out_enc_tr['nvec'], 'mvec':out_enc_tr['mvec'], 'mask':out_enc_tr['mask'], 'total_shape':[N,M], 'indn':indn, 'indm':indm} val_dict = {'nvec':out_enc_val['nvec'], 'mvec':out_enc_val['mvec'], 'mask':out_enc_val['mask'], 'total_shape':[N,M], 'indn':indn, 'indm':indm} decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], verbose=2)#define the decoder out_tr = decoder.get_output(tr_dict)['input']#build it out_val = decoder.get_output(val_dict, reuse=True, verbose=0, is_training=False)['input']#reuse it for validation #loss and training rec_loss = loss_fn(inverse_trans(mat), mask_tr, inverse_trans(out_tr))# reconstruction loss reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss_val = loss_fn(inverse_trans(mat_val), mask_val, inverse_trans(out_val)) total_loss = rec_loss + reg_loss rng = tf.range(1,6,1, dtype=tf.float32) idx = tf.convert_to_tensor([[2],[0]], dtype=np.int32) mse_loss_train = rec_loss_fn(mat_raw, mask_tr, tf.reshape(tf.tensordot(tf.nn.softmax(out_tr), rng, idx), (maxN,maxM,1))) mse_loss_valid = rec_loss_fn(mat_raw_valid, mask_val, tf.reshape(tf.tensordot(tf.nn.softmax(out_val), rng, idx), (N,M,1))) train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss) merged = tf.summary.merge_all() sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) train_writer = tf.summary.FileWriter('logs/train', sess.graph) sess.run(tf.global_variables_initializer()) iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch min_loss = 5 min_loss_epoch = 0 for ep in range(opts['epochs']): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, mse_tr = 0,0,0,0 for indn_, indm_ in tqdm(sample_submatrix(data['mask_tr'], maxN, maxM), total=iters_per_epoch):#go over mini-batches inds_ = np.ix_(indn_,indm_,range(num_features)) inds_mask = np.ix_(indn_,indm_, [0]) #inds_ = np.ix_(indn_,indm_,[0])#select a sub-matrix given random indices for users/movies tr_dict = {mat:standardize(input_data[inds_]), mask_tr:data['mask_tr'][inds_mask], mat_raw:raw_input_data[inds_mask], indn:indn_, indm:indm_} if opts.get("loss", "mse") == "mse": _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif opts.get("loss", "mse") == "ce": _, bloss_, brec_loss_, mse = sess.run([train_step, total_loss, rec_loss, mse_loss_train], feed_dict=tr_dict) loss_tr_ += np.sqrt(mse) rec_loss_tr_ += brec_loss_ loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch val_dict = {mat_val:standardize(input_data), mask_val:data['mask_val'], mask_tr_val:data['mask_tr'], mat_raw_valid:raw_input_data, indn:np.arange(N), indm:np.arange(M)} if merged is not None: summary, = sess.run([merged], feed_dict=tr_dict) train_writer.add_summary(summary, ep) if opts.get("loss", "mse") == "mse": bloss_, = sess.run([rec_loss_val], feed_dict=val_dict) else: bloss_true, bloss_ = sess.run([rec_loss_val, mse_loss_valid], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d}".format(ep, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_loss_epoch))