def dnn_concat(data, configfile, layers=[16, 8], activation='tanhlecun', initrange=1e-3, bn=True, keep_prob=.95, concat_size=24, uembed=32, iembed=32, learnrate=.00001, verbose=True, epochs=10, maxbadcount=20, mb=2000, eval_rate=500): with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='dnn_concat', variable_bindings={ 'layers': layers, 'activation': activation, 'initrange': initrange, 'bn': bn, 'keep_prob': keep_prob, 'concat_size': concat_size, 'uembed': uembed, 'iembed': iembed, }) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict[ 'ratings'] = y_ # put the new placeholder in the graph for training with tf.name_scope('objective'): objective = tf.reduce_sum(tf.square(y_ - y)) with tf.name_scope('dev_rmse'): dev_rmse = tf.sqrt( tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples)) with tf.name_scope('training'): model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='res_concat', make_histograms=False, save=False, tensorboard=False) model.train(data.train, dev=data.dev, eval_schedule=eval_rate) return model
def mf(data, configfile, lamb=0.001, kfactors=20, learnrate=0.01, verbose=True, epochs=1000, maxbadcount=20, mb=500, initrange=1, eval_rate=500, random_seed=None, develop=False, train_dev_eval_factor=3): with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='basic_mf', develop=develop, variable_bindings={'kfactors': kfactors, 'initrange': initrange}) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y))) objective += (lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb*tf.reduce_sum(tf.square(ant.tensordict['ibias']))) with tf.name_scope('dev_rmse'): _rmse = node_ops.rmse(y_, y) mae = node_ops.mae(y_, y) model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=_rmse, train_evaluate=_rmse, predictions=y, model_name='mf', random_seed=random_seed, save_tensors={'mae': mae}) model.train(data.train, dev=data.dev, eval_schedule=eval_rate,train_dev_eval_factor= train_dev_eval_factor) return model
def tensorfactor(data, configfile, lamb=.001, kfactors=1000, learnrate=0.01, verbose=True, epochs=1000, maxbadcount=20, mb=500, initrange=1, eval_rate=500, random_seed=None): with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='basic_mf', variable_bindings={ 'kfactors': kfactors, 'initrange': initrange, 'lamb': lamb }) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='mf', random_seed=random_seed) model.train(data.train, dev=data.dev, eval_schedule=200) return model
def dssm(data, configfile, layers=[10, 10, 10], bn=True, keep_prob=.95, act='tanhlecun', initrange=1, kfactors=10, lamb=.1, mb=500, learnrate=0.0001, verbose=True, maxbadcount=10, epochs=100, model_name='dssm', random_seed=500, eval_rate=500): datadict = data.user.features.copy() datadict.update(data.item.features) configdatadict = data.dev.features.copy() configdatadict.update(datadict) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=configdatadict, marker='-', graph_name='basic_mf', variable_bindings={ 'initrange': initrange, 'kfactors': kfactors }) y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): if type(ant.tensor_out) is list: objective = tf.reduce_sum(tf.square(y_ - ant.tensor_out[0])) for i in range(1, len(ant.tensor_out)): objective += tf.reduce_sum(tf.square(y_ - ant.tensor_out[i])) objective += ( lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias']))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(ant.tensor_out[0], y_) model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=ant.tensor_out[0], model_name='dssm', random_seed=random_seed) model.train(data.train, dev=data.dev, supplement=datadict, eval_schedule=eval_rate) return model
def tree(data, configfile, lamb=0.001, kfactors=20, learnrate=0.0001, verbose=True, maxbadcount=20, mb=500, initrange=0.00001, epochs=10, random_seed=None, eval_rate=500, keep_prob=0.95, act='tanh'): datadict = data.user.features.copy() datadict.update(data.item.features) configdatadict = data.dev.features.copy() configdatadict.update(datadict) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=configdatadict, marker='-', variable_bindings={ 'kfactors': kfactors, 'initrange': initrange, 'keep_prob': keep_prob, 'act': act }, graph_name='tree') y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict[ 'ratings'] = y_ # put the new placeholder in the graph for training with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y)) + lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias']))) with tf.name_scope('dev_rmse'): dev_rmse = tf.sqrt( tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples)) with tf.name_scope('training'): model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='tree') model.train(data.train, dev=data.dev, supplement=datadict, eval_schedule=eval_rate) return model
data.user.features['age'] = loader.center(data.user.features['age'], axis=None) data.item.features['year'] = loader.center(data.item.features['year'], axis=None) data.user.features['age'] = loader.maxnormalize(data.user.features['age']) data.item.features['year'] = loader.maxnormalize(data.item.features['year']) datadict = data.user.features.copy() datadict.update(data.item.features) configdatadict = data.dev.features.copy() configdatadict.update(datadict) with tf.variable_scope('mfgraph'): ant = config.AntGraph('tree.config', data=configdatadict, marker='-', variable_bindings={ 'kfactors': 100, 'initrange': 0.0001 }, develop=False) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict[ 'ratings'] = y_ # put the new placeholder in the graph for training objective = tf.reduce_sum(tf.square(y_ - y)) dev_rmse = node_ops.rmse(y, y_) model = generic_model.Model(objective, ant.placeholderdict, mb=500, learnrate=0.00001,
def mf(data, configfile, lamb=0.001, kfactors=20, learnrate=0.01, verbose=True, epochs=1000, maxbadcount=20, mb=500, initrange=1, eval_rate=500, random_seed=None, develop=False): data = loader.read_data_sets(data, hashlist=['item', 'user', 'ratings'], folders=['dev', 'train', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='basic_mf', develop=develop, variable_bindings={ 'kfactors': kfactors, 'initrange': initrange, 'lamb': lamb }) print(ant.tensor_out) y = node_ops.x_dot_y(ant.tensor_out) y_ = tf.placeholder("float", [None, None], name='Target') data.item.features['util'] = utility_matrix xuser = tf.placeholder(tf.int32, [None]) xitem = tf.placeholder(tf.int32, [None]) xutil = tf.placeholder(tf.float32, [None, None]) wuser = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim, kfactors])) witem = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) wplus = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) ubias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim])) ibias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim])) i_bias = tf.nn.embedding_lookup(ibias, xitem) u_bias = tf.nn.embedding_lookup(ubias, xuser) huser = tf.nn.embedding_lookup(wuser, xuser) hitem = tf.nn.embedding_lookup(witem, xitem) hplus = tf.nn.embedding_lookup(xutil, xuser) plus = tf.mul( tf.matmul(hplus, wplus, a_is_sparse=True), tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1, keep_dims=True))) huserplus = huser #+ plus ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y))) objective += ( lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias']))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='mf', random_seed=random_seed) model.train(data.train, dev=data.dev, eval_schedule=eval_rate) return model
def dsadd(data, configfile, initrange=0.1, kfactors=20, lamb=.01, mb=500, learnrate=0.003, verbose=True, maxbadcount=10, epochs=100, model_name='dssm', random_seed=500, eval_rate=500): datadict = data.user.features.copy() datadict.update(data.item.features) configdatadict = data.dev.features.copy() configdatadict.update(datadict) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=configdatadict, marker='-', graph_name='basic_mf', variable_bindings={ 'initrange': initrange, 'kfactors': kfactors }) y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): if type(ant.tensor_out) is list: scalars = tf.Variable( 0.001 * tf.truncated_normal([len(ant.tensor_out), 1])) prediction = tf.mul(ant.tensor_out[0], tf.slice(scalars, [0, 0], [1, 1])) for i in range(1, len(ant.tensor_out)): with tf.variable_scope('predictor%d' % i): prediction = prediction + tf.mul( ant.tensor_out[i], tf.slice(scalars, [i, 0], [1, 1])) prediction = tf.square(y_ - prediction) objective = ( tf.reduce_sum(prediction) + lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])) + lamb * tf.reduce_sum(tf.square(tf.concat(1, ant.tensor_out)))) with tf.name_scope('dev_rmse'): dev_rmse = tf.sqrt( tf.div(tf.reduce_sum(prediction), data.dev.num_examples)) model = generic_model.Model(objective, ant.placeholderdict, mb=500, learnrate=0.000001, verbose=True, maxbadcount=10, epochs=100, evaluate=dev_rmse, predictions=ant.tensor_out[0], model_name='dssm', random_seed=500) model.train(data.train, dev=data.dev, supplement=datadict, eval_schedule=eval_rate) return model
loader.maybe_download('ml100k.tar.gz', '.', 'http://sw.cs.wwu.edu/~tuora/aarontuor/ml100k.tar.gz') loader.untar('ml100k.tar.gz') data = loader.read_data_sets('ml100k', folders=['dev', 'train'], hashlist=['item', 'user', 'ratings']) data.show() data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) with tf.variable_scope('mfgraph'): ant = config.AntGraph('mf2.config', data=data.dev.features, marker='-', variable_bindings={ 'kfactors': 100, 'initrange': 0.001, 'l2': 0.1 }) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict[ 'ratings'] = y_ # put the new placeholder in the graph for training objective = tf.reduce_sum(tf.square(y_ - y)) dev_rmse = node_ops.rmse(y, y_) dev_mae = node_ops.mae(y, y_) model = generic_model.Model(objective, ant.placeholderdict, mb=500,
# [-nlayers NUM_HIDDEN_LAYERS] def deep(data, configfile, epochs=10, learnrate=0.001 layers=[10, 10, 10], act='tanh', opt='grad', mb=500, type='r') with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='deep', variable_bindings = {'layers': layers, 'act': act}) y = ant.tensor_out y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict['targets'] = y_ if type == 'r': objective = node_ops.mse(y, y_) if type == 'c': objective = node_ops.accuracy(y, y_) with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y)) + lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
def tree(data, configfile, lamb=0.001, kfactors=50, learnrate=0.00001, verbose=True, maxbadcount=20, mb=500, initrange=0.00001, epochs=10, random_seed=None, eval_rate=500, keep_prob=0.95, act='tanh'): datadict = data.user.features.copy() datadict.update(data.item.features) configdatadict = data.dev.features.copy() configdatadict.update(datadict) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=configdatadict, marker='-', variable_bindings={ 'kfactors': kfactors, 'initrange': initrange, 'keep_prob': keep_prob, 'act': act }, graph_name='tree') # ant.display_graph() alpha = node_ops.weights('tnorm', [1, 1], l2=1.0) beta = node_ops.weights('tnorm', [1, 1], l2=1.0) print(ant.tensor_out) ubias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim])) ibias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim])) i_bias = tf.nn.embedding_lookup(ibias, ant.placeholderdict['item']) u_bias = tf.nn.embedding_lookup(ubias, ant.placeholderdict['user']) y = alpha * ant.tensor_out[0] + beta * ant.tensor_out[1] + u_bias + i_bias y_ = tf.placeholder("float", [None, None], name='Target') ant.placeholderdict[ 'ratings'] = y_ # put the new placeholder in the graph for training with tf.name_scope('objective'): objective = ( tf.reduce_sum(tf.square(y_ - y)) + lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['huser2'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem2'])) + lamb * tf.reduce_sum(tf.square(u_bias)) + lamb * tf.reduce_sum(tf.square(i_bias))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) with tf.name_scope('training'): model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='tree', train_evaluate=dev_rmse) model.train(data.train, dev=data.dev, supplement=datadict, eval_schedule=eval_rate, train_dev_eval_factor=5) return model