parser.add_argument("epochs", metavar="EPOCHS", type=int, help="The maximum number of epochs to train for.") parser.add_argument("modelID", metavar="MODEL_ID", type=int, help="A unique integer for saving model results during distributed runs model parameters.") parser.add_argument("random_seed", metavar="RANDOM_SEED", type=int, help="For reproducible results.") parser.add_argument("eval_rate", metavar="EVAL_RATE", type=int, help="How often (in terms of number of data points) to evaluate on dev.") return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets(args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None) data.user.features['age'] = loader.center(data.user.features['age'], axis=None) data.item.features['year'] = loader.center(data.item.features['year'], axis=None) data.user.features['age'] = loader.maxnormalize(data.user.features['age']) data.item.features['year'] = loader.maxnormalize(data.item.features['year']) x = tree_model.tree(data, args.config,data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb =args.lamb, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose, maxbadcount=args.maxbadcount, epochs=args.epochs,
def svdplus(data, lamb_bias=0.005, lambfactor=0.015, kfactors=20, learnrate=0.01, verbose=True, epochs=1000, maxbadcount=20, mb=500, initrange=1, eval_rate=500, random_seed=None, develop=False): data = loader.read_data_sets(data, folders=['train', 'dev', 'item'], hashlist=['user', 'item', 'ratings']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) utility_matrix = sps.csr_matrix( (numpy.ones(data.train.features['user'].vec.shape[0]), (data.train.features['user'].vec, data.train.features['item'].vec)), shape=(data.train.features['user'].dim, data.train.features['item'].dim)) data.item.features['util'] = utility_matrix xuser = tf.placeholder(tf.int32, [None]) xitem = tf.placeholder(tf.int32, [None]) xutil = tf.placeholder(tf.float32, [None, None]) wuser = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim, kfactors])) witem = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) wplus = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) ubias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim])) ibias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim])) i_bias = tf.nn.embedding_lookup(ibias, xitem) u_bias = tf.nn.embedding_lookup(ubias, xuser) huser = tf.nn.embedding_lookup(wuser, xuser) hitem = tf.nn.embedding_lookup(witem, xitem) hplus = tf.nn.embedding_lookup(xutil, xuser) plus = tf.mul( tf.matmul(hplus, wplus, a_is_sparse=True), tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1, keep_dims=True))) huserplus = huser + plus y = node_ops.x_dot_y([huserplus, hitem, i_bias, u_bias]) y_ = tf.placeholder("float", [None, None], name='Target') with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y)) + lambfactor * tf.reduce_sum(tf.square(huser)) + lambfactor * tf.reduce_sum(tf.square(hitem)) + lambfactor * tf.reduce_sum(tf.square(wplus)) + lamb_bias * tf.reduce_sum(tf.square(i_bias)) + lamb_bias * tf.reduce_sum(tf.square(u_bias))) placeholderdict = { 'ratings': y_, 'util': xutil, 'user': xuser, 'item': xitem } mae = node_ops.mae(y_, y) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='svdplus', random_seed=random_seed, decay=(500, 0.999), save_tensors={'mae': mae}) model.train(data.train, dev=data.dev, supplement=data.item.features, eval_schedule=eval_rate) return model
"A unique integer for saving model results during distributed runs model parameters." ) parser.add_argument("random_seed", metavar="RANDOM_SEED", type=int, help="For reproducible results.") return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets( args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) data.user.features['age'] = loader.center(data.user.features['age']) data.item.features['year'] = loader.center(data.item.features['year']) data.user.features['age'] = loader.maxnormalize(data.user.features['age']) data.item.features['year'] = loader.maxnormalize( data.item.features['year']) x = dsaddmodel.dsadd(data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb=args.lamb, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose,
"-eval_rate", metavar="EVAL_RATE", type=int, default=500, help="How often (in terms of number of data points) to evaluate on dev." ) return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets(args.datadir, hashlist=['user', 'item', 'ratings'], folders=['train', 'test', 'dev']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) x = dnn_concat_model.dnn_concat(data, args.config, layers=args.layers, activation=args.act, initrange=args.initrange, bn=args.bn, keep_prob=args.kp, concat_size=args.cs, uembed=args.uembed, iembed=args.iembed, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose, maxbadcount=args.maxbadcount,
def tensorfactor(data, context_key='occ', lamb=0.01, learnrate=0.0001, verbose=True, epochs=5, maxbadcount=20, mb=500, initrange=0.0001, eval_rate=10000, random_seed=None, uembed=50, iembed=50, cembed=50): data = loader.read_data_sets(data, folders=('train', 'dev', 'item', 'user'), hashlist=('user', 'item', context_key, 'ratings')) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) if context_key in data.item.features: data.train.features[context_key] = data.item.features[context_key][ data.train.features['item']] data.dev.features[context_key] = data.item.features[context_key][ data.dev.features['item']] del data.item.features[context_key] elif context_key in data.user.features: data.train.features[context_key] = data.user.features[context_key][ data.train.features['user']] data.dev.features[context_key] = data.user.features[context_key][ data.dev.features['user']] del data.user.features[context_key] data.show() item = tf.placeholder(tf.int32, [None]) user = tf.placeholder(tf.int32, [None]) context = tf.placeholder(tf.int32, [None]) wuser = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].shape[1], uembed])) witem = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].shape[1], iembed])) wcontext = initrange * tf.Variable( tf.truncated_normal([data.dev.features[context_key].shape[1], cembed])) xuser = tf.nn.embedding_lookup(wuser, user) xitem = tf.nn.embedding_lookup(witem, item) xcontext = tf.nn.embedding_lookup(wcontext, context) ibias = tf.Variable( tf.truncated_normal([data.dev.features['item'].shape[1]])) ubias = tf.Variable( tf.truncated_normal([data.dev.features['user'].shape[1]])) cbias = tf.Variable( tf.truncated_normal([data.dev.features[context_key].shape[1]])) i_bias = tf.nn.embedding_lookup(ibias, item) u_bias = tf.nn.embedding_lookup(ubias, user) c_bias = tf.nn.embedding_lookup(cbias, context) y = node_ops.ternary_tensor_combine([xuser, xitem, xcontext], initrange=initrange, l2=lamb) + i_bias + u_bias y_ = tf.placeholder("float", [None, None], name='Target') placeholderdict = { 'user': user, 'item': item, context_key: context, 'ratings': y_ } with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y)) + lamb * tf.reduce_sum(tf.square(wcontext)) + lamb * tf.reduce_sum(tf.square(xuser)) + lamb * tf.reduce_sum(tf.square(xitem)) + lamb * tf.reduce_sum(tf.square(i_bias)) + lamb * tf.reduce_sum(tf.square(u_bias)) + lamb * tf.reduce_sum(tf.square(c_bias))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='tensorfactor', random_seed=random_seed) model.train(data.train, dev=data.dev, eval_schedule=eval_rate) return model
help="For reproducible results.") parser.add_argument("-eval_rate", metavar="EVAL_RATE", type=int, default=500, help="How often (in terms of number of data points) to evaluate on dev.") parser.add_argument("lossfile", metavar="LOSSFILE", type=str, help="Loss file for spearmint_condor $lossfn argument.") parser.add_argument("expname", metavar="EXPNAME", type=str, help="Name of experiment (for resolving results path).") return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets(args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) data.user.features['age'] = loader.center(data.user.features['age']) #data.item.features['year'] = loader.center(data.item.features['year']) data.user.features['age'] = loader.maxnormalize(data.user.features['age']) #data.item.features['year'] = loader.maxnormalize(data.item.features['year']) x = dssm_model.dssm(data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb =args.lamb, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose, maxbadcount=args.maxbadcount, epochs=args.epochs,
parser.add_argument("kfactors", metavar="KFACTORS", type=int, help="kfactors hyperparameter") parser.add_argument("learnrate", metavar="LEARNRATE", type=float, help="learn rate hyperparameter") parser.add_argument("mbsize", metavar="MBSIZE", type=int, help="minibatch size") parser.add_argument("irange", metavar="IRANGE", type=float, help="initrange hyperparameter") parser.add_argument("lossfile", metavar="LOSSFILE", type=str, help="loss file for spearmint") #if __name__ == '__main__': args = parser.parse_args() #data = loader.read_data_sets("/home/hutch_research/skomsks/prep/ydata/out", hashlist=['item', 'user', 'ratings']) data = loader.read_data_sets(args.datadir, hashlist=['item', 'user', 'ratings']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) x = mfmodel.mf(data, args.config, lamb=args.lamb, kfactors=args.kfactors, verbose=True, epochs=100, maxbadcount=20, mb=args.mbsize, initrange=args.irange) lfile = str(args.lossfile) out = open(lfile, 'w') x_err = x._best_dev_error if x_err > 100 or x_err == float('inf') or x_err == float('nan'): x_err = 100
"-eval_rate", metavar="EVAL_RATE", type=int, default=500, help="How often (in terms of number of data points) to evaluate on dev." ) return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets( args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None) x = tree_model.tree(data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb=args.lamb, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose, maxbadcount=args.maxbadcount, epochs=args.epochs, random_seed=args.random_seed, eval_rate=args.eval_rate)
def test_center_dense_test_axis1(): np.testing.assert_array_almost_equal( np.sum(loader.center(x, axis=1).mean(axis=1)), 0.0)
def test_center_sparse_test(): np.testing.assert_array_almost_equal( np.sum(loader.center(y, axis=None).mean(axis=None)), 0.0)
def test_center_dense_test(): np.testing.assert_array_almost_equal( loader.center(x, axis=None).mean(axis=None), 0.0)
def mf(data, configfile, lamb=0.001, kfactors=20, learnrate=0.01, verbose=True, epochs=1000, maxbadcount=20, mb=500, initrange=1, eval_rate=500, random_seed=None, develop=False): data = loader.read_data_sets(data, hashlist=['item', 'user', 'ratings'], folders=['dev', 'train', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) with tf.name_scope('ant_graph'): ant = config.AntGraph(configfile, data=data.dev.features, marker='-', graph_name='basic_mf', develop=develop, variable_bindings={ 'kfactors': kfactors, 'initrange': initrange, 'lamb': lamb }) print(ant.tensor_out) y = node_ops.x_dot_y(ant.tensor_out) y_ = tf.placeholder("float", [None, None], name='Target') data.item.features['util'] = utility_matrix xuser = tf.placeholder(tf.int32, [None]) xitem = tf.placeholder(tf.int32, [None]) xutil = tf.placeholder(tf.float32, [None, None]) wuser = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim, kfactors])) witem = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) wplus = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim, kfactors])) ubias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['user'].dim])) ibias = initrange * tf.Variable( tf.truncated_normal([data.dev.features['item'].dim])) i_bias = tf.nn.embedding_lookup(ibias, xitem) u_bias = tf.nn.embedding_lookup(ubias, xuser) huser = tf.nn.embedding_lookup(wuser, xuser) hitem = tf.nn.embedding_lookup(witem, xitem) hplus = tf.nn.embedding_lookup(xutil, xuser) plus = tf.mul( tf.matmul(hplus, wplus, a_is_sparse=True), tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1, keep_dims=True))) huserplus = huser #+ plus ant.placeholderdict['ratings'] = y_ with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y))) objective += ( lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) + lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias']))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, ant.placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='mf', random_seed=random_seed) model.train(data.train, dev=data.dev, eval_schedule=eval_rate) return model
parser.add_argument("-maxbadcount", metavar="MAXBADCOUNT", type=int, default=20, help="The threshold for early stopping.") parser.add_argument("-epochs", metavar="EPOCHS", type=int, default=100, help="The maximum number of epochs to train for.") parser.add_argument("-random_seed", metavar="RANDOM_SEED", type=int, default=500, help="For reproducible results.") parser.add_argument("-eval_rate", metavar="EVAL_RATE", type=int, default=500, help="How often (in terms of number of data points) to evaluate on dev.") return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets(args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None) x = tree_model.tree(data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb =args.lamb, mb=args.mb, learnrate=args.learnrate, verbose=args.verbose, maxbadcount=args.maxbadcount, epochs=args.epochs, random_seed=args.random_seed, eval_rate=args.eval_rate) #print stuff here to file.
def tensorfactor(data, lamb=0.01, learnrate=0.0001, verbose=True, epochs=100, maxbadcount=20, mb=500, initrange=0.0001, eval_rate=10000, random_seed=None, uembed=50, iembed=50, gembed=50): data = loader.read_data_sets(data, folders=('train', 'dev', 'item'), hashlist=('user', 'item', 'genres', 'ratings')) data.train.labels['ratings'] = loader.center(data.train.labels['ratings']) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings']) data.train.features['genre'] = data.item.features['genres'][data.train.features['item'].vec, :] data.dev.features['genre'] = data.item.features['genres'][data.dev.features['item'].vec, :] data.show() item = tf.placeholder(tf.int32, [None]) user = tf.placeholder(tf.int32, [None]) genre = tf.placeholder(tf.float32, [None, data.dev.features['genre'].shape[1]]) wuser = initrange*tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1], uembed])) witem = initrange*tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1], iembed])) wgenre = initrange*tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], gembed])) xuser = tf.nn.embedding_lookup(wuser, user) xitem = tf.nn.embedding_lookup(witem, item) xgenre = tf.matmul(genre, wgenre, a_is_sparse=True) ibias = tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1]])) ubias = tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1]])) gbias = tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], 1])) i_bias = tf.nn.embedding_lookup(ibias, item) u_bias = tf.nn.embedding_lookup(ubias, user) g_bias = tf.matmul(genre, gbias, a_is_sparse=True) y = node_ops.ternary_tensor_combine([xuser, xitem, xgenre], initrange=initrange, l2=lamb) + i_bias + u_bias y_ = tf.placeholder("float", [None, None], name='Target') placeholderdict = {'user': user, 'item': item, 'genre': genre, 'ratings': y_} with tf.name_scope('objective'): objective = (tf.reduce_sum(tf.square(y_ - y)) + lamb*tf.reduce_sum(tf.square(wgenre)) + lamb*tf.reduce_sum(tf.square(xuser)) + lamb*tf.reduce_sum(tf.square(xitem)) + lamb*tf.reduce_sum(tf.square(i_bias)) + lamb*tf.reduce_sum(tf.square(u_bias))) with tf.name_scope('dev_rmse'): dev_rmse = node_ops.rmse(y_, y) model = generic_model.Model(objective, placeholderdict, mb=mb, learnrate=learnrate, verbose=verbose, maxbadcount=maxbadcount, epochs=epochs, evaluate=dev_rmse, predictions=y, model_name='tensorfactor', random_seed=random_seed) model.train(data.train, dev=data.dev, eval_schedule=eval_rate) return model
help="Loss file for spearmint_condor $lossfn argument.") parser.add_argument( "expname", metavar="EXPNAME", type=str, help="Name of experiment (for resolving results path).") return parser if __name__ == '__main__': args = return_parser().parse_args() data = loader.read_data_sets( args.datadir, folders=['train', 'test', 'dev', 'user', 'item']) data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None) data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None) data.user.features['age'] = loader.center(data.user.features['age'], axis=None) #data.item.features['year'] = loader.center(data.item.features['year'], axis=None) data.user.features['age'] = loader.maxnormalize(data.user.features['age']) #data.item.features['year'] = loader.maxnormalize(data.item.features['year']) x = tree_model.tree(data, args.config, initrange=args.initrange, kfactors=args.kfactors, lamb=args.lamb, mb=args.mb, learnrate=args.learnrate,