Esempio n. 1
0
    parser.add_argument("epochs", metavar="EPOCHS", type=int,
                        help="The maximum number of epochs to train for.")
    parser.add_argument("modelID", metavar="MODEL_ID", type=int,
                        help="A unique integer for saving model results during distributed runs model parameters.")
    parser.add_argument("random_seed", metavar="RANDOM_SEED", type=int,
                        help="For reproducible results.")
    parser.add_argument("eval_rate", metavar="EVAL_RATE", type=int,
                        help="How often (in terms of number of data points) to evaluate on dev.")
    return parser

if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(args.datadir, folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None)
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None)
    data.user.features['age'] = loader.center(data.user.features['age'], axis=None)
    data.item.features['year'] = loader.center(data.item.features['year'], axis=None)
    data.user.features['age'] = loader.maxnormalize(data.user.features['age'])
    data.item.features['year'] = loader.maxnormalize(data.item.features['year'])

    x = tree_model.tree(data, args.config,data, args.config,
                        initrange=args.initrange,
                        kfactors=args.kfactors,
                        lamb =args.lamb,
                        mb=args.mb,
                        learnrate=args.learnrate,
                        verbose=args.verbose,
                        maxbadcount=args.maxbadcount,
                        epochs=args.epochs,
Esempio n. 2
0
def svdplus(data,
            lamb_bias=0.005,
            lambfactor=0.015,
            kfactors=20,
            learnrate=0.01,
            verbose=True,
            epochs=1000,
            maxbadcount=20,
            mb=500,
            initrange=1,
            eval_rate=500,
            random_seed=None,
            develop=False):

    data = loader.read_data_sets(data,
                                 folders=['train', 'dev', 'item'],
                                 hashlist=['user', 'item', 'ratings'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    utility_matrix = sps.csr_matrix(
        (numpy.ones(data.train.features['user'].vec.shape[0]),
         (data.train.features['user'].vec, data.train.features['item'].vec)),
        shape=(data.train.features['user'].dim,
               data.train.features['item'].dim))
    data.item.features['util'] = utility_matrix

    xuser = tf.placeholder(tf.int32, [None])
    xitem = tf.placeholder(tf.int32, [None])

    xutil = tf.placeholder(tf.float32, [None, None])

    wuser = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim, kfactors]))
    witem = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim, kfactors]))
    wplus = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim, kfactors]))

    ubias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim]))
    ibias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim]))

    i_bias = tf.nn.embedding_lookup(ibias, xitem)
    u_bias = tf.nn.embedding_lookup(ubias, xuser)

    huser = tf.nn.embedding_lookup(wuser, xuser)
    hitem = tf.nn.embedding_lookup(witem, xitem)
    hplus = tf.nn.embedding_lookup(xutil, xuser)

    plus = tf.mul(
        tf.matmul(hplus, wplus, a_is_sparse=True),
        tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1, keep_dims=True)))
    huserplus = huser + plus

    y = node_ops.x_dot_y([huserplus, hitem, i_bias, u_bias])
    y_ = tf.placeholder("float", [None, None], name='Target')

    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lambfactor * tf.reduce_sum(tf.square(huser)) +
                     lambfactor * tf.reduce_sum(tf.square(hitem)) +
                     lambfactor * tf.reduce_sum(tf.square(wplus)) +
                     lamb_bias * tf.reduce_sum(tf.square(i_bias)) +
                     lamb_bias * tf.reduce_sum(tf.square(u_bias)))

    placeholderdict = {
        'ratings': y_,
        'util': xutil,
        'user': xuser,
        'item': xitem
    }
    mae = node_ops.mae(y_, y)
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)
    model = generic_model.Model(objective,
                                placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=y,
                                model_name='svdplus',
                                random_seed=random_seed,
                                decay=(500, 0.999),
                                save_tensors={'mae': mae})
    model.train(data.train,
                dev=data.dev,
                supplement=data.item.features,
                eval_schedule=eval_rate)

    return model
Esempio n. 3
0
        "A unique integer for saving model results during distributed runs model parameters."
    )
    parser.add_argument("random_seed",
                        metavar="RANDOM_SEED",
                        type=int,
                        help="For reproducible results.")
    return parser


if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(
        args.datadir, folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    data.user.features['age'] = loader.center(data.user.features['age'])
    data.item.features['year'] = loader.center(data.item.features['year'])
    data.user.features['age'] = loader.maxnormalize(data.user.features['age'])
    data.item.features['year'] = loader.maxnormalize(
        data.item.features['year'])

    x = dsaddmodel.dsadd(data,
                         args.config,
                         initrange=args.initrange,
                         kfactors=args.kfactors,
                         lamb=args.lamb,
                         mb=args.mb,
                         learnrate=args.learnrate,
                         verbose=args.verbose,
Esempio n. 4
0
        "-eval_rate",
        metavar="EVAL_RATE",
        type=int,
        default=500,
        help="How often (in terms of number of data points) to evaluate on dev."
    )
    return parser


if __name__ == '__main__':

    args = return_parser().parse_args()
    data = loader.read_data_sets(args.datadir,
                                 hashlist=['user', 'item', 'ratings'],
                                 folders=['train', 'test', 'dev'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    x = dnn_concat_model.dnn_concat(data,
                                    args.config,
                                    layers=args.layers,
                                    activation=args.act,
                                    initrange=args.initrange,
                                    bn=args.bn,
                                    keep_prob=args.kp,
                                    concat_size=args.cs,
                                    uembed=args.uembed,
                                    iembed=args.iembed,
                                    mb=args.mb,
                                    learnrate=args.learnrate,
                                    verbose=args.verbose,
                                    maxbadcount=args.maxbadcount,
Esempio n. 5
0
def tensorfactor(data,
                 context_key='occ',
                 lamb=0.01,
                 learnrate=0.0001,
                 verbose=True,
                 epochs=5,
                 maxbadcount=20,
                 mb=500,
                 initrange=0.0001,
                 eval_rate=10000,
                 random_seed=None,
                 uembed=50,
                 iembed=50,
                 cembed=50):

    data = loader.read_data_sets(data,
                                 folders=('train', 'dev', 'item', 'user'),
                                 hashlist=('user', 'item', context_key,
                                           'ratings'))
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])

    if context_key in data.item.features:
        data.train.features[context_key] = data.item.features[context_key][
            data.train.features['item']]
        data.dev.features[context_key] = data.item.features[context_key][
            data.dev.features['item']]
        del data.item.features[context_key]
    elif context_key in data.user.features:
        data.train.features[context_key] = data.user.features[context_key][
            data.train.features['user']]
        data.dev.features[context_key] = data.user.features[context_key][
            data.dev.features['user']]
        del data.user.features[context_key]
    data.show()

    item = tf.placeholder(tf.int32, [None])
    user = tf.placeholder(tf.int32, [None])
    context = tf.placeholder(tf.int32, [None])

    wuser = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].shape[1], uembed]))
    witem = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].shape[1], iembed]))
    wcontext = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features[context_key].shape[1], cembed]))

    xuser = tf.nn.embedding_lookup(wuser, user)
    xitem = tf.nn.embedding_lookup(witem, item)
    xcontext = tf.nn.embedding_lookup(wcontext, context)

    ibias = tf.Variable(
        tf.truncated_normal([data.dev.features['item'].shape[1]]))
    ubias = tf.Variable(
        tf.truncated_normal([data.dev.features['user'].shape[1]]))
    cbias = tf.Variable(
        tf.truncated_normal([data.dev.features[context_key].shape[1]]))

    i_bias = tf.nn.embedding_lookup(ibias, item)
    u_bias = tf.nn.embedding_lookup(ubias, user)
    c_bias = tf.nn.embedding_lookup(cbias, context)

    y = node_ops.ternary_tensor_combine([xuser, xitem, xcontext],
                                        initrange=initrange,
                                        l2=lamb) + i_bias + u_bias
    y_ = tf.placeholder("float", [None, None], name='Target')

    placeholderdict = {
        'user': user,
        'item': item,
        context_key: context,
        'ratings': y_
    }
    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb * tf.reduce_sum(tf.square(wcontext)) +
                     lamb * tf.reduce_sum(tf.square(xuser)) +
                     lamb * tf.reduce_sum(tf.square(xitem)) +
                     lamb * tf.reduce_sum(tf.square(i_bias)) +
                     lamb * tf.reduce_sum(tf.square(u_bias)) +
                     lamb * tf.reduce_sum(tf.square(c_bias)))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)
    model = generic_model.Model(objective,
                                placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=y,
                                model_name='tensorfactor',
                                random_seed=random_seed)
    model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

    return model
Esempio n. 6
0
                        help="For reproducible results.")
    parser.add_argument("-eval_rate", metavar="EVAL_RATE", type=int, default=500,
                        help="How often (in terms of number of data points) to evaluate on dev.")
    parser.add_argument("lossfile", metavar="LOSSFILE", type=str,
                        help="Loss file for spearmint_condor $lossfn argument.")
    parser.add_argument("expname", metavar="EXPNAME", type=str,
                        help="Name of experiment (for resolving results path).")
    return parser

if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(args.datadir,
                                 folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    data.user.features['age'] = loader.center(data.user.features['age'])
    #data.item.features['year'] = loader.center(data.item.features['year'])
    data.user.features['age'] = loader.maxnormalize(data.user.features['age'])
    #data.item.features['year'] = loader.maxnormalize(data.item.features['year'])

    x = dssm_model.dssm(data, args.config,
                        initrange=args.initrange,
                        kfactors=args.kfactors,
                        lamb =args.lamb,
                        mb=args.mb,
                        learnrate=args.learnrate,
                        verbose=args.verbose,
                        maxbadcount=args.maxbadcount,
                        epochs=args.epochs,
Esempio n. 7
0
parser.add_argument("kfactors", metavar="KFACTORS", type=int,
                    help="kfactors hyperparameter")
parser.add_argument("learnrate", metavar="LEARNRATE", type=float,
                    help="learn rate hyperparameter")
parser.add_argument("mbsize", metavar="MBSIZE", type=int,
                    help="minibatch size")
parser.add_argument("irange", metavar="IRANGE", type=float,
                    help="initrange hyperparameter")
parser.add_argument("lossfile", metavar="LOSSFILE", type=str,
                    help="loss file for spearmint")

#if __name__ == '__main__':
args = parser.parse_args()
#data = loader.read_data_sets("/home/hutch_research/skomsks/prep/ydata/out", hashlist=['item', 'user', 'ratings'])
data = loader.read_data_sets(args.datadir, hashlist=['item', 'user', 'ratings'])
data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
x = mfmodel.mf(data,
               args.config,
               lamb=args.lamb,
               kfactors=args.kfactors,
               verbose=True,
               epochs=100,
               maxbadcount=20,
               mb=args.mbsize,
               initrange=args.irange)
lfile = str(args.lossfile)
out = open(lfile, 'w')
x_err = x._best_dev_error
if x_err > 100 or x_err == float('inf') or x_err == float('nan'):
    x_err = 100
Esempio n. 8
0
        "-eval_rate",
        metavar="EVAL_RATE",
        type=int,
        default=500,
        help="How often (in terms of number of data points) to evaluate on dev."
    )
    return parser


if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(
        args.datadir, folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'],
                                                 axis=None)
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'],
                                               axis=None)

    x = tree_model.tree(data,
                        args.config,
                        initrange=args.initrange,
                        kfactors=args.kfactors,
                        lamb=args.lamb,
                        mb=args.mb,
                        learnrate=args.learnrate,
                        verbose=args.verbose,
                        maxbadcount=args.maxbadcount,
                        epochs=args.epochs,
                        random_seed=args.random_seed,
                        eval_rate=args.eval_rate)
Esempio n. 9
0
def test_center_dense_test_axis1():
    np.testing.assert_array_almost_equal(
        np.sum(loader.center(x, axis=1).mean(axis=1)), 0.0)
Esempio n. 10
0
def test_center_sparse_test():
    np.testing.assert_array_almost_equal(
        np.sum(loader.center(y, axis=None).mean(axis=None)), 0.0)
Esempio n. 11
0
def test_center_dense_test():
    np.testing.assert_array_almost_equal(
        loader.center(x, axis=None).mean(axis=None), 0.0)
Esempio n. 12
0
def mf(data,
       configfile,
       lamb=0.001,
       kfactors=20,
       learnrate=0.01,
       verbose=True,
       epochs=1000,
       maxbadcount=20,
       mb=500,
       initrange=1,
       eval_rate=500,
       random_seed=None,
       develop=False):

    data = loader.read_data_sets(data,
                                 hashlist=['item', 'user', 'ratings'],
                                 folders=['dev', 'train', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              develop=develop,
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'lamb': lamb
                              })
        print(ant.tensor_out)
        y = node_ops.x_dot_y(ant.tensor_out)
        y_ = tf.placeholder("float", [None, None], name='Target')

        data.item.features['util'] = utility_matrix

        xuser = tf.placeholder(tf.int32, [None])
        xitem = tf.placeholder(tf.int32, [None])

        xutil = tf.placeholder(tf.float32, [None, None])

        wuser = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim, kfactors]))
        witem = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))
        wplus = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))

        ubias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim]))
        ibias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim]))

        i_bias = tf.nn.embedding_lookup(ibias, xitem)
        u_bias = tf.nn.embedding_lookup(ubias, xuser)

        huser = tf.nn.embedding_lookup(wuser, xuser)
        hitem = tf.nn.embedding_lookup(witem, xitem)
        hplus = tf.nn.embedding_lookup(xutil, xuser)

        plus = tf.mul(
            tf.matmul(hplus, wplus, a_is_sparse=True),
            tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1,
                                   keep_dims=True)))
        huserplus = huser  #+ plus

        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        objective += (
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

        return model
Esempio n. 13
0
    parser.add_argument("-maxbadcount", metavar="MAXBADCOUNT", type=int, default=20,
                        help="The threshold for early stopping.")
    parser.add_argument("-epochs", metavar="EPOCHS", type=int, default=100,
                        help="The maximum number of epochs to train for.")
    parser.add_argument("-random_seed", metavar="RANDOM_SEED", type=int, default=500,
                        help="For reproducible results.")
    parser.add_argument("-eval_rate", metavar="EVAL_RATE", type=int, default=500,
                        help="How often (in terms of number of data points) to evaluate on dev.")
    return parser

if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(args.datadir, folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'], axis=None)
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'], axis=None)

    x = tree_model.tree(data, args.config,
                        initrange=args.initrange,
                        kfactors=args.kfactors,
                        lamb =args.lamb,
                        mb=args.mb,
                        learnrate=args.learnrate,
                        verbose=args.verbose,
                        maxbadcount=args.maxbadcount,
                        epochs=args.epochs,
                        random_seed=args.random_seed,
                        eval_rate=args.eval_rate)
    #print stuff here to file.
Esempio n. 14
0
def tensorfactor(data,  lamb=0.01,
            learnrate=0.0001,
            verbose=True,
            epochs=100,
            maxbadcount=20,
            mb=500,
            initrange=0.0001,
            eval_rate=10000,
            random_seed=None,
            uembed=50,
            iembed=50,
            gembed=50):

        data = loader.read_data_sets(data, folders=('train', 'dev', 'item'),
                                     hashlist=('user', 'item', 'genres', 'ratings'))
        data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
        data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])

        data.train.features['genre'] = data.item.features['genres'][data.train.features['item'].vec, :]
        data.dev.features['genre'] = data.item.features['genres'][data.dev.features['item'].vec, :]

        data.show()


        item = tf.placeholder(tf.int32, [None])
        user = tf.placeholder(tf.int32, [None])
        genre = tf.placeholder(tf.float32, [None, data.dev.features['genre'].shape[1]])

        wuser = initrange*tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1], uembed]))
        witem = initrange*tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1], iembed]))
        wgenre = initrange*tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], gembed]))

        xuser = tf.nn.embedding_lookup(wuser, user)
        xitem = tf.nn.embedding_lookup(witem, item)
        xgenre = tf.matmul(genre, wgenre, a_is_sparse=True)

        ibias = tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1]]))
        ubias = tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1]]))
        gbias = tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], 1]))

        i_bias = tf.nn.embedding_lookup(ibias, item)
        u_bias = tf.nn.embedding_lookup(ubias, user)
        g_bias = tf.matmul(genre, gbias, a_is_sparse=True)

        y = node_ops.ternary_tensor_combine([xuser, xitem, xgenre],
                                            initrange=initrange,
                                            l2=lamb) + i_bias + u_bias
        y_ = tf.placeholder("float", [None, None], name='Target')

        placeholderdict = {'user': user, 'item': item, 'genre': genre, 'ratings': y_}
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)) +
                         lamb*tf.reduce_sum(tf.square(wgenre)) +
                         lamb*tf.reduce_sum(tf.square(xuser)) +
                         lamb*tf.reduce_sum(tf.square(xitem)) +
                         lamb*tf.reduce_sum(tf.square(i_bias)) +
                         lamb*tf.reduce_sum(tf.square(u_bias)))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective, placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tensorfactor',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

        return model
Esempio n. 15
0
        help="Loss file for spearmint_condor $lossfn argument.")
    parser.add_argument(
        "expname",
        metavar="EXPNAME",
        type=str,
        help="Name of experiment (for resolving results path).")
    return parser


if __name__ == '__main__':

    args = return_parser().parse_args()

    data = loader.read_data_sets(
        args.datadir, folders=['train', 'test', 'dev', 'user', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'],
                                                 axis=None)
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'],
                                               axis=None)
    data.user.features['age'] = loader.center(data.user.features['age'],
                                              axis=None)
    #data.item.features['year'] = loader.center(data.item.features['year'], axis=None)
    data.user.features['age'] = loader.maxnormalize(data.user.features['age'])
    #data.item.features['year'] = loader.maxnormalize(data.item.features['year'])

    x = tree_model.tree(data,
                        args.config,
                        initrange=args.initrange,
                        kfactors=args.kfactors,
                        lamb=args.lamb,
                        mb=args.mb,
                        learnrate=args.learnrate,