Esempio n. 1
0
def dnn_concat(data,
               configfile,
               layers=[16, 8],
               activation='tanhlecun',
               initrange=1e-3,
               bn=True,
               keep_prob=.95,
               concat_size=24,
               uembed=32,
               iembed=32,
               learnrate=.00001,
               verbose=True,
               epochs=10,
               maxbadcount=20,
               mb=2000,
               eval_rate=500):

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='dnn_concat',
                              variable_bindings={
                                  'layers': layers,
                                  'activation': activation,
                                  'initrange': initrange,
                                  'bn': bn,
                                  'keep_prob': keep_prob,
                                  'concat_size': concat_size,
                                  'uembed': uembed,
                                  'iembed': iembed,
                              })

    y = ant.tensor_out
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):
        objective = tf.reduce_sum(tf.square(y_ - y))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples))
    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='res_concat',
                                    make_histograms=False,
                                    save=False,
                                    tensorboard=False)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)
    return model
Esempio n. 2
0
def mf(data, configfile, lamb=0.001,
            kfactors=20,
            learnrate=0.01,
            verbose=True,
            epochs=1000,
            maxbadcount=20,
            mb=500,
            initrange=1,
            eval_rate=500,
            random_seed=None,
            develop=False,
            train_dev_eval_factor=3):


    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              develop=develop,
                              variable_bindings={'kfactors': kfactors,
                                                 'initrange': initrange})
        y = ant.tensor_out
        y_ = tf.placeholder("float", [None, None], name='Target')
        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        objective += (lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
        with tf.name_scope('dev_rmse'):
            _rmse = node_ops.rmse(y_, y)
        mae = node_ops.mae(y_, y)
        model = generic_model.Model(objective, ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=_rmse,
                                    train_evaluate=_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed,
                                    save_tensors={'mae': mae})
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate,train_dev_eval_factor= train_dev_eval_factor)

        return model
Esempio n. 3
0
def tensorfactor(data,
                 configfile,
                 lamb=.001,
                 kfactors=1000,
                 learnrate=0.01,
                 verbose=True,
                 epochs=1000,
                 maxbadcount=20,
                 mb=500,
                 initrange=1,
                 eval_rate=500,
                 random_seed=None):

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'lamb': lamb
                              })
        y = ant.tensor_out
        y_ = tf.placeholder("float", [None, None], name='Target')
        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=200)

        return model
Esempio n. 4
0
def svdplus(data,
            lamb_bias=0.005,
            lambfactor=0.015,
            kfactors=20,
            learnrate=0.01,
            verbose=True,
            epochs=1000,
            maxbadcount=20,
            mb=500,
            initrange=1,
            eval_rate=500,
            random_seed=None,
            develop=False):

    data = loader.read_data_sets(data,
                                 folders=['train', 'dev', 'item'],
                                 hashlist=['user', 'item', 'ratings'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    utility_matrix = sps.csr_matrix(
        (numpy.ones(data.train.features['user'].vec.shape[0]),
         (data.train.features['user'].vec, data.train.features['item'].vec)),
        shape=(data.train.features['user'].dim,
               data.train.features['item'].dim))
    data.item.features['util'] = utility_matrix

    xuser = tf.placeholder(tf.int32, [None])
    xitem = tf.placeholder(tf.int32, [None])

    xutil = tf.placeholder(tf.float32, [None, None])

    wuser = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim, kfactors]))
    witem = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim, kfactors]))
    wplus = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim, kfactors]))

    ubias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim]))
    ibias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim]))

    i_bias = tf.nn.embedding_lookup(ibias, xitem)
    u_bias = tf.nn.embedding_lookup(ubias, xuser)

    huser = tf.nn.embedding_lookup(wuser, xuser)
    hitem = tf.nn.embedding_lookup(witem, xitem)
    hplus = tf.nn.embedding_lookup(xutil, xuser)

    plus = tf.mul(
        tf.matmul(hplus, wplus, a_is_sparse=True),
        tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1, keep_dims=True)))
    huserplus = huser + plus

    y = node_ops.x_dot_y([huserplus, hitem, i_bias, u_bias])
    y_ = tf.placeholder("float", [None, None], name='Target')

    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lambfactor * tf.reduce_sum(tf.square(huser)) +
                     lambfactor * tf.reduce_sum(tf.square(hitem)) +
                     lambfactor * tf.reduce_sum(tf.square(wplus)) +
                     lamb_bias * tf.reduce_sum(tf.square(i_bias)) +
                     lamb_bias * tf.reduce_sum(tf.square(u_bias)))

    placeholderdict = {
        'ratings': y_,
        'util': xutil,
        'user': xuser,
        'item': xitem
    }
    mae = node_ops.mae(y_, y)
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)
    model = generic_model.Model(objective,
                                placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=y,
                                model_name='svdplus',
                                random_seed=random_seed,
                                decay=(500, 0.999),
                                save_tensors={'mae': mae})
    model.train(data.train,
                dev=data.dev,
                supplement=data.item.features,
                eval_schedule=eval_rate)

    return model
Esempio n. 5
0
def dssm(data,
         configfile,
         layers=[10, 10, 10],
         bn=True,
         keep_prob=.95,
         act='tanhlecun',
         initrange=1,
         kfactors=10,
         lamb=.1,
         mb=500,
         learnrate=0.0001,
         verbose=True,
         maxbadcount=10,
         epochs=100,
         model_name='dssm',
         random_seed=500,
         eval_rate=500):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'initrange': initrange,
                                  'kfactors': kfactors
                              })
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['ratings'] = y_
    with tf.name_scope('objective'):
        if type(ant.tensor_out) is list:
            objective = tf.reduce_sum(tf.square(y_ - ant.tensor_out[0]))
        for i in range(1, len(ant.tensor_out)):
            objective += tf.reduce_sum(tf.square(y_ - ant.tensor_out[i]))
        objective += (
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(ant.tensor_out[0], y_)

    model = generic_model.Model(objective,
                                ant.placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=ant.tensor_out[0],
                                model_name='dssm',
                                random_seed=random_seed)

    model.train(data.train,
                dev=data.dev,
                supplement=datadict,
                eval_schedule=eval_rate)
    return model
Esempio n. 6
0
def tensorfactor(data,
                 context_key='occ',
                 lamb=0.01,
                 learnrate=0.0001,
                 verbose=True,
                 epochs=5,
                 maxbadcount=20,
                 mb=500,
                 initrange=0.0001,
                 eval_rate=10000,
                 random_seed=None,
                 uembed=50,
                 iembed=50,
                 cembed=50):

    data = loader.read_data_sets(data,
                                 folders=('train', 'dev', 'item', 'user'),
                                 hashlist=('user', 'item', context_key,
                                           'ratings'))
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])

    if context_key in data.item.features:
        data.train.features[context_key] = data.item.features[context_key][
            data.train.features['item']]
        data.dev.features[context_key] = data.item.features[context_key][
            data.dev.features['item']]
        del data.item.features[context_key]
    elif context_key in data.user.features:
        data.train.features[context_key] = data.user.features[context_key][
            data.train.features['user']]
        data.dev.features[context_key] = data.user.features[context_key][
            data.dev.features['user']]
        del data.user.features[context_key]
    data.show()

    item = tf.placeholder(tf.int32, [None])
    user = tf.placeholder(tf.int32, [None])
    context = tf.placeholder(tf.int32, [None])

    wuser = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].shape[1], uembed]))
    witem = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].shape[1], iembed]))
    wcontext = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features[context_key].shape[1], cembed]))

    xuser = tf.nn.embedding_lookup(wuser, user)
    xitem = tf.nn.embedding_lookup(witem, item)
    xcontext = tf.nn.embedding_lookup(wcontext, context)

    ibias = tf.Variable(
        tf.truncated_normal([data.dev.features['item'].shape[1]]))
    ubias = tf.Variable(
        tf.truncated_normal([data.dev.features['user'].shape[1]]))
    cbias = tf.Variable(
        tf.truncated_normal([data.dev.features[context_key].shape[1]]))

    i_bias = tf.nn.embedding_lookup(ibias, item)
    u_bias = tf.nn.embedding_lookup(ubias, user)
    c_bias = tf.nn.embedding_lookup(cbias, context)

    y = node_ops.ternary_tensor_combine([xuser, xitem, xcontext],
                                        initrange=initrange,
                                        l2=lamb) + i_bias + u_bias
    y_ = tf.placeholder("float", [None, None], name='Target')

    placeholderdict = {
        'user': user,
        'item': item,
        context_key: context,
        'ratings': y_
    }
    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb * tf.reduce_sum(tf.square(wcontext)) +
                     lamb * tf.reduce_sum(tf.square(xuser)) +
                     lamb * tf.reduce_sum(tf.square(xitem)) +
                     lamb * tf.reduce_sum(tf.square(i_bias)) +
                     lamb * tf.reduce_sum(tf.square(u_bias)) +
                     lamb * tf.reduce_sum(tf.square(c_bias)))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)
    model = generic_model.Model(objective,
                                placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=y,
                                model_name='tensorfactor',
                                random_seed=random_seed)
    model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

    return model
Esempio n. 7
0
def tree(data,
         configfile,
         lamb=0.001,
         kfactors=20,
         learnrate=0.0001,
         verbose=True,
         maxbadcount=20,
         mb=500,
         initrange=0.00001,
         epochs=10,
         random_seed=None,
         eval_rate=500,
         keep_prob=0.95,
         act='tanh'):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'keep_prob': keep_prob,
                                  'act': act
                              },
                              graph_name='tree')

    y = ant.tensor_out
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):

        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
                     lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples))

    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tree')
        model.train(data.train,
                    dev=data.dev,
                    supplement=datadict,
                    eval_schedule=eval_rate)

    return model
Esempio n. 8
0
def mf(data,
       configfile,
       lamb=0.001,
       kfactors=20,
       learnrate=0.01,
       verbose=True,
       epochs=1000,
       maxbadcount=20,
       mb=500,
       initrange=1,
       eval_rate=500,
       random_seed=None,
       develop=False):

    data = loader.read_data_sets(data,
                                 hashlist=['item', 'user', 'ratings'],
                                 folders=['dev', 'train', 'item'])
    data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
    data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=data.dev.features,
                              marker='-',
                              graph_name='basic_mf',
                              develop=develop,
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'lamb': lamb
                              })
        print(ant.tensor_out)
        y = node_ops.x_dot_y(ant.tensor_out)
        y_ = tf.placeholder("float", [None, None], name='Target')

        data.item.features['util'] = utility_matrix

        xuser = tf.placeholder(tf.int32, [None])
        xitem = tf.placeholder(tf.int32, [None])

        xutil = tf.placeholder(tf.float32, [None, None])

        wuser = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim, kfactors]))
        witem = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))
        wplus = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim, kfactors]))

        ubias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['user'].dim]))
        ibias = initrange * tf.Variable(
            tf.truncated_normal([data.dev.features['item'].dim]))

        i_bias = tf.nn.embedding_lookup(ibias, xitem)
        u_bias = tf.nn.embedding_lookup(ubias, xuser)

        huser = tf.nn.embedding_lookup(wuser, xuser)
        hitem = tf.nn.embedding_lookup(witem, xitem)
        hplus = tf.nn.embedding_lookup(xutil, xuser)

        plus = tf.mul(
            tf.matmul(hplus, wplus, a_is_sparse=True),
            tf.rsqrt(tf.reduce_sum(hplus, reduction_indices=1,
                                   keep_dims=True)))
        huserplus = huser  #+ plus

        ant.placeholderdict['ratings'] = y_
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)))
        objective += (
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='mf',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

        return model
Esempio n. 9
0
configdatadict = data.dev.features.copy()
configdatadict.update(datadict)

with tf.variable_scope('mfgraph'):
    ant = config.AntGraph('tree.config',
                          data=configdatadict,
                          marker='-',
                          variable_bindings={
                              'kfactors': 100,
                              'initrange': 0.0001
                          },
                          develop=False)

y = ant.tensor_out
y_ = tf.placeholder("float", [None, None], name='Target')
ant.placeholderdict[
    'ratings'] = y_  # put the new placeholder in the graph for training
objective = tf.reduce_sum(tf.square(y_ - y))
dev_rmse = node_ops.rmse(y, y_)

model = generic_model.Model(objective,
                            ant.placeholderdict,
                            mb=500,
                            learnrate=0.00001,
                            verbose=True,
                            maxbadcount=20,
                            epochs=100,
                            evaluate=dev_rmse,
                            predictions=y)
model.train(data.train, dev=data.dev, supplement=datadict, eval_schedule=2000)
Esempio n. 10
0
def dsadd(data,
          configfile,
          initrange=0.1,
          kfactors=20,
          lamb=.01,
          mb=500,
          learnrate=0.003,
          verbose=True,
          maxbadcount=10,
          epochs=100,
          model_name='dssm',
          random_seed=500,
          eval_rate=500):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)
    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              graph_name='basic_mf',
                              variable_bindings={
                                  'initrange': initrange,
                                  'kfactors': kfactors
                              })
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['ratings'] = y_
    with tf.name_scope('objective'):
        if type(ant.tensor_out) is list:
            scalars = tf.Variable(
                0.001 * tf.truncated_normal([len(ant.tensor_out), 1]))
            prediction = tf.mul(ant.tensor_out[0],
                                tf.slice(scalars, [0, 0], [1, 1]))
            for i in range(1, len(ant.tensor_out)):
                with tf.variable_scope('predictor%d' % i):
                    prediction = prediction + tf.mul(
                        ant.tensor_out[i], tf.slice(scalars, [i, 0], [1, 1]))
            prediction = tf.square(y_ - prediction)

        objective = (
            tf.reduce_sum(prediction) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['ibias'])) +
            lamb * tf.reduce_sum(tf.square(tf.concat(1, ant.tensor_out))))
    with tf.name_scope('dev_rmse'):
        dev_rmse = tf.sqrt(
            tf.div(tf.reduce_sum(prediction), data.dev.num_examples))

    model = generic_model.Model(objective,
                                ant.placeholderdict,
                                mb=500,
                                learnrate=0.000001,
                                verbose=True,
                                maxbadcount=10,
                                epochs=100,
                                evaluate=dev_rmse,
                                predictions=ant.tensor_out[0],
                                model_name='dssm',
                                random_seed=500)

    model.train(data.train,
                dev=data.dev,
                supplement=datadict,
                eval_schedule=eval_rate)
    return model
Esempio n. 11
0
y_ = tf.placeholder(tf.float32, [None, 19])

objective = node_ops.other_cross_entropy(y, y_)
detection = node_ops.detection(y, args.threshold)

# recall percentage of actual genres detected
recall = node_ops.recall(y, y_, detects=detection)
# precision: percentage of the genres we predicted that were correct
precision = node_ops.precision(y, y_, detects=detection)
fscore = node_ops.fscore(precisions=precision, recalls=recall)
placeholderdict = {'words': x, 'genre': y_}
model = generic_model.Model(objective,
                            placeholderdict,
                            mb=args.mb,
                            learnrate=args.learnrate,
                            verbose=args.verbose,
                            maxbadcount=args.maxbadcount,
                            epochs=100,
                            evaluate=objective,
                            predictions=detection,
                            model_name='genrepred',
                            random_seed=500,
                            save_tensors={
                                'fscore': fscore,
                                'precision': precision,
                                'recall': recall,
                                'dist':
                                tf.get_collection('activation_layers')[0]
                            })
z = model.train(data.train, dev=data.dev, eval_schedule=args.eval_rate)
Esempio n. 12
0
with tf.variable_scope('mfgraph'):
    ant = config.AntGraph('mf2.config',
                          data=data.dev.features,
                          marker='-',
                          variable_bindings={
                              'kfactors': 100,
                              'initrange': 0.001,
                              'l2': 0.1
                          })

y = ant.tensor_out
y_ = tf.placeholder("float", [None, None], name='Target')
ant.placeholderdict[
    'ratings'] = y_  # put the new placeholder in the graph for training
objective = tf.reduce_sum(tf.square(y_ - y))
dev_rmse = node_ops.rmse(y, y_)
dev_mae = node_ops.mae(y, y_)

model = generic_model.Model(objective,
                            ant.placeholderdict,
                            mb=500,
                            learnrate=0.01,
                            verbose=True,
                            maxbadcount=10,
                            epochs=100,
                            evaluate=dev_rmse,
                            predictions=y,
                            save_tensors={'dev_mae': dev_mae})
model.train(data.train, dev=data.dev)
Esempio n. 13
0
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict['targets'] = y_
    if type == 'r':
        objective = node_ops.mse(y, y_)
    if type == 'c':
        objective = node_ops.accuracy(y, y_)

    with tf.name_scope('objective'):
        objective = (tf.reduce_sum(tf.square(y_ - y)) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ubias'])) +
                     lamb*tf.reduce_sum(tf.square(ant.tensordict['ibias'])))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)
        #dev_rmse = tf.sqrt(tf.div(tf.reduce_sum(tf.square(y - y_)), data.dev.num_examples))

    model = generic_model.Model(objective, ant.placeholderdict,
                                mb=mb,
                                learnrate=learnrate,
                                verbose=verbose,
                                maxbadcount=maxbadcount,
                                epochs=epochs,
                                evaluate=dev_rmse,
                                predictions=y,
                                model_name='mf',
                                random_seed=500)
    model.train(data.train, dev=data.dev, eval_schedule=200)

    return model
Esempio n. 14
0
def tensorfactor(data,  lamb=0.01,
            learnrate=0.0001,
            verbose=True,
            epochs=100,
            maxbadcount=20,
            mb=500,
            initrange=0.0001,
            eval_rate=10000,
            random_seed=None,
            uembed=50,
            iembed=50,
            gembed=50):

        data = loader.read_data_sets(data, folders=('train', 'dev', 'item'),
                                     hashlist=('user', 'item', 'genres', 'ratings'))
        data.train.labels['ratings'] = loader.center(data.train.labels['ratings'])
        data.dev.labels['ratings'] = loader.center(data.dev.labels['ratings'])

        data.train.features['genre'] = data.item.features['genres'][data.train.features['item'].vec, :]
        data.dev.features['genre'] = data.item.features['genres'][data.dev.features['item'].vec, :]

        data.show()


        item = tf.placeholder(tf.int32, [None])
        user = tf.placeholder(tf.int32, [None])
        genre = tf.placeholder(tf.float32, [None, data.dev.features['genre'].shape[1]])

        wuser = initrange*tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1], uembed]))
        witem = initrange*tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1], iembed]))
        wgenre = initrange*tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], gembed]))

        xuser = tf.nn.embedding_lookup(wuser, user)
        xitem = tf.nn.embedding_lookup(witem, item)
        xgenre = tf.matmul(genre, wgenre, a_is_sparse=True)

        ibias = tf.Variable(tf.truncated_normal([data.dev.features['item'].shape[1]]))
        ubias = tf.Variable(tf.truncated_normal([data.dev.features['user'].shape[1]]))
        gbias = tf.Variable(tf.truncated_normal([data.dev.features['genre'].shape[1], 1]))

        i_bias = tf.nn.embedding_lookup(ibias, item)
        u_bias = tf.nn.embedding_lookup(ubias, user)
        g_bias = tf.matmul(genre, gbias, a_is_sparse=True)

        y = node_ops.ternary_tensor_combine([xuser, xitem, xgenre],
                                            initrange=initrange,
                                            l2=lamb) + i_bias + u_bias
        y_ = tf.placeholder("float", [None, None], name='Target')

        placeholderdict = {'user': user, 'item': item, 'genre': genre, 'ratings': y_}
        with tf.name_scope('objective'):
            objective = (tf.reduce_sum(tf.square(y_ - y)) +
                         lamb*tf.reduce_sum(tf.square(wgenre)) +
                         lamb*tf.reduce_sum(tf.square(xuser)) +
                         lamb*tf.reduce_sum(tf.square(xitem)) +
                         lamb*tf.reduce_sum(tf.square(i_bias)) +
                         lamb*tf.reduce_sum(tf.square(u_bias)))
        with tf.name_scope('dev_rmse'):
            dev_rmse = node_ops.rmse(y_, y)
        model = generic_model.Model(objective, placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tensorfactor',
                                    random_seed=random_seed)
        model.train(data.train, dev=data.dev, eval_schedule=eval_rate)

        return model
Esempio n. 15
0
def tree(data,
         configfile,
         lamb=0.001,
         kfactors=50,
         learnrate=0.00001,
         verbose=True,
         maxbadcount=20,
         mb=500,
         initrange=0.00001,
         epochs=10,
         random_seed=None,
         eval_rate=500,
         keep_prob=0.95,
         act='tanh'):

    datadict = data.user.features.copy()
    datadict.update(data.item.features)

    configdatadict = data.dev.features.copy()
    configdatadict.update(datadict)

    with tf.name_scope('ant_graph'):
        ant = config.AntGraph(configfile,
                              data=configdatadict,
                              marker='-',
                              variable_bindings={
                                  'kfactors': kfactors,
                                  'initrange': initrange,
                                  'keep_prob': keep_prob,
                                  'act': act
                              },
                              graph_name='tree')
    # ant.display_graph()
    alpha = node_ops.weights('tnorm', [1, 1], l2=1.0)
    beta = node_ops.weights('tnorm', [1, 1], l2=1.0)
    print(ant.tensor_out)
    ubias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['user'].dim]))
    ibias = initrange * tf.Variable(
        tf.truncated_normal([data.dev.features['item'].dim]))

    i_bias = tf.nn.embedding_lookup(ibias, ant.placeholderdict['item'])
    u_bias = tf.nn.embedding_lookup(ubias, ant.placeholderdict['user'])
    y = alpha * ant.tensor_out[0] + beta * ant.tensor_out[1] + u_bias + i_bias
    y_ = tf.placeholder("float", [None, None], name='Target')
    ant.placeholderdict[
        'ratings'] = y_  # put the new placeholder in the graph for training
    with tf.name_scope('objective'):

        objective = (
            tf.reduce_sum(tf.square(y_ - y)) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['huser2'])) +
            lamb * tf.reduce_sum(tf.square(ant.tensordict['hitem2'])) +
            lamb * tf.reduce_sum(tf.square(u_bias)) +
            lamb * tf.reduce_sum(tf.square(i_bias)))
    with tf.name_scope('dev_rmse'):
        dev_rmse = node_ops.rmse(y_, y)

    with tf.name_scope('training'):
        model = generic_model.Model(objective,
                                    ant.placeholderdict,
                                    mb=mb,
                                    learnrate=learnrate,
                                    verbose=verbose,
                                    maxbadcount=maxbadcount,
                                    epochs=epochs,
                                    evaluate=dev_rmse,
                                    predictions=y,
                                    model_name='tree',
                                    train_evaluate=dev_rmse)
        model.train(data.train,
                    dev=data.dev,
                    supplement=datadict,
                    eval_schedule=eval_rate,
                    train_dev_eval_factor=5)
    return model
Esempio n. 16
0
x = tf.placeholder(tf.float32, [None, 12734])
y = node_ops.dnn(x, [19], initrange=args.initrange, activation='sigmoid')
y_ = tf.placeholder(tf.float32, [None, 19])

objective = node_ops.other_cross_entropy(y, y_)
detection = node_ops.detection(y, args.threshold)

# recall percentage of actual genres detected
recall = node_ops.recall(y_, detects=detection)
# precision: percentage of the genres we predicted that were correct
precision = node_ops.precision(y_, detects=detection)
fscore = node_ops.fscore(precisions=precision, recalls=recall)
placeholderdict = {'words': x, 'genre': y_}
model = generic_model.Model(objective,
                            placeholderdict,
                            mb=args.mb,
                            learnrate=args.learnrate,
                            verbose=args.verbose,
                            maxbadcount=args.maxbadcount,
                            epochs=100,
                            evaluate=objective,
                            predictions=detection,
                            model_name='genrepred',
                            save_tensors={
                                'precision': precision,
                                'recall': recall,
                                'fscore': fscore
                            },
                            random_seed=args.random_seed)
z = model.train(data.train, dev=data.dev, eval_schedule=args.eval_rate)