def main(opts, logfile=None, restore_point=None):
    if logfile is not None:
        LOG = open(logfile, "w", 0)
    else:
        LOG = sys.stdout
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    path = opts['data_path']

    if 'movielens-100k' in path:
        data = get_data(
            path, train=.75, valid=.05, test=.2, mode='sparse', fold=1
        )  # ml-100k uses official test set so only the valid paramter matters
    else:
        data = get_data(path,
                        train=.6,
                        valid=.2,
                        test=.2,
                        mode='sparse',
                        fold=1)

    #build encoder and decoder and use VAE loss
    N, M, num_features = data['mat_shape']
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M
    lossfn = opts.get("loss", "mse")

    if opts['verbose'] > 0:
        print('\nFactorized Autoencoder run settings:', file=LOG)
        print('dataset: ', path, file=LOG)
        print('Exchangable layer pool mode: ',
              opts['defaults']['matrix_sparse']['pool_mode'],
              file=LOG)
        print('Pooling layer pool mode: ',
              opts['defaults']['matrix_pool_sparse']['pool_mode'],
              file=LOG)
        print('learning rate: ', opts['lr'], file=LOG)
        print('activation: ',
              opts['defaults']['matrix_sparse']['activation'],
              file=LOG)
        print('number of latent features: ',
              opts['encoder'][-2]['units'],
              file=LOG)
        print('maxN: ', opts['maxN'], file=LOG)
        print('maxM: ', opts['maxM'], file=LOG)
        print('', file=LOG)

    with tf.Graph().as_default():
        mat_values_tr = tf.placeholder(tf.float32,
                                       shape=[None],
                                       name='mat_values_tr')
        mask_indices_tr = tf.placeholder(tf.int32,
                                         shape=[None, 2],
                                         name='mask_indices_tr')

        mat_values_val = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name='mat_values_val')
        mask_split = tf.placeholder(tf.float32,
                                    shape=[None],
                                    name='mat_values_val')
        mask_indices_val = tf.placeholder(tf.int32,
                                          shape=[None, 2],
                                          name='mask_indices_val')
        mask_indices_tr_val = tf.placeholder(tf.int32,
                                             shape=[None, 2],
                                             name='mask_indices_tr_val')

        tr_dict = {
            'input': mat_values_tr,
            'mask_indices': mask_indices_tr,
            'units': 1 if lossfn == "mse" else 5,
            'shape': [N, M],
        }

        val_dict = {
            'input': mat_values_tr,
            'mask_indices': mask_indices_tr,
            'units': 1 if lossfn == "mse" else 5,
            'shape': [N, M],
        }

        encoder = Model(layers=opts['encoder'],
                        layer_defaults=opts['defaults'],
                        scope="encoder",
                        verbose=2)  #define the encoder
        out_enc_tr = encoder.get_output(tr_dict)  #build the encoder
        enc_ema_op, enc_getter = setup_ema("encoder",
                                           opts.get("ema_decay", 1.))
        out_enc_val = encoder.get_output(
            val_dict,
            reuse=True,
            verbose=0,
            is_training=False,
            getter=enc_getter)  #get encoder output, reusing the neural net

        tr_dict = {
            'nvec': out_enc_tr['nvec'],
            'mvec': out_enc_tr['mvec'],
            'units': out_enc_tr['units'],
            'mask_indices': mask_indices_tr,
            'shape': out_enc_tr['shape'],
        }

        val_dict = {
            'nvec': out_enc_val['nvec'],
            'mvec': out_enc_val['mvec'],
            'units': out_enc_val['units'],
            'mask_indices': mask_indices_tr_val,
            'shape': out_enc_val['shape'],
        }

        decoder = Model(layers=opts['decoder'],
                        layer_defaults=opts['defaults'],
                        scope="decoder",
                        verbose=2)  #define the decoder
        out_dec_tr = decoder.get_output(tr_dict)  #build it
        out_tr = out_dec_tr['input']
        dec_ema_op, dec_getter = setup_ema("decoder",
                                           opts.get("ema_decay", 1.))
        ema_op = enc_ema_op + dec_ema_op

        out_dec_val = decoder.get_output(
            val_dict,
            reuse=True,
            verbose=0,
            is_training=False,
            getter=dec_getter)  #reuse it for validation
        out_val = out_dec_val['input']

        eout_val = expected_value(
            tf.nn.softmax(tf.reshape(out_val, shape=[-1, 5])))

        #loss and training
        reg_loss = sum(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization

        rec_loss, rec_loss_val, total_loss = get_losses(
            lossfn, reg_loss, mat_values_tr, mat_values_val, mask_indices_tr,
            mask_indices_val, out_tr, out_val, mask_split)
        train_step = get_optimizer(total_loss, opts)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess.run(tf.global_variables_initializer())

        if 'by_row_column_density' in opts[
                'sample_mode'] or 'conditional_sample_sparse' in opts[
                    'sample_mode']:
            iters_per_epoch = math.ceil(N // maxN) * math.ceil(
                M // maxM
            )  # a bad heuristic: the whole matrix is in expectation covered in each epoch
        elif 'uniform_over_dense_values' in opts['sample_mode']:
            minibatch_size = np.minimum(opts['minibatch_size'],
                                        data['mask_indices_tr'].shape[0])
            iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size
        elif 'neighbourhood' in opts['sample_mode']:
            minibatch_size = np.minimum(opts['minibatch_size'],
                                        data['mask_indices_tr'].shape[0])
            weights = csr_matrix(
                (np.ones_like(data['mat_values_tr']),
                 (data['mask_indices_tr'][:, 0], data['mask_indices_tr'][:,
                                                                         1])),
                data["mat_shape"][0:2])

            sp_mat = csr_matrix(
                (data['mat_values_all'], (data['mask_indices_all'][:, 0],
                                          data['mask_indices_all'][:, 1])),
                data["mat_shape"][0:2])

        min_loss = 5.
        min_train = 5.
        min_loss_epoch = 0
        losses = OrderedDict()
        losses["train"] = []
        losses["valid"] = []
        losses["test"] = []
        min_ts_loss = 5.
        min_val_ts = 5.

        saver = tf.train.Saver()
        if restore_point is not None:
            saver.restore(sess, restore_point)

        best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log"
        print("epoch,train,valid,test\n", file=open(best_log, "a"))

        saved_tr_loss = []
        saved_val_loss = []

        for ep in range(opts.get('restore_point_epoch', 0),
                        opts['epochs'] + opts.get('restore_point_epoch', 0)):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0.

            if 'by_row_column_density' in opts['sample_mode']:
                for indn_, indm_ in tqdm(
                        sample_submatrix(data['mask_tr'],
                                         maxN,
                                         maxM,
                                         sample_uniform=False),
                        total=iters_per_epoch):  #go over mini-batches

                    inds_ = np.ix_(
                        indn_, indm_, [0]
                    )  #select a sub-matrix given random indices for users/movies
                    mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_]
                    mat_values = dense_array_to_sparse(mat_sp)['values']
                    mask_indices = dense_array_to_sparse(
                        data['mask_tr'][inds_])['indices'][:, 0:2]

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                        mask_split:
                        np.ones_like(mat_values)
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [i for i in returns[1:3]]

                    loss_tr_ += np.sqrt(bloss_)
                    rec_loss_tr_ += np.sqrt(brec_loss_)

            elif 'uniform_over_dense_values' in opts['sample_mode']:
                for sample_ in tqdm(sample_dense_values_uniform(
                        data['mask_indices_tr'], minibatch_size,
                        iters_per_epoch),
                                    total=iters_per_epoch):
                    mat_values = data['mat_values_tr'][sample_]
                    mask_indices = data['mask_indices_tr'][sample_]

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                        mask_split:
                        np.ones_like(mat_values)
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [
                        i for i in returns[1:3]
                    ]  # ema_op may be empty and we only need these two outputs

                    loss_tr_ += bloss_
                    rec_loss_tr_ += np.sqrt(brec_loss_)
                    gc.collect()

            elif 'neighbourhood' in opts['sample_mode']:
                hops = opts.get("n_hops", 4)
                n_samp = opts.get("n_neighbours", 100)
                iters_per_epoch = max(
                    1, data['mask_indices_tr'].shape[0] / minibatch_size)

                for sample_ in tqdm(neighbourhood_sampling(
                        data['mask_indices_tr'],
                        minibatch_size,
                        iters_per_epoch,
                        hops=4),
                                    total=iters_per_epoch):
                    w = np.array(weights[sample_[:, 0], sample_[:,
                                                                1]]).flatten()
                    mat_values = np.array(sp_mat[sample_[:, 0],
                                                 sample_[:, 1]]).flatten()
                    mat_weight = weights.sum() / float(
                        data['mask_indices_tr'].shape[0]) / w
                    mask_indices = sample_
                    weights = weights + csr_matrix(
                        (np.ones(sample_.shape[0]),
                         (sample_[:, 0], sample_[:, 1])),
                        data["mat_shape"][0:2])

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                        mask_split:
                        mat_weight
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [
                        i for i in returns[1:3]
                    ]  # ema_op may be empty and we only need these two outputs

                    loss_tr_ += bloss_
                    rec_loss_tr_ += np.sqrt(brec_loss_)
                    gc.collect()

            elif 'conditional_sample_sparse' in opts['sample_mode']:
                for _, _, _, _, sample_ in tqdm(conditional_sample_sparse(
                        data['mask_indices_tr'], data['mask_tr_val_split'],
                    [N, M, 1], maxN, maxM),
                                                total=iters_per_epoch):

                    mat_values = data['mat_values_tr'][sample_]
                    mask_indices = data['mask_indices_tr'][sample_]

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                        mask_split:
                        np.ones_like(mat_values)
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [
                        i for i in returns[1:3]
                    ]  # ema_op may be empty and we only need these two outputs

                    loss_tr_ += bloss_
                    rec_loss_tr_ += np.sqrt(brec_loss_)
                    gc.collect()

            else:
                raise ValueError('\nERROR - unknown <sample_mode> in main()\n')

            loss_tr_ /= iters_per_epoch
            rec_loss_tr_ /= iters_per_epoch
            losses['train'].append(loss_tr_)

            print(
                "Training: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f});"
                .format(ep + 1,
                        time.time() - begin, loss_tr_, rec_loss_tr_))

            if (ep + 1) % opts[
                    'validate_interval'] == 0:  # Validate and test every validate_interval epochs

                ## Validation Loss
                print("Validating: ")
                if opts['sample_mode'] == "neighbourhood":
                    tf_dic = {
                        "sess": sess,
                        "mat_values_tr": mat_values_tr,
                        "mask_indices_tr": mask_indices_tr,
                        "mat_values_val": mat_values_val,
                        "mask_indices_val": mask_indices_val,
                        "mask_indices_tr_val": mask_indices_tr_val,
                        "mask_split": mask_split,
                        "rec_loss_val": rec_loss_val
                    }
                    hops = opts.get("n_hops", 4)
                    n_samp = opts.get("n_neighbours", 100)
                    loss_val_ = neighbourhood_validate(
                        sparse_matrix=sp_mat,
                        mat_values_val=data['mat_values_val'],
                        mask_indices_val=data['mask_indices_val'],
                        mask_indices_tr=data['mask_indices_tr'],
                        mask_indices_all=data['mask_indices_all'],
                        tf_dic=tf_dic,
                        hops=hops,
                        n_samp=n_samp,
                        lossfn=lossfn,
                        minibatch_size=minibatch_size /
                        100)  #TODO: what should this be?

                    loss_ts_ = neighbourhood_validate(
                        sparse_matrix=sp_mat,
                        mat_values_val=data['mat_values_test'],
                        mask_indices_val=data['mask_indices_test'],
                        mask_indices_tr=data['mask_indices_tr'],
                        mask_indices_all=data['mask_indices_all'],
                        tf_dic=tf_dic,
                        hops=hops,
                        n_samp=n_samp,
                        lossfn=lossfn,
                        minibatch_size=minibatch_size / 100)
                else:
                    # entries_val = np.zeros(data['mask_indices_all'].shape[0])
                    predictions_val = np.mean(data['mat_values_tr']) * np.ones(
                        data['mask_indices_all'].shape[0])

                    predictions_val_count = np.zeros(
                        data['mask_indices_all'].shape[0])
                    num_entries_val = data['mask_indices_val'].shape[0]

                    while np.sum(
                            predictions_val_count
                    ) < opts['validation_threshold'] * num_entries_val:
                        for sample_tr_, sample_val_, sample_tr_val_, _, _ in tqdm(
                                conditional_sample_sparse(
                                    data['mask_indices_all'],
                                    data['mask_tr_val_split'], [N, M, 1], maxN,
                                    maxM),
                                total=iters_per_epoch):

                            mat_values_tr_ = data['mat_values_all'][sample_tr_]
                            mat_values_tr_val_ = data['mat_values_all'][
                                sample_tr_val_]

                            mask_indices_tr_ = data['mask_indices_all'][
                                sample_tr_]
                            mask_indices_val_ = data['mask_indices_all'][
                                sample_val_]
                            mask_indices_tr_val_ = data['mask_indices_all'][
                                sample_tr_val_]

                            mask_split_ = (data['mask_tr_val_split']
                                           [sample_tr_val_] == 1) * 1.

                            val_dict = {
                                mat_values_tr:
                                mat_values_tr_ if lossfn == "mse" else
                                one_hot(mat_values_tr_),
                                mask_indices_tr:
                                mask_indices_tr_,
                                mat_values_val:
                                mat_values_tr_val_ if lossfn == "mse" else
                                one_hot(mat_values_tr_val_),
                                mask_indices_val:
                                mask_indices_val_,
                                mask_indices_tr_val:
                                mask_indices_tr_val_,
                                mask_split:
                                mask_split_
                            }

                            bloss_val, beout_val, = sess.run(
                                [rec_loss_val, eout_val], feed_dict=val_dict)
                            predictions_val[sample_val_] = beout_val[
                                mask_split_ == 1.]
                            predictions_val_count[sample_val_] = 1

                    loss_val_ = np.sqrt(
                        np.mean(
                            (data['mat_values_all'][data['mask_tr_val_split']
                                                    == 1] -
                             predictions_val[data['mask_tr_val_split'] == 1]
                             )**2))

                    ## Test Loss
                    print("Testing: ")
                    predictions_ts = np.mean(
                        data['mat_values_tr_val']) * np.ones(
                            data['mask_indices_all'].shape[0])

                    predictions_ts_count = np.zeros(
                        data['mask_indices_all'].shape[0])
                    num_entries_ts = data['mask_indices_test'].shape[0]

                    while np.sum(
                            predictions_ts_count
                    ) < opts['validation_threshold'] * num_entries_ts:
                        for sample_tr_, _, sample_tr_val_, sample_ts_, sample_all_ in tqdm(
                                conditional_sample_sparse(
                                    data['mask_indices_all'],
                                    data['mask_tr_val_split'], [N, M, 1], maxN,
                                    maxM),
                                total=iters_per_epoch):

                            mat_values_tr_val_ = data['mat_values_all'][
                                sample_tr_val_]
                            mat_values_all_ = data['mat_values_all'][
                                sample_all_]

                            mask_indices_tr_val_ = data['mask_indices_all'][
                                sample_tr_val_]
                            mask_indices_ts_ = data['mask_indices_all'][
                                sample_ts_]
                            mask_indices_all_ = data['mask_indices_all'][
                                sample_all_]

                            mask_split_ = (data['mask_tr_val_split']
                                           [sample_all_] == 2) * 1.

                            test_dict = {
                                mat_values_tr:
                                mat_values_tr_val_ if lossfn == "mse" else
                                one_hot(mat_values_tr_val_),
                                mask_indices_tr:
                                mask_indices_tr_val_,
                                mat_values_val:
                                mat_values_all_ if lossfn == "mse" else
                                one_hot(mat_values_all_),
                                mask_indices_val:
                                mask_indices_ts_,
                                mask_indices_tr_val:
                                mask_indices_all_,
                                mask_split:
                                mask_split_
                            }

                            bloss_test, beout_ts, = sess.run(
                                [rec_loss_val, eout_val], feed_dict=test_dict)
                            predictions_ts[sample_ts_] = beout_ts[mask_split_
                                                                  == 1.]
                            predictions_ts_count[sample_ts_] = 1

                    loss_ts_ = np.sqrt(
                        np.mean((data['mat_values_all'][
                            data['mask_tr_val_split'] == 2] -
                                 predictions_ts[data['mask_tr_val_split'] == 2]
                                 )**2))

                losses['valid'].append(loss_val_)
                losses['test'].append(loss_ts_)

                if loss_val_ < min_loss:  # keep track of the best validation loss
                    min_loss = loss_val_
                    min_loss_epoch = ep + 1
                    min_train = rec_loss_tr_
                    min_test = loss_ts_
                    print("{:d},{:4},{:4},{:4}\n".format(
                        ep, loss_tr_, loss_val_, loss_ts_),
                          file=open(best_log, "a"))
                    if opts.get("save_best", False):
                        save_path = saver.save(
                            sess, opts['ckpt_folder'] +
                            "/%s_best.ckpt" % opts.get('model_name', "test"))
                        print("Model saved in file: %s" % save_path, file=LOG)

                if loss_ts_ < min_ts_loss:  # keep track of the best test loss
                    min_ts_loss = loss_ts_
                    min_val_ts = loss_val_

                saved_tr_loss.append(loss_tr_)
                saved_val_loss.append(loss_val_)

                print(
                    "Validation: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} (train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})"
                    .format(ep + 1,
                            time.time() - begin, loss_tr_, rec_loss_tr_,
                            loss_val_, min_loss, min_train, min_test,
                            min_loss_epoch, loss_ts_, min_ts_loss, min_val_ts),
                    file=LOG)
                gc.collect()

            if (ep + 1) % opts.get("checkpoint_interval", 10000000) == 0:
                save_path = saver.save(
                    sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" %
                    (opts.get('model_name', "test"), ep + 1))
                print("Model saved in file: %s" % save_path, file=LOG)

            if loss_val_ > min_loss * 1.075:
                # overfitting: break if validation loss diverges
                break
    saved_tr_loss = np.array(saved_tr_loss)
    saved_val_loss = np.array(saved_val_loss)
    np.save(os.path.join('output', 'ml-1m_train_loss.npy'), saved_tr_loss)
    np.save(os.path.join('output', 'ml-1m_val_loss.npy'), saved_val_loss)

    return losses
Example #2
0
def main(opts, logfile=None, restore_point=None):
    if logfile is not None:
        # LOG = open(logfile, "w", 0)
        LOG = open(logfile, "w")
    else:
        LOG = sys.stdout
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    path = opts['data_path']
    if 'movielens-100k' in path:
        data = get_data(
            path, train=.85, valid=.05, test=.1, mode='sparse', fold=1
        )  # ml-100k uses official test set so only the valid paramter matters
    else:
        data = get_data(path,
                        train=.85,
                        valid=.05,
                        test=.1,
                        mode='sparse',
                        fold=1)

    #build encoder and decoder and use VAE loss
    N, M, num_features = data['mat_shape']
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M

    if opts['verbose'] > 0:
        print('\nSelf supervised run settings:')
        print('dataset: ', path)
        print('Exchangable layer pool mode: ',
              opts['defaults']['matrix_sparse']['pool_mode'])
        print('learning rate: ', opts['lr'])
        print('activation: ', opts['defaults']['matrix_sparse']['activation'])
        print('dae_noise_rate: ', opts['dae_noise_rate'])
        print('dae_loss_alpha: ', opts['dae_loss_alpha'])
        print('l2_regularization: ', opts['l2_regularization'])
        print('')

    # with tf.device('/cpu:0'):
    with tf.Graph().as_default():
        with tf.device('/gpu:0'):
            mat_values_tr = tf.placeholder(tf.float32,
                                           shape=[None],
                                           name='mat_values_tr')
            mask_split = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name='mask_split')
            mat_values_tr_noisy = tf.placeholder(tf.float32,
                                                 shape=[None],
                                                 name='mat_values_tr_noisy')
            mask_indices_tr = tf.placeholder(tf.int64,
                                             shape=[None, 2],
                                             name='mask_indices_tr')
            mat_shape_tr = tf.placeholder(tf.int32,
                                          shape=[3],
                                          name='mat_shape_tr')
            noise_mask_tr = tf.placeholder(tf.int64,
                                           shape=(None),
                                           name='noise_mask_tr')

            mat_values_val = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name='mat_values_val')
            mat_values_val_noisy = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name='mat_values_val_noisy')
            mask_indices_val = tf.placeholder(tf.int64,
                                              shape=[None, 2],
                                              name='mask_indices_val')
            mat_shape_val = tf.placeholder(tf.int32,
                                           shape=[3],
                                           name='mat_shape_val')
            noise_mask_val = tf.placeholder(tf.int64,
                                            shape=(None),
                                            name='noise_mask_val')

            with tf.variable_scope("network"):
                tr_dict = {
                    'input': mat_values_tr_noisy,
                    'mask_indices': mask_indices_tr,
                    'units': 5,
                    'shape': [N, M]
                }

                val_dict = {
                    'input': mat_values_val_noisy,
                    'mask_indices': mask_indices_val,
                    'units': 5,
                    'shape': [N, M]
                }

                network = Model(layers=opts['network'],
                                layer_defaults=opts['defaults'],
                                verbose=2)  #define the network
                out_tr = network.get_output(tr_dict)[
                    'input']  #build the network

                out_val = network.get_output(
                    val_dict, reuse=True, verbose=0, is_training=False)[
                        'input']  #get network output, reusing the neural net

            iters_per_epoch = math.ceil(N // maxN) * math.ceil(M // maxM)

            #loss and training
            rec_loss = dae_loss_fn_sp(mat_values_tr, out_tr, noise_mask_tr,
                                      opts['dae_loss_alpha'], mask_split)
            #rec_loss = ordinal_hinge_loss_fn_sp(mat_values_tr, out_tr, noise_mask_tr, opts['dae_loss_alpha'], minibatch_size)
            reg_loss = sum(
                tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization
            total_loss = rec_loss + reg_loss

            ev = expected_value(
                tf.nn.softmax(tf.reshape(out_val, shape=[-1, 5])))
            av = expected_value(tf.reshape(mat_values_val, shape=[-1, 5]))
            nm = tf.cast(noise_mask_val, tf.float32)
            rec_loss_val = tf.reduce_sum((av - ev)**2 * nm) / tf.reduce_sum(nm)
            # rec_loss_val = dae_loss_fn_sp(mat_values_val, out_val, noise_mask_val, 1, valid=True)

            train_step = tf.train.AdamOptimizer(
                opts['lr']).minimize(total_loss)
            sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                    allow_soft_placement=True,
                                                    device_count={'GPU': 0}))
            sess.run(tf.global_variables_initializer())

            min_loss = np.inf
            min_train = np.inf
            min_loss_epoch = 0
            losses = OrderedDict()
            losses["train"] = []
            losses["valid"] = []
            losses["test"] = []
            min_ts_loss = np.inf
            min_val_ts = np.inf

            noise_rate = opts['dae_noise_rate']
            sample_mode = opts.get('sample_mode', 'conditional_sample_sparse')
            if 'conditional_sample_sparse' in sample_mode:
                iters_per_epoch = math.ceil(N // maxN) * math.ceil(
                    M // maxM
                )  # a bad heuristic: the whole matrix is in expectation covered in each epoch
            elif 'uniform_over_dense_values' in sample_mode:
                minibatch_size = np.minimum(opts['minibatch_size'],
                                            data['mask_indices_tr'].shape[0])
                iters_per_epoch = data['mask_indices_tr'].shape[
                    0] // minibatch_size
            elif 'neighbourhood' in sample_mode:
                minibatch_size = np.minimum(opts['minibatch_size'],
                                            data['mask_indices_tr'].shape[0])
                hops = opts.get("n_hops", 3)
                n_samp = opts.get("n_neighbours",
                                  None)  # None for "get all neighbours"
                print(
                    "Using neighbourhood sampling with %d hops and %s samples"
                    % (hops, n_samp))
                sp_mat = csr_matrix(
                    (data['mat_values_all'], (data['mask_indices_all'][:, 0],
                                              data['mask_indices_all'][:, 1])),
                    data["mat_shape"][0:2])

            saver = tf.train.Saver()
            if restore_point is not None:
                saver.restore(sess, restore_point)

            best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log"
            print("epoch,train,valid,test\n", file=open(best_log, "a"))
            restore_point_epoch = opts.get('restore_point_epoch', 0)
            ep = 0
            # for ep in range(restore_point_epoch, opts['epochs'] + restore_point_epoch):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0.
            if 'conditional_sample_sparse' in sample_mode:
                # set up helper for drawing sample with common interface so we can reuse code between
                # 'conditional_sample_sparse' and 'uniform_over_dense_values'
                draw_sample = lambda mask, split, sample_dict: conditional_sample_sparse(
                    mask, split, [N, M, 1], maxN, maxM, sample_dict)
                draw_sample_val = lambda mask, split, sample_dict: conditional_sample_sparse(
                    mask,
                    split, [N, M, 1],
                    maxN,
                    maxM,
                    sample_dict,
                    valid=True)
            else:
                draw_sample = lambda mask, split, sample_dict: sample_dense_values_uniform(
                    mask, minibatch_size, iters_per_epoch)
                draw_sample_val = lambda mask, split, sample_dict: sample_dense_values_uniform_val(
                    mask, split, minibatch_size, iters_per_epoch)

                # sample_dict = prep_conditional_sample_sparse(data['mask_indices_tr'], [N,M,1])

                #     for sample_ in tqdm(draw_sample(data['mask_indices_tr'], data['mask_tr_val_split'], sample_dict), total=iters_per_epoch):
                #         mat_values = one_hot(data['mat_values_tr'][sample_])
                #         mask_indices = data['mask_indices_tr'][sample_]

                #         # which entries to 'corrupt' by dropping out
                #         noise_mask = np.random.choice([0,1], size=mask_indices.shape[0], p=[1-noise_rate, noise_rate])
                #         no_noise_mask = np.ones_like(noise_mask) - noise_mask
                #         mat_values_noisy = (mat_values.reshape((-1, 5)) * no_noise_mask[:, None]).flatten()

                #         tr_dict = {mat_values_tr:mat_values,
                #                     mat_values_tr_noisy:mat_values_noisy,
                #                     mask_indices_tr:mask_indices,
                #                     noise_mask_tr:noise_mask,
                #                     mask_split:np.ones_like(noise_mask)
                #                     }

                #         _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict)

                #         loss_tr_ += np.sqrt(bloss_)
                #         rec_loss_tr_ += np.sqrt(brec_loss_)

                # elif 'neighbourhood' in sample_mode:
                #     iters_per_epoch = max(1,data['mask_indices_tr'].shape[0] /  minibatch_size)

                #     for seed_set_idx in tqdm(sample_dense_values_uniform(data['mask_indices_tr'], minibatch_size, iters_per_epoch),
                #                         total=iters_per_epoch):
                #         seed_set = data['mask_indices_tr'][seed_set_idx]
                #         neighbours = sample_k_neighbours(seed_set, data['mask_indices_tr'], hops, n_samp)
                #         mask_indices_ = np.concatenate([seed_set, neighbours], axis=0)
                #         mask_split_ = np.concatenate([np.ones(seed_set.shape[0]), np.zeros(neighbours.shape[0])]) # only evaluate the seed set
                #         mat_values_ = np.array(sp_mat[mask_indices_[:,0], mask_indices_[:,1]]).flatten()
                #         mat_values_ = one_hot(mat_values_)

                #         # which entries to 'corrupt' by dropping out
                #         noise_mask = mask_split_
                #         no_noise_mask = np.ones_like(noise_mask) - noise_mask
                #         mat_values_noisy = (mat_values_.reshape((-1, 5)) * no_noise_mask[:, None]).flatten()

                #         tr_dict = {mat_values_tr:mat_values_,
                #                    mat_values_tr_noisy:mat_values_noisy,
                #                    noise_mask_tr:noise_mask,
                #                    mask_indices_tr:mask_indices_,
                #                    mask_split:mask_split_
                #                   }

                #         _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict)

                #         loss_tr_ += bloss_
                #         rec_loss_tr_ += np.sqrt(brec_loss_)
                # else:
                #     raise KeyError("Unrecognized sample mode: %s" % sample_mode)

                # loss_tr_ /= iters_per_epoch
                # rec_loss_tr_ /= iters_per_epoch
                # losses['train'].append(loss_tr_)

                # print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f})".format(ep+1, time.time() - begin, loss_tr_, rec_loss_tr_))

            # if (ep+1) % opts.get("checkpoint_interval", 10000000) == 0:
            #     save_path = saver.save(sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" % (opts.get('model_name', "test"), ep + 1))
            #     print("Model saved in file: %s" % save_path, file=LOG)

            # if (ep+1) % opts['validate_interval'] == 0:
            tf_dict = {
                "sess": sess,
                "mat_values_val": mat_values_val,
                "mat_values_val_noisy": mat_values_val_noisy,
                "mask_indices_val": mask_indices_val,
                "noise_mask_val": noise_mask_val,
                "ev": ev
            }

            if 'conditional_sample_sparse' in sample_mode:
                # loss_val_ = conditional_validation(tf_dict, data['mat_values_tr_val'], data['mask_indices_tr_val'],
                #                                 data['mask_tr_val_split'], split_id=1, draw_sample=draw_sample_val,
                #                                 iters_per_epoch=iters_per_epoch, shape=[N,M,1])
                loss_ts_ = conditional_validation(
                    tf_dict,
                    data['mat_values_all'],
                    data['mask_indices_all'],
                    data['mask_tr_val_split'],
                    split_id=2,
                    draw_sample=draw_sample_val,
                    iters_per_epoch=iters_per_epoch,
                    shape=[N, M, 1])
            elif 'uniform_over_dense_values' in sample_mode:
                # loss_val_ = conditional_validation(tf_dict, data['mat_values_tr_val'], data['mask_indices_tr_val'],
                #                                 data['mask_tr_val_split'], split_id=1, draw_sample=draw_sample_val,
                #                                 iters_per_epoch=iters_per_epoch, shape=[N,M,1])
                loss_ts_ = conditional_validation(
                    tf_dict,
                    data['mat_values_all'],
                    data['mask_indices_all'],
                    data['mask_tr_val_split'],
                    split_id=2,
                    draw_sample=draw_sample_val,
                    iters_per_epoch=iters_per_epoch,
                    shape=[N, M, 1])
            elif 'neighbourhood' in sample_mode:
                # loss_val_ = neighbourhood_validation(tf_dict, data['mask_indices_all'], data['mask_indices_tr'], data['mat_values_all'],
                #                                      data['mask_tr_val_split'], sp_mat=sp_mat, split_id=1, hops=hops, n_samp=n_samp)

                loss_ts_ = neighbourhood_validation(
                    tf_dict,
                    data['mask_indices_all'],
                    np.concatenate(
                        [data['mask_indices_tr'], data['mask_indices_val']],
                        axis=0),
                    data['mat_values_all'],
                    data['mask_tr_val_split'],
                    sp_mat=sp_mat,
                    split_id=2,
                    hops=hops,
                    n_samp=n_samp)

            # losses['valid'].append(loss_val_)
            losses['test'].append(loss_ts_)

            print("Test loss: {:.3})".format(loss_val_, loss_ts_), file=LOG)
    return losses
def main(opts, data=None):        
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)    
    if data is None:
        data, top_words = read_data()
    
    N, M, num_features = data['mat_shape']

    with tf.Graph().as_default():
            mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr')
            mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr')

            with tf.variable_scope("model"):
                tr_dict = {'input':mat_values_tr,
                           'mask_indices':mask_indices_tr,
                           'units':1,
                           'shape':[N,M]}


                model = Model(layers=opts['architecture'], layer_defaults=opts['defaults'], verbose=2) #define the model
                model_output = model.get_output(tr_dict) #build the model
                #words = tf.squeeze(tf.nn.log_softmax(model_output['nvec'], dim=0))
                words = tf.nn.log_softmax(model_output['nvec'], dim=0)
                docs = tf.nn.log_softmax(model_output['mvec'], dim=2)
                #docs = tf.squeeze(tf.nn.log_softmax(model_output['mvec'], dim=-1))

            # log_prob_topics = words + docs # gather
            # take sum
            eps = 1e-16
            total_prob = tf.clip_by_value(masked_inner_product(words, docs, mask_indices_tr), -np.inf, 0.)
            #total_prob = masked_inner_product(words, docs, mask_indices_tr, log_inp=True)
            topic_loss = ce_loss(total_prob, mat_values_tr)
            #loss and training
            reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization
            total_loss = topic_loss + reg_loss

            train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss)
            #train_step = tf.train.RMSPropOptimizer(opts['lr']).minimize(total_loss)
            sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
            sess.run(tf.global_variables_initializer())

            if 'by_row_column_density' in opts['sample_mode']:
                iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch
            elif 'uniform_over_dense_values' in opts['sample_mode']:
                minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0])
                iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size
            
            
            min_loss = 5
            min_loss_epoch = 0
            losses = OrderedDict()
            losses["train"] = []
            losses["valid"] = []
            
            for ep in range(opts['epochs']):
                begin = time.time()
                loss_tr_, topic_loss_tr_, loss_val_, loss_ts_ = 0,0,0,0

                for sample_ in tqdm(sample_dense_values_uniform(data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch):

                    mat_values = data['mat_values_tr'][sample_]
                    mask_indices = data['mask_indices_tr'][sample_]

                    tr_dict = {mat_values_tr:mat_values,# * 100,
                                mask_indices_tr:mask_indices}
                    
                    _, bloss_, btopic_loss_ = sess.run([train_step, total_loss, topic_loss], feed_dict=tr_dict)

                    loss_tr_ += bloss_
                    topic_loss_tr_ += btopic_loss_

                loss_tr_ /= iters_per_epoch
                topic_loss_tr_ /= iters_per_epoch

                losses['train'].append(loss_tr_)
                losses['valid'].append(loss_val_)

                print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f})".format(ep, time.time() - begin, loss_tr_, topic_loss_tr_)) 
                if ep % 100 == 0:
                    W, = sess.run([tf.squeeze(words)], feed_dict=tr_dict)
                    print("Top words for each topic:")
                    for i in xrange(W.shape[1]):
                        print("Topic %d: %s" % (i, ', '.join(top_words(W, i))))
    return losses, {"sess":sess, "total_loss": total_loss, "rec_loss": rec_loss, "rec_loss_val":rec_loss_val, 
                    "mat_values_tr": mat_values_tr, "mask_indices_tr": mask_indices_tr,
                    "mat_values_val":mat_values_val, "mask_indices_val":mask_indices_val,
                    "mask_indices_tr_val":mask_indices_tr_val}
Example #4
0
def main(opts, logfile=None, restore_point=None):
    if logfile is not None:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            filename=logfile,
                            level=logging.INFO)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.INFO)
    log = logging.getLogger()
    log.addHandler(logging.StreamHandler(sys.stdout))
    try:
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        path = opts['data_path']

        data = load_data()

        #build encoder and decoder and use VAE loss
        N, M, num_features = data['mat_shape']
        maxN, maxM = opts['maxN'], opts['maxM']

        if N < maxN: maxN = N
        if M < maxM: maxM = M
        lossfn = opts.get("loss", "mse")

        if opts['verbose'] > 0:
            logging.info('Factorized Autoencoder run settings:')
            logging.info('dataset: %s' % path)
            logging.info('Exchangable layer pool mode: %s' %
                         opts['defaults']['matrix_sparse']['pool_mode'])
            logging.info('Pooling layer pool mode: %s' %
                         opts['defaults']['matrix_pool_sparse']['pool_mode'])
            logging.info('learning rate: %s' % opts['lr'])
            logging.info('activation: %s' %
                         opts['defaults']['matrix_sparse']['activation'])
            logging.info('number of latent features: %s' %
                         opts['encoder'][-2]['units'])
            logging.info('maxN: %s' % opts['maxN'])
            logging.info('maxM: %s' % opts['maxM'])

        with tf.Graph().as_default():
            mat_values_tr = tf.placeholder(tf.float32,
                                           shape=[None],
                                           name='mat_values_tr')
            mask_indices_tr = tf.placeholder(tf.int32,
                                             shape=[None, 2],
                                             name='mask_indices_tr')

            mat_values_val = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name='mat_values_val')
            mask_split = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name='mat_values_val')
            mask_indices_val = tf.placeholder(tf.int32,
                                              shape=[None, 2],
                                              name='mask_indices_val')
            mask_indices_tr_val = tf.placeholder(tf.int32,
                                                 shape=[None, 2],
                                                 name='mask_indices_tr_val')
            expected_value = prep_ev(data["mat_values_all"])

            tr_dict = {
                'input': mat_values_tr,
                'mask_indices': mask_indices_tr,
                'units': 1 if lossfn == "mse" else num_features,
                'shape': [N, M],
            }

            val_dict = {
                'input': mat_values_tr,
                'mask_indices': mask_indices_tr,
                'units': 1 if lossfn == "mse" else num_features,
                'shape': [N, M],
            }

            encoder = Model(layers=opts['encoder'],
                            layer_defaults=opts['defaults'],
                            scope="encoder",
                            verbose=2)  #define the encoder
            out_enc_tr = encoder.get_output(tr_dict)  #build the encoder
            enc_ema_op, enc_getter = setup_ema("encoder",
                                               opts.get("ema_decay", 1.))
            out_enc_val = encoder.get_output(
                val_dict,
                reuse=True,
                verbose=0,
                is_training=False,
                getter=enc_getter)  #get encoder output, reusing the neural net
            tr_dict = {
                'nvec': out_enc_tr['nvec'],
                'mvec': out_enc_tr['mvec'],
                'units': out_enc_tr['units'],
                'mask_indices': mask_indices_tr,
                'shape': out_enc_tr['shape'],
            }

            val_dict = {
                'nvec': out_enc_val['nvec'],
                'mvec': out_enc_val['mvec'],
                'units': out_enc_val['units'],
                'mask_indices': mask_indices_tr_val,
                'shape': out_enc_val['shape'],
            }

            decoder = Model(layers=opts['decoder'],
                            layer_defaults=opts['defaults'],
                            scope="decoder",
                            verbose=2)  #define the decoder
            out_dec_tr = decoder.get_output(tr_dict)  #build it
            out_tr = out_dec_tr['input']
            dec_ema_op, dec_getter = setup_ema("decoder",
                                               opts.get("ema_decay", 1.))
            ema_op = enc_ema_op + dec_ema_op

            out_dec_val = decoder.get_output(
                val_dict,
                reuse=True,
                verbose=0,
                is_training=False,
                getter=dec_getter)  #reuse it for validation
            out_val = out_dec_val['input']

            eout_val = expected_value(
                tf.nn.softmax(tf.reshape(out_val, shape=[-1, num_features])))

            #loss and training
            reg_loss = sum(
                tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization
            print(num_features)
            rec_loss, rec_loss_val, total_loss = get_losses(
                lossfn,
                reg_loss,
                mat_values_tr,
                mat_values_val,
                mask_indices_tr,
                mask_indices_val,
                out_tr,
                out_val,
                mask_split,
                expected_value,
                num_outputs=num_features)
            train_step = get_optimizer(total_loss, opts)
            sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
            sess.run(tf.global_variables_initializer())

            if 'by_row_column_density' in opts[
                    'sample_mode'] or 'conditional_sample_sparse' in opts[
                        'sample_mode']:
                iters_per_epoch = math.ceil(N // maxN) * math.ceil(
                    M // maxM
                )  # a bad heuristic: the whole matrix is in expectation covered in each epoch
            elif 'uniform_over_dense_values' in opts['sample_mode']:
                minibatch_size = np.minimum(opts['minibatch_size'],
                                            data['mask_indices_tr'].shape[0])
                iters_per_epoch = data['mask_indices_tr'].shape[
                    0] // minibatch_size
            elif 'neighbourhood' in opts['sample_mode']:
                minibatch_size = np.minimum(opts['minibatch_size'],
                                            data['mask_indices_tr'].shape[0])
                weights = csr_matrix((np.ones_like(data['mat_values_tr']),
                                      (data['mask_indices_tr'][:, 0],
                                       data['mask_indices_tr'][:, 1])),
                                     data["mat_shape"][0:2])

                sp_mat = csr_matrix(
                    (data['mat_values_all'], (data['mask_indices_all'][:, 0],
                                              data['mask_indices_all'][:, 1])),
                    data["mat_shape"][0:2])

            min_loss = np.inf
            min_train = np.inf
            min_loss_epoch = 0
            losses = OrderedDict()
            losses["train"] = []
            losses["valid"] = []
            losses["test"] = []
            min_ts_loss = np.inf
            min_val_ts = np.inf

            saver = tf.train.Saver(max_to_keep=1000)  # keep all checkpoints
            if restore_point is not None:
                saver.restore(sess, restore_point)

            best_log = "logs/best_" + opts.get("model_name", "TEST") + ".log"
            print("epoch,train,valid,test", file=open(best_log, "w"))

            tf_nodes = {
                "sess": sess,
                "mat_values_tr": mat_values_tr,
                "mask_indices_tr": mask_indices_tr,
                "mat_values_val": mat_values_val,
                "mask_indices_val": mask_indices_val,
                "mask_indices_tr_val": mask_indices_tr_val,
                "mask_split": mask_split,
                "total_loss": total_loss,
                "rec_loss": rec_loss,
                "rec_loss_val": rec_loss_val,
                "out_tr": out_tr,
                "out_val": out_val
            }

            saved_tr_loss = []
            saved_val_loss = []

            for ep in range(
                    opts.get('restore_point_epoch', 0),
                    opts['epochs'] + opts.get('restore_point_epoch', 0)):
                begin = time.time()
                loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0., 0., 0., 0.

                if 'by_row_column_density' in opts['sample_mode']:
                    for indn_, indm_ in tqdm(
                            sample_submatrix(data['mask_tr'],
                                             maxN,
                                             maxM,
                                             sample_uniform=False),
                            total=iters_per_epoch):  #go over mini-batches

                        inds_ = np.ix_(
                            indn_, indm_, [0]
                        )  #select a sub-matrix given random indices for users/movies
                        mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][
                            inds_]
                        mat_values = dense_array_to_sparse(mat_sp)['values']
                        mask_indices = dense_array_to_sparse(
                            data['mask_tr'][inds_])['indices'][:, 0:2]

                        tr_dict = {
                            mat_values_tr:
                            mat_values
                            if lossfn == "mse" else one_hot(mat_values),
                            mask_indices_tr:
                            mask_indices,
                            mask_split:
                            np.ones_like(mat_values)
                        }

                        returns = sess.run([train_step, total_loss, rec_loss] +
                                           ema_op,
                                           feed_dict=tr_dict)
                        bloss_, brec_loss_ = [i for i in returns[1:3]]

                        loss_tr_ += np.sqrt(bloss_)
                        rec_loss_tr_ += np.sqrt(brec_loss_)

                elif 'uniform_over_dense_values' in opts['sample_mode']:
                    for sample_ in tqdm(sample_dense_values_uniform(
                            data['mask_indices_tr'], minibatch_size,
                            iters_per_epoch),
                                        total=iters_per_epoch):
                        mat_values = data['mat_values_tr'][
                            sample_] if lossfn == "mse" else data[
                                'mat_values_tr_one_hot'][sample_].flatten()
                        mask_indices = data['mask_indices_tr'][sample_]

                        tr_dict = {
                            mat_values_tr: mat_values.flatten(),
                            mask_indices_tr: mask_indices,
                            mask_split: np.ones_like(mat_values)
                        }

                        returns = sess.run([train_step, total_loss, rec_loss] +
                                           ema_op,
                                           feed_dict=tr_dict)
                        bloss_, brec_loss_ = [
                            i for i in returns[1:3]
                        ]  # ema_op may be empty and we only need these two outputs

                        loss_tr_ += bloss_
                        rec_loss_tr_ += np.sqrt(brec_loss_)
                        gc.collect()

                elif 'conditional_sample_sparse' in opts['sample_mode']:
                    for _, _, _, _, sample_ in tqdm(conditional_sample_sparse(
                            data['mask_indices_tr'], data['mask_tr_val_split'],
                        [N, M, 1], maxN, maxM),
                                                    total=iters_per_epoch):
                        mat_values = data['mat_values_tr'][
                            sample_] if lossfn == "mse" else data[
                                'mat_values_tr_one_hot'][sample_]
                        mask_indices = data['mask_indices_tr'][sample_]

                        tr_dict = {
                            mat_values_tr: mat_values.flatten(),
                            mask_indices_tr: reindex_mask(mask_indices),
                            mask_split: np.ones_like(mat_values[:, 0])
                        }

                        returns = sess.run([train_step, total_loss, rec_loss] +
                                           ema_op,
                                           feed_dict=tr_dict)
                        bloss_, brec_loss_ = [
                            i for i in returns[1:3]
                        ]  # ema_op may be empty and we only need these two outputs

                        loss_tr_ += bloss_
                        rec_loss_tr_ += np.sqrt(brec_loss_)
                        gc.collect()

                else:
                    raise ValueError(
                        '\nERROR - unknown <sample_mode> in main()\n')

                loss_tr_ /= iters_per_epoch
                rec_loss_tr_ /= iters_per_epoch
                losses['train'].append(loss_tr_)

                logging.info(
                    "Training: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f});"
                    .format(ep + 1,
                            time.time() - begin, loss_tr_, rec_loss_tr_))

                if (ep + 1) % opts[
                        'validate_interval'] == 0:  # Validate and test every validate_interval epochs
                    ## Validation Loss
                    if lossfn == "mse":
                        val_ratings = data['mat_values_all'].copy()
                        val_ratings[data['mask_tr_val_split'] == 0] = 3.
                    else:
                        val_ratings = data['mat_values_all_one_hot'].copy()
                        val_ratings[data['mask_tr_val_split'] == 0, :] = 0
                        val_ratings = val_ratings.flatten()
                    vals = data['mat_values_tr'] if lossfn == "mse" else data[
                        'mat_values_tr_one_hot'].flatten()
                    val_dict = {
                        mat_values_tr: vals.flatten(),
                        mask_indices_tr: data['mask_indices_tr'],
                        mat_values_val: val_ratings.flatten(),
                        mask_indices_val: data['mask_indices_all'],
                        mask_indices_tr_val: data['mask_indices_all'],
                        mask_split: (data['mask_tr_val_split'] == 1) * 1.
                    }

                    bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict)
                    loss_val_ += np.sqrt(bloss_val)

                    ## Test Loss
                    test_dict = {
                        mat_values_tr: vals.flatten(),
                        mask_indices_tr: data['mask_indices_tr'],
                        mat_values_val: val_ratings.flatten(),
                        mask_indices_val: data['mask_indices_all'],
                        mask_indices_tr_val: data['mask_indices_all'],
                        mask_split: (data['mask_tr_val_split'] == 2) * 1.
                    }

                    bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict)

                    loss_ts_ += np.sqrt(bloss_test)

                    losses['valid'].append(loss_val_)
                    losses['test'].append(loss_ts_)

                    if loss_val_ < min_loss:  # keep track of the best validation loss
                        min_loss = loss_val_
                        min_loss_epoch = ep + 1
                        min_train = rec_loss_tr_
                        min_test = loss_ts_
                        print("{:d},{:4},{:4},{:4}".format(
                            ep, loss_tr_, loss_val_, loss_ts_),
                              file=open(best_log, "a"))
                        if opts.get("save_best", False):
                            save_path = saver.save(
                                sess, opts['ckpt_folder'] + "/%s_best.ckpt" %
                                opts.get('model_name', "test"))
                            logging.info("Model saved in file: %s" % save_path)

                    if loss_ts_ < min_ts_loss:  # keep track of the best test loss
                        min_ts_loss = loss_ts_
                        min_val_ts = loss_val_

                    saved_tr_loss.append(loss_tr_)
                    saved_val_loss.append(loss_val_)

                    logging.info(
                        "Validation: epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} (train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})."
                        .format(ep + 1,
                                time.time() - begin, loss_tr_, rec_loss_tr_,
                                loss_val_, min_loss, min_train, min_test,
                                min_loss_epoch, loss_ts_, min_ts_loss,
                                min_val_ts))
                    gc.collect()

                if (ep + 1) % opts.get("checkpoint_interval", 10000000) == 0:
                    save_path = saver.save(
                        sess,
                        opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" %
                        (opts.get('model_name', "test"), ep + 1))
                    logging.info("Model saved in file: %s" % save_path)

        saved_tr_loss = np.array(saved_tr_loss)
        saved_val_loss = np.array(saved_val_loss)
        np.save(os.path.join('output', 'yahoo_music_train_loss.npy'),
                saved_tr_loss)
        np.save(os.path.join('output', 'yahoo_music_val_loss.npy'),
                saved_val_loss)
        #if loss_val_ > min_loss * 1.075:
        #    logging.info("Overfitting... exiting")
        # overfitting: break if validation loss diverges
        #                break
        return losses
    except Exception as e:
        logging.exception("Training failed")
Example #5
0
def main(opts):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    path = opts['data_path']
    data = get_data(path, train=.8, valid=.2, test=.001)

    #build encoder and decoder and use VAE loss
    N, M, num_features = data['mat_tr_val'].shape
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M

    # if opts['verbose'] > 0:
    #     print('\nRun Settings:')
    #     print('dataset: ', path)
    #     print('drop mask: ', opts['defaults']['matrix_dense']['drop_mask'])
    #     print('Exchangable layer pool mode: ', opts['defaults']['matrix_dense']['pool_mode'])
    #     print('Pooling layer pool mode: ', opts['defaults']['matrix_pool']['pool_mode'])
    #     print('learning rate: ', opts['lr'])
    #     print('activation: ', opts['defaults']['matrix_dense']['activation'])
    #     print('maxN: ', opts['maxN'])
    #     print('maxM: ', opts['maxM'])
    #     print('')

    with tf.Graph().as_default():

        mat = tf.placeholder(tf.float32,
                             shape=(maxN, maxM, num_features),
                             name='mat')  #data matrix for training
        mask_tr = tf.placeholder(tf.float32,
                                 shape=(maxN, maxM, 1),
                                 name='mask_tr')
        #for validation, since we need less memory (forward pass only), we are feeding the whole matrix. This is only feasible for this smaller dataset. In the long term we could perform validation on CPU to avoid memory problems
        mat_val = tf.placeholder(tf.float32,
                                 shape=(N, M, num_features),
                                 name='mat')  ##data matrix for validation:
        mask_val = tf.placeholder(
            tf.float32, shape=(N, M, 1),
            name='mask_val')  #the entries not present during training
        mask_tr_val = tf.placeholder(
            tf.float32, shape=(N, M, 1),
            name='mask_tr_val')  #both training and validation entries

        noise_mask = tf.placeholder(tf.float32,
                                    shape=(maxN, maxM, 1),
                                    name='noise_mask')
        mask_tr_noise = tf.placeholder(tf.float32,
                                       shape=(maxN, maxM, 1),
                                       name='mask_tr_noise')

        with tf.variable_scope("encoder"):
            tr_dict = {'input': mat, 'mask': mask_tr_noise}
            val_dict = {'input': mat_val, 'mask': mask_tr_val}

            encoder = Model(layers=opts['encoder'],
                            layer_defaults=opts['defaults'],
                            verbose=2)  #define the encoder

            out_enc_tr = encoder.get_output(tr_dict)  #build the encoder
            out_enc_val = encoder.get_output(
                val_dict, reuse=True, verbose=0,
                is_training=False)  #get encoder output, reusing the neural net

        with tf.variable_scope("decoder"):
            tr_dict = {'input': out_enc_tr['input'], 'mask': mask_tr}
            val_dict = {'input': out_enc_val['input'], 'mask': mask_val}

            decoder = Model(layers=opts['decoder'],
                            layer_defaults=opts['defaults'],
                            verbose=2)  #define the decoder

            out_tr = decoder.get_output(tr_dict)['input']  #build it
            out_val = decoder.get_output(
                val_dict, reuse=True, verbose=0,
                is_training=False)['input']  #reuse it for validation

        #loss and training
        rec_loss = dae_loss_fn(mat, mask_tr, noise_mask, out_tr,
                               opts['dae_loss_alpha'])
        reg_loss = sum(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization
        rec_loss_val = rec_loss_fn(mat_val, mask_val, out_val)
        total_loss = rec_loss + reg_loss

        train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess.run(tf.global_variables_initializer())

        iters_per_epoch = math.ceil(N // maxN) * math.ceil(
            M // maxM
        )  # a bad heuristic: the whole matrix is in expectation covered in each epoch

        min_loss = 5
        min_loss_epoch = 0

        for ep in range(opts['epochs']):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_ = 0, 0, 0
            for indn_, indm_ in tqdm(
                    sample_submatrix(data['mask_tr'], maxN, maxM),
                    total=iters_per_epoch):  #go over mini-batches
                inds_ = np.ix_(indn_, indm_, [
                    0
                ])  #select a sub-matrix given random indices for users/movies

                noise_rate = opts['dae_noise_rate']
                noise = np.random.choice([0, 1],
                                         size=[maxN, maxM, 1],
                                         p=[noise_rate, 1 - noise_rate])

                tr_dict = {
                    mat: data['mat_tr_val'][inds_],
                    mask_tr: data['mask_tr'][inds_],
                    mask_tr_noise: (data['mask_tr'][inds_] * noise),
                    noise_mask: noise
                }

                _, bloss_, brec_loss_ = sess.run(
                    [train_step, total_loss, rec_loss], feed_dict=tr_dict)

                loss_tr_ += np.sqrt(bloss_)
                rec_loss_tr_ += np.sqrt(brec_loss_)

            loss_tr_ /= iters_per_epoch
            rec_loss_tr_ /= iters_per_epoch

            val_dict = {
                mat_val: data['mat_tr_val'],
                mask_val: data['mask_val'],
                mask_tr_val: data['mask_tr']
            }

            bloss_, = sess.run([rec_loss_val], feed_dict=val_dict)
            loss_val_ += np.sqrt(bloss_)
            if loss_val_ < min_loss:  # keep track of the best validation loss
                min_loss = loss_val_
                min_loss_epoch = ep
            print(
                "epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d}"
                .format(ep,
                        time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_,
                        min_loss, min_loss_epoch),
                flush=True)
def main(opts):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    path = opts['data_path']
    data = get_data(path, train=.8, valid=.1, test=.1)

    #build encoder and decoder and use VAE loss
    N, M, num_features = data['mat_shape']
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M

    if opts['verbose'] > 0:
        print('\nFactorized Autoencoder run settings:')
        print('dataset: ', path)
        print('Exchangable layer pool mode: ',
              opts['defaults']['matrix_sparse']['pool_mode'])
        print('Pooling layer pool mode: ',
              opts['defaults']['matrix_pool_sparse']['pool_mode'])
        print('learning rate: ', opts['lr'])
        print('activation: ', opts['defaults']['matrix_sparse']['activation'])
        print('number of latent features: ', opts['encoder'][-2]['units'])
        print('maxN: ', opts['maxN'])
        print('maxM: ', opts['maxM'])
        print('')

    with tf.Graph().as_default():
        # with tf.device('/gpu:0'):
        mat_values_tr = tf.placeholder(tf.float32,
                                       shape=[None],
                                       name='mat_values_tr')
        mask_indices_tr = tf.placeholder(tf.int32,
                                         shape=[None, 2],
                                         name='mask_indices_tr')

        mat_values_val = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name='mat_values_val')
        mask_indices_val = tf.placeholder(tf.int32,
                                          shape=[None, 2],
                                          name='mask_indices_val')
        mask_indices_tr_val = tf.placeholder(tf.int32,
                                             shape=[None, 2],
                                             name='mask_indices_tr_val')

        with tf.variable_scope(
                None,
                default_name="input_features",
                initializer=opts['defaults']['matrix_sparse'].get(
                    'kernel_initializer', None),
                regularizer=opts['defaults']['matrix_sparse'].get(
                    'regularizer', None),
                reuse=False,
        ):
            mvec_feat = model_variable("mvec_feat",
                                       shape=[1, M, 1],
                                       trainable=True)
            nvec_feat = model_variable("nvec_feat",
                                       shape=[N, 1, 1],
                                       trainable=True)
        with tf.variable_scope("encoder"):
            tr_dict = {
                'input': mat_values_tr,
                'mask_indices': mask_indices_tr,
                'units': 1,
                'mvec': mvec_feat,
                'shape': [N, M],
                'nvec': nvec_feat
            }
            #with tf.variable_scope("encoder"):
            #    tr_dict = {'input':mat_values_tr,
            #               'mask_indices':mask_indices_tr,
            #               'units':1}

            val_dict = {
                'input': mat_values_tr,
                'mask_indices': mask_indices_tr,
                'units': 1,
                'mvec': mvec_feat,
                'nvec': nvec_feat,
                'shape': [N, M]
            }

            encoder = Model(layers=opts['encoder'],
                            layer_defaults=opts['defaults'],
                            verbose=2)  #define the encoder
            out_enc_tr = encoder.get_output(tr_dict)  #build the encoder
            out_enc_val = encoder.get_output(
                val_dict, reuse=True, verbose=0,
                is_training=False)  #get encoder output, reusing the neural net

        with tf.variable_scope("decoder"):
            tr_dict = {  #'nvec':out_enc_tr['nvec'],
                #'mvec':out_enc_tr['mvec'],
                'input':
                masked_inner_product(out_enc_tr['nvec'], out_enc_tr['mvec'],
                                     mask_indices_tr),
                'mask_indices':
                mask_indices_tr,
                'units':
                1,  #out_enc_tr['units'],
                'shape':
                out_enc_tr['shape']
            }
            val_dict = {  #'nvec':out_enc_val['nvec'],
                #'mvec':out_enc_val['mvec'],
                'input':
                masked_inner_product(out_enc_val['nvec'], out_enc_val['mvec'],
                                     mask_indices_tr_val),
                'mask_indices':
                mask_indices_tr_val,
                'units':
                1,  #out_enc_val['units'],
                'shape':
                out_enc_val['shape']
            }

            decoder = Model(layers=opts['decoder'],
                            layer_defaults=opts['defaults'],
                            verbose=2)  #define the decoder
            out_dec_tr = decoder.get_output(tr_dict)  #build it
            out_tr = out_dec_tr['input']

            out_dec_val = decoder.get_output(
                val_dict, reuse=True, verbose=0,
                is_training=False)  #reuse it for validation
            out_val = out_dec_val['input']

        #loss and training
        rec_loss = rec_loss_fn_sp(mat_values_tr, mask_indices_tr, out_tr,
                                  tf.ones(tf.shape(mat_values_tr)))
        reg_loss = sum(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization
        rec_loss_val = rec_loss_fn_sp(mat_values_val, mask_indices_val,
                                      out_val, data['mask_tr_val_split'])
        total_loss = rec_loss + reg_loss

        train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss)
        #train_step = tf.train.GradientDescentOptimizer(opts['lr']).minimize(total_loss)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess.run(tf.global_variables_initializer())

        if 'by_row_column_density' in opts['sample_mode']:
            iters_per_epoch = math.ceil(N // maxN) * math.ceil(
                M // maxM
            )  # a bad heuristic: the whole matrix is in expectation covered in each epoch
        elif 'uniform_over_dense_values' in opts['sample_mode']:
            minibatch_size = np.minimum(opts['minibatch_size'],
                                        data['mask_indices_tr'].shape[0])
            iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size

        min_loss = 5
        min_loss_epoch = 0
        losses = OrderedDict()
        losses["train"] = []
        losses["valid"] = []

        for ep in range(opts['epochs']):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0, 0, 0, 0

            if 'by_row_column_density' in opts['sample_mode']:
                for indn_, indm_ in tqdm(
                        sample_submatrix(data['mask_tr'],
                                         maxN,
                                         maxM,
                                         sample_uniform=False),
                        total=iters_per_epoch):  #go over mini-batches

                    inds_ = np.ix_(
                        indn_, indm_, [0]
                    )  #select a sub-matrix given random indices for users/movies
                    mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_]
                    mat_values = dense_array_to_sparse(mat_sp)['values']
                    mask_indices = dense_array_to_sparse(
                        data['mask_tr'][inds_])['indices'][:, 0:2]

                    tr_dict = {
                        mat_values_tr: mat_values,
                        mask_indices_tr: mask_indices
                    }

                    _, bloss_, brec_loss_ = sess.run(
                        [train_step, total_loss, rec_loss], feed_dict=tr_dict)

                    loss_tr_ += np.sqrt(bloss_)
                    rec_loss_tr_ += np.sqrt(brec_loss_)

            elif 'uniform_over_dense_values' in opts['sample_mode']:
                for sample_ in tqdm(sample_dense_values_uniform(
                        data['mask_indices_tr'], minibatch_size,
                        iters_per_epoch),
                                    total=iters_per_epoch):

                    mat_values = data['mat_values_tr'][sample_]
                    mask_indices = data['mask_indices_tr'][sample_]

                    tr_dict = {
                        mat_values_tr: mat_values,
                        mask_indices_tr: mask_indices
                    }

                    _, bloss_, brec_loss_ = sess.run(
                        [train_step, total_loss, rec_loss], feed_dict=tr_dict)

                    loss_tr_ += np.sqrt(bloss_)
                    rec_loss_tr_ += np.sqrt(brec_loss_)
            else:
                print('\nERROR - unknown <sample_mode> in main()\n')
                return

            loss_tr_ /= iters_per_epoch
            rec_loss_tr_ /= iters_per_epoch
            new_nvec, new_mvec = sess.run([nvec_feat, mvec_feat])

            ## Validation Loss
            val_dict = {
                mat_values_tr: data['mat_values_tr'],
                mask_indices_tr: data['mask_indices_tr'],
                mat_values_val: data['mat_values_tr_val'],
                mask_indices_val: data['mask_indices_val'],
                mask_indices_tr_val: data['mask_indices_tr_val']
            }

            bloss_, = sess.run([rec_loss_val], feed_dict=val_dict)

            loss_val_ += np.sqrt(bloss_)
            if loss_val_ < min_loss:  # keep track of the best validation loss
                min_loss = loss_val_
                min_loss_epoch = ep
            losses['train'].append(loss_tr_)
            losses['valid'].append(loss_val_)

            print(
                "epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d} \t test loss: {:.3f}"
                .format(ep,
                        time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_,
                        min_loss, min_loss_epoch, loss_ts_))
    return losses
Example #7
0
def main(opts, logfile=None, restore_point=None):
    if logfile is not None:
        LOG = open(logfile, "w", 0)
    else:
        LOG = sys.stdout
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    path = opts['data_path']
    data, eval_data = load_ratings()

    #build encoder and decoder and use VAE loss
    N, M, num_features = data['mat_shape']
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M
    lossfn = opts.get("loss", "mse")

    if opts['verbose'] > 0:
        print('\nFactorized Autoencoder run settings:', file=LOG)
        print('dataset: ', path, file=LOG)
        print('Exchangable layer pool mode: ',
              opts['defaults']['matrix_sparse']['pool_mode'],
              file=LOG)
        print('Pooling layer pool mode: ',
              opts['defaults']['matrix_pool_sparse']['pool_mode'],
              file=LOG)
        print('learning rate: ', opts['lr'], file=LOG)
        print('activation: ',
              opts['defaults']['matrix_sparse']['activation'],
              file=LOG)
        print('number of latent features: ',
              opts['encoder'][-2]['units'],
              file=LOG)
        print('maxN: ', opts['maxN'], file=LOG)
        print('maxM: ', opts['maxM'], file=LOG)
        print('', file=LOG)

    with tf.Graph().as_default():
        mat_values_tr = tf.placeholder(tf.float32,
                                       shape=[None],
                                       name='mat_values_tr')
        mask_indices_tr = tf.placeholder(tf.int32,
                                         shape=[None, 2],
                                         name='mask_indices_tr')

        mat_values_val = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name='mat_values_val')
        mask_split = tf.placeholder(tf.float32,
                                    shape=[None],
                                    name='mat_values_val')
        mask_indices_val = tf.placeholder(tf.int32,
                                          shape=[None, 2],
                                          name='mask_indices_val')
        mask_indices_tr_val = tf.placeholder(tf.int32,
                                             shape=[None, 2],
                                             name='mask_indices_tr_val')

        tr_dict = {
            'input': mat_values_tr,
            'mask_indices': mask_indices_tr,
            'units': 1 if lossfn == "mse" else 5,
            'shape': [N, M],
        }

        val_dict = {
            'input': mat_values_tr,
            'mask_indices': mask_indices_tr,
            'units': 1 if lossfn == "mse" else 5,
            'shape': [N, M],
        }

        encoder = Model(layers=opts['encoder'],
                        layer_defaults=opts['defaults'],
                        scope="encoder",
                        verbose=2)  #define the encoder
        out_enc_tr = encoder.get_output(tr_dict)  #build the encoder
        enc_ema_op, enc_getter = setup_ema("encoder",
                                           opts.get("ema_decay", 1.))
        out_enc_val = encoder.get_output(
            val_dict,
            reuse=True,
            verbose=0,
            is_training=False,
            getter=enc_getter)  #get encoder output, reusing the neural net

        tr_dict = {
            'nvec': out_enc_tr['nvec'],
            'mvec': out_enc_tr['mvec'],
            'units': out_enc_tr['units'],
            'mask_indices': mask_indices_tr,
            'shape': out_enc_tr['shape'],
        }

        val_dict = {
            'nvec': out_enc_val['nvec'],
            'mvec': out_enc_val['mvec'],
            'units': out_enc_val['units'],
            'mask_indices': mask_indices_tr_val,
            'shape': out_enc_val['shape'],
        }

        decoder = Model(layers=opts['decoder'],
                        layer_defaults=opts['defaults'],
                        scope="decoder",
                        verbose=2)  #define the decoder
        out_dec_tr = decoder.get_output(tr_dict)  #build it
        out_tr = out_dec_tr['input']
        dec_ema_op, dec_getter = setup_ema("decoder",
                                           opts.get("ema_decay", 1.))
        ema_op = enc_ema_op + dec_ema_op

        out_dec_val = decoder.get_output(
            val_dict,
            reuse=True,
            verbose=0,
            is_training=False,
            getter=dec_getter)  #reuse it for validation
        out_val = out_dec_val['input']

        #loss and training
        reg_loss = sum(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES))  # regularization

        rec_loss, rec_loss_val, total_loss = get_losses(
            lossfn, reg_loss, mat_values_tr, mat_values_val, mask_indices_tr,
            mask_indices_val, out_tr, out_val, mask_split)
        train_step = get_optimizer(total_loss, opts)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess.run(tf.global_variables_initializer())

        if 'by_row_column_density' in opts['sample_mode']:
            iters_per_epoch = math.ceil(N // maxN) * math.ceil(
                M // maxM
            )  # a bad heuristic: the whole matrix is in expectation covered in each epoch
        elif 'uniform_over_dense_values' in opts['sample_mode']:
            minibatch_size = np.minimum(opts['minibatch_size'],
                                        data['mask_indices_tr'].shape[0])
            iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size

        min_loss = 5
        min_train = 5
        min_loss_epoch = 0
        losses = OrderedDict()
        losses["train"] = []
        losses["valid"] = []
        losses["test"] = []
        min_ts_loss = 5
        min_val_ts = 5

        saver = tf.train.Saver()
        if restore_point is not None:
            saver.restore(sess, restore_point)

        best_log = "logs/" + opts.get("model_name", "TEST") + "_best.log"
        print("epoch,train,valid,test\n", file=open(best_log, "a"))
        for ep in range(opts['epochs']):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0, 0, 0, 0

            if 'by_row_column_density' in opts['sample_mode']:
                for indn_, indm_ in tqdm(
                        sample_submatrix(data['mask_tr'],
                                         maxN,
                                         maxM,
                                         sample_uniform=False),
                        total=iters_per_epoch):  #go over mini-batches

                    inds_ = np.ix_(
                        indn_, indm_, [0]
                    )  #select a sub-matrix given random indices for users/movies
                    mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_]
                    mat_values = dense_array_to_sparse(mat_sp)['values']
                    mask_indices = dense_array_to_sparse(
                        data['mask_tr'][inds_])['indices'][:, 0:2]

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [i for i in returns[1:3]]

                    loss_tr_ += np.sqrt(bloss_)
                    rec_loss_tr_ += np.sqrt(brec_loss_)

            elif 'uniform_over_dense_values' in opts['sample_mode']:
                for sample_ in tqdm(sample_dense_values_uniform(
                        data['mask_indices_tr'], minibatch_size,
                        iters_per_epoch),
                                    total=iters_per_epoch):
                    mat_values = data['mat_values_tr'][sample_]
                    mask_indices = data['mask_indices_tr'][sample_]

                    tr_dict = {
                        mat_values_tr:
                        mat_values if lossfn == "mse" else one_hot(mat_values),
                        mask_indices_tr:
                        mask_indices,
                    }

                    returns = sess.run([train_step, total_loss, rec_loss] +
                                       ema_op,
                                       feed_dict=tr_dict)
                    bloss_, brec_loss_ = [
                        i for i in returns[1:3]
                    ]  # ema_op may be empty and we only need these two outputs

                    loss_tr_ += bloss_
                    rec_loss_tr_ += np.sqrt(brec_loss_)
                    gc.collect()
            else:
                raise ValueError('\nERROR - unknown <sample_mode> in main()\n')

            loss_tr_ /= iters_per_epoch
            rec_loss_tr_ /= iters_per_epoch

            ## Validation Loss
            val_dict = {
                mat_values_tr:
                data['mat_values_tr']
                if lossfn == "mse" else one_hot(data['mat_values_tr']),
                mask_indices_tr:
                data['mask_indices_tr'],
                mat_values_val:
                data['mat_values_tr_val']
                if lossfn == "mse" else one_hot(data['mat_values_tr_val']),
                mask_indices_val:
                data['mask_indices_val'],
                mask_indices_tr_val:
                data['mask_indices_tr_val'],
                mask_split: (data['mask_tr_val_split'] == 1) * 1.
            }

            bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict)
            loss_val_ += np.sqrt(bloss_val)

            ## Test Loss
            test_dict = {
                mat_values_tr:
                eval_data['mat_values_tr']
                if lossfn == "mse" else one_hot(eval_data['mat_values_tr']),
                mask_indices_tr:
                eval_data['mask_indices_tr'],
                mat_values_val:
                eval_data['mat_values_tr_val'] if lossfn == "mse" else one_hot(
                    eval_data['mat_values_tr_val']),
                mask_indices_val:
                eval_data['mask_indices_test'],
                mask_indices_tr_val:
                eval_data['mask_indices_tr_val'],
                mask_split: (eval_data['mask_tr_val_split'] == 2) * 1.
            }

            bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict)

            loss_ts_ += np.sqrt(bloss_test)
            if loss_ts_ < min_ts_loss:  # keep track of the best validation loss
                min_ts_loss = loss_ts_
                min_val_ts = loss_val_
            if loss_val_ < min_loss:  # keep track of the best validation loss
                min_loss = loss_val_
                min_loss_epoch = ep
                min_train = rec_loss_tr_
                min_test = loss_ts_
                print("{:d},{:4},{:4},{:4}\n".format(ep, loss_tr_, loss_val_,
                                                     loss_ts_),
                      file=open(best_log, "a"))
                if ep > 1000 and (min_loss < 0.942):
                    save_path = saver.save(
                        sess, opts['ckpt_folder'] +
                        "/%s_best.ckpt" % opts.get('model_name', "test"))
                    print("Model saved in file: %s" % save_path, file=LOG)
            if (ep + 1) % 500 == 0:
                save_path = saver.save(
                    sess, opts['ckpt_folder'] + "/%s_checkpt_ep_%05d.ckpt" %
                    (opts.get('model_name', "test"), ep + 1))
                print("Model saved in file: %s" % save_path, file=LOG)

            losses['train'].append(loss_tr_)
            losses['valid'].append(loss_val_)
            losses['test'].append(loss_ts_)

            print(
                "epoch {:d} took {:.1f} train loss {:.3f} (rec:{:.3f}); valid: {:.3f}; min valid loss: {:.3f} \
(train: {:.3}, test: {:.3}) at epoch: {:d}; test loss: {:.3f} (best test: {:.3f} with val {:.3f})"
                .format(ep,
                        time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_,
                        min_loss, min_train, min_test, min_loss_epoch,
                        loss_ts_, min_ts_loss, min_val_ts),
                file=LOG)
            gc.collect()
            if loss_val_ > min_loss * 1.075:
                # overfitting
                break

    saver.restore(
        sess,
        opts['ckpt_folder'] + "/%s_best.ckpt" % opts.get('model_name', "test"))
    return losses, {
        "sess": sess,
        "mat_values_tr": mat_values_tr,
        "mask_indices_tr": mask_indices_tr,
        "mat_values_val": mat_values_val,
        "mask_indices_val": mask_indices_val,
        "mask_indices_tr_val": mask_indices_tr_val,
        "mask_split": mask_split,
        "total_loss": total_loss,
        "rec_loss": rec_loss,
        "rec_loss_val": rec_loss_val,
        "out_tr": out_tr,
        "out_val": out_val
    }
Example #8
0
def main(opts, logfile=None, restore_point=None):
    if logfile is not None:
        logging.basicConfig(format='%(asctime)s %(message)s', filename=logfile, level=logging.INFO)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    try:
        cpu_config = tf.ConfigProto(
            device_count = {'GPU': 0}
        )

        path = opts['data_path']

        if 'movielens-100k' in path:
            data = get_data(path, train=.75, valid=.05, test=.2, mode='sparse', fold=1) # ml-100k uses official test set so only the valid paramter matters
        else: 
            data = get_data(path, train=.6, valid=.2, test=.2, mode='sparse', fold=1)
        
        #build encoder and decoder and use VAE loss
        N, M, num_features = data['mat_shape']
        maxN, maxM = opts['maxN'], opts['maxM']

        if N < maxN: maxN = N
        if M < maxM: maxM = M
        lossfn = opts.get("loss", "mse")

        if opts['verbose'] > 0:
            logging.info('Factorized Autoencoder run settings:')
            logging.info('dataset: %s' % path)
            logging.info('Exchangable layer pool mode: %s' % opts['defaults']['matrix_sparse']['pool_mode'])
            logging.info('Pooling layer pool mode: %s' % opts['defaults']['matrix_pool_sparse']['pool_mode'])
            logging.info('learning rate: %s' % opts['lr'])
            logging.info('activation: %s' % opts['defaults']['matrix_sparse']['activation'])
            logging.info('number of latent features: %s' % opts['encoder'][-2]['units'])
            logging.info('maxN: %s' % opts['maxN'])
            logging.info('maxM: %s' % opts['maxM'])

        with tf.Graph().as_default():
            mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr')
            mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr')

            mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val')
            mask_split = tf.placeholder(tf.float32, shape=[None], name='mat_values_val')
            mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val')

            tr_dict = {'input':mat_values_tr,
                        'mask_indices':mask_indices_tr,
                        'units':1 if lossfn == "mse" else 5, 
                        'shape':[N,M],
                        }

            
            val_dict = {'input':mat_values_tr,
                        'mask_indices':mask_indices_tr,
                        'units':1 if lossfn == "mse" else 5,
                        'shape':[N,M],
                        }

            encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], scope="encoder", verbose=2) #define the encoder
            out_enc_tr = encoder.get_output(tr_dict) #build the encoder
            enc_ema_op, enc_getter = setup_ema("encoder", opts.get("ema_decay", 1.))
            out_enc_val = encoder.get_output(val_dict, reuse=True, verbose=0, is_training=False, getter=enc_getter)#get encoder output, reusing the neural net

            tr_dict = {'nvec':out_enc_tr['nvec'],
                        'mvec':out_enc_tr['mvec'],
                        'units':out_enc_tr['units'],
                        'mask_indices':mask_indices_tr,
                        'shape':out_enc_tr['shape'],
                        }

            val_dict = {'nvec':out_enc_val['nvec'],
                        'mvec':out_enc_val['mvec'],
                        'units':out_enc_val['units'],
                        'mask_indices':mask_indices_tr_val,
                        'shape':out_enc_val['shape'],
                        }

            decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], scope="decoder", verbose=2)#define the decoder
            out_dec_tr = decoder.get_output(tr_dict)#build it
            out_tr = out_dec_tr['input']
            dec_ema_op, dec_getter = setup_ema("decoder", opts.get("ema_decay", 1.))
            ema_op = enc_ema_op + dec_ema_op

            out_dec_val = decoder.get_output(val_dict, reuse=True, verbose=0, is_training=False, getter=dec_getter)#reuse it for validation
            out_val = out_dec_val['input']

            eout_val = expected_value(tf.nn.softmax(tf.reshape(out_val, shape=[-1,5])))

            #loss and training
            reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization
            
            rec_loss, rec_loss_val, total_loss = get_losses(lossfn, reg_loss, 
                                                            mat_values_tr, 
                                                            mat_values_val,
                                                            out_tr, out_val,
                                                            mask_split)
            train_step = get_optimizer(total_loss, opts)
            sess = tf.Session(config=cpu_config)
            sess.run(tf.global_variables_initializer())

            if 'by_row_column_density' in opts['sample_mode'] or 'conditional_sample_sparse' in opts['sample_mode']:
                iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch
            elif 'uniform_over_dense_values' in opts['sample_mode']:
                minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0])
                iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size
            elif 'neighbourhood' in opts['sample_mode']:
                minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0])
                weights = csr_matrix((np.ones_like(data['mat_values_tr']), 
                    (data['mask_indices_tr'][:,0], 
                    data['mask_indices_tr'][:,1])),
                    data["mat_shape"][0:2])

                sp_mat = csr_matrix((data['mat_values_all'], 
                    (data['mask_indices_all'][:,0], 
                    data['mask_indices_all'][:, 1])),
                    data["mat_shape"][0:2])
        
            saver = tf.train.Saver()
            if restore_point is not None:
                saver.restore(sess, restore_point)
            logging.info("Restored successfully, running validation")
            ## Validation Loss
            train_data = data['mat_values_all'][data['mask_tr_val_split'] == 0]
            train_mask = data['mask_indices_all'][data['mask_tr_val_split'] == 0,:]
            val_dict = {mat_values_tr:train_data if lossfn =="mse" else one_hot(train_data),
                        mat_values_val:data['mat_values_all'] if lossfn =="mse" else one_hot(data['mat_values_all']),
                        mask_indices_tr:train_mask,
                        mask_indices_tr_val:data['mask_indices_all'],
                        mask_split:(data['mask_tr_val_split'] == 1) * 1.
                        }

            bloss_val, = sess.run([rec_loss_val], feed_dict=val_dict)
            loss_val_ = np.sqrt(bloss_val)
            logging.info("Validation complete. Got {:4}".format(loss_val_))

            ## Test Loss     
            test_dict = {mat_values_tr:train_data if lossfn =="mse" else one_hot(train_data),
                        mat_values_val:data['mat_values_all'] if lossfn =="mse" else one_hot(data['mat_values_all']),
                        mask_indices_tr:train_mask,
                        mask_indices_tr_val:data['mask_indices_all'],
                        mask_split:(data['mask_tr_val_split'] == 2) * 1.
                        }

            bloss_test, = sess.run([rec_loss_val], feed_dict=test_dict)
            loss_test_ = np.sqrt(bloss_test)
            print("Valid: {:4}, Test: {:4}\n".format(loss_val_, loss_test_))
    except Exception as e:
        logging.exception("Training failed")
def main(opts):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    path = opts['data_path']
    data = get_data(path, train=.8, valid=.2, test=.001)
    
    standardize = inverse_trans = lambda x: x # defaults
    if opts.get("loss", "mse") == "mse":
        input_data = data['mat_tr_val']
        raw_input_data = data['mat_tr_val'].copy()
        if opts.get('normalize', False):
            print("Normalizing data")
            standardize, inverse_trans = normalize(input_data)
    else:
        raw_input_data = data['mat_tr_val'].copy()
        input_data = to_indicator(data['mat_tr_val'])

    loss_fn = get_loss_function(opts.get("loss", "mse"))
    #build encoder and decoder and use VAE loss
    N, M, num_features = input_data.shape
    opts['decoder'][-1]['units'] = num_features
    maxN, maxM = opts['maxN'], opts['maxM']

    if N < maxN: maxN = N
    if M < maxM: maxM = M

    if opts['verbose'] > 0:
        print('\nRun Settings:')
        print('dataset: ', path)
        print('drop mask: ', opts['defaults']['matrix_dense']['drop_mask'])
        print('Exchangable layer pool mode: ', opts['defaults']['matrix_dense']['pool_mode'])
        print('Pooling layer pool mode: ', opts['defaults']['matrix_pool']['pool_mode'])
        print('learning rate: ', opts['lr'])
        print('activation: ', opts['defaults']['matrix_dense']['activation'])
        print('maxN: ', opts['maxN'])
        print('maxM: ', opts['maxM'])
        print('')
        

    with tf.Graph().as_default():
        mat_raw = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mat_raw')#data matrix for training
        mat_raw_valid = tf.placeholder(tf.float32, shape=(N, M, 1), name='mat_raw_valid')#data matrix for training

        mat = tf.placeholder(tf.float32, shape=(maxN, maxM, num_features), name='mat')#data matrix for training
        mask_tr = tf.placeholder(tf.float32, shape=(maxN, maxM, 1), name='mask_tr')
        # For validation, since we need less memory (forward pass only), 
        # we are feeding the whole matrix. This is only feasible for this smaller dataset. 
        # In the long term we could perform validation on CPU to avoid memory problems
        mat_val = tf.placeholder(tf.float32, shape=(N, M, num_features), name='mat')##data matrix for validation: 
        mask_val = tf.placeholder(tf.float32, shape=(N, M, 1), name='mask_val')#the entries not present during training
        mask_tr_val = tf.placeholder(tf.float32, shape=(N, M, 1), name='mask_tr_val')#both training and validation entries

        indn = tf.placeholder(tf.int32, shape=(None), name='indn')
        indm = tf.placeholder(tf.int32, shape=(None), name='indm')
        
        with tf.variable_scope("encoder"):
            tr_dict = {'input':mat,
                       'mask':mask_tr,
                       'total_shape':[N,M],
                       'indn':indn,
                       'indm':indm}
            val_dict = {'input':mat_val,
                        'mask':mask_tr_val,
                        'total_shape':[N,M],
                        'indn':indn,
                        'indm':indm}

            encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], verbose=2) #define the encoder

            out_enc_tr = encoder.get_output(tr_dict) #build the encoder
            out_enc_val = encoder.get_output(val_dict, reuse=True, verbose=0, is_training=False)#get encoder output, reusing the neural net
            

        with tf.variable_scope("decoder"):
            tr_dict = {'nvec':out_enc_tr['nvec'],
                       'mvec':out_enc_tr['mvec'],
                       'mask':out_enc_tr['mask'],
                       'total_shape':[N,M],
                       'indn':indn,
                       'indm':indm}
            val_dict = {'nvec':out_enc_val['nvec'],
                        'mvec':out_enc_val['mvec'],
                        'mask':out_enc_val['mask'],
                        'total_shape':[N,M],
                        'indn':indn,
                        'indm':indm}

            decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], verbose=2)#define the decoder

            out_tr = decoder.get_output(tr_dict)['input']#build it
            out_val = decoder.get_output(val_dict, reuse=True, verbose=0, is_training=False)['input']#reuse it for validation

        #loss and training
        rec_loss = loss_fn(inverse_trans(mat), mask_tr, inverse_trans(out_tr))# reconstruction loss
        reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization
        rec_loss_val = loss_fn(inverse_trans(mat_val), mask_val, inverse_trans(out_val))
        total_loss = rec_loss + reg_loss 

        rng = tf.range(1,6,1, dtype=tf.float32)
        idx = tf.convert_to_tensor([[2],[0]], dtype=np.int32)
        mse_loss_train = rec_loss_fn(mat_raw, mask_tr, tf.reshape(tf.tensordot(tf.nn.softmax(out_tr), rng, idx), (maxN,maxM,1)))
        mse_loss_valid = rec_loss_fn(mat_raw_valid, mask_val, tf.reshape(tf.tensordot(tf.nn.softmax(out_val), rng, idx), (N,M,1)))

        train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss)
        merged = tf.summary.merge_all()
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        train_writer = tf.summary.FileWriter('logs/train', sess.graph)
        sess.run(tf.global_variables_initializer())

        iters_per_epoch = math.ceil(N//maxN) * math.ceil(M//maxM) # a bad heuristic: the whole matrix is in expectation covered in each epoch
        
        min_loss = 5
        min_loss_epoch = 0

        for ep in range(opts['epochs']):
            begin = time.time()
            loss_tr_, rec_loss_tr_, loss_val_, mse_tr = 0,0,0,0
            for indn_, indm_ in tqdm(sample_submatrix(data['mask_tr'], maxN, maxM), total=iters_per_epoch):#go over mini-batches
                inds_ = np.ix_(indn_,indm_,range(num_features))
                inds_mask = np.ix_(indn_,indm_, [0])
                #inds_ = np.ix_(indn_,indm_,[0])#select a sub-matrix given random indices for users/movies

                tr_dict = {mat:standardize(input_data[inds_]),
                           mask_tr:data['mask_tr'][inds_mask],
                           mat_raw:raw_input_data[inds_mask],
                           indn:indn_,
                           indm:indm_}

                if opts.get("loss", "mse") == "mse":
                    _, bloss_, brec_loss_ = sess.run([train_step, total_loss, rec_loss], feed_dict=tr_dict)
                    loss_tr_ += np.sqrt(bloss_)
                    rec_loss_tr_ += np.sqrt(brec_loss_)
                elif opts.get("loss", "mse") == "ce":
                    _, bloss_, brec_loss_, mse = sess.run([train_step, total_loss, rec_loss, mse_loss_train], 
                                                          feed_dict=tr_dict)
                    loss_tr_ += np.sqrt(mse)
                    rec_loss_tr_ += brec_loss_

            loss_tr_ /= iters_per_epoch
            rec_loss_tr_ /= iters_per_epoch

            val_dict = {mat_val:standardize(input_data),
                        mask_val:data['mask_val'],
                        mask_tr_val:data['mask_tr'],
                        mat_raw_valid:raw_input_data,
                        indn:np.arange(N),
                        indm:np.arange(M)}

            if merged is not None:
                summary, = sess.run([merged], feed_dict=tr_dict)
                train_writer.add_summary(summary, ep)
            if opts.get("loss", "mse") == "mse":
                bloss_, = sess.run([rec_loss_val], feed_dict=val_dict)
            else:
                bloss_true, bloss_ = sess.run([rec_loss_val, mse_loss_valid], feed_dict=val_dict)
            loss_val_ += np.sqrt(bloss_)
            if loss_val_ < min_loss: # keep track of the best validation loss 
                min_loss = loss_val_
                min_loss_epoch = ep
            print("epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d}".format(ep, time.time() - begin, loss_tr_, rec_loss_tr_,  loss_val_, min_loss, min_loss_epoch))