Exemple #1
0
from utils import load_data, optimizer, Accuracy

np.random.seed(2020)

# Data generation
train_data, test_data = load_data('RedWine')
x_train, y_train = train_data[0], train_data[1]
x_test, y_test = test_data[0], test_data[1]

# Hyper-parameter
_epoch=1000
_batch_size=32
_lr = 0.001
_optim = 'SGD'

# Build model
model = LogisticRegression(num_features=x_train.shape[1])
optimizer = optimizer(_optim)

# Solve
print('Train start!')
model.fit(x=x_train, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer)
print('Trained done.')

# Inference
print('Predict on test data')
inference = model.eval(x_test)

# Assess model
error = Accuracy(inference, y_test)
print('Accuracy on Test Data : %.4f' % error)
Exemple #2
0
        1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
        90, 95, 100
    ]
    _lr = 0.01

#search_param이 lr인 경우에는 batch_size는 32로 고정, batch_size인 경우에는 lr을 0.01로 고정

# ============================================================

train_results = []
test_results = []
search_space = _lr if search_param == 'lr' else _batch_size
for i, space in enumerate(search_space):
    # Build model
    model = LinearRegression(num_features=x_train_data.shape[1])
    optim = optimizer(_optim)

    # Train model with gradient descent
    if search_param == 'lr':
        model.numerical_solution(x=x_train_data,
                                 y=y_train_data,
                                 epochs=_epoch,
                                 batch_size=_batch_size,
                                 lr=space,
                                 optim=optim)
    else:
        model.numerical_solution(x=x_train_data,
                                 y=y_train_data,
                                 epochs=_epoch,
                                 batch_size=space,
                                 lr=_lr,
Exemple #3
0
# OPTIMIZER
OPTIMIZER = 'SGD'
# ============================================================

assert DATA_NAME in ['Titanic', 'Digit']
assert OPTIMIZER in ['SGD', 'Momentum', 'RMSProp']

# Load dataset, model and evaluation metric
train_data, test_data, logistic_regression, metric = _initialize(DATA_NAME)
train_x, train_y = train_data

num_data, num_features = train_x.shape
print('# of Training data : ', num_data)

# Make model & optimizer
model = logistic_regression(num_features)
optim = optimizer(OPTIMIZER, gamma=gamma, epsilon=epsilon)

# TRAIN
loss = model.train(train_x, train_y, num_epochs, batch_size, learning_rate,
                   optim)
print('Training Loss at last epoch: %.2f' % loss)

# EVALUATION
test_x, test_y = test_data
pred = model.eval(test_x)

ACC = metric(pred, test_y)

print(OPTIMIZER, ' ACC on Test Data : %.3f' % ACC)
Exemple #4
0
    def train(self,dataset_path,num_classes,batch_size,lr_base,lr_decay,step_size,\
              max_iteration,pretrained_model=None):
        '''
        @description: 构建VGG-Net16网络结构,训练网络模型,输出训练过程中的logs,保存网络模型
        @params:
            - dataset_path: 训练样本集和验证样本集对应的txt文件所在的路径
            - num_classes: 分类数目
            - batch_size: 训练过程中的每次输入网络中的样本数
            - lr_base: 初始学习率
            - lr_decay: 学习率衰减系数
            - step_size: 学习率衰减速度   lr = lr_base * lr_decay ^ (global_step / step_size)
            - max_iteration: 迭代的最大次数
            - pretrained_model: 预训练的模型所在的路径
        @return: None
        '''

        train_file_name = dataset_path + 'train_list.txt'
        valid_file_name = dataset_path + 'valid_list.txt'

        log_dir = './log/vgg'
        model_dir = './model/vgg'

        vgg = VGG(weight_decay=0.0005, keep_prob=0.5, num_classes=num_classes)

        train_summary_list = []
        valid_summary_list = []

        with tf.Graph().as_default(), tf.device('/gpu:0'):

            with tf.name_scope('input'):
                #队列读取训练数据
                train_image,train_label = get_batch(train_file_name,self._image_H,\
                                                    self._image_W,batch_size)
                valid_image,valid_label = get_batch(valid_file_name,self._image_H,\
                                                    self._image_W,250,is_train=False)

                x = tf.placeholder(tf.float32,[None,self._image_H,self._image_W,\
                                               self._image_channels],name='x')
                y = tf.placeholder(tf.int64, [None], name='y')

            #loss, accuracy, train_op
            logits, _ = vgg.vgg16(x)
            loss = utils.calc_loss(logits, y)
            accuracy = utils.calc_accuracy(logits, y)
            train_op, learning_rate, global_step = utils.optimizer(
                lr_base, step_size, lr_decay, loss)

            #summary
            train_summary_list.append(tf.summary.scalar('train_loss', loss))
            valid_summary_list.append(tf.summary.scalar('valid_loss', loss))
            train_summary_list.append(
                tf.summary.scalar('train_accuracy', accuracy))
            valid_summary_list.append(
                tf.summary.scalar('test_accuracy', accuracy))
            train_summary_list.append(
                tf.summary.scalar('learning rate', learning_rate))
            valid_summary_list.append(
                tf.summary.scalar('learning rate', learning_rate))
            for var in tf.trainable_variables():
                valid_summary_list.append(tf.summary.histogram(var.name, var))
            train_summary = tf.summary.merge(train_summary_list)
            valid_summary = tf.summary.merge(valid_summary_list)

            #session
            saver = tf.train.Saver(max_to_keep=50)
            with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,\
                                                  log_device_placement=True)) as sess:
                train_writer = tf.summary.FileWriter(log_dir + 'train',
                                                     sess.graph)
                test_writer = tf.summary.FileWriter(log_dir + 'valid')
                tf.global_variables_initializer().run()
                tf.local_variables_initializer().run()

                #启动多线程
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                #加载预训练的模型
                if pretrained_model != None:
                    ckpt = tf.train.get_checkpoint_state(pretrained_model)
                    print('Restoring pretrained model: %s' %
                          ckpt.model_checkpoint_path)
                    saver.restore(sess, ckpt.model_checkpoint_path)

                train_time = 0
                for step in range(max_iteration):

                    #模型持久化操作
                    #                    graph_def = tf.get_default_graph().as_graph_def()
                    #                    output_graph_def = graph_util.convert_variables_to_constants(sess,graph_def,['input/x','deepid/Relu'])
                    #                    with tf.gfile.GFile(model_dir+'deepid_model.pb','wb') as file:
                    #                        file.write(output_graph_def.SerializeToString())
                    #                    break

                    start_time = time.time()
                    image, label = sess.run([train_image, train_label])
                    _, train_loss, summary_str, train_step = sess.run(
                        [train_op, loss, train_summary, global_step],
                        feed_dict={
                            x: image,
                            y: label
                        })
                    train_writer.add_summary(summary_str,
                                             global_step=train_step)
                    train_writer.flush()
                    duration = time.time() - start_time
                    train_time += duration

                    #valid and save model
                    if step % 1000 == 0 or (step + 1) == max_iteration:
                        image, label = sess.run([valid_image, valid_label])
                        lr,summary_str,valid_loss,validation_accuracy,\
                        train_step = sess.run([learning_rate,
                                               valid_summary,
                                               loss,
                                               accuracy,
                                               global_step],
                                               feed_dict={x:image,y:label})
                        test_writer.add_summary(summary_str,
                                                global_step=train_step)
                        test_writer.flush()
                        print('Step %d: train loss = %.3f, valid loss = %.3f,valid accuracy = %.3f%%, lr = %.6f (%.3f sec)'%\
                              (train_step,train_loss,valid_loss,validation_accuracy,\
                               lr,train_time))
                        saver.save(sess,
                                   model_dir + 'model.ckpt',
                                   global_step=train_step)
                        with open(log_dir + 'valid_result.txt',
                                  'at') as file_writer:
                            file_writer.write('%d\t%.3f%%\t%.5f\t%d\r\n' %
                                              (train_step, validation_accuracy,
                                               lr, train_time))
                #退出多线程
                coord.request_stop()
                coord.join(threads)
Exemple #5
0
def train_model(
    max_epochs=5,  # The maximum number of epoch to run
    decay_c=0.,  # Weight decay for weights
    lrate=1e-4,  # Learning rate for sgd (not used for adadelta and rmsprop)
    batch_size=16,  # The batch size during training
    valid_batch_size=64,  # The batch size used for test set
):
    model_options = locals().copy()
    dataparams, train, test = load_data.load_data()
    model_options.update(dataparams)

    print('Building model...')
    params = init_params(model_options)
    tparams = init_tparams(params)
    (u, l, q, f_score, cost) = build_model(tparams, model_options)
    args = [u, l, q]

    def _l2_regularizer(decay_c):
        decay_c = theano.shared(np.asarray(decay_c, dtype=config.floatX),
                                name='decay_c')
        l2r = 0.
        for kk, vv in tparams.items():
            l2r += (tparams[kk] ** 2).sum()
        return decay_c * l2r

    cost += _l2_regularizer(decay_c)

    grads = T.grad(cost, wrt=list(tparams.values()))
    # f_grad = theano.function([x], grads, name='f_grad')
    lr = T.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads, args, cost)

    kf = get_minibatches_idx(len(train[0]), batch_size,
                             shuffle=True)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size,
                                  shuffle=True)

    print('Training...')
    uidx = 0
    start = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0
            logging.info('Time: %s' % (time.time() - start))

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size,
                                     shuffle=True)

            for _,  train_index in kf:
                uidx += 1
                u = [train[0][t] for t in train_index]
                l = [train[1][t] for t in train_index]
                q = [train[2][t] for t in train_index]
                n_samples += len(u)

                cost = f_grad_shared(u, l, q)
                f_update(lrate)

                print('Epoch ', eidx, 'Update ', uidx,
                      'Cost ', cost)
                logging.info('------ Epoch: %d, Update(cls): %d -------'
                             % (eidx, uidx))
                pred_error(f_score, test, kf_test)

                logging.info('------------------------------------')

    except KeyboardInterrupt:
        print("Training interupted")
Exemple #6
0
def train(input_tfr_pool, val_tfr_pool, out_dir, log_dir, mean, sbatch, wd):
    """Train Multi-View Network for a number of steps."""
    log_freq = 100
    val_freq = 1000
    model_save_freq = 10000
    tf.logging.set_verbosity(tf.logging.ERROR)

    # maximum epochs
    total_iters = 140001
    lrs = [0.01, 0.001, 0.0001]
    steps = [
        int(total_iters * 0.5),
        int(total_iters * 0.4),
        int(total_iters * 0.1)
    ]

    # set config file
    config = tf.ConfigProto(log_device_placement=False)
    with tf.Graph().as_default():
        sys.stderr.write("Building Network ... \n")
        global_step = tf.contrib.framework.get_or_create_global_step()

        images, gt_2d, gt_3d, gt_occ = create_bb_pip(input_tfr_pool,
                                                     1000,
                                                     sbatch,
                                                     mean,
                                                     shuffle=True)

        # inference model
        k2d_dim = gt_2d.get_shape().as_list()[1]
        k3d_dim = gt_3d.get_shape().as_list()[1]
        pred_key = sk_net.infer_os(images, 36, tp=True)

        # Calculate loss
        total_loss, data_loss = sk_net.L2_loss_os(pred_key,
                                                  [gt_2d, gt_3d, gt_occ],
                                                  weight_decay=wd)
        train_op, _ = optimizer(total_loss, global_step, lrs, steps)
        sys.stderr.write("Train Graph Done ... \n")
        #add_bb_summary(images, pred_key[0], gt_2d, 'train', max_out=3)

        if val_tfr_pool:
            val_pool = []
            val_iters = []
            for ix, val_tfr in enumerate(val_tfr_pool):
                total_val_num = ndata_tfrecords(val_tfr)
                total_val_iters = int(float(total_val_num) / sbatch)
                val_iters.append(total_val_iters)
                val_images, val_gt_2d, val_gt_3d, _ = create_bb_pip(
                    [val_tfr], 1000, sbatch, mean, shuffle=False)

                val_pred_key = sk_net.infer_os(val_images,
                                               36,
                                               tp=False,
                                               reuse_=True)
                _, val_data_loss = sk_net.L2_loss_23d(val_pred_key,
                                                      [val_gt_2d, val_gt_3d],
                                                      None)
                val_pool.append(val_data_loss)
                #add_bb_summary(val_images, val_pred_key[0], val_gt_2d, 'val_c' + str(ix), max_out=3)
            sys.stderr.write("Validation Graph Done ... \n")

        # merge all summaries
        merged = tf.summary.merge_all()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        with tf.Session(config=config) as sess:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            model_saver = tf.train.Saver(max_to_keep=15)

            sys.stderr.write("Initializing ... \n")
            # initialize graph
            sess.run(init_op)

            # initialize the queue threads to start to shovel data
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            model_prefix = os.path.join(out_dir, 'single_key')
            timer = 0
            timer_count = 0

            sys.stderr.write("Start Training --- OUT DIM: %d, %d\n" %
                             (k2d_dim, k3d_dim))
            for i in xrange(total_iters):
                ts = time.time()
                if i > 0 and i % log_freq == 0:
                    key_loss, _, summary = sess.run(
                        [data_loss, train_op, merged])

                    summary_writer.add_summary(summary, i)
                    summary_writer.flush()

                    sys.stderr.write(
                        'Training %d (%fs) --- Key L2 Loss: %f\n' %
                        (i, timer / timer_count, key_loss))
                    timer = 0
                    timer_count = 0
                else:
                    sess.run([train_op])
                    timer += time.time() - ts
                    timer_count += 1

                if val_tfr and i > 0 and i % val_freq == 0:
                    sys.stderr.write('Validation %d\n' % i)
                    for cid, v_dl in enumerate(val_pool):
                        val_key_loss = eval_one_epoch(sess, v_dl,
                                                      val_iters[cid])
                        sys.stderr.write('Class %d --- Key L2 Loss: %f\n' %
                                         (cid, val_key_loss))

                if i > 0 and i % model_save_freq == 0:
                    model_saver.save(sess, model_prefix, global_step=i)

            model_saver.save(sess, model_prefix, global_step=i)

            summary_writer.close()
            coord.request_stop()
            coord.join(threads, stop_grace_period_secs=5)
def main():
    wandb.init(project="Multimodal")
    parser = argparse.ArgumentParser()
    parser.add_argument('--optim', type=str, default='adam', help='optimizer')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--epochs', type=int, default=20, help='learning rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1024,
                        help='train batch size')
    parser.add_argument('--latent_dim_mf',
                        type=int,
                        default=8,
                        help='latent_dim_mf')
    parser.add_argument('--num_layers', type=int, default=3, help='num layers')
    parser.add_argument('--num_neg',
                        type=int,
                        default=4,
                        help='negative sample')
    parser.add_argument('--l2',
                        type=float,
                        default=0.0,
                        help='l2_regularization')
    parser.add_argument('--gpu', type=str, default='0', help='gpu number')
    args = parser.parse_args()
    wandb.config.update(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    data = pd.read_feather("/daintlab/data/movielens/movie_3706.ftr")
    print(data)
    MD = Make_Dataset(ratings=data)
    user, item, rating = MD.trainset
    evaluate_data = MD.evaluate_data

    #NCF model
    model = NeuralCF(num_users=6040,
                     num_items=3706,
                     embedding_size=args.latent_dim_mf,
                     num_layers=args.num_layers)
    model.cuda()
    model = nn.DataParallel(model)
    print(model)
    optim = optimizer(optim=args.optim,
                      lr=args.lr,
                      model=model,
                      weight_decay=args.l2)
    criterion = nn.BCEWithLogitsLoss()
    wandb.watch(model)

    N = []
    patience = 0
    for epoch in range(args.epochs):
        print('Epoch {} starts !'.format(epoch + 1))
        print('-' * 80)
        t1 = time.time()
        model.train()
        total_loss = 0
        sample = SampleGenerator(user=user,
                                 item=item,
                                 rating=rating,
                                 ratings=data,
                                 positive_len=MD.positive_len,
                                 num_neg=args.num_neg)
        train_loader = sample.instance_a_train_loader(args.batch_size)
        print("Train Loader 생성 완료")
        for batch_id, batch in enumerate(train_loader):
            users, items, ratings = batch[0], batch[1], batch[2]
            ratings = ratings.float()
            users, items, ratings = users.cuda(), items.cuda(), ratings.cuda()
            optim.zero_grad()
            output = model(users, items)
            loss = criterion(output, ratings)
            loss.backward()
            optim.step()
            loss = loss.item()
            wandb.log({'Batch Loss': loss})
            total_loss += loss

        t2 = time.time()
        print("train : ", t2 - t1)

        engine = Engine()
        hit_ratio, ndcg = engine.evaluate(model, evaluate_data, epoch_id=epoch)
        wandb.log({"epoch": epoch, "HR": hit_ratio, "NDCG": ndcg})
        N.append(ndcg)

        if N[-1] < max(N):
            if patience == 5:
                print("Patience = ")
                print("ndcg = {:.4f}".format(max(N)))
                break
            else:
                patience += 1
                print("Patience = {} ndcg = {:.4f}".format(patience, max(N)))
        else:
            patience = 0
            print("Patience = {}".format(patience))
Exemple #8
0
def train():
    start_time = time.time()
    
    # 第1步:命令行参数解析,获取集群的信息ps_hosts和worker_hosts,以及当前节点的角色信息job_name和task_index
    print("\n\n\n", start_time, "\n\n")
    if FLAGS.job_name is None or FLAGS.job_name == '':
        raise ValueError('Must specify an explicit job_name !')
    else:
        print('job_name : %s' % FLAGS.job_name)
    if FLAGS.task_index is None or FLAGS.task_index == '':
        raise ValueError('Must specify an explicit task_index!')
    else:
        print('task_index : %d' % FLAGS.task_index)
    ps_spec = FLAGS.ps_hosts.split(',')
    worker_spec = FLAGS.worker_hosts.split(',')

    # 第2步:创建当前task结点的Server
    # num_worker = len(worker_spec)
    cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec})
    server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)

    # 第3步:如果当前节点是ps,则调用server.join()无休止等待;如果是worker,则执行第4步。
    if FLAGS.job_name == 'ps':
        server.join()

    is_chief = (FLAGS.task_index == 0)
    # worker_device = '/job:worker/task%d/cpu:0' % FLAGS.task_index

    # 读入数据
    train_next_element = get_data("../data/train.tfrecords")
    test_next_element = get_data("../data/test.tfrecords")

    # 定义网络输入
    with tf.name_scope('net_input'):
        p1 = tf.placeholder(tf.float32, [None, 20, 40, 3])
        p2 = tf.placeholder(tf.float32, [None, 8, 10])
        p3 = tf.placeholder(tf.float32, [None, 10])
        y_ = tf.placeholder(tf.float32,[None,1])
    x = [p1, p2, p3]


    # Assigns ops to the local worker by default.
    # 将op 挂载到各个本地的worker上
    # tf.train.replica_device_setter()会根据job名,将with内的Variable op放到ps tasks,
    # 将其他计算op放到worker tasks。默认分配策略是轮询。
    with tf.device(tf.train.replica_device_setter(cluster=cluster)):
        # 全局优化次数,主要用于分布式
        with tf.name_scope('global_step'):
            global_step = tf.Variable(0, trainable=False) 
        # 第4步:则构建要训练的模型
        model = Three_branch_net()
        y_out = model.forward(x)
        model_loss = tf.losses.mean_squared_error(y_, y_out)
        train_op = optimizer(model_loss, learning_rate, global_step)

        saver = tf.train.Saver() 
        # 用于tensorboard
        #直接获取所有的数据汇总
        summary_op = tf.summary.merge_all() 
        # 生成本地的参数初始化操作init_op
        # init_op = tf.global_variables_initializer()
        init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())

        train_dir = tempfile.mkdtemp()

        # 第5步:创建tf.train.Supervisor来管理模型的训练过程
        # Create a "supervisor", which oversees the training process.
        sv = tf.train.Supervisor(is_chief=is_chief, logdir=train_dir, init_op=init_op, summary_op=summary_op,
                                 recovery_wait_secs=1,
                                 global_step=global_step)

        if is_chief:
            print('Worker %d: Initailizing session...' % FLAGS.task_index)
        else:
            print('Worker %d: Waiting for session to be initaialized...' % FLAGS.task_index)
        
        # The supervisor takes care of session initialization, restoring from
        # a checkpoint, and closing when done or an error occurs.
        gpu_options = tf.GPUOptions(allow_growth=True)
        with sv.prepare_or_wait_for_session(server.target, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 
            # 用于tensorboard
            train_writer = tf.summary.FileWriter(MODEL_SAVE_PATH + '/train', sess.graph)
            test_writer = tf.summary.FileWriter(MODEL_SAVE_PATH + '/test')

            ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) 
            if ckpt and ckpt.model_checkpoint_path:
                print("\n restore model\n")
                saver.restore(sess, ckpt.model_checkpoint_path) 
            else:
                print("\n new model train \n")
            
            # init_op = tf.global_variables_initializer() 
            sess.run(init_op)

            i = 0
            while True:
                if i % 10 == 0:
                    try:
                        p1_run, p2_run, p3_run, label_run = sess.run([test_next_element[0],
                                                        test_next_element[1],
                                                        test_next_element[2],
                                                        test_next_element[3]])
                        losses, summary, step = sess.run([model_loss,summary_op, global_step], feed_dict={
                                p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run})                     
                        test_writer.add_summary(summary, step)
                        print("test loss at step %s:(global step %s) :%s " %(i, step, losses))
                    except tf.errors.OutOfRangeError:
                        break

                else:
                    try:
                        p1_run, p2_run, p3_run, label_run = sess.run([train_next_element[0],
                                                        train_next_element[1],
                                                        train_next_element[2],
                                                        train_next_element[3]])
                        # print(p1_run.shape, p2_run.shape,  p3_run.shape, label_run.shape)
                        if i % 100 == 1:
                            pass
                            #定义tensorflow运行选项。
                            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                            #定义运行的元信息。可以记录下来运算的时间、内存占用这些信息。
                            run_metadata = tf.RunMetadata()
                            losses, _, summary, step = sess.run([model_loss, train_op,summary_op, global_step], feed_dict={
                                p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run}) 
                            train_writer.add_run_metadata(run_metadata, 'step%03d'%step)
                            train_writer.add_summary(summary, step)
                            saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=step)
                            print("adding run metadata for ", step) 
                        else:
                            losses, _, summary, step = sess.run([model_loss, train_op,summary_op, global_step], feed_dict={
                                p1: p1_run, p2: p2_run, p3: p3_run, y_: label_run})                                                                                 
                            train_writer.add_summary(summary, step)
                            # print("accuracy at step %s:(global step %s) :%s " %(i, step, losses))
                    except tf.errors.OutOfRangeError:
                        break
                i += 1
            train_writer.close()
            test_writer.close()
        print("All time {}s".format(time.time()-start_time))
def main():
    wandb.init(project="AttCF")
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        type=str,
                        default='/daintlab/data/recommend/Amazon-office-raw',
                        help='path')
    parser.add_argument('--top_k', type=int, default=10, help='top_k')
    parser.add_argument('--optim', type=str, default='adam', help='optimizer')
    parser.add_argument('--epochs', type=int, default=5, help='epoch')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='batch size')
    parser.add_argument('--dim', type=int, default=128, help='dimension')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--gpu', type=str, default='0', help='gpu number')
    parser.add_argument('--num_sam',
                        type=int,
                        default=4,
                        help='num of pos sample')

    parser.add_argument('--feature_type',
                        default='all',
                        type=str,
                        help='Type of feature to use. [all, img, txt]')
    parser.add_argument(
        '--eval_type',
        default='leave-one-out',
        type=str,
        help='Evaluation protocol. [ratio-split, leave-one-out]')

    global args
    global sd
    global train_len
    global test_len

    args = parser.parse_args()
    wandb.config.update(args)

    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    # Load dataset
    print("Loading Dataset")
    data_path = os.path.join(args.data_path, args.eval_type)

    train_df, test_df, train_ng_pool, test_negative, num_user, num_item, images = D.load_data(
        data_path, args.feature_type)
    train_len = len(train_df)
    test_len = num_user

    train_dataset = D.CustomDataset(train_df,
                                    test_df,
                                    images,
                                    negative=train_ng_pool,
                                    istrain=True,
                                    feature_type=args.feature_type,
                                    num_sam=args.num_sam)
    test_dataset = D.CustomDataset(train_df,
                                   test_df,
                                   images,
                                   negative=test_negative,
                                   istrain=False,
                                   feature_type=args.feature_type,
                                   num_sam=args.num_sam)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=my_collate,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=1,
                             shuffle=False,
                             collate_fn=my_collate_tst,
                             pin_memory=True)

    # Model
    acf = ACF(num_user, num_item, images, args.dim)
    acf = torch.nn.DataParallel(acf)
    acf = acf.cuda()
    print(acf)

    # Optimizer
    optim = optimizer(optim=args.optim, lr=args.lr, model=acf)

    # Train & Eval

    for epoch in range(args.epochs):
        sd = np.random.randint(2021)
        start = time.time()
        train(acf, train_loader, epoch, optim)
        end = time.time()
        print("{}/{} Train Time : {}".format(epoch + 1, args.epochs,
                                             end - start))
        if (epoch + 1) == args.epochs:
            start = time.time()
            test(acf, test_loader, epoch)
            end = time.time()
            print("{}/{} Evaluate Time : {}".format(epoch + 1, args.epochs,
                                                    end - start))
Exemple #10
0
def train_model(
        max_epochs=5,  # The maximum number of epoch to run
        decay_c=0.,  # Weight decay for weights
        lrate=1e-4,  # Learning rate for sgd (not used for adadelta and rmsprop)
        batch_size=16,  # The batch size during training
        valid_batch_size=64,  # The batch size used for test set
):
    model_options = locals().copy()
    dataparams, train, test = load_data.load_data()
    model_options.update(dataparams)

    print('Building model...')
    params = init_params(model_options)
    tparams = init_tparams(params)
    (u, l, q, f_score, cost) = build_model(tparams, model_options)
    args = [u, l, q]

    def _l2_regularizer(decay_c):
        decay_c = theano.shared(np.asarray(decay_c, dtype=config.floatX),
                                name='decay_c')
        l2r = 0.
        for kk, vv in tparams.items():
            l2r += (tparams[kk]**2).sum()
        return decay_c * l2r

    cost += _l2_regularizer(decay_c)

    grads = T.grad(cost, wrt=list(tparams.values()))
    # f_grad = theano.function([x], grads, name='f_grad')
    lr = T.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads, args, cost)

    kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True)

    print('Training...')
    uidx = 0
    start = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0
            logging.info('Time: %s' % (time.time() - start))

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                u = [train[0][t] for t in train_index]
                l = [train[1][t] for t in train_index]
                q = [train[2][t] for t in train_index]
                n_samples += len(u)

                cost = f_grad_shared(u, l, q)
                f_update(lrate)

                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)
                logging.info('------ Epoch: %d, Update(cls): %d -------' %
                             (eidx, uidx))
                pred_error(f_score, test, kf_test)

                logging.info('------------------------------------')

    except KeyboardInterrupt:
        print("Training interupted")