Ejemplo n.º 1
0
def main(_):
    FLAGS.train_input = '../input/train.small'
    FLAGS.valid_input = '../input/train.small'
    FLAGS.batch_size = 4
    FLAGS.feat_file_path = '../input/feature_index'
    FLAGS.field_file_path = '../input/feat_fields.old'
    melt.init()

    dataset = Dataset('train')
    #dataset = Dataset('valid')

    iter = dataset.make_batch()
    op = iter.get_next()

    print('---batch_size', dataset.batch_size, FLAGS.batch_size)

    sess = melt.get_session()

    print('----sess', sess)

    if not FLAGS.use_horovod:
        for epoch in range(2):
            for i in range(3):
                batch = sess.run(op)
                print(epoch, i, len(batch[0]['id']), batch[0]['id'])
    else:
        for epoch in range(2):
            for i in range(3):
                batch = sess.run(op)
                print(epoch, i, len(batch[0]['id']), batch[0]['id'])
Ejemplo n.º 2
0
def main(_):
    FLAGS.torch_only = True
    #FLAGS.valid_input = None
    melt.init()
    fit = melt.get_fit()

    FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier

    model_name = FLAGS.model
    model = getattr(base, model_name)()

    loss_fn = nn.BCEWithLogitsLoss()

    td = text_dataset.Dataset()
    train_files = gezi.list_files(FLAGS.train_input)
    train_ds = get_dataset(train_files, td)

    ## speed up a bit with pin_memory==True
    ## num_workers 1 is very slow especially for validation, seems 4 workers is enough, large number dangerous sometimes 12 ok sometimes hang, too much resource seems

    #kwargs = {'num_workers': 12, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
    #kwargs = {'num_workers': 6, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
    kwargs = {
        'num_workers': 8,
        'pin_memory': True,
        'collate_fn': lele.DictPadCollate()
    }
    ## for 1 gpu, set > 8 might startup very slow
    #num_workers = int(8 / hvd.size())
    # num_workers = 0
    # pin_memory = False
    #kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory, 'collate_fn': lele.DictPadCollate()}

    train_dl = DataLoader(train_ds, FLAGS.batch_size, shuffle=True, **kwargs)

    #kwargs['num_workers'] = max(1, num_workers)
    #logging.info('num train examples', len(train_ds), len(train_dl))

    if FLAGS.valid_input:
        valid_files = gezi.list_files(FLAGS.valid_input)
        valid_ds = get_dataset(valid_files, td)
        valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, **kwargs)

        #kwargs['num_workers'] = max(1, num_workers)
        valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, **kwargs)
        #logging.info('num valid examples', len(valid_ds), len(valid_dl))

    fit(
        model,
        loss_fn,
        dataset=train_dl,
        valid_dataset=valid_dl,
        valid_dataset2=valid_dl2,
        eval_fn=ev.evaluate,
        valid_write_fn=ev.valid_write,
        #write_valid=FLAGS.write_valid)
        write_valid=False,
    )
Ejemplo n.º 3
0
def main(_):
  FLAGS.torch_only = True
  melt.init()
  fit = melt.get_fit()

  FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier

  model_name = FLAGS.model
  model = getattr(base, model_name)() 

  model = model.cuda()

  loss_fn = nn.BCEWithLogitsLoss()

  td = text_dataset.Dataset()

  train_files = gezi.list_files('../input/train/*')
  train_ds = get_dataset(train_files, td)
  
  #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
  #kwargs = {'num_workers': 0, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
  #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
  
  num_workers = 1
  kwargs = {'num_workers': num_workers, 'pin_memory': False, 'collate_fn': lele.DictPadCollate()}

  train_sampler = train_ds
  train_sampler = torch.utils.data.distributed.DistributedSampler(
    train_ds, num_replicas=hvd.size(), rank=hvd.rank())
  
  train_dl = DataLoader(train_ds, FLAGS.batch_size, sampler=train_sampler, **kwargs)
  
  valid_files = gezi.list_files('../input/valid/*')
  valid_ds = get_dataset(valid_files, td)

  kwargs['num_workers'] = 1
  # support shuffle=False from version 1.2
  valid_sampler = torch.utils.data.distributed.DistributedSampler(
      valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)

  kwargs['num_workers'] = 1
  valid_sampler2 = torch.utils.data.distributed.DistributedSampler(
      valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
  
  valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, sampler=valid_sampler, **kwargs)
  valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, sampler=valid_sampler2, **kwargs)

  fit(model,  
      loss_fn,
      dataset=train_dl,
      valid_dataset=valid_dl,
      valid_dataset2=valid_dl2,
      eval_fn=ev.evaluate,
      valid_write_fn=ev.valid_write,
      #write_valid=FLAGS.write_valid)   
      write_valid=False,
     )
Ejemplo n.º 4
0
def main(_):
    FLAGS.torch_only = True
    melt.init()
    fit = melt.get_fit()

    FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier

    model_name = FLAGS.model
    model = getattr(base, model_name)()

    loss_fn = nn.BCEWithLogitsLoss()

    td = TextDataset()
    train_files = gezi.list_files('../input/train/*')
    train_ds = get_dataset(train_files, td)

    import multiprocessing
    #--easy to be Killed .. if large workers
    num_threads = int(multiprocessing.cpu_count() * 0.3)
    logging.info('num_threads as multiprocessing.cpu_count', num_threads)

    num_threads = 12
    train_dl = DataLoader(train_ds,
                          FLAGS.batch_size,
                          shuffle=True,
                          num_workers=num_threads,
                          collate_fn=lele.DictPadCollate())
    #logging.info('num train examples', len(train_ds), len(train_dl))
    valid_files = gezi.list_files('../input/valid/*')
    valid_ds = get_dataset(valid_files, td)
    valid_dl = DataLoader(valid_ds,
                          FLAGS.eval_batch_size,
                          collate_fn=lele.DictPadCollate(),
                          num_workers=num_threads)
    valid_dl2 = DataLoader(valid_ds,
                           FLAGS.batch_size,
                           collate_fn=lele.DictPadCollate(),
                           num_workers=num_threads)
    #logging.info('num valid examples', len(valid_ds), len(valid_dl))

    fit(
        model,
        loss_fn,
        dataset=train_dl,
        valid_dataset=valid_dl,
        valid_dataset2=valid_dl2,
        eval_fn=ev.evaluate,
        valid_write_fn=ev.valid_write,
        #write_valid=FLAGS.write_valid)
        write_valid=False,
    )
Ejemplo n.º 5
0
def main(_):
    fit = mt.fit
    config.init()
    mt.init()

    strategy = mt.distributed.get_strategy()
    with strategy.scope():
        model = getattr(base, FLAGS.model)()
        loss_fn = model.get_loss()

        # 如果用model.get_model 当前不能用model.get_loss 否则tf2 keras
        # Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'Squeeze_6:0' shape=(None,) dtype=int64>, <tf.Tensor 'Squeeze_10:0' shape=(None,) dtype=int64>]
        if not FLAGS.custom_loss:
            loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

        # if tf.__version__ >= '2':
        #   # TODO FIXME has bug for tf1 non eager mode below but eager mode ok
        #   # TF2 may has problem eager input not ok...
        #   dataset = Dataset('train')
        #   dataset.make_batch()
        #   if not dataset.has_varlen_keys:
        #     inputs = dataset.get_inputs()
        #     model = model.get_model(inputs)

    if not FLAGS.lm_target:
        callbacks = []
        #if tf.__version__ >= '2':
        #  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.log_dir, profile_batch=(5, 20))
        #  callbacks = [tensorboard_callback]
        fit(
            model,
            loss_fn=loss_fn,
            Dataset=Dataset,
            eval_fn=ev.evaluate,
            eval_keys=[
                'uid_', 'did_', 'impression_id', 'uid_in_train', 'did_in_train'
            ],  # used in evaluate.py for x 
            out_hook=out_hook,
            infer_write_fn=infer_write,
            valid_write_fn=valid_write,
            callbacks=callbacks,
        )
    else:
        fit(
            model,
            loss_fn=loss_fn,
            Dataset=Dataset,
        )
Ejemplo n.º 6
0
def main(_):
    fit = melt.fit
    config.init()
    melt.init()

    strategy = melt.distributed.get_strategy()
    with strategy.scope():
        model = getattr(base, FLAGS.model)()
        # loss_fn = model.get_loss()
        loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

        if tf.__version__ >= '2':
            # TODO FIXME has bug for tf1 non eager mode below but eager mode ok
            dataset = Dataset('train')
            dataset.make_batch()
            if not dataset.has_varlen_keys:
                inputs = dataset.get_inputs()
                model = model.get_model(inputs)

    if not FLAGS.lm_target:
        callbacks = []
        # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.log_dir, profile_batch=(5, 20))
        # callbacks = [tensorboard_callback]
        fit(
            model,
            loss_fn=loss_fn,
            Dataset=Dataset,
            eval_fn=ev.evaluate,
            eval_keys=[
                'uid_', 'did_', 'impression_id', 'uid_in_train', 'did_in_train'
            ],  # used in evaluate.py for x 
            out_hook=out_hook,
            infer_write_fn=infer_write,
            valid_write_fn=valid_write,
            callbacks=callbacks,
        )
    else:
        fit(
            model,
            loss_fn=loss_fn,
            Dataset=Dataset,
        )
Ejemplo n.º 7
0
def main(_):
    FLAGS.torch_only = True
    melt.init()
    fit = melt.get_fit()

    FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier

    model_name = FLAGS.model
    model = getattr(base, model_name)()

    loss_fn = nn.BCEWithLogitsLoss()

    td = TextDataset()
    train_files = gezi.list_files('../input/train/*')
    train_ds = get_dataset(train_files, td)

    train_dl = DataLoader(train_ds,
                          FLAGS.batch_size,
                          shuffle=True,
                          num_workers=12)
    logging.info('num train examples', len(train_ds), len(train_dl))
    valid_files = gezi.list_files('../input/valid/*')
    valid_ds = get_dataset(valid_files, td)
    valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size)
    valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size)
    logging.info('num valid examples', len(valid_ds), len(valid_dl))
    print(dir(valid_dl))

    fit(
        model,
        loss_fn,
        dataset=train_dl,
        valid_dataset=valid_dl,
        valid_dataset2=valid_dl2,
        eval_fn=ev.evaluate,
        valid_write_fn=ev.valid_write,
        #write_valid=FLAGS.write_valid)
        write_valid=False,
    )
Ejemplo n.º 8
0
def main(_):
    FLAGS.torch = True
    melt.init()
    fit = melt.get_fit()

    FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier
    print('---------eval_batch_size', FLAGS.eval_batch_size)

    model_name = FLAGS.model
    model = getattr(base, model_name)()

    Dataset = TextDataset if not 'tfrecord' in FLAGS.train_input else TFRecordDataset

    loss_fn = nn.BCEWithLogitsLoss()

    fit(
        model,
        loss_fn,
        Dataset,
        eval_fn=ev.evaluate,
        valid_write_fn=ev.valid_write,
        #write_valid=FLAGS.write_valid)
        write_valid=False)
Ejemplo n.º 9
0
def main(_):
  FLAGS.torch_only = True
  
  melt.init()
  #fit = melt.get_fit()

  FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier
  FLAGS.eval_batch_size = 512

  model_name = FLAGS.model
  model = getattr(base, model_name)() 

  model = model.cuda()

  loss_fn = nn.BCEWithLogitsLoss()

  td = text_dataset.Dataset()

  train_files = gezi.list_files('../input/train/*')
  train_ds = get_dataset(train_files, td)
  
  #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}
  #num_workers = int(16 / hvd.size())  
  num_workers = 1  # set to 1 2 min to start might just set to 0 for safe
  num_workers = 0 # 设置0 速度比1慢很多   启动都需要1分多。。
  # pin_memory 影响不大 单gpu提升速度一点点 多gpu 主要是 num_workers  影响资源占有。。有可能启动不起来
  # 多gpu pin_memory = False 反而速度更快。。
  #kwargs = {'num_workers': num_workers, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()}  
  kwargs = {'num_workers': 1, 'pin_memory': False, 'collate_fn': lele.DictPadCollate()}

  train_sampler = train_ds
  train_sampler = torch.utils.data.distributed.DistributedSampler(
    train_ds, num_replicas=hvd.size(), rank=hvd.rank())
  
  train_dl = DataLoader(train_ds, FLAGS.batch_size, sampler=train_sampler, **kwargs)
  
  valid_files = gezi.list_files('../input/valid/*')
  valid_ds = get_dataset(valid_files, td)

  # support shuffle=False from version 1.2
  valid_sampler = torch.utils.data.distributed.DistributedSampler(
      valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)

  # valid_sampler2 = torch.utils.data.distributed.DistributedSampler(
  #     valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
  
  valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, sampler=valid_sampler, **kwargs)
  
  #valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, sampler=valid_sampler2, **kwargs)


  optimizer = optim.Adamax(model.parameters(), lr=0.1)
  #optimizer = optim.SGD(model.parameters(), lr=0.1)
  hvd.broadcast_parameters(model.state_dict(), root_rank=0)
  hvd.broadcast_optimizer_state(optimizer, root_rank=0)

  optimizer = hvd.DistributedOptimizer(optimizer,
                                       named_parameters=model.named_parameters())

  for epoch in range(2):
    train(epoch, model, loss_fn, train_dl, optimizer)
    test(model, loss_fn, valid_dl)
Ejemplo n.º 10
0
from __future__ import division
from __future__ import print_function

import sys
import os

import tensorflow as tf
flags = tf.app.flags
FLAGS = flags.FLAGS

from torch.utils.data import DataLoader
import gezi
import lele

import melt
melt.init()
#import horovod.tensorflow as hvd
#import horovod.tensorflow as hvd

# from pyt.dataset import *
# from text_dataset import Dataset as TD

import numpy as np

#import horovod.tensorflow as hvd
import horovod.torch as hvd
hvd.init()

for i in range(5):
    print(i)