def main(_): FLAGS.torch_only = True melt.init() fit = melt.get_fit() FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier model_name = FLAGS.model model = getattr(base, model_name)() loss_fn = nn.BCEWithLogitsLoss() td = TextDataset() train_files = gezi.list_files('../input/train/*') train_ds = get_dataset(train_files, td) import multiprocessing #--easy to be Killed .. if large workers num_threads = int(multiprocessing.cpu_count() * 0.3) logging.info('num_threads as multiprocessing.cpu_count', num_threads) num_threads = 12 train_dl = DataLoader(train_ds, FLAGS.batch_size, shuffle=True, num_workers=num_threads, collate_fn=lele.DictPadCollate()) #logging.info('num train examples', len(train_ds), len(train_dl)) valid_files = gezi.list_files('../input/valid/*') valid_ds = get_dataset(valid_files, td) valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, collate_fn=lele.DictPadCollate(), num_workers=num_threads) valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, collate_fn=lele.DictPadCollate(), num_workers=num_threads) #logging.info('num valid examples', len(valid_ds), len(valid_dl)) fit( model, loss_fn, dataset=train_dl, valid_dataset=valid_dl, valid_dataset2=valid_dl2, eval_fn=ev.evaluate, valid_write_fn=ev.valid_write, #write_valid=FLAGS.write_valid) write_valid=False, )
def main(_): FLAGS.torch_only = True #FLAGS.valid_input = None melt.init() fit = melt.get_fit() FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier model_name = FLAGS.model model = getattr(base, model_name)() loss_fn = nn.BCEWithLogitsLoss() td = text_dataset.Dataset() train_files = gezi.list_files(FLAGS.train_input) train_ds = get_dataset(train_files, td) ## speed up a bit with pin_memory==True ## num_workers 1 is very slow especially for validation, seems 4 workers is enough, large number dangerous sometimes 12 ok sometimes hang, too much resource seems #kwargs = {'num_workers': 12, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} #kwargs = {'num_workers': 6, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} kwargs = { 'num_workers': 8, 'pin_memory': True, 'collate_fn': lele.DictPadCollate() } ## for 1 gpu, set > 8 might startup very slow #num_workers = int(8 / hvd.size()) # num_workers = 0 # pin_memory = False #kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory, 'collate_fn': lele.DictPadCollate()} train_dl = DataLoader(train_ds, FLAGS.batch_size, shuffle=True, **kwargs) #kwargs['num_workers'] = max(1, num_workers) #logging.info('num train examples', len(train_ds), len(train_dl)) if FLAGS.valid_input: valid_files = gezi.list_files(FLAGS.valid_input) valid_ds = get_dataset(valid_files, td) valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, **kwargs) #kwargs['num_workers'] = max(1, num_workers) valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, **kwargs) #logging.info('num valid examples', len(valid_ds), len(valid_dl)) fit( model, loss_fn, dataset=train_dl, valid_dataset=valid_dl, valid_dataset2=valid_dl2, eval_fn=ev.evaluate, valid_write_fn=ev.valid_write, #write_valid=FLAGS.write_valid) write_valid=False, )
def main(_): FLAGS.torch_only = True melt.init() fit = melt.get_fit() FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier model_name = FLAGS.model model = getattr(base, model_name)() model = model.cuda() loss_fn = nn.BCEWithLogitsLoss() td = text_dataset.Dataset() train_files = gezi.list_files('../input/train/*') train_ds = get_dataset(train_files, td) #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} #kwargs = {'num_workers': 0, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} num_workers = 1 kwargs = {'num_workers': num_workers, 'pin_memory': False, 'collate_fn': lele.DictPadCollate()} train_sampler = train_ds train_sampler = torch.utils.data.distributed.DistributedSampler( train_ds, num_replicas=hvd.size(), rank=hvd.rank()) train_dl = DataLoader(train_ds, FLAGS.batch_size, sampler=train_sampler, **kwargs) valid_files = gezi.list_files('../input/valid/*') valid_ds = get_dataset(valid_files, td) kwargs['num_workers'] = 1 # support shuffle=False from version 1.2 valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False) kwargs['num_workers'] = 1 valid_sampler2 = torch.utils.data.distributed.DistributedSampler( valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False) valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, sampler=valid_sampler, **kwargs) valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, sampler=valid_sampler2, **kwargs) fit(model, loss_fn, dataset=train_dl, valid_dataset=valid_dl, valid_dataset2=valid_dl2, eval_fn=ev.evaluate, valid_write_fn=ev.valid_write, #write_valid=FLAGS.write_valid) write_valid=False, )
def main(_): FLAGS.torch_only = True melt.init() #fit = melt.get_fit() FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier FLAGS.eval_batch_size = 512 model_name = FLAGS.model model = getattr(base, model_name)() model = model.cuda() loss_fn = nn.BCEWithLogitsLoss() td = text_dataset.Dataset() train_files = gezi.list_files('../input/train/*') train_ds = get_dataset(train_files, td) #kwargs = {'num_workers': 4, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} #num_workers = int(16 / hvd.size()) num_workers = 1 # set to 1 2 min to start might just set to 0 for safe num_workers = 0 # 设置0 速度比1慢很多 启动都需要1分多。。 # pin_memory 影响不大 单gpu提升速度一点点 多gpu 主要是 num_workers 影响资源占有。。有可能启动不起来 # 多gpu pin_memory = False 反而速度更快。。 #kwargs = {'num_workers': num_workers, 'pin_memory': True, 'collate_fn': lele.DictPadCollate()} kwargs = {'num_workers': 1, 'pin_memory': False, 'collate_fn': lele.DictPadCollate()} train_sampler = train_ds train_sampler = torch.utils.data.distributed.DistributedSampler( train_ds, num_replicas=hvd.size(), rank=hvd.rank()) train_dl = DataLoader(train_ds, FLAGS.batch_size, sampler=train_sampler, **kwargs) valid_files = gezi.list_files('../input/valid/*') valid_ds = get_dataset(valid_files, td) # support shuffle=False from version 1.2 valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False) # valid_sampler2 = torch.utils.data.distributed.DistributedSampler( # valid_ds, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False) valid_dl = DataLoader(valid_ds, FLAGS.eval_batch_size, sampler=valid_sampler, **kwargs) #valid_dl2 = DataLoader(valid_ds, FLAGS.batch_size, sampler=valid_sampler2, **kwargs) optimizer = optim.Adamax(model.parameters(), lr=0.1) #optimizer = optim.SGD(model.parameters(), lr=0.1) hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters()) for epoch in range(2): train(epoch, model, loss_fn, train_dl, optimizer) test(model, loss_fn, valid_dl)
if not isinstance(x, dict): x = torch_(x) else: for key in x: x[key] = torch_(x[key]) if y is None: return x else: return x, y files = gezi.list_files('../input/train.small/*') td = TD() ds = get_dataset(files, td) dl = DataLoader(ds, 2, collate_fn=lele.DictPadCollate()) print(len(ds), len(dl), len(dl.dataset)) for i, (x, y) in enumerate(dl): print(i, x['id'][0], x['value'][0]) # #print('--------------', d) # print(x['index'].shape) # print(x['field'].shape) # print(x['value'].shape) # print(x['id'].shape) # print(y.shape) #print(x) # for key in x: # print(key, type(x[key][0]), type(x[key]), x[key][0].dtype) #x, y = to_torch(x, y) # if i == 2:
#import tensorflow as tf sampler = ds sampler = torch.utils.data.distributed.DistributedSampler( ds, num_replicas=hvd.size(), rank=hvd.rank()) # sampler = torch.utils.data.RandomSampler(sampler) #sampler = torch.utils.data.RandomSampler(ds) # # seems here shuffle not work.. # sampler = torch.utils.data.distributed.DistributedSampler( # sampler, num_replicas=hvd.size(), rank=hvd.rank(), # shuffle=True) #collate_fn = lele.DictPadCollate2() collate_fn = lele.DictPadCollate() dl = DataLoader(ds, 2, collate_fn=collate_fn, sampler=sampler) print(len(ds), len(dl), len(dl.dataset)) for epoch in range(2): if dl.sampler and hasattr(dl.sampler, 'set_epoch'): dl.sampler.set_epoch(epoch) for i, (x, y) in enumerate(dl): for j in range(len(y)): print('epoch', epoch, 'i', i, 'j', j, x['id'][j]) # #print('--------------', d)