args.emsize, args.nhid, args.nhidlast, args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute, ) size = 0 for p in model.parameters(): size += p.nelement() logging.info("param size: {}".format(size)) logging.info("initial genotype:") logging.info(model.genotype()) if torch.cuda.is_available(): # if torch.cuda.device_count() > 1: # parallel_model = nn.DataParallel(model, dim=1) # parallel_model = parallel_model.to(device) # else: parallel_model = model.to(device) else: parallel_model = model architect = Architect(parallel_model, args) total_params = sum(x.data.nelement() for x in model.parameters()) logging.info("Args: {}".format(args))
test_data = batchify(corpus.test, test_batch_size, args) ntokens = len(corpus.dictionary) if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModelSearch(ntokens, args.emsize, args.nhid, args.nhidlast, args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute) size = 0 for p in model.parameters(): size += p.nelement() logging.info('param size: {}'.format(size)) logging.info('initial genotype:') logging.info(model.genotype()) # what's this if args.cuda: if args.single_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model architect = Architect(parallel_model, args) total_params = sum(x.data.nelement() for x in model.parameters()) logging.info('Args: {}'.format(args)) logging.info('Model total parameters: {}'.format(total_params))