def main(batch_size, baseline, reduction): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( 'data/cifar10', train=True, download=True, transform=transform_train), batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( 'data/cifar10', train=False, transform=transform_test), batch_size=batch_size, shuffle=True) if baseline: model = resnet20() else: model = se_resnet20(num_classes=10, reduction=reduction) optimizer = optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) scheduler = StepLR(optimizer, 80, 0.1) trainer = Trainer(model, optimizer, F.cross_entropy) trainer.loop(200, train_loader, test_loader, scheduler)
def main(batch_size, data_root): train_data = MyDataset( mode='train', txt=data_root + 'train_label_balance.txt', transform=transforms.Compose([ # transforms.RandomResizedCrop(224), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.3301, 0.3301, 0.3301], std=[0.1938, 0.1938, 0.1938]) ])) test_data = MyDataset( mode='test', txt=data_root + 'test_label_balance.txt', transform=transforms.Compose([ # transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.3301, 0.3301, 0.3301], std=[0.1938, 0.1938, 0.1938]) ])) train_loader = DataLoader( train_data, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True) test_loader = DataLoader( test_data, batch_size=batch_size, shuffle=False, num_workers=8) model = UNet(n_channels=3, n_classes=3) print(model) model = nn.DataParallel(model.cuda(), device_ids=[0]) optimizer = optim.SGD(params=model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5) # optimizer = optim.Adam(params=model.parameters(), lr=0.01) scheduler = StepLR(optimizer, 10, gamma=0.1) trainer = Trainer(model, optimizer, F.cross_entropy, save_dir=".") trainer.loop(50, train_loader, test_loader, scheduler)
def main(): import sys import pathlib __dir__ = pathlib.Path(os.path.abspath(__file__)) sys.path.append(str(__dir__)) sys.path.append(str(__dir__.parent.parent)) from models import build_model, build_loss from data_loader import get_dataloader from utils import Trainer from utils import get_post_processing from utils import get_metric config = anyconfig.load(open('config.yaml', 'rb')) train_loader = get_dataloader(config['dataset']['train']) validate_loader = get_dataloader(config['dataset']['validate']) criterion = build_loss(config['loss']).cuda() model = build_model(config['arch']) post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader) trainer.train()
def main(batch_size, data_root): transform_train = transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = transforms.Compose([ transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) traindir = os.path.join(data_root, 'train') valdir = os.path.join(data_root, 'val') train = datasets.ImageFolder(traindir, transform_train) val = datasets.ImageFolder(valdir, transform_test) train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=8) test_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=True, num_workers=8) se_resnet = se_resnet50(num_classes=1000) optimizer = optim.SGD(params=se_resnet.parameters(), lr=0.6, momentum=0.9, weight_decay=1e-4) scheduler = StepLR(optimizer, 30, gamma=0.1) trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".") trainer.loop(100, train_loader, test_loader, scheduler)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: a dim %r, agent dim: %d' % (env.n_a_ls, env.n_agent)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') model = init_agent(env, config['MODEL_CONFIG'], total_step, seed) # disable multi-threading for safe SUMO implementation summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, output_path=dirs['data']) trainer.run() # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def main(batch_size, lr, momentum, epsilon, update_freq): train_loader, test_loader = get_dataloader(batch_size) model = resnet20() optimizer = Shampoo(params=model.parameters(), lr=lr, momentum=momentum, weight_decay=1e-4, epsilon=epsilon, update_freq=update_freq) trainer = Trainer(model, optimizer, F.cross_entropy) trainer.loop(200, train_loader, test_loader)
def main(batch_size, root): train_loader, test_loader = get_dataloader(batch_size, root) _se_resnet = se_resnet50(num_classes=1000) se_resnet = nn.DataParallel(_se_resnet, device_ids=[0, 1]) optimizer = optim.SGD(params=se_resnet.parameters(), lr=0.6, momentum=0.9, weight_decay=1e-4) scheduler = StepLR(optimizer, 30, gamma=0.1) trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".") trainer.loop(100, train_loader, test_loader, scheduler)
def main(): logger.info('=> PyTorch Version: {}'.format(torch.__version__)) # Environment initialization device, pin_memory = init_device(args.seed, args.cpu, args.gpu, args.cpu_affinity) # Create the data loader train_loader, val_loader, test_loader = Cost2100DataLoader( root=args.data_dir, batch_size=args.batch_size, num_workers=args.workers, pin_memory=pin_memory, scenario=args.scenario)() # Define model model = init_model(args) model.to(device) # Define loss function criterion = nn.MSELoss().to(device) # Inference mode if args.evaluate: Tester(model, device, criterion)(test_loader) return # Define optimizer and scheduler lr_init = 1e-3 if args.scheduler == 'const' else 2e-3 optimizer = torch.optim.Adam(model.parameters(), lr_init) if args.scheduler == 'const': scheduler = FakeLR(optimizer=optimizer) else: scheduler = WarmUpCosineAnnealingLR(optimizer=optimizer, T_max=args.epochs * len(train_loader), T_warmup=30 * len(train_loader), eta_min=5e-5) # Define the training pipeline trainer = Trainer(model=model, device=device, optimizer=optimizer, criterion=criterion, scheduler=scheduler, resume=args.resume) # Start training trainer.loop(args.epochs, train_loader, val_loader, test_loader) # Final testing loss, rho, nmse = Tester(model, device, criterion)(test_loader) print(f"\n=! Final test loss: {loss:.3e}" f"\n test rho: {rho:.3e}" f"\n test NMSE: {nmse:.3e}\n")
def main(batch_size, root): train_loader, test_loader = get_dataloader(batch_size, root) gpus = list(range(torch.cuda.device_count())) se_resnet = nn.DataParallel(se_resnet50(num_classes=345), device_ids=gpus) optimizer = optim.SGD(params=se_resnet.parameters(), lr=0.6 / 1024 * batch_size, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1) trainer = Trainer(se_resnet, optimizer, F.cross_entropy, save_dir=".") trainer.loop(100, train_loader, test_loader, scheduler)
def main(batch_size, root): # torchvision.datasets.DataLoader() ##################################################################### "The implementation of tensorboardX and topK accuracy is in utils.py" ##################################################################### # test mode: 1=open, 0=close test_mode = 1 # get checkpoint information checkpoint_newest = get_checkPoint("./checkpoint/", 1) test_loader = get_dataloader(batch_size, root) gpus = list(range(torch.cuda.device_count())) # initialize your net/optimizer nameOfNet = "se_resnet_testResult.csv" se_resnet = nn.DataParallel(inceptionv4(num_classes=3), device_ids=gpus) optimizer = optim.SGD(params=se_resnet.parameters(), lr=0.6 / 1024 * batch_size, momentum=0.9, weight_decay=1e-4) # No existed checkpoint if checkpoint_newest == 0: print("-------------No checkpoint available!!!!--------------") # load existed checkpoint else: csv_path = "./" + nameOfNet csv_writer = open(csv_path, "w") csv_writer.write("key_id,word\n") checkpoint_newest_list = [] if isinstance(checkpoint_newest, list) == False: checkpoint_newest_list.append(checkpoint_newest) else: checkpoint_newest_list = checkpoint_newest for checkpoint_path in checkpoint_newest_list: print("The path of the pretrained model %s" % checkpoint_path) print("load pretrained model......") checkpoint = torch.load(checkpoint_path) se_resnet.load_state_dict(checkpoint['weight']) scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1) print("The current epoch is %d" % checkpoint['epoch']) print("prepare to write the csv file for testing...") trainer = Trainer(se_resnet, optimizer, F.cross_entropy, batch_size, csv_writer, save_dir="./checkpoint/", save_freq=1) train_loader = None trainer.loop(checkpoint['epoch'], train_loader, test_loader, checkpoint['epoch'], scheduler, test_mode)
def main(batch_size, baseline, reduction): train_loader, test_loader = get_dataloader(batch_size) if baseline: model = resnet20() else: model = se_resnet20(num_classes=10, reduction=reduction) optimizer = optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) scheduler = StepLR(optimizer, 80, 0.1) trainer = Trainer(model, optimizer, F.cross_entropy) trainer.loop(200, train_loader, test_loader, scheduler)
def main(batch_size): train_loader, test_loader = get_dataloader(batch_size) #model= DinkNet34(num_classes=1) model = resnet() #optimizer = optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9,weight_decay=1e-4) #scheduler = optim.lr_scheduler.StepLR(optimizer, 80, 0.1) optimizer = optim.Adam(params=model.parameters()) trainer = Trainer(model, optimizer, nn.CrossEntropyLoss, save_freq=1, save_dir=SAVE_PATH) trainer.loop(400, train_loader, test_loader)
def main(): # load data print("Loading dataset...") train_data = COCO_motivations_Dataset(data_root, train=True) val_data = COCO_motivations_Dataset(data_root, train=False) batch_size = 2 batch_size = batch_size if len( params.gpus) == 0 else batch_size * len(params.gpus) train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) print('train dataset len: {}'.format(len(train_dataloader.dataset))) val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('val dataset len: {}'.format(len(val_dataloader.dataset))) # models # model = resnet34(pretrained=False, modelpath=model_path, num_classes=1000) # batch_size=120, 1GPU Memory < 7000M # model.fc = nn.Linear(512, 256) model = resnet101(pretrained=False, modelpath=model_path, num_classes=1000) # batch_size=60, 1GPU Memory > 9000M model.fc = nn.Linear(512 * 4, 256) # optimizer trainable_vars = [ param for param in model.parameters() if param.requires_grad ] print("Training with sgd") params.optimizer = torch.optim.SGD(trainable_vars, lr=init_lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov) # Train params.lr_scheduler = ReduceLROnPlateau(params.optimizer, 'min', factor=lr_decay, patience=10, cooldown=10, verbose=True) trainer = Trainer(model, params, train_dataloader, val_dataloader) trainer.train()
def main(batch_size, root, lrate): ##################################################################### "The implementation of tensorboardX and topK accuracy is in utils.py" ##################################################################### # get checkpoint information checkpoint_newest = get_checkPoint("./lr" + str(lrate) + "/checkpoint/") #TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now()) # write log and visualize the losses of batches of training and testing TIMESTAMP = "" writer1 = SummaryWriter('./lr' + str(lrate) + '/tensorboard_log/batch/' + TIMESTAMP) # write log and visualize the accuracy of batches of training and testing writer2 = SummaryWriter('./lr' + str(lrate) + '/tensorboard_log/epoch/' + TIMESTAMP) train_loader, test_loader = get_dataloader(batch_size, root) gpus = list(range(torch.cuda.device_count())) # initialize your net/optimizer seresnet50 = nn.DataParallel(se_resnet50(num_classes=340), device_ids=gpus) optimizer = optim.SGD(params=seresnet50.parameters(), lr=lrate / 1024 * batch_size, momentum=0.9, weight_decay=1e-4) # No existed checkpoint if checkpoint_newest == 0: scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1) trainer = Trainer(seresnet50, optimizer, F.cross_entropy, save_dir="./lr" + str(lrate) + "/checkpoint/", writer1=writer1, writer2=writer2, save_freq=1) trainer.loop(50, train_loader, test_loader, 1, scheduler) # load existed checkpoint else: print("The path of the pretrained model %s" % checkpoint_newest) print("load pretrained model......") checkpoint = torch.load(checkpoint_newest) seresnet50.load_state_dict(checkpoint['weight']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1, last_epoch=checkpoint['epoch']) print("The current epoch is %d" % checkpoint['epoch']) trainer = Trainer(seresnet50, optimizer, F.cross_entropy, save_dir="./lr" + str(lrate) + "/checkpoint/", writer1=writer1, writer2=writer2, save_freq=1) trainer.loop(100, train_loader, test_loader, checkpoint['epoch'] + 1, scheduler)
def main(batch_size, baseline, reduction): train_loader, test_loader = get_dataloader(batch_size) if baseline: model = densenet121() else: model = se_densenet121(num_classes=10) optimizer = optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, 80, 0.1) trainer = Trainer(model, optimizer, F.cross_entropy, save_dir="weights") trainer.loop(100, train_loader, test_loader, scheduler)
def test_create_allows_model_instance_as_parameter_for_foreign_key_field( self): james = Trainer.create(name='James', age=21) meowth = Pokemon.create(name='Meowth', level=19, trainer=james.pk) assert james.pk == 1 assert meowth.trainer.pk == james.pk
def test_update_one_field_with_subquery_with_filter(self): self.add_trainer(['James', 'Jessie']) jessie_name = Trainer.select(Trainer.name).where(Trainer.name == 'Jessie') UpdateQuery(db=self.db).table(Trainer).fields(Trainer.age == 42).where(Trainer.name == jessie_name).execute() james, jessie = Trainer._db._connection.execute("SELECT age FROM trainer").fetchall() assert jessie[0] == 42 assert james[0] == 21
def make_crf_trainer(_run, min_freq=1, c2=1.0, max_iter=2**31 - 1): params = { 'feature.minfreq': min_freq, 'c2': c2, 'max_iterations': max_iter } return Trainer(_run, algorithm='lbfgs', params=params)
def __init__(self, args, data_settings): self.separated_inputs = data_settings["separate_conditions"] self.device_names = data_settings["devices"] self.conditions = data_settings["conditions"] self.elbo = [] self.elbo_list = [] self.epoch = args.epochs self.name = args.experiment self.label = args.experiment self.log_normalized_iws = [] self.precisions = [] self.q_names = [] self.q_values = [] self.splits = [] self.theta = [] self.X_post_sample = [] self.X_sample = [] # from data_pair.val self.data_ids = [] self.devices = [] self.treatments = [] self.trainer = trainer = Trainer(args, add_timestamp=True) self.X_obs = [] # Attributes initialized elsewhere self.chunk_sizes = None self.ids = None self.names = None self.times = None self.xval_writer = None
def main(): parser = create_parser(True) args = parser.parse_args() spec = load_config_file(args.yaml) # spec is a dict of dicts of dicts trainer = Trainer(args, args.yaml, add_timestamp=True) xval_merge = XvalMerge(args, spec["data"], trainer) data_pair, val_results = run_on_split(args, split=None, trainer=trainer) xval_merge.add(1, data_pair, val_results) xval_merge.finalize() xval_merge.save(xval_merge.trainer.tb_log_dir)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) #utils init_log(dirs['log'])#utils config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) #seeonce logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) #logging? # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step)#what is this # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') if env.agent == 'iddpg': model = IDDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'maddpg': #TODO: Add MADDPG model = MADDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) summary_writer = tf.summary.FileWriter(dirs['log'])#what is this trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data'])#utils trainer.run() #if post_test: #how? # tester = Tester(env, model, global_counter, summary_writer, dirs['data']) # tester.run_offline(dirs['data'])#utils # save model#what's this final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def main(batch_size, baseline, reduction, data_path, checkpoint_path, lr, checkpoint_name=None): train_loader, test_loader = get_dataloader(batch_size, data_path) if baseline: model = resnet20() else: model = se_resnet20(num_classes=10, reduction=reduction) optimizer = optim.SGD(params=model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) scheduler = StepLR(optimizer, 80, 0.1) dict = model.state_dict() print(dict.keys()) trainer = Trainer(model, optimizer, F.cross_entropy, save_dir=checkpoint_path) # 加载模型参数 if checkpoint_name != None: checkpoint_path = Path(checkpoint_path) ckpt_dir = checkpoint_path / checkpoint_name model.load_state_dict(torch.load(ckpt_dir)["weight"]) print("checkpoint load successfully!") trainer.max_acc = max(trainer.test(test_loader), trainer.max_acc) trainer.loop(200, train_loader, test_loader, scheduler)
def test_create_many(self): ntrainers = Trainer._db._connection.execute( "SELECT count(*) FROM trainer WHERE name = 'Giovanni' OR name = 'James'" ).fetchone() assert ntrainers[0] == 0 Trainer.create_many([{ 'name': 'Giovanni', 'age': 42 }, { 'name': 'James', 'age': 21 }]) trainers = Trainer._db._connection.execute( "SELECT name, age FROM trainer WHERE name = 'Giovanni' OR name = 'James'" ).fetchall() assert trainers[0][0] == 'Giovanni' assert trainers[0][1] == 42 assert trainers[1][0] == 'James' assert trainers[1][1] == 21
def train(): model, recorder = mdl.Classifier(), Recorder() optimizer = torch.optim.Adam(model.parameters(), weight_decay=constants.WEIGHT_DECAY) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, constants.EPOCHS) trainer = Trainer(model, optimizer, scheduler, recorder) trainer.fit(constants.EPOCHS) trainer.save_model() recorder.plot()
def __init__(self, args, split=None, trainer=None): """ :param args: a Namespace, from argparse.parse_args :param split: an integer between 1 and args.folds inclusive, or None :param trainer: a Trainer instance, or None """ self.procdata = None # Command-line arguments (Namespace) self.args = self._tidy_args(args, split) self._fix_random_seed() # TODO(dacart): introduce a switch to allow non-GPU use, achieved with: # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Utility methods for training a model self.trainer = trainer or Trainer(args, args.yaml, add_timestamp=True) # Attributes set in other methods: # Conditions, input signals of the data that are being modelled self.conditions = None # DatasetPair, from a training Dataset and validation Dataset self.dataset_pair = None # Decoder and encoder networks self.decoder = None self.encoder = None # Number of instances in a batch (int) self.n_batch = None # Number of "theta" parameters: local, global-conditional and global (int) self.n_theta = None # Collection of attributes related to training objective self.objective = None # Value of spec["params"] from YAML file (dict) self.params_dict = None # Collection of placeholder attributes, each a Tensor, fed with new values for each batch self.placeholders = None # Training feed_dict: dict from placeholder Tensor to np.array self.train_feed_dict = None # TrainingStepper object self.training_stepper = None # Validation feed_dict keys: dict from placeholder Tensor to np.array self.val_feed_dict = None # Model path for storing best weights so far self.model_path = os.path.join(self.trainer.tb_log_dir, 'saver', 'sess_max_elbo')
def main(): args = get_arguments() with open(args.model_params, 'r') as f: model_params = json.load(f) with open(args.training_params, 'r') as f: train_params = json.load(f) try: directories = validate_directories(args) except ValueError as e: print('Some arguments are wrong:') print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from receptive_field = WaveNetModel.calculate_receptive_field( model_params['filter_width'], model_params['dilations'], model_params['initial_filter_width']) # Save arguments and model params into file save_run_config(args, receptive_field, STARTED_DATESTRING, logdir) # Create coordinator. coord = tf.train.Coordinator() # Create data loader. with tf.name_scope('create_inputs'): reader = WavMidReader(data_dir=args.data_dir_train, coord=coord, audio_sample_rate=model_params['audio_sr'], receptive_field=receptive_field, velocity=args.velocity, sample_size=args.sample_size, queues_size=(10, 10*args.batch_size)) data_batch = reader.dequeue(args.batch_size) # Create model. net = WaveNetModel( batch_size=args.batch_size, dilations=model_params['dilations'], filter_width=model_params['filter_width'], residual_channels=model_params['residual_channels'], dilation_channels=model_params['dilation_channels'], skip_channels=model_params['skip_channels'], output_channels=model_params['output_channels'], use_biases=model_params['use_biases'], initial_filter_width=model_params['initial_filter_width']) input_data = tf.placeholder(dtype=tf.float32, shape=(args.batch_size, None, 1)) input_labels = tf.placeholder(dtype=tf.float32, shape=(args.batch_size, None, model_params['output_channels'])) loss, probs = net.loss(input_data=input_data, input_labels=input_labels, pos_weight=train_params['pos_weight'], l2_reg_str=train_params['l2_reg_str']) optimizer = optimizer_factory[args.optimizer]( learning_rate=train_params['learning_rate'], momentum=train_params['momentum']) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.summary.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() histograms = tf.summary.merge_all(key=HKEY) # Separate summary ops for validation, since they are # calculated only once per evaluation cycle. with tf.name_scope('validation_summaries'): metric_summaries = metrics_empty_dict() metric_value = tf.placeholder(tf.float32) for name in metric_summaries.keys(): metric_summaries[name] = tf.summary.scalar(name, metric_value) images_buffer = tf.placeholder(tf.string) images_batch = tf.stack( [tf.image.decode_png(images_buffer[0], channels=4), tf.image.decode_png(images_buffer[1], channels=4), tf.image.decode_png(images_buffer[2], channels=4)]) images_summary = tf.summary.image('estim', images_batch) audio_data = tf.placeholder(tf.float32) audio_summary = tf.summary.audio('input', audio_data, model_params['audio_sr']) # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints) # Trainer for keeping best validation-performing model # and optional early stopping. trainer = Trainer(sess, logdir, train_params['early_stop_limit'], 0.999) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print('Something went wrong while restoring checkpoint. ' 'Training will be terminated to avoid accidentally ' 'overwriting the previous model.') raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None last_saved_step = saved_global_step try: for step in range(saved_global_step + 1, train_params['num_steps']): waveform, pianoroll = sess.run([data_batch[0], data_batch[1]]) feed_dict = {input_data : waveform, input_labels : pianoroll} # Reload switches from file on each step with open(RUNTIME_SWITCHES, 'r') as f: switch = json.load(f) start_time = time.time() if switch['store_meta'] and step % switch['store_every'] == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run( [summaries, loss, optim], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim], feed_dict=feed_dict) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)' .format(step, loss_value, duration)) if step % switch['checkpoint_every'] == 0: save(saver, sess, logdir, step) last_saved_step = step # Evaluate model performance on validation data if step % switch['evaluate_every'] == 0: if switch['histograms']: hist_summary = sess.run(histograms) writer.add_summary(hist_summary, step) print('evaluating...') stats = 0, 0, 0, 0, 0, 0 est = np.empty([0, model_params['output_channels']]) ref = np.empty([0, model_params['output_channels']]) b_data, b_labels, b_cntr = ( np.empty((0, args.sample_size + receptive_field - 1, 1)), np.empty((0, model_params['output_channels'])), args.batch_size) # if (batch_size * sample_size > valid_data) single_pass() again while est.size == 0: # and ref.size == 0 and sum(stats) == 0 ... for data, labels in reader.single_pass( sess, args.data_dir_valid): # cumulate batch if b_cntr > 1: b_data, b_labels, decr = cumulateBatch( data, labels, b_data, b_labels) b_cntr -= decr continue elif args.batch_size > 1: b_data, b_labels, decr = cumulateBatch( data, labels, b_data, b_labels) if not decr: continue data = b_data labels = b_labels # reset batch cumulation variables b_data, b_labels, b_cntr = ( np.empty(( 0, args.sample_size + receptive_field - 1, 1 )), np.empty((0, model_params['output_channels'])), args.batch_size) predictions = sess.run( probs, feed_dict={input_data : data}) # Aggregate sums for metrics calculation stats_chunk = calc_stats( predictions, labels, args.threshold) stats = tuple([sum(x) for x in zip(stats, stats_chunk)]) est = np.append(est, predictions, axis=0) ref = np.append(ref, labels, axis=0) metrics = calc_metrics(None, None, None, stats=stats) write_metrics(metrics, metric_summaries, metric_value, writer, step, sess) trainer.check(metrics['f1_measure']) # Render evaluation results if switch['log_image'] or switch['log_sound']: sub_fac = int(model_params['audio_sr']/switch['midi_sr']) est = roll_subsample(est.T, sub_fac) ref = roll_subsample(ref.T, sub_fac) if switch['log_image']: write_images(est, ref, switch['midi_sr'], args.threshold, (8, 6), images_summary, images_buffer, writer, step, sess) if switch['log_sound']: write_audio(est, ref, switch['midi_sr'], model_params['audio_sr'], 0.007, audio_summary, audio_data, writer, step, sess) except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads) flush_n_close(writer, sess)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') # coord = tf.train.Coordinator() # if env.agent == 'a2c': # model = A2C(env.n_s, env.n_a, total_step, # config['MODEL_CONFIG'], seed=seed) if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') # disable multi-threading for safe SUMO implementation # threads = [] summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # if in_test or post_test: # # assign a different port for test env # test_env = init_env(config['ENV_CONFIG'], port=1) # tester = Tester(test_env, model, global_counter, summary_writer, dirs['data']) # def train_fn(): # trainer.run(coord) # thread = threading.Thread(target=train_fn) # thread.start() # threads.append(thread) # if in_test: # def test_fn(): # tester.run_online(coord) # thread = threading.Thread(target=test_fn) # thread.start() # threads.append(thread) # coord.join(threads) # post-training test if post_test: tester = Tester(env, model, global_counter, summary_writer, dirs['data']) tester.run_offline(dirs['data']) # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
),batch_size=args.batch_size,shuffle=True,**kwargs ) test_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data',train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5)) ]) ), batch_size = args.test_batch_size,shuffle=True,**kwargs ) model = vgg(margin=args.margin) optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum,weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() print('\nNormal Training \n') trainer = Trainer( model=model, optimizer=optimizer, criterion=criterion, start_epoch=args.start_epoch, epochs=args.epochs, cuda=args.cuda, log_interval=args.log_interval, train_loader=train_loader, test_loader=test_loader, root=args.root, ) trainer.start()
logger = SummaryWriter(args.log_dir) # ROIScoreWriter score_writer = ROIScoreWriter(os.path.join(args.log_dir, 'scores.csv'), ROIs) else: logger = None timer = time.time() start = timer trainer = Trainer( model, optimizer, loss_fn=loss_fn, accu_fn='dice', load_checkpoint=args.checkpoint, logger=logger, ) validator = Validator( trainer.model, threshold=config['output_threshold'], ) if args.validate_only: validator.run(data_gen['valid']) logger.close() print('Total:', time.time() - start) exit(0)
if MODEL["mode"] == "UNET": model = UnetResNet(encoder_name=MODEL["backbone"], num_classes=MODEL["num_classes"], input_channels=3, num_filters=32, Dropout=0.3, res_blocks_dec=MODEL["unet_res_blocks_decoder"]) elif MODEL["mode"] == "FPN": model = FPN(encoder_name=MODEL["backbone"], decoder_pyramid_channels=256, decoder_segmentation_channels=128, classes=MODEL["num_classes"], dropout=0.3, activation='sigmoid', final_upsampling=4, decoder_merge_policy='add') else: raise ValueError('Model type is not correct: `{}`.'.format( MODEL["mode"])) model_trainer = Trainer(model=model, image_dataset=image_dataset, optimizer=optim.Adam, **TRAINING) model_trainer.start(trainset, valset) # copy training config file into created folder copyfile(args.config_path, os.path.join(TRAINING["model_path"], "train_config.yaml"))
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) #1e6 test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) #2e4 log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) #1e4 global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') #12 # coord = tf.train.Coordinator() if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'codql': print('This is codql') num_agents = len(env.n_s_ls) print('num_agents:', num_agents) a_dim = env.n_a_ls[0] # ?????????????????? dim ??or num?? print('a_dim:', a_dim) s_dim = env.n_s_ls[0] print('env.n_s_ls=', s_dim) s_dim_wait = env.n_w_ls[0] print('s_dim_wait:', s_dim_wait) #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error model = MFQ(nb_agent=num_agents, a_dim=a_dim, s_dim=s_dim, s_dim_wave=s_dim - s_dim_wait, s_dim_wait=s_dim_wait, config=config['MODEL_CONFIG']) elif env.agent == 'dqn': model = DQN(nb_agent=len(env.n_s_ls), a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0], config=config['MODEL_CONFIG'], doubleQ=False) #doubleQ=False denotes dqn else ddqn elif env.agent == 'ddpg': model = DDPGEN(nb_agent=len(env.n_s_ls), share_params=True, a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0]) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
num_workers = 48 num_classes = 2 model_name = "shufflenet" train_txt = "" test_txt = "" model_type = "s2" model_width = 0.25 init_lr = 0.01 lr_decay = 0.8 momentum = 0.9 weight_decay = 0.000 nesterov = True # Set Training parameters params = Trainer.TrainParams() params.max_epoch = 1000 params.criterion = nn.CrossEntropyLoss() params.gpus = [2] # set 'params.gpus=[]' to use CPU mode params.save_dir = model_path params.ckpt = None params.save_freq_epoch = 2 parser = argparse.ArgumentParser() parser.add_argument('--image_w', type=int, default=64) parser.add_argument('--image_h', type=int, default=64) parser.add_argument('--num_classes', type=int, default=2) parser.add_argument('--batch_size', type=int, default=528) parser.add_argument('--num_workers', type=int, default=24) parser.add_argument('--model_name', type=str, default=None) parser.add_argument('--train_txt', type=str, default=None)