Esempio n. 1
0
def train():
    device = torch.device('cuda' if cfg.GPU[0] >= 0 else 'cpu')

    start_epoch = 1
    if start_epoch == 1:
        train_log = open(os.path.join(cfg.LOG_DIR, "train_log.csv"), 'w')
        train_log_title = "epoch,total_loss,classify_loss,angle_loss,iou_loss\n"
        train_log.write(train_log_title)
        train_log.flush()
    else:
        train_log = open(os.path.join(cfg.LOG_DIR, "train_log.csv"), 'a')

    print('Creating model...')
    model = create_model()
    if start_epoch != 1:
        model = load_model(
            model, 'logs/weights/model_epoch_{}.pth'.format(start_epoch - 1))
    optimizer = torch.optim.Adam(model.parameters(), cfg.LR)

    trainer = Trainer(model, optimizer)
    trainer.set_device(device)
    print('Setting up data...')
    train_loader = DataLoader(LatexDataset(),
                              batch_size=cfg.BATCH_SIZE,
                              shuffle=True,
                              num_workers=cfg.NUM_WORKERS,
                              pin_memory=True,
                              drop_last=True)
    print('Starting training...')
    epoch = start_epoch
    for epoch in range(start_epoch, start_epoch + cfg.EPOCHS):
        trainer.train(epoch, train_loader, train_log)
        if epoch % 5 == 0:
            save_model('logs/weights/model_epoch_{}.pth'.format(epoch), epoch,
                       model)

    save_model(os.path.join(cfg.WEIGHTS_DIR, 'model_last.pth'), epoch, model)
Esempio n. 2
0
def main(opt):
  torch.manual_seed(opt.seed)
  torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.eval
  Dataset = get_dataset(opt.dataset)
  opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
  print(opt)
  if not opt.not_set_cuda_env:
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
  opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
  logger = Logger(opt)

  print('Creating model...')
  model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
  optimizer = get_optimizer(opt, model)
  start_epoch = 0
  lr = opt.lr

  if opt.load_model != '':
    model, optimizer, start_epoch = load_model(
      model, opt.load_model, opt, optimizer)

  trainer = Trainer(opt, model, optimizer)
  trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
  
  if opt.val_intervals < opt.num_epochs or opt.eval:
    print('Setting up validation data...')
    val_loader = torch.utils.data.DataLoader(
      Dataset(opt, opt.val_split), batch_size=1, shuffle=False, 
              num_workers=1, pin_memory=True)

    if opt.eval:
      _, preds = trainer.val(0, val_loader)
      val_loader.dataset.run_eval(preds, opt.save_dir, n_plots=opt.eval_n_plots, 
                                  render_curves=opt.eval_render_curves)
      return

  print('Setting up train data...')
  train_loader = torch.utils.data.DataLoader(
      Dataset(opt, opt.train_split), batch_size=opt.batch_size, 
        shuffle=opt.shuffle_train, num_workers=opt.num_workers, 
        pin_memory=True, drop_last=True
  )

  print('Starting training...')
  for epoch in range(start_epoch + 1, opt.num_epochs + 1):
    mark = epoch if opt.save_all else 'last'

    # log learning rate
    for param_group in optimizer.param_groups:
      lr = param_group['lr']
      logger.scalar_summary('LR', lr, epoch)
      break
    
    # train one epoch
    log_dict_train, _ = trainer.train(epoch, train_loader)
    logger.write('epoch: {} |'.format(epoch))
    
    # log train results
    for k, v in log_dict_train.items():
      logger.scalar_summary('train_{}'.format(k), v, epoch)
      logger.write('{} {:8f} | '.format(k, v))
    
    # evaluate
    if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
      save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 
                 epoch, model, optimizer)
      with torch.no_grad():
        log_dict_val, preds = trainer.val(epoch, val_loader)
        
        # evaluate val set using dataset-specific evaluator
        if opt.run_dataset_eval:
          out_dir = val_loader.dataset.run_eval(preds, opt.save_dir, 
                                                n_plots=opt.eval_n_plots, 
                                                render_curves=opt.eval_render_curves)
          
          # log dataset-specific evaluation metrics
          with open('{}/metrics_summary.json'.format(out_dir), 'r') as f:
            metrics = json.load(f)
          logger.scalar_summary('AP/overall', metrics['mean_ap']*100.0, epoch)
          for k,v in metrics['mean_dist_aps'].items():
            logger.scalar_summary('AP/{}'.format(k), v*100.0, epoch)
          for k,v in metrics['tp_errors'].items():
            logger.scalar_summary('Scores/{}'.format(k), v, epoch)
          logger.scalar_summary('Scores/NDS', metrics['nd_score'], epoch)
      
      # log eval results
      for k, v in log_dict_val.items():
        logger.scalar_summary('val_{}'.format(k), v, epoch)
        logger.write('{} {:8f} | '.format(k, v))
    
    # save this checkpoint
    else:
      save_model(os.path.join(opt.save_dir, 'model_last.pth'), 
                 epoch, model, optimizer)
    logger.write('\n')
    if epoch in opt.save_point:
      save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 
                 epoch, model, optimizer)
    
    # update learning rate
    if epoch in opt.lr_step:
      lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
      print('Drop LR to', lr)
      for param_group in optimizer.param_groups:
          param_group['lr'] = lr

  logger.close()
Esempio n. 3
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)
    if not opt.not_set_cuda_env:
        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
    logger = Logger(opt)

    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(model, opt.load_model, opt,
                                                   optimizer)

    ############################################3333
    #freezing backbone and one head
    for param in model.parameters():
        # print(param)
        param.requires_grad = False

    req_grad = ["model.hm_bdd", "model.wh_bdd", "model.reg_bdd"]
    # for hd in model.reg_tl:
    for custom_head in (req_grad):
        for hd in eval(custom_head):
            # print(hd.parameters())
            for wt in hd.parameters():
                # print(wt)
                wt.requires_grad = True

    ######################################################

    trainer = Trainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    if opt.val_intervals < opt.num_epochs or opt.test:
        print('Setting up validation data...')
        val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=True)

        if opt.test:
            _, preds = trainer.val(0, val_loader)
            val_loader.dataset.run_eval(preds, opt.save_dir)
            return

    print('Setting up train data...')
    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    print('Starting training...')
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        mark = epoch if opt.save_all else 'last'
        log_dict_train, _ = trainer.train(epoch, train_loader)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('train_{}'.format(k), v, epoch)
            logger.write('{} {:8f} | '.format(k, v))
        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
                       epoch, model, optimizer)
            with torch.no_grad():
                log_dict_val, preds = trainer.val(epoch, val_loader)
                if opt.eval_val:
                    val_loader.dataset.run_eval(preds, opt.save_dir)
            for k, v in log_dict_val.items():
                logger.scalar_summary('val_{}'.format(k), v, epoch)
                logger.write('{} {:8f} | '.format(k, v))
        else:
            save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch,
                       model, optimizer)
        logger.write('\n')
        # if epoch in opt.save_point:
        if epoch % opt.save_point[0] == 0:
            save_model(
                os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
                epoch, model, optimizer)
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
            print('Drop LR to', lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
    logger.close()
Esempio n. 4
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)
    if not opt.not_set_cuda_env:
        os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpus_str
    opt.device = torch.device("cuda" if opt.gpus[0] >= 0 else "cpu")
    logger = Logger(opt)

    print("Creating model...")
    model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model != "":
        model, optimizer, start_epoch = load_model(
            model, opt.load_model, opt, optimizer
        )

    for i, param in enumerate(model.parameters()):
        param.requires_grad = True
    trainer = Trainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    if opt.val_intervals < opt.num_epochs or opt.test:
        print("Setting up validation data...")
        val_loader = torch.utils.data.DataLoader(
            Dataset(opt, "val"),
            batch_size=1,
            shuffle=False,
            num_workers=1,
            pin_memory=True,
        )

        if opt.test:
            _, preds = trainer.val(0, val_loader)
            val_loader.dataset.run_eval(preds, opt.save_dir)
            return

    print("Setting up train data...")
    train_loader = torch.utils.data.DataLoader(
        Dataset(opt, "train"),
        batch_size=opt.batch_size,
        shuffle=True,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=True,
    )

    print("Starting training...")
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        save_model(
            os.path.join(opt.save_dir, "model_{}.pth".format(epoch)),
            epoch,
            model,
            optimizer,
        )
        mark = epoch if opt.save_all else "last"
        log_dict_train, _ = trainer.train(epoch, train_loader)
        logger.write("epoch: {} |".format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary("train_{}".format(k), v, epoch)
            logger.write("{} {:8f} | ".format(k, v))
        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
            save_model(
                os.path.join(opt.save_dir, "model_{}.pth".format(mark)),
                epoch,
                model,
                optimizer,
            )
            with torch.no_grad():
                log_dict_val, preds = trainer.val(epoch, val_loader)
                if opt.eval_val:
                    val_loader.dataset.run_eval(preds, opt.save_dir)
            for k, v in log_dict_val.items():
                logger.scalar_summary("val_{}".format(k), v, epoch)
                logger.write("{} {:8f} | ".format(k, v))
        else:
            save_model(
                os.path.join(opt.save_dir, "model_last.pth"), epoch, model, optimizer
            )
        logger.write("\n")
        #     if epoch in opt.save_point:
        save_model(
            os.path.join(opt.save_dir, "model_{}.pth".format(epoch)),
            epoch,
            model,
            optimizer,
        )
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
            print("Drop LR to", lr)
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr
    logger.close()
Esempio n. 5
0
    def run_epoch(self, phase, epoch, data_loader, model, optimizer):
        model_with_loss = self.model_with_loss
        if phase == 'train':
            model_with_loss.train()
        else:
            if len(self.opt.gpus) > 1:
                model_with_loss = self.model_with_loss.module
            model_with_loss.eval()
            torch.cuda.empty_cache()

        opt = self.opt
        results = {}
        data_time, batch_time = AverageMeter(), AverageMeter()
        avg_loss_stats = {l: AverageMeter() for l in self.loss_stats \
                          if l == 'tot' or opt.weights[l] > 0}
        num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
        bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
        end = time.time()
        for iter_id, batch in enumerate(data_loader):
            if iter_id >= num_iters:
                break
            data_time.update(time.time() - end)

            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=opt.device,
                                           non_blocking=True)
            output, loss, loss_stats = model_with_loss(batch)
            loss = loss.mean()
            if phase == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            batch_time.update(time.time() - end)
            end = time.time()

            if iter_id % int(num_iters / 10) == 0:
                save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch,
                           model, optimizer)

            Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
                epoch,
                iter_id,
                num_iters,
                phase=phase,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            for l in avg_loss_stats:
                avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                         batch['image'].size(0))
                Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(
                    l, avg_loss_stats[l].avg)
            Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
              '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
            if opt.print_iter > 0:  # If not using progress bar
                if iter_id % opt.print_iter == 0:
                    print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
            else:
                bar.next()

            if opt.debug > 0:
                self.debug(batch, output, iter_id, dataset=data_loader.dataset)

            del output, loss, loss_stats

        bar.finish()
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        ret['time'] = bar.elapsed_td.total_seconds() / 60.
        return ret, results
Esempio n. 6
0
def online_fit(num_timesteps, num_targets, num_tweets=300):

    stabilize_logs()

    dir_path = os.path.dirname(os.path.abspath(__file__))

    conn = sqlite3.connect(os.path.join(dir_path, 'historical.db'))

    cursor = conn.cursor()

    #for debugging to simulate a 1 hour pass time
    #cursor.execute("DELETE FROM historical ORDER BY date DESC LIMIT 1")

    cursor.execute("SELECT * FROM historical ORDER BY date DESC LIMIT 1")
    last_record = cursor.fetchall()
    from_date = arrow.get((float(last_record[0][0]) + 3600000) /
                          1000).format('YYYY-MM-DD HH:mm:ss')

    combined_length = num_timesteps + num_targets
    cursor.execute(
        "SELECT * FROM historical ORDER BY date DESC LIMIT {}".format(
            combined_length - 1)
    )  # need to fit with some data in the db as the model didn't fit itself with said data on the past fit
    precomputed_data = np.asarray(cursor.fetchall(), dtype=np.float32)
    precomputed_data = precomputed_data[::-1]

    file = open(os.path.join(dir_path, "logs/context_prices.txt"), "a")
    file.write(str(precomputed_data[-1][-1]) + "\n")
    file.close()

    conn.commit()
    conn.close()

    unseen_data = get_historical(num_tweets,
                                 from_date=from_date,
                                 is_online=True)

    #actual price from last prediction used for logging with twitter
    actual_price = unseen_data[0][-1]
    file = open(os.path.join(dir_path, "logs/actuals.txt"), "a")
    file.write(str(actual_price) + "\n")
    file.close()

    all_data = np.concatenate((precomputed_data, unseen_data), axis=0)

    # store recent data so that we can get a live prediction
    recent_reference = []
    recent_data = all_data[-num_timesteps:, 1:]
    recent_data = normalize_timestep(recent_data, recent_reference)

    timesteps = split_into_timeseries(all_data, combined_length)

    reference = []
    for i in range(0, len(timesteps)):
        timesteps[i] = normalize_timestep(timesteps[i], reference)

    split_index = len(timesteps[0]) - num_targets
    X_train = timesteps[:, :split_index]
    y_train = timesteps[:, split_index:, -1]

    model = load_model()

    #train the model
    print("TRAINING")
    model.fit(X_train,
              y_train,
              batch_size=512,
              epochs=10,
              validation_split=0,
              verbose=2)
    save_model(model)

    recent_data = np.asarray([recent_data.tolist()])

    future = model.predict(recent_data)
    predictions = (future[0] + 1) * recent_reference[0]
    recent_data[0] = (recent_data[0] + 1) * recent_reference[0]

    # document results in file
    print("WRITING TO LOG")
    file = open(os.path.join(dir_path, "logs/log_online.txt"), "w")
    for timestep in recent_data:
        file.write(str(timestep) + "\n")
    file.write(str(future[0]) + "\n")
    file.close()

    file = open(os.path.join(dir_path, "logs/predictions.txt"), "a")
    file.write(str(predictions[0]) + "\n")
    file.close()

    log_to_twitter(predictions)

    return predictions
Esempio n. 7
0
def initial_fit(num_timesteps, num_targets, train_percent=.93, num_tweets=300):
    print("started init fit")
    dir_path = os.path.dirname(os.path.abspath(__file__))

    #clear contents of log files
    open(os.path.join(dir_path, 'logs/context_prices.txt'), 'w').close()
    open(os.path.join(dir_path, 'logs/actuals.txt'), 'w').close()
    open(os.path.join(dir_path, 'logs/predictions.txt'), 'w').close()
    open(os.path.join(dir_path, 'logs/history.txt'), 'w').close()
    open(os.path.join(dir_path, 'logs/proxy_log.txt'), 'w').close()

    data = get_historical(num_tweets, from_date="")

    X_train, y_train, X_test, y_test, ref = load_data(
        data,
        num_timesteps,
        num_targets=num_targets,
        train_percent=train_percent
    )  #TODO: make higher percentage of training when this goes into "prod"

    # store recent data so that we can get a live prediction
    recent_reference = []
    recent_data = data[-num_timesteps:, 1:]
    recent_data = normalize_timestep(recent_data, recent_reference)

    print("    X_train", X_train.shape)
    print("    y_train", y_train.shape)
    print("    X_test", X_test.shape)
    print("    y_test", y_test.shape)

    model = build_model([9, num_timesteps, num_targets])
    #train the model
    print("TRAINING")
    model.fit(X_train,
              y_train,
              batch_size=512,
              epochs=600,
              validation_split=0.1,
              verbose=2)
    save_model(model)

    trainScore = model.evaluate(X_train, y_train, verbose=100)
    print('Train Score: %.2f MSE (%.2f RMSE) (%.2f)' %
          (trainScore[0], math.sqrt(trainScore[0]), trainScore[1]))

    testScore = model.evaluate(X_test, y_test, verbose=100)
    print('Test Score: %.2f MSE (%.2f RMSE) (%.2f)' %
          (testScore[0], math.sqrt(testScore[0]), testScore[1]))

    #make predictions
    print("PREDICTING")
    p = model.predict(X_test)

    recent_data = [
        recent_data
    ]  # One-sample predictions need list wrapper. Argument must be 3d.
    recent_data = np.asarray(recent_data)
    future = model.predict(recent_data)

    # document results in file
    print("WRITING TO LOG")
    file = open(os.path.join(dir_path, "logs/log_initial.txt"), "w")
    for i in range(0, len(X_train)):
        for s in range(0, num_timesteps):
            file.write(str(X_train[i][s]) + "\n")
        file.write("Target: " + str(y_train[i]) + "\n")
        file.write("\n")

    for i in range(0, len(X_test)):
        for s in range(0, num_timesteps):
            file.write(str(X_test[i][s]) + "\n")
        file.write("Target: " + str(y_test[i]) + "\n")
        file.write("Prediction: " + str(p[i]) + "\n")
        file.write("\n")
    file.close()

    # de-normalize
    print("DENORMALIZING")
    for i in range(0, len(p)):
        p[i] = (p[i] + 1) * ref[round(.9 * len(ref) + i)]
        y_test[i] = (y_test[i] + 1) * ref[round(.9 * len(ref) + i)]

    future[0] = (future[0] + 1) * recent_reference[0]
    recent_data[0] = (recent_data[0] + 1) * recent_reference[0]

    file = open(os.path.join(dir_path, "logs/predictions.txt"), "a")
    file.write(str(future[0][0]) + "\n")
    file.close()

    # plot historical predictions
    print("PLOTTING")
    for i in range(0, len(p)):
        if i % (num_targets * 2) == 0:
            plot_index = i  #for filling plot indexes
            plot_indexes = []
            plot_values = p[i]
            for j in range(0, num_targets):
                plot_indexes.append(plot_index)
                plot_index += 1
            plt.plot(plot_indexes, plot_values, color="red")

    # plot historical actual
    plt.plot(y_test[:, 0], color='blue',
             label='Actual')  # actual price history

    # plot recent prices
    plot_indexes = [len(y_test) - 1]
    plot_values = [y_test[-1, 0]]
    plot_index = None
    for i in range(0, len(recent_data[0])):
        plot_values.append(recent_data[0][i][0])
        plot_index = len(y_test) + i
        plot_indexes.append(len(y_test) + i)
    plt.plot(plot_indexes, plot_values, color='blue')

    # plot future predictions
    plot_indexes = [plot_index]
    plot_values = [recent_data[0][-1][0]]
    for i in range(0, len(future[0])):
        plot_index += 1
        plot_values.append(future[0][i])
        plot_indexes.append(plot_index)
    plt.plot(plot_indexes, plot_values, color="red", label="Prediction")

    #show/save plot
    print("SENDING EMAILS")
    plt.legend(loc="upper left")
    plt.title("ETH Price Predictions")
    plt.xlabel("Hours")
    plt.ylabel("Price ($)")
    filename = str(arrow.utcnow().format("YYYY-MM-DD"))
    plt.savefig(os.path.join(dir_path, "graphs/" + filename))
    #plt.show()
    plt.close()
    send_email()

    return
Esempio n. 8
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)
    if not opt.not_set_cuda_env:
        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
    logger = Logger(opt)

    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(model, opt.load_model, opt,
                                                   optimizer)

    trainer = Trainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    print('Setting up train data...')
    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    print('Starting training...')
    # for each epoch, record scale
    bestmota = 0
    bestepoch = 0

    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        mark = epoch if opt.save_all else 'last'
        log_dict_train, _ = trainer.train(epoch, train_loader)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('train_{}'.format(k), v, epoch)
            logger.write('{} {:8f} | '.format(k, v))
        save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model,
                   optimizer)
        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
                       epoch, model, optimizer)
            # with torch.no_grad():
            #     log_dict_val, preds = trainer.val(epoch, val_loader)
            #     if opt.eval_val:
            #         val_loader.dataset.run_eval(preds, opt.save_dir)
            # for k, v in log_dict_val.items():
            #     logger.scalar_summary('val_{}'.format(k), v, epoch)
            #     logger.write('{} {:8f} | '.format(k, v))
            valset = '17halfval'
            mota, motp = prefetch_test(opt, valset)
            if mota > bestmota:
                bestmota = mota
                bestepoch = epoch
            print('mota = {}, motp = {}, bestmota = {}, bestepoch = {}'.format(
                mota, motp, bestmota, bestepoch))

        logger.write('\n')
        if epoch in opt.save_point:
            save_model(
                os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
                epoch, model, optimizer)
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
            print('Drop LR to', lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
    logger.close()
Esempio n. 9
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)

    # Log our parameters into mlflow
    for key, value in vars(opt).items():
        mlflow.log_param(key, value)

    if not opt.not_set_cuda_env:
        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
    logger = Logger(opt)

    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(model, opt.load_model, opt,
                                                   optimizer)

    trainer = Trainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    if opt.val_intervals < opt.num_epochs or opt.test:
        print('Setting up validation data...')
        val_loader = torch.utils.data.DataLoader(Dataset(
            opt, 'val', opt.data_name),
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=True)

        if opt.test:
            _, preds = trainer.val(0, val_loader)
            val_loader.dataset.run_eval(preds, opt.save_dir)
            return

    print('Setting up train data...')
    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train',
                                                       opt.data_name),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    print('Starting training...')
    best = 1e10
    best_epoch = 1e10
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        mark = epoch if opt.save_all else 'last'
        log_dict_train, _ = trainer.train(epoch, train_loader)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('train_{}'.format(k), v, epoch)
            logger.write('{} {:8f} | '.format(k, v))
            mlflow.log_metric('train_{}'.format(k), v, step=epoch)
        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
                       epoch, model, optimizer)
            with torch.no_grad():
                log_dict_val, preds = trainer.val(epoch, val_loader)
                if opt.eval_val:
                    val_loader.dataset.run_eval(preds, opt.save_dir)
            for k, v in log_dict_val.items():
                logger.scalar_summary('val_{}'.format(k), v, epoch)
                logger.write('{} {:8f} | '.format(k, v))
                mlflow.log_metric('val_{}'.format(k), v, step=epoch)
            if log_dict_val[opt.metric] < best:
                best = log_dict_val[opt.metric]
                best_epoch = epoch
                save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch,
                           model)
        else:
            save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch,
                       model, optimizer)
        logger.write('\n')
        if epoch in opt.save_point:
            save_model(
                os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
                epoch, model, optimizer)
        # early stopping
        if isinstance(opt.early_stopping, int):
            if epoch - best_epoch > opt.early_stopping:
                msg = 'Stopped {} epoch. Best epoch is {}, score is {}.'.format(
                    epoch, best_epoch, best)
                print(msg)
                logger.write(msg)
                break
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
            print('Drop LR to', lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

    logger.close()
Esempio n. 10
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset, prediction_model=True)
    if not opt.not_set_cuda_env:
        os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpus_str
    opt.device = torch.device("cuda" if opt.gpus[0] >= 0 else "cpu")
    device = opt.device
    logger = Logger(opt)

    print("Creating model...")

    model = DecoderRNN(128, opt)
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model_traj != "":
        model, optimizer, start_epoch = load_model(model, opt.load_model, opt,
                                                   optimizer)
    loss_function = torch.nn.SmoothL1Loss()

    for i, param in enumerate(model.parameters()):
        param.requires_grad = True

    train_loader = torch.utils.data.DataLoader(
        Dataset(opt, "train"),
        batch_size=1,
        shuffle=True,
        num_workers=16,
        pin_memory=True,
        drop_last=True,
    )

    for state in optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.to(device=device, non_blocking=True)
    model = model.to(device)
    loss_function = loss_function.to(device)

    print("Starting training...")
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        mark = epoch if opt.save_all else "last"
        for iter_id, (inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(device=device).float()
            targets = targets.to(device=device).view(1, -1).float()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            if 100 * loss.item() < 20:
                loss = 100 * loss
            else:
                loss = 10 * loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            del outputs, loss

        save_model(os.path.join(opt.save_dir, "model_last.pth"), epoch, model,
                   optimizer)
        logger.write("\n")
        save_model(
            os.path.join(opt.save_dir, "model_{}.pth".format(epoch)),
            epoch,
            model,
            optimizer,
        )
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr
    logger.close()
Esempio n. 11
0
    time_str = time.strftime("%m%d-%H%M", time.localtime(time.time()))
    rootdir = "{}/{}/{}-semi-{}-fixed-{}-ratio-{}-lr-{}/".format(
        "/data/yangy/data_prepare/result", hp['dataname'], time_str,
        str(hp['semi']), str(hp['fixed']), str(hp['ratio']), str(args.lr))
    os.makedirs(rootdir, exist_ok=True)
    hp['rootdir'] = rootdir

    np.save('{}parameter.npy'.format(rootdir), hp)

    # 获取模型
    my_models = load_model(hp)

    #获取数据
    train_data, test_data = load_data(hp)

    #预训练模型
    #my_models = pre_train(hp, my_models, train_data, test_data)

    # 预训练结果
    #result = test(test_data,hp,my_models,'pretrain')

    # 训练模型
    my_models = train(hp, my_models, train_data)

    # 保存模型
    save_model(my_models, rootdir)

    # 测试模型
    result = test(test_data, hp, my_models, 'final')
Esempio n. 12
0
def pre_train(hp, models, train_data, test_data):
    print("----------start pre-training models----------")
    view_num = len(models)
    par = []
    for i in range(view_num):
        models[i].cuda()
        models[i].train()
        par.append({'params': models[i].parameters()})

    optimizer = optim.Adam(par, lr=hp['pre_lr'])
    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    batch_size = hp['pre_size']
    loss_func = nn.MSELoss()

    for epoch in range(hp['pre_epoch']):
        scheduler.step()
        running_loss = 0.0
        data_num = 0
        for i in range(view_num):
            models[i].train()
        for i in range(3):
            data = train_data[i]
            if data == None:
                continue
            bag_num = len(data)
            data_num += bag_num
            max_step = int(bag_num / batch_size)
            while max_step * batch_size < bag_num:
                max_step += 1

            for step in range(max_step):
                # get data
                step_data = get_batch(
                    data,
                    list(
                        range(step * batch_size,
                              min((step + 1) * batch_size, bag_num))), hp)
                x1, x2, bag1, bag2, y = step_data
                b_y = Variable(y).cuda()
                loss = 0
                if i == 0 or i == 2:
                    x_img = Variable(x1).cuda()
                    h1, _, _ = models[0](x_img, bag1)
                    loss += loss_func(h1, b_y)
                if i == 0 or i == 1:
                    x_text = Variable(x2).cuda()
                    h2, _, _ = models[1](x_text, bag2)
                    loss += loss_func(h2, b_y)

                running_loss += loss.data * x2.size(0)

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        # epoch loss
        epoch_loss = running_loss / data_num
        print('epoch {}/{} | Loss: {:.9f}'.format(epoch, hp['pre_epoch'],
                                                  epoch_loss))

        rootpath = "{}{}/".format(hp['modelpath'], str(epoch + 1))
        os.makedirs(rootpath, exist_ok=True)
        save_model(models, rootpath)
        hp['rootdir'] = rootpath
        result = test(test_data, hp, models, 'pretrain')

    print("----------end pre-training models----------")
    return models
Esempio n. 13
0
def main(opt):
    torch.manual_seed(opt.seed)
    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
    Dataset = get_dataset(opt.dataset)
    print(Dataset)
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    print(opt)
    if not opt.not_set_cuda_env:
        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
    logger = Logger(opt)

    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
    if opt.fix_backbone:
        for param in model.backbone.parameters():
            param.requires_grad = False
    if opt.fix_dla_up:
        for param in model.neck.dla_up.parameters():
            param.requires_grad = False
    if opt.fix_ida_up:
        for param in model.neck.ida_up.parameters():
            param.requires_grad = False
    optimizer = get_optimizer(opt, model)
    start_epoch = 0
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(model, opt.load_model, opt,
                                                   optimizer)

    trainer = Trainer(opt, model, optimizer, logger)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    if opt.val_intervals < opt.num_epochs or opt.test:
        print('Setting up validation data...')
        val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=True)

        if opt.test:
            _, preds = trainer.val(0, val_loader)
            val_loader.dataset.run_eval(preds, opt.save_dir)
            return

    print('Setting up train data...')
    if opt.using_randomly_half:
        test_data = Dataset(opt, 'train')
        length = len(test_data)
        torch.random.manual_seed(opt.seed)
        actual_dataset, _ = torch.utils.data.random_split(
            test_data, [
                int(length * opt.use_percent),
                length - int(length * opt.use_percent)
            ])
    else:
        actual_dataset = Dataset(opt, 'train')

    train_loader = torch.utils.data.DataLoader(actual_dataset,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    print('Starting training...')
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        mark = epoch if opt.save_all else 'last'
        log_dict_train, _ = trainer.train(epoch, train_loader)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('train_{}'.format(k), v, epoch)
            logger.write('{} {:8f} | '.format(k, v))
        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
                       epoch, model, optimizer)
            with torch.no_grad():
                log_dict_val, preds = trainer.val(epoch, val_loader)
                if opt.eval_val:
                    val_loader.dataset.run_eval(preds, opt.save_dir)
            for k, v in log_dict_val.items():
                logger.scalar_summary('val_{}'.format(k), v, epoch)
                logger.write('{} {:8f} | '.format(k, v))
        else:
            save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch,
                       model, optimizer)
        logger.write('\n')
        if epoch in opt.save_point:
            save_model(
                os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
                epoch, model, optimizer)
        if epoch in opt.lr_step:
            lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
            print('Drop LR to', lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
    logger.close()