Example #1
0
    def train_loop(self, train_loader, test_loader,
                   model):  # train_cifar调用了本函数,训练超网
        best_top1 = 0.0
        for epoch in range(self.epochs):
            logging.info("Learning Rate: {:.4f}".format(
                self.optimizer.param_groups[0]["lr"]))
            self.writer.add_scalar("learning_rate/weights",
                                   self.optimizer.param_groups[0]["lr"], epoch)
            logging.info("Start to train for epoch {}".format(epoch))

            self._training_step(model,
                                train_loader,
                                epoch,
                                info_for_logger="_train_step_",
                                scratch=True)
            if self.CONFIG.lr_scheduler == "step":
                self.scheduler.step()

            top1_avg = self._validate(model, test_loader, epoch, scratch=True)
            if best_top1 < top1_avg:
                logging.info("Best top1 acc by now. Save model")
                best_top1 = top1_avg
                save(model, self.optimizer, self.CONFIG.path_to_save_scratch)

        logging.info("The Best top1 acc : {}".format(best_top1))
        return best_top1
Example #2
0
def load_pair_tvt_splits():
    dir = join(get_save_path(), 'pairs_tvt_split')
    train_ratio = int(FLAGS.tvt_ratio[0] * 100)
    val_ratio = int(FLAGS.tvt_ratio[1] * 100)
    test_ratio = 100 - train_ratio - val_ratio
    ensure_train_connectivity_str = 'ensure_train_connectivity_{}'\
        .format(str(FLAGS.ensure_train_connectivity).lower())

    num_folds = 1 if FLAGS.cross_val is None else FLAGS.num_folds

    sfn = '{}_{}_seed_{}_folds_{}_train_{}_val_{}_test_{}_num_negative_pairs_' \
          '{}_{}_feat_size_{}_{}'.format(
            FLAGS.dataset,
            FLAGS.random_seed,
            num_folds,
            train_ratio,
            val_ratio,
            test_ratio,
            ensure_train_connectivity_str,
            FLAGS.num_negative_samples if FLAGS.negative_sample else 0,
            '_'.join(get_flags_with_prefix_as_list('node_fe', FLAGS)),
            FLAGS.feat_size,
            '_'.join([node_feat.replace('_', '') for node_feat in FLAGS.node_feats])
    )

    tp = join(dir, sfn)
    rtn = load(tp)
    if rtn:
        tvt_pairs_dict = rtn
    else:
        tvt_pairs_dict = _load_pair_tvt_splits_helper()
        save(tvt_pairs_dict, tp)
    return tvt_pairs_dict
Example #3
0
def load_dataset(dataset_name, tvt, node_feats, edge_feats):
    if tvt not in ['train', 'val', 'test', 'all']:
        raise ValueError('Unknown tvt specifier {}'.format(tvt))
    name_list = list((dataset_name, tvt))
    name_list.append('_'.join(
        [node_feat.replace('_', '') for node_feat in node_feats]))
    f_name = '_'.join(name_list)
    f_path = join(get_save_path(), 'dataset', f_name)
    ld = load(f_path)

    if ld:
        dataset = BiGNNDataset(None, None, None, None, None, None, None, None,
                               ld)
    else:
        try:
            dataset = load_raw_interaction_data(dataset_name, node_feats,
                                                edge_feats, tvt)
        except Exception as e:
            print(e)
            raise FileNotFoundError(f'Please get {f_name} from google drive')

        gc.collect()
        save(dataset.__dict__, f_path)

    return dataset
Example #4
0
 def save_ranking_mat(self, true_m, pred_m, info):
     p = join(self.get_log_dir(), '{}_ranking_mats'.format(info))
     print("in save_ranking_mat")
     save({
         'true_m': true_m.__dict__,
         'pred_m': pred_m.__dict__
     },
          p,
          print_msg=False)
Example #5
0
 def save_pairs_with_results(self, pairs, info, set_name="validation"):
     p = join(self.get_log_dir(), '{}_pairs'.format(info))
     print("in save_pairs_with_results")
     save(
         {
             '{}_data_pairs'.format(set_name):
             self._shrink_space_pairs(pairs),
         },
         p,
         print_msg=False)
Example #6
0
    def search_train_loop(self, val_loader, model, generator):
        tau = 5
        best_top1 = 0.0
        for epoch in range(self.epochs):
            logging.info("Start to train for search epoch {}".format(epoch))
            logging.info("Tau: {}".format(tau))
            self._generator_training_step(generator, model, val_loader, epoch, tau, info_for_logger="_gen_train_step")

            top1_avg, _ = self.generator_validate(generator, model, val_loader, epoch, tau, sample=True, info_for_logger="_gen_val_step_")


            evaluate_metric, total_loss, kendall_tau = evaluate_generator(generator, self.backbone_pool, self.lookup_table, self.low_macs, self.high_macs, self.alpha, self.loss_penalty)

            logging.info("Total loss : {}".format(total_loss))
            if best_loss > total_loss:
                logging.info("Best loss by now: {} Tau : {}.Save model".format(total_loss, kendall_tau))
                best_loss = total_loss
                save_generator_evaluate_metric(evaluate_metric, self.path_to_generator_eval)
                save(generator, self.g_optimizer, self.path_to_save_generator)
            if top1_avg > best_top1 and total_loss < 0.4:
                logging.info("Best top1-avg by now: {}.Save model".format(top1_avg))
                best_top1 = top1_avg
                save(generator, self.g_optimizer, self.path_to_best_avg_generator)
            save(generator, self.g_optimizer, "./logs/generator/{}.pth".format(total_loss))
            tau *= self.tau_decay
        logging.info("Best loss: {}".format(best_loss))
        save(generator, self.g_optimizer, self.path_to_fianl_generator)
    def train_loop(self, train_loader, val_loader, test_loader, model):
        best_top1 = 0.0
        for epoch in range(self.train_epochs):
            self.writer.add_scalar("learning_rate/weights", self.optimizer.param_groups[0]["lr"], epoch)

            self.logger.info("Start to train for epoch %d" % (epoch))
            self._training_step(model, train_loader, self.optimizer, epoch, info_for_logger="_train_step_")
            if val_loader is not None:
                self._training_step(model, val_loader, self.optimizer, epoch, info_for_logger="_train_step_")

            top1_avg = self._validate(model, test_loader, epoch)

            self.block_acc.append(top1_avg)

            if best_top1 < top1_avg:
                best_top1 = top1_avg
                self.logger.info("Best top1 acc by now. Save model")
                save(model, self.path_to_save_model)
Example #8
0
    def train_loop(self, train_loader, test_loader, model, fold):
        best_f1 = 0.0
        for epoch in range(self.epochs):
            logging.info("Learning Rate: {:.4f}".format(
                self.optimizer.param_groups[0]["lr"]))
            logging.info("Start to train for epoch {}".format(epoch))

            self._training_step(model,
                                train_loader,
                                epoch,
                                info_for_logger="_train_step_")

            f1_avg, error_index = self.validate(model, test_loader, epoch)
            if best_f1 < f1_avg:
                logging.info("Best f1 score by now. Save model")
                best_f1 = f1_avg
                save(
                    model, self.optimizer,
                    self.CONFIG.path_to_save_model[:-4] + "_{}".format(fold) +
                    self.CONFIG.path_to_save_model[-4:])

        logging.info("The Best f1 score : {}".format(best_f1))
Example #9
0
    def search_train_loop(self, generator):
        self.epochs = self.warmup_epochs + self.search_epochs
        # Training generator
        best_loss = 10000.0
        best_top1 = 0
        tau = 5
        for epoch in range(self.warmup_epochs, self.search_epochs):
            logging.info("Start to train for search epoch {}".format(epoch))
            logging.info("Tau: {}".format(tau))
            self._generator_training_step(generator,
                                          val_loader,
                                          epoch,
                                          tau,
                                          info_for_logger="_gen_train_step")

            # ================ Train ============================================
            for i in range():
                # Training generator
                arch_param, hardware_constraint = self.set_arch_param(
                    generator, tau=tau)

                # ============== evaluation flops ===============================
                gen_flops = self.flops_table.predict_arch_param_efficiency(
                    arch_param)
                hc_loss = cal_hc_loss(gen_flops.cuda(),
                                      hardware_constraint.item(),
                                      self.CONFIG.alpha,
                                      self.CONFIG.loss_penalty)
                # ===============================================================
                self.g_optimizer.zero_grad()

                # ============== predict top1 accuracy ==========================
                top1_avg = self.accuracy_predictor(arch_param)
                ce_loss = -1 * top1_avg
                # ===============================================================
                loss = ce_loss + hc_loss
                logging.info("HC loss : {}".format(hc_loss))
                loss.backward()

                self.g_optimizer.step()
                self.g_optimizer.zero_grad()
            # ====================================================================

            # ============== Valid ===============================================
            hardware_constraint, arch_param = self._get_arch_param(
                generator, hardware_constraint, valid=True)
            arch_param = self.calculate_one_hot(arch_param)
            arch_param, hardware_constraint = self.set_arch_param(
                generator,
                model,
                hardware_constraint=hardware_constraint,
                arch_param=arch_param)
            # ============== evaluation flops ===============================
            gen_flops = self.flops_table.predict_arch_param_efficiency(
                arch_param)

            hc_loss = cal_hc_loss(gen_flops.cuda(), hardware_constraint.item(),
                                  self.CONFIG.alpha, self.CONFIG.loss_penalty)
            # ===============================================================

            # ============== predict top1 accuracy ==========================
            top1_avg = self.accuracy_predictor(arch_param)
            logger.info("Valid : Top-1 avg : {}".format(top1_avg))
            # ===============================================================

            # ====================================================================

            # ============== Evaluate ============================================
            total_loss = 0
            evaluate_metric = {"gen_flops": [], "true_flops": []}
            for flops in range(self.CONFIG.low_macs, self.CONFIG.high_macs,
                               10):
                hardware_constraint = torch.tensor(flops, dtpye=torch.float32)
                hardware_constraint = hardware_constraint.view(-1, 1)
                hardware_constraint = hardware_constraint.to(self.device)

                normalize_hardware_constraint = min_max_normalize(
                    self.CONFIG.high_macs, self.CONFIG.low_macs,
                    hardware_constraint)

                noise = torch.randn(*self.backbone.shape)
                noise = noise.to(device)
                noise *= 0

                arch_param = generator(self.backbone,
                                       normalize_hardware_constraint, noise)
                # ============== evaluation flops ===============================
                gen_flops = self.flops_table.predict_arch_param_efficiency(
                    arch_param)
                hc_loss = cal_hc_loss(gen_flops.cuda(),
                                      hardware_constraint.item(),
                                      self.CONFIG.alpha,
                                      self.CONFIG.loss_penalty)
                # ===============================================================

                evaluate_metric["gen_flops"].append(gen_flops)
                evaluate_metric["true_flops"].append(flops)

                total_loss += hc_loss.item()
            kendall_tau, _ = stats.kendalltau(evaluate_metric["gen_flops"],
                                              evaluate_metric["true_flops"])
            # ====================================================================

            logging.info("Total loss : {}".format(total_loss))
            if best_loss > total_loss:
                logging.info("Best loss by now: {} Tau : {}.Save model".format(
                    total_loss, kendall_tau))
                best_loss = total_loss
                save_generator_evaluate_metric(
                    evaluate_metric, self.CONFIG.path_to_generator_eval)
                save(generator, self.g_optimizer,
                     self.CONFIG.path_to_save_generator)
            if top1_avg > best_top1 and total_loss < 0.4:
                logging.info(
                    "Best top1-avg by now: {}.Save model".format(top1_avg))
                best_top1 = top1_avg
                save(generator, self.g_optimizer,
                     self.CONFIG.path_to_best_avg_generator)
            save(generator, self.g_optimizer,
                 "./logs/generator/{}.pth".format(total_loss))

            tau *= self.CONFIG.tau_decay
            self.noise_weight = self.noise_weight * self.CONFIG.noise_decay if self.noise_weight > 0.0001 else 0
            logging.info("Noise weight : {}".format(self.noise_weight))
        logging.info("Best loss: {}".format(best_loss))
        save(generator, self.g_optimizer, self.CONFIG.path_to_fianl_generator)
Example #10
0
def main():
  '''
  if not torch.cuda.is_avaitargetsle():
    logging.info('no gpu device avaitargetsle')
    sys.exit(1)'''

  np.random.seed(args.seed)
  if args.gpu == -1:
    device = torch.device('cpu')
  else:
    device = torch.device('cuda:{}'.format(args.gpu))
  cudnn.benchmark = True
  # 为CPU设置种子用于生成随机数,以使得结果是确定的
  torch.manual_seed(args.seed)
  cudnn.enabled=True

  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()#损失函数,交叉熵
  criterion = criterion.to(device)
  model = Network(args.gpu,args.init_channels, dataset_classes, args.layers, criterion)
  model = model.to(device)
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  train_data = MyDataset(args=args, subset='train')
  valid_data = MyDataset(args=args, subset='valid')

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, args)
  f_arch = open(os.path.join(args.save, 'arch.txt'),'a')
  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    #选出来α,把结构从连续的又变回离散的。
    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj, train_fscores, train_MIoU = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, train_fscores, train_MIoU)

    # validation
    valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f _fcores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
    f_arch.write(str(F.softmax(model.arch_parameters()[0],-1)))
  f_arch.close()
Example #11
0
def train(label, phi, t_label, t_phi, cfg):
    # writer = SummaryWriter()
    train_label, validate_label, _, _ = train_test_split(
        label.label, test_size=cfg.tv_value, random_state=20, shuffle=True)

    train_dataset = ds.SnapshotDataset(phi, train_label)
    validate_dataset = ds.SnapshotDataset(phi, validate_label)
    t_dataset = ds.SnapshotDataset(t_phi, t_label)

    phi = phi.to(cfg.device)
    model = End2end(phi, cfg)
    print(sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = model.to(cfg.device)
    optimizer = util.get_optimizer(cfg.o_name, model, cfg.learning_rate)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.5,
                                               cfg.scheduler)
    loss_func = get_loss(cfg)

    # with writer as w:
    #     dummy_x = torch.zeros_like(label[0].unsqueeze(0))
    #     dummy_y = torch.zeros_like(label[0, 0].unsqueeze(0))
    #     w.add_graph(model, (dummy_x, dummy_y, phi))

    losses = []
    val_losses = []
    best_val_loss = 1
    best_psnr = 0

    accumulation_steps = cfg.poor

    tain_data_loader = DataLoader(train_dataset,
                                  batch_size=cfg.batch,
                                  shuffle=True,
                                  drop_last=True)
    validate_data_loader = DataLoader(validate_dataset,
                                      batch_size=math.floor(cfg.batch / 2),
                                      shuffle=False,
                                      drop_last=True)
    for ep in range(cfg.epoch):
        optimizer.zero_grad()
        for ep_i, batch in enumerate(train_data_loader):
            label, y = batch
            initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul(
                phi.cpu()).div(phi.cpu().sum(0) + 0.0001)
            initial = initial.to(cfg.device)
            y = y.to(cfg.device)
            label = label.to(cfg.device)
            model.train()
            layers, symmetric = model(initial, y, phi)
            net_output = layers[-1]
            loss = loss_func(layers, label, symmetric)
            loss.backward()
            if (ep_i + 1) % accumulation_steps == 0:
                print("ep", ep, "ep_i ", ep_i, "loss ", loss.item())
                optimizer.step()
                optimizer.zero_grad()

        with torch.no_grad():
            losses.append(loss.item())
            val_loss = torch.zeros([1])
            for v_ep_i, v_batch in enumerate(validate_data_loader):
                v_initial = v_y.repeat(args.frame, 1, 1, 1).permute(
                    1, 0, 2, 3).mul(phi.cpu()).div(phi.cpu().sum(0) + 0.0001)
                v_initial = v_initial.to(cfg.device)
                v_y = v_y.to(cfg.device)
                v_label = v_label.to(cfg.device)
                model.eval()
                v_layers, symmetric = model(v_initial, v_y, phi)
                net_output = v_layers[-1]
                val_loss += loss_func(v_layers, v_label, symmetric)
                scheduler.step(val_loss)
                val_losses.append(val_loss.item())

            print("ep ", ep, "loss ", loss.item(), "val loss ", val_loss, "lr",
                  optimizer.param_groups[0]['lr'], "time ", time())

            if ep % cfg.store == 0:
                best_val_loss = val_loss
                best_img = np.clip(net_output.detach().cpu().numpy(), 0,
                                   1).astype(np.float64)
                best_psnr = compare_psnr(v_label.cpu().numpy(), best_img)
                print("PSNR: ", np.round(best_psnr, 2))
                util.save(model, best_psnr, best_img,
                          v_label.cpu().numpy(), cfg)

    t_phi = t_phi.to(cfg.device)
    data_loader = DataLoader(t_dataset,
                             batch_size=t_label.shape[0],
                             shuffle=False)
    label, y = next(iter(data_loader))
    initial = y.repeat(args.frame, 1, 1, 1).permute(1, 0, 2, 3).mul(
        t_phi.cpu()).div(t_phi.cpu().sum(0) + 0.0001)
    initial = initial.to(cfg.device)
    y = y.to(cfg.device)
    layers, _ = model(initial, y, t_phi)
    net_output = layers[-1].detach().cpu().numpy()
    psnr = compare_psnr(label.numpy(),
                        np.clip(net_output, 0, 1).astype(np.float64))
    return model, psnr, net_output
Example #12
0
    parser.add_argument('--batch', type=int, default=8)
    parser.add_argument('--phase', type=int, default=2)
    parser.add_argument('--share', type=bool, default=False)
    parser.add_argument('--poor', type=int, default=1)
    parser.add_argument('--scheduler', type=int, default=5)
    parser.add_argument('--tv_value', type=float, default=0.9)
    parser.add_argument('--store', type=int, default=20)
    args = parser.parse_args()

    if args.use_gpu:
        if args.device is None:
            args.device = util.getbestgpu()
    else:
        args.device = 'cpu'

    train_file, test_file, mask_file, _, _, _ = config.general(args.name)
    t_label, t_phi = ds.load_test_data(test_file, mask_file, False)
    if args.name == "Traffic":
        label, phi = ds.load_train_data(train_file, mask_file, False)
    else:
        label, phi = ds.load_train_data(train_file, mask_file, True)
    print(label.shape)

    start = time()
    model, psnr, reconstruction = train(label, phi, t_label, t_phi, args)
    end = time()
    t = end - start
    print("PSNR {}, Training Time: {}".format(psnr, t))

    util.save(model, psnr, reconstruction, t_label.cpu().numpy(), args)
Example #13
0
def search(args):
    logging.info('start load dataset')
    train_data, test_data, x_shape, class_num = get_src_dataset(
        args.data_path, args.name)
    x_shape[0] = args.batch_size
    search_loader, _, _ = get_search_loader(train_data, test_data, args.name,
                                            args.split, args.workers,
                                            args.batch_size)
    logging.info('dataset loaded')

    model = Network(args.name, x_shape, class_num, args)
    model = model.cuda()
    flop, param = get_model_infos(model, x_shape)
    logging.info('Params={:.2f} MB, FLOPs={:.2f} M'.format(param, flop))

    w_optimizer, w_scheduler, criterion = get_opt_scheduler(
        model.get_weights(), args.base_optm, args.base_lr, args.base_decay,
        args.base_scheduler, args.epoch)

    criterion = criterion.cuda()

    if args.arch_optm == 'Adam':
        a_optimizer = optim.Adam(model.get_alphas(),
                                 args.arch_lr,
                                 weight_decay=args.arch_decay)
    else:
        raise ValueError
    logging.info('w-optimizer : {:}'.format(w_optimizer))
    logging.info('a-optimizer : {:}'.format(a_optimizer))
    logging.info('w-scheduler : {:}'.format(w_scheduler))
    logging.info('criterion   : {:}'.format(criterion))
    logging.info('classifier:\n{:}'.format(model.classifier))

    best_acc = 0
    time_str = ''
    for epoch in range(1, args.epoch + 1):
        new_tau = args.max_tau - (args.max_tau -
                                  args.min_tau) * epoch / (args.epoch - 1)
        model.set_tau(new_tau)
        logging.info('epoch:{:} LR:{:.6f} tau:{:.6f} need time {:}'.format(
            epoch,
            w_scheduler.get_lr()[0], new_tau, time_str))
        if args.name in ['cifar10', 'cifar100']:
            model.set_drop_path_prob(args.drop_path_prob * epoch / args.epoch)

        epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epoch)

        # A, B = model.show_alphas()
        # logging.info(A)
        # logging.info(B)
        s_time = time.time()
        base_top1, base_top5, base_loss, arch_top1, arch_top5, arch_loss = search_train(
            search_loader, model, criterion, w_optimizer, a_optimizer,
            epoch_str, args.print_frequency, args.grad_clip)
        batch_time = (time.time() - s_time) * (args.epoch - epoch)
        m, s = divmod(batch_time, 60)
        h, m = divmod(m, 60)
        time_str = "%d:%02d:%02d" % (h, m, s)
        train_str = 'train set - epoch:' + epoch_str + ' result  Loss:'
        vla_str = ' val  set - epoch:' + epoch_str + ' result  Loss:'
        logging.info(train_str +
                     '{:.6f}  Pre@1 : {:.5f}%  Pre@5:{:.5f}%'.format(
                         base_loss, base_top1, base_top5))
        logging.info(vla_str + '{:.6f}  Pre@1 : {:.5f}%  Pre@5:{:.5f}%'.format(
            arch_loss, arch_top1, arch_top5))

        if arch_top1 > best_acc:
            best_acc = arch_top1
            logging.info(
                'find the best model. best acc is {:.5f}%'.format(best_acc))
            logging.info('Save it to {:}'.format(args.save + 'best.pt'))
            save(model, os.path.join(args.save, 'best.pt'))
            model.get_genotype()

        w_scheduler.step()

    logging.info('best acc is {:.5f}%'.format(best_acc))
Example #14
0
def train(args):
    logging.info('start load dataset')
    train_data, test_data, x_shape, class_num = get_src_dataset(
        args.data_path, args.name)
    _, train_loader, valid_loader = get_search_loader(train_data, test_data,
                                                      args.name, args.split,
                                                      args.workers,
                                                      args.batch_size)
    logging.info('dataset loaded')

    model = Network(args.name, x_shape, class_num, args)
    model = model.cuda()
    flop, param = get_model_infos(model, x_shape)
    logging.info('Params={:.2f} MB, FLOPs={:.2f} M'.format(param, flop))

    optimizer, scheduler, criterion = get_opt_scheduler(
        model.get_weights(), args.optimizer, args.lr, args.weight_decay,
        args.scheduler, args.epoch)
    criterion = criterion.cuda()

    logging.info('optimizer : {:}'.format(optimizer))
    logging.info('scheduler : {:}'.format(scheduler))
    logging.info('criterion   : {:}'.format(criterion))
    logging.info('classifier:\n{:}'.format(model.classifier))

    need_time = AverageMeter()
    time_str = ''
    best_acc = 0
    for epoch in range(1, args.epoch + 1):
        logging.info('epoch:{:} LR:{:.6f} need time {:}'.format(
            epoch,
            scheduler.get_lr()[0], time_str))
        if args.name in ['cifar10', 'cifar100']:
            model.set_drop_path_prob(args.drop_path_prob * epoch / args.epoch)
        epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epoch)

        s_time = time.time()
        train_top1, train_top5, train_loss = model_train(
            train_loader, model, criterion, optimizer, epoch_str,
            args.print_frequency, args.grad_clip)
        val_top1, val_top5, val_loss = mode_val(valid_loader, model, criterion,
                                                epoch_str,
                                                args.print_frequency)
        need_time.update(time.time() - s_time)

        m, s = divmod(need_time.avg * (args.epoch - epoch), 60)
        h, m = divmod(m, 60)
        time_str = "%d:%02d:%02d" % (h, m, s)
        train_str = 'train set - epoch:' + epoch_str + ' result  Loss:'
        vla_str = ' val  set - epoch:' + epoch_str + ' result  Loss:'
        logging.info(train_str +
                     '{:.6f}  Pre@1 : {:.5f}%  Pre@5:{:.5f}%'.format(
                         train_loss, train_top1, train_top5))
        logging.info(vla_str + '{:.6f}  Pre@1 : {:.5f}%  Pre@5:{:.5f}%'.format(
            val_loss, val_top1, val_top5))

        if val_top1 > best_acc:
            best_acc = val_top1
            logging.info(
                'find the best model. best acc is {:.5f}%'.format(best_acc))
            logging.info('Save it to {:}'.format(args.save + 'best.pt'))
            save(model, os.path.join(args.save, 'best.pt'))

        scheduler.step()

    logging.info('best acc is {:.5f}%'.format(best_acc))
Example #15
0
 def save_global_eval_result_dict(self, global_result_dict):
     p = join(self.get_log_dir(), 'global_result_dict')
     print("in save_global_eval_result_dict")
     save(global_result_dict, p, print_msg=False)
Example #16
0
    #选出来α,把结构从连续的又变回离散的。
    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj, train_fscores, train_MIoU = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, train_fscores, train_MIoU)

    # validation
    valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f _fcores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
    f_arch.write(str(F.softmax(model.arch_parameters()[0],-1)))
  f_arch.close()
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr):
  objs = utils.AvgrageMeter()# 用于保存loss的值
  accs = utils.AvgrageMeter()
  MIoUs = utils.AvgrageMeter()
  fscores = utils.AvgrageMeter()

  # device = torch.device('cuda' if torch.cuda.is_avaitargetsle() else 'cpu')
  if args.gpu == -1:
    device = torch.device('cpu')
  else:
    device = torch.device('cuda:{}'.format(args.gpu))

  for step, (input, target) in enumerate(train_queue):#每个step取出一个batch,batchsize是64(256个数据对)
Example #17
0
        ckpt = tf.train.get_checkpoint_state(snapshot_dir)
        if ckpt and ckpt.model_checkpoint_path:
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
    else:
        load(loader, sess, snapshot_dir)

    print('Start training ...')

    _step, _loss, _summaries = 0, None, None
    while _step < iterations:
        try:
            _, _step, _g_loss, _p_psnr, _summaries = \
                sess.run([g_train_op, g_step, g_loss, train_positive_psnr, summary_op])

            if _step % 10 == 0:
                print('Iteration = {}, global loss = {:.6f}, positive psnr = {:.6f}'.format(_step, _g_loss, _p_psnr))

            if _step % 100 == 0:
                summary_writer.add_summary(_summaries, global_step=_step)
                print('Save summaries...')

            if _step % model_save_freq == 0:
                save(saver, sess, snapshot_dir, _step)

        except tf.errors.OutOfRangeError:
            print('Finish successfully!')
            save(saver, sess, snapshot_dir, _step)
            break
Example #18
0
def main():
    seed = util.prepare(args)
    if not cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    CIFAR_CLASSES = 10
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.set_device(args.gpu)
    cudnn.benchmark = False
    cudnn.deterministic = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    logging.info('hidden_layers:{:}'.format(args.hidden_layers))
    logging.info('first_neurons:{:}'.format(args.first_neurons))
    logging.info('change:{:}'.format(args.change))
    logging.info('activate_func:{:}'.format(args.activate_func))
    logging.info('opt:{:}'.format(args.opt))
    logging.info('cross_link:{:}'.format(args.cross_link))

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype, args)
    model = model.cuda()
    logging.info("param size = %fMB", util.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = util.get_data_transforms_cifar10(args)
    train_data = datasets.CIFAR10(root=args.data,
                                  train=True,
                                  download=False,
                                  transform=train_transform)
    valid_data = datasets.CIFAR10(root=args.data,
                                  train=False,
                                  download=False,
                                  transform=valid_transform)

    train_queue = DataLoader(train_data,
                             batch_size=args.batch_size,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=1)

    valid_queue = DataLoader(valid_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=1)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs)

    best_acc = 0
    for epoch in range(args.epochs):
        logging.info('epoch %d lr %.6f', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epochs)
        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     epoch_str)
        logging.info('train_acc %.2f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch_str)
        logging.info('valid_acc %.2f', valid_acc)

        if valid_acc > best_acc:
            logging.info(
                'find the best model. Save it to {:}'.format(args.save +
                                                             'best.pt'))
            util.save(model, os.path.join(args.save, 'best.pt'))
            best_acc = valid_acc
        scheduler.step()
    logging.info('best acc is {:}'.format(best_acc))
Example #19
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = get_data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=2,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=1)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=2,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=1)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        save(model, os.path.join(args.save, 'weights.pt'))
Example #20
0
    def search_train_loop(self, train_loader, val_loader, test_loader, model,
                          generator):
        self.epochs = self.warmup_epochs + self.search_epochs

        # Training supernet
        best_top1 = 0.0
        for epoch in range(
                self.warmup_epochs
        ):  # warmup_epochs表示训练supernet的论述,若为0跳过当前的loop直接进入到下面的generator训练环节
            logging.info("Learning Rate: {:.4f}".format(
                self.optimizer.param_groups[0]["lr"]))
            self.writer.add_scalar("learning_rate/weights",
                                   self.optimizer.param_groups[0]["lr"], epoch)
            logging.info("Start to train for warmup epoch {}".format(epoch))

            self._training_step(model,
                                train_loader,
                                epoch,
                                info_for_logger="_train_step_")  # 训一个step
            if self.CONFIG.lr_scheduler == "step":
                self.scheduler.step()

            top1_avg = self._validate(model, val_loader, epoch)
            if best_top1 < top1_avg:
                logging.info("Best top1 acc by now. Save model")
                best_top1 = top1_avg
            save(model, self.optimizer, self.CONFIG.path_to_save_model)

        # Training generator
        best_loss = 10000.0
        best_top1 = 0
        tau = 5
        for epoch in range(
                self.warmup_epochs,
                self.search_epochs):  # epoch计数从warmup开始到search_epochs结束
            logging.info("Start to train for search epoch {}".format(epoch))
            logging.info("Tau: {}".format(tau))
            self._generator_training_step(generator,
                                          model,
                                          val_loader,
                                          epoch,
                                          tau,
                                          info_for_logger="_gen_train_step")

            top1_avg, _ = self.generator_validate(
                generator,
                model,
                val_loader,
                epoch,
                info_for_logger="_gen_val_step_",
                target_hardware_constraint=(self.CONFIG.low_flops +
                                            self.CONFIG.high_flops) / 2)
            evaluate_metric, total_loss, kendall_tau = evaluate_generator(
                generator, self.prior_pool, self.lookup_table, self.CONFIG,
                self.device)

            logging.info("Total loss : {}".format(total_loss))
            if best_loss > total_loss:
                logging.info("Best loss by now: {} Tau : {}.Save model".format(
                    total_loss, kendall_tau))
                best_loss = total_loss
                save_generator_evaluate_metric(
                    evaluate_metric, self.CONFIG.path_to_generator_eval)
                save(generator, self.g_optimizer,
                     self.CONFIG.path_to_save_generator)
            if top1_avg > best_top1 and total_loss < 0.4:
                logging.info(
                    "Best top1-avg by now: {}.Save model".format(top1_avg))
                best_top1 = top1_avg
                save(generator, self.g_optimizer,
                     self.CONFIG.path_to_best_avg_generator)

            tau *= self.CONFIG.tau_decay
        logging.info("Best loss: {}".format(best_loss))
        save(generator, self.g_optimizer, self.CONFIG.path_to_fianl_generator)
Example #21
0
 def _save_conf_code(self):
     with open(join(self.logdir, 'config.py'), 'w') as f:
         f.write(extract_config_code())
     p = join(self.get_log_dir(), 'FLAGS')
     print("in _save_conf_code")
     save({'FLAGS': FLAGS}, p, print_msg=False)
Example #22
0
def main(args):
    arg = argparse.ArgumentParser(
        description='Separate on- and off-screen audio from a video')
    arg.add_argument('vid_file', type=str, help='Video file to process')
    arg.add_argument(
        '--duration_mult',
        type=float,
        default=None,
        help=
        'Multiply the default duration of the audio (i.e. %f) by this amount. Should be a power of 2.'
        % sep_params.VidDur)
    arg.add_argument(
        '--mask',
        type=str,
        default=None,
        help=
        "set to 'l' or 'r' to visually mask the left/right half of the video before processing"
    )
    arg.add_argument('--start',
                     type=float,
                     default=0.,
                     help='How many seconds into the video to start')
    arg.add_argument(
        '--model',
        type=str,
        default='full',
        help='Which variation of othe source separation model to run.')
    arg.add_argument('--gpu', type=int, default=0, help='Set to -1 for no GPU')
    arg.add_argument('--out',
                     type=str,
                     default=None,
                     help='Directory to save videos')
    arg.add_argument('--cam', dest='cam', default=False, action='store_true')

    # undocumented/deprecated options
    arg.add_argument('--clip_dur', type=float, default=None)
    arg.add_argument('--duration', type=float, default=None)
    arg.add_argument('--fullres', type=bool, default=True)
    arg.add_argument('--suffix', type=str, default='')
    arg.add_argument('--max_full_height', type=int, default=600)

    arg = arg.parse_args(args)
    arg.fullres = arg.fullres or arg.cam

    if arg.gpu < 0:
        arg.gpu = None

    print 'Start time:', arg.start
    print 'GPU =', arg.gpu

    gpus = [arg.gpu]
    gpus = mu.set_gpus(gpus)

    if arg.duration_mult is not None:
        pr = sep_params.full()
        step = 0.001 * pr.frame_step_ms
        length = 0.001 * pr.frame_length_ms
        arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

    fn = getattr(sep_params, arg.model)
    pr = fn(vid_dur=arg.clip_dur)

    if arg.clip_dur is None:
        arg.clip_dur = pr.vid_dur
    pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
    print 'Spectrogram samples:', pr.spec_len
    pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
                                                          pr.train_iters)

    if not os.path.exists(arg.vid_file):
        print 'Does not exist:', arg.vid_file
        sys.exit(1)

    if arg.duration is None:
        arg.duration = arg.clip_dur + 0.01

    print arg.duration, arg.clip_dur
    full_dur = arg.duration
    step_dur = arg.clip_dur / 2.
    filled = np.zeros(int(np.ceil(full_dur * pr.samp_sr)), 'bool')
    full_samples_fg = np.zeros(filled.shape, 'float32')
    full_samples_bg = np.zeros(filled.shape, 'float32')
    full_samples_src = np.zeros(filled.shape, 'float32')
    arg.start = ut.make_mod(arg.start, (1. / pr.fps))

    ts = np.arange(arg.start, arg.start + full_dur - arg.clip_dur, step_dur)
    full_ims = [None] * int(np.ceil(full_dur * pr.fps))

    # Process each video chunk
    for t in ut.time_est(ts):
        t = ut.make_mod(t, (1. / pr.fps))
        frame_start = int(t * pr.fps - arg.start * pr.fps)
        ret = run(arg.vid_file,
                  t,
                  arg.clip_dur,
                  pr,
                  gpus[0],
                  mask=arg.mask,
                  arg=arg)
        if ret is None:
            continue
        ims = ret['ims']
        for frame, im in zip(xrange(frame_start, frame_start + len(ims)), ims):
            full_ims[frame] = im

        samples_fg = ret['samples_pred_fg'][:, 0]
        samples_bg = ret['samples_pred_bg'][:, 0]
        samples_src = ret['samples_src'][:, 0]
        samples_src = samples_src[:samples_bg.shape[0]]

        sample_start = int(round((t - arg.start) * pr.samp_sr))
        n = samples_src.shape[0]
        inds = np.arange(sample_start, sample_start + n)
        ok = ~filled[inds]
        full_samples_fg[inds[ok]] = samples_fg[ok]
        full_samples_bg[inds[ok]] = samples_bg[ok]
        full_samples_src[inds[ok]] = samples_src[ok]
        filled[inds] = True

    full_samples_fg = np.clip(full_samples_fg, -1., 1.)
    full_samples_bg = np.clip(full_samples_bg, -1., 1.)
    full_samples_src = np.clip(full_samples_src, -1., 1.)
    full_ims = [x for x in full_ims if x is not None]
    table = [['start =', arg.start], 'fg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_fg,
                                                   pr.samp_sr)), 'bg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_bg,
                                                   pr.samp_sr)), 'src:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_src,
                                                   pr.samp_sr))]

    # Write videos
    if arg.out is not None:
        ut.mkdir(arg.out)
        vid_s = arg.vid_file.split('/')[-1].split('.mp4')[0]
        mask_s = '' if arg.mask is None else '_%s' % arg.mask
        cam_s = '' if not arg.cam else '_cam'
        suffix_s = '' if arg.suffix == '' else '_%s' % arg.suffix
        name = '%s%s%s_%s' % (suffix_s, mask_s, cam_s, vid_s)

        def snd(x):
            x = Sound(x, pr.samp_sr)
            x.samples = np.clip(x.samples, -1., 1.)
            return x

        print 'Writing to:', arg.out
        ut.save(pj(arg.out, 'ret%s.pk' % name), ret)
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'fg%s.mp4' % name),
                      snd(full_samples_fg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'bg%s.mp4' % name),
                      snd(full_samples_bg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'src%s.mp4' % name),
                      snd(full_samples_src))
    else:
        print 'Not writing, since --out was not set'

    print 'Video results:'
    ig.show(table)
    return 'fg%s.mp4' % name, 'bg%s.mp4' % name
Example #23
0
            architecture_num = test_data["architecture_num"][i+250]
            y = test_data["avg"][i+250]
            adj_matrix = adj_matrix_table.iloc[architecture_num].values
            adj_matrix = adj_matrix.reshape(nodes_num, nodes_num)

            X = get_input_data(adj_matrix)
            edge_index = get_edge_index(adj_matrix)

            X = wrap_data(X)
            y = wrap_data([y])
            edge_index = wrap_data(edge_index, dtype=torch.long)

            outs = model(X, edge_index)
            loss = criterion(outs, y)
            test_loss += loss

            test_metric["architecture_num"].append(i+250)
            test_metric["predict_avg"].append(outs.item())
            test_metric["avg"].append(y.item())
        test_loss /= 50
        if best_loss > test_loss.item():
            save(model, "gcn_weight.pth")
            print(test_loss.item())
            df_metric = pd.DataFrame(test_metric)
            df_metric.to_csv("./test.csv", index=False)
            best_loss = test_loss.item()


        

Example #24
0
 def save_graph_embeddings_mat(self, init_x, id_map, gs_map):
     assert (init_x.shape[0] == len(gs_map))
     p = join(self.get_log_dir(), "graph_embeddings")
     save({"init_x": init_x, "id_map": id_map, "gs_map": gs_map}, p)