Exemple #1
0
 def test_helper_threads(self):
     """
     Test openmp threads helper method.
     """
     rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
     rnn.set_num_threads(4)
     self.assertEqual(torch.get_num_threads(), 4)
Exemple #2
0
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in
               zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
Exemple #3
0
def main():

    # Things need to be parsed:
    # device, batch_size, n_epoch, num_workers, n_neg_sample,
    parser = argparse.ArgumentParser(description='Process some integers.')

    parser.add_argument("--device", type=str, default="cuda:0")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument(
        "--batch_size_train", type=int, default=1
    )  # batch size will indeed affact this. Larger batch size will cause out of memory issue
    parser.add_argument("--batch_size_eval", type=int, default=4)
    parser.add_argument("--n_epoch", type=int, default=4)
    parser.add_argument("--n_worker", type=int, default=3)
    parser.add_argument("--n_neg_sample", type=int, default=4)
    parser.add_argument("--num_dev", type=int, default=2000)
    parser.add_argument(
        "--max_seq_len", type=int, default=256
    )  # TODO: think about a way to pass this value to the collate function.
    parser.add_argument("--dataset", type=str, default="openbook")

    # parse the input arguments
    args = parser.parse_args()

    # set the random seeds
    torch.manual_seed(args.seed)  # set pytorch seed
    random.seed(args.seed)  # set python seed.
    # #This python random library is used in two places: one is constructing the raw dataset, the other is when constructing train data.
    np.random.seed(args.seed)  # set numpy seed

    torch.set_num_threads(
        1)  # this has nothing to do with dataloader num worker

    print("=" * 20)
    print("args:", args)
    print("num thread:", torch.get_num_threads())
    print("=" * 20)

    train_and_eval_model(args)

    return 0
Exemple #4
0
def evaluate(model,
             data_loader,
             device,
             device_ids,
             distributed,
             log_freq=1000,
             title=None,
             header='Test:'):
    model.to(device)
    if distributed:
        model = DistributedDataParallel(model, device_ids=device_ids)
    elif device.type.startswith('cuda'):
        model = DataParallel(model, device_ids=device_ids)

    if title is not None:
        logger.info(title)

    num_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    model.eval()
    metric_logger = MetricLogger(delimiter='  ')
    for image, target in metric_logger.log_every(data_loader, log_freq,
                                                 header):
        image = image.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)
        output = model(image)
        acc1, acc5 = compute_accuracy(output, target, topk=(1, 5))
        # FIXME need to take into account that the datasets
        # could have been padded in distributed setup
        batch_size = image.shape[0]
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    top1_accuracy = metric_logger.acc1.global_avg
    top5_accuracy = metric_logger.acc5.global_avg
    logger.info(' * Acc@1 {:.4f}\tAcc@5 {:.4f}\n'.format(
        top1_accuracy, top5_accuracy))
    torch.set_num_threads(num_threads)
    return metric_logger.acc1.global_avg
Exemple #5
0
def parse_args():
    parser = argparse.ArgumentParser(
        description="Eval the achromatic pixel detector")
    a = parser.add_argument
    a("model_file", help="Model to evaluate")
    a("test_list", help="File listing test images")
    a("--image-size", type=int, default=256, help="Size of input images")
    a("--remove-gamma",
      action="store_true",
      help="Remove srgb gamma from training images.")
    a("--apply-gamma",
      action="store_true",
      help="Apply srgb gamma to output data.")
    a("--mask-clipped",
      action="store_true",
      help="Exclude clipped pixels from the estimate")
    a("--mask-black",
      action="store_true",
      help="Exclude black pixels from the estimate")
    a("--batch-size", type=int, default=16, help="Size of the minibatch")
    a("--num-workers",
      type=int,
      default=torch.get_num_threads(),
      help="Number of parallel threads")
    a("--device", default="cuda", help="Processing device")
    a("--plot-estimates",
      action="store_true",
      help="show the estimates on a plot")
    a("--filter-outliers",
      action="store_true",
      help=
      "exclude pixels outside the range of allowed illuminants (deprecated)")
    a("--filter", help="Classifier excluding unlikely rgb values.")
    a("--cv", type=int, help="Number of cross validation folds")
    a("--tex", action="store_true", help="Latex table format")
    a("--gw", action="store_true", help="Apply gray-world instead")
    a("--gt",
      action="store_true",
      help="Use the ground truth instead of the actual estimate")
    a("--output-dir", help="Directory where processed images are placed")
    return parser.parse_args()
Exemple #6
0
def dataloader(cfg, split, bs, shuffle=False):
    """Create a data loader for the specified dataset.
    """
    if cfg.dataset == "Flickr30K":
        from ml.datasets.flickr import Flickr30kEntities
        ds = Flickr30kEntities(
            split,
            path=cfg.data / "Flickr30K",
            tokenization=cfg.tok,
            max_tokens=cfg.max_tokens,
            max_entities=cfg.max_entities,
            max_rois=cfg.max_rois,
        )
    else:
        raise ValueError(f"Unsupported dataset: {cfg.dataset}")

    num_workers = cfg.num_workers or max(th.get_num_threads() // 2, 2)
    return DataLoader(ds,
                      batch_size=bs,
                      shuffle=shuffle,
                      num_workers=num_workers)
def evaluate(model, data_loader, device='cuda'):
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1)  # Does it nessesary? Who knows...
    cpu_device = torch.device("cpu")
    inference_res = []
    model.eval()

    for images, targets in data_loader:
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        res = targets, outputs
        inference_res.append(res)

    torch.set_num_threads(n_threads)
    return inference_res
def set_hardware(args: argparse.Namespace) -> Optional[torch.device]:
    # set torch number of threads
    if args.torch_num_threads is None:
        LOGGER.info("Using default number of CPU threads: %s" %
                    torch.get_num_threads())
    else:
        torch.set_num_threads(args.torch_num_threads)
        LOGGER.info("Using specified number of CPU threads: %s" %
                    args.torch_num_threads)

    # specify gpu device if relevant
    if args.gpu:
        gpu_device: Optional[torch.device]
        gpu_device = torch.device(args.gpu_device)
        LOGGER.info("Using GPU device: %s" % args.gpu_device)
    else:
        gpu_device = None
        LOGGER.info("Using CPU device")

    # return device
    return gpu_device
Exemple #9
0
def setup_dist_backend(args, set_threads=False, thread_choice=None):
    """Sets up backend/environment for distributed training.

    Params:
        args: Command line args for main.py.
        thread_choice: How to choose number of OMP threads used.
    """
    def setup_print(s, **kwargs):
        if args.setup_verbose is True:
            print(s, **kwargs)

    # assumes all data will have (roughly) the same dimensions
    cudnn.benchmark = True

    # choose environment variable OMP_NUM_THREADS
    # see: https://github.com/pytorch/pytorch/pull/22501
    if set_threads is True:
        if thread_choice is None:
            os.environ['OMP_NUM_THREADS'] = str(1)
        elif thread_choice == 'torch_threads':
            os.environ['OMP_NUM_THREADS'] = str(torch.get_num_threads())
        elif thread_choice == 'multiproc':
            n_threads = (int)(multiprocessing.cpu_count() /
                              os.environ['WORLD_SIZE'])
            os.environ['OMP_NUM_THREADS'] = str(n_threads)

    if args.distributed is True:
        if args.local_rank == 0:
            setup_print('Setting up distributed process group...')

        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend=args.dist_backend,
                                             init_method=args.dist_url,
                                             world_size=env_world_size())

        # make sure there's no mismatch between world sizes
        assert (env_world_size() == torch.distributed.get_world_size())
        setup_print(
            f"\tSuccess on process {args.local_rank}/{torch.distributed.get_world_size()}"
        )
Exemple #10
0
    def __init__(self, env, policy_comm):
        """
        env is an environment object that conforms to a gym interface

        policy_comm is an mpi4py comm object to communicate with the other processes that are working to train this policy.  See docs for details.
        """
        self.env = env
        self.comm = policy_comm
        self.rank = self.comm.Get_rank()
        self.world_rank = MPI.COMM_WORLD.Get_rank()
        self.root = 0
        self.is_root = self.rank == self.root

        print(f'{self.world_rank} {self.rank} {self.comm.Get_size()}')

        # Avoid slowdowns caused by each separate process's PyTorch using more than its fair share of CPU resources.
        torch.set_num_threads(
            max(int(torch.get_num_threads() / self.comm.Get_size()), 1))

        self.model: torch.nn.Module = Model(env.observation_space,
                                            env.action_space)
        self.optimizer = torch.optim.Adam(self.model.parameters())
Exemple #11
0
def evaluate(model,
             data_loader,
             device,
             interval=1000,
             split_name='Test',
             title=None):
    if title is not None:
        print(title)

    num_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    model.eval()
    metric_logger = MetricLogger(delimiter='  ')
    header = '{}:'.format(split_name)
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, interval,
                                                     header):
            image = image.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(image)

            acc1, acc5 = main_util.compute_accuracy(output,
                                                    target,
                                                    topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = image.shape[0]
            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    top1_accuracy = metric_logger.acc1.global_avg
    top5_accuracy = metric_logger.acc5.global_avg
    print(' * Acc@1 {:.4f}\tAcc@5 {:.4f}\n'.format(top1_accuracy,
                                                   top5_accuracy))
    torch.set_num_threads(num_threads)
    return metric_logger.acc1.global_avg
    def _train(self: "Solver") -> float:
        """Perform Training on One Epoch
        
        Returns:
            float -- train loss (averaged over batches)
        """
        #torch.set_num_threads(8)
        threads = torch.get_num_threads()
        #torch.set_num_threads(threads)
        print("Threads: ", threads)
        self.model.train()
        tr_loss = 0.0
        pbar = tqdm(self.train_loader,
                    desc="Train Batch",
                    position=0,
                    leave=True)
        for b, batch in enumerate(pbar):
            mixture, source = batch
            print(len(batch))
            if self.cuda:
                mixture = mixture.cuda()
                source = source.cuda()

            estimate = self.model(mixture)
            loss = self.criterion(estimate, source)

            self.optim.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.train_config.max_norm)
            self.optim.step()

            tr_loss += loss.item()
            pbar.set_postfix(tr_loss=tr_loss / (b + 1))

        tr_loss /= len(self.train_loader)

        return tr_loss
Exemple #13
0
def evaluate_cls(model, data_loader, device, use_amp=False):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    total_loss = 0.0
    preds = []
    trues = []
    for images, targets in metric_logger.log_every(data_loader, 100, header):
        # images = torch.stack(images,0).to(device)
        # targets = [
        #     {k: v.to(device) for k, v in targ.items() if k not in ["path"]} for targ in targets]
        # targets = torch.stack([target["labels"] for target in targets], 0).to(device)

        images = images.to(device)
        targets = targets.to(device)

        # torch.cuda.synchronize()
        with torch.cuda.amp.autocast(use_amp):
            outputs = model(images, targets, False, True)

        total_loss += outputs["valid_loss"]
        preds.extend(outputs["preds"])
        trues.extend(targets)

    num_datas = len(data_loader.dataset)
    valid_loss = total_loss / num_datas
    valid_acc = (
        torch.eq(torch.tensor(preds), torch.tensor(trues)).sum().float() /
        num_datas).item()

    # torch.set_num_threads(n_threads)
    print("\nvalid_loss:%.5f valid_acc:%.5f\n" % (valid_loss, valid_acc))

    return valid_loss, valid_acc
Exemple #14
0
def main():
    model = DCTTS(args).to(DEVICE)
    print('Model {} is working...'.format(args.model_name))
    print('{} threads are used...'.format(torch.get_num_threads()))
    ckpt_dir = os.path.join(args.logdir, args.model_name)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # scheduler = MultiStepLR(optimizer, milestones=[50000, 150000, 300000], gamma=0.5) #
    scheduler = LambdaLR(optimizer, lr_policy)

    if not os.path.exists(ckpt_dir):
        os.makedirs(os.path.join(ckpt_dir, 'A', 'train'))
        if args.pretrained_path is not None:
            print('Train with pretrained model {}'.format(args.pretrained_path))
            state = torch.load(args.pretrained_path)
            model.custom_load_state_dict(state['model'])
    else:
        print('Already exists. Retrain the model.')
        ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '*k.pth.tar')))[-1]
        state = torch.load(ckpt)
        model.load_state_dict(state['model'])
        args.global_step = state['global_step']
        optimizer.load_state_dict(state['optimizer'])
        # scheduler.load_state_dict(state['scheduler'])

    # model = torch.nn.DataParallel(model, device_ids=list(range(args.no_gpu))).to(DEVICE)

    dataset = SpeechDataset(args.data_path, args.meta_train, mem_mode=args.mem_mode)
    validset = SpeechDataset(args.data_path, args.meta_eval, mem_mode=args.mem_mode)
    data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size,
                             shuffle=True, collate_fn=t2m_ga_collate_fn,
                             drop_last=True, pin_memory=True)
    valid_loader = DataLoader(dataset=validset, batch_size=args.test_batch,
                              shuffle=False, collate_fn=t2m_ga_collate_fn, pin_memory=True)
    
    writer = SummaryWriter(ckpt_dir)
    train(model, data_loader, valid_loader, optimizer, scheduler,
          batch_size=args.batch_size, ckpt_dir=ckpt_dir, writer=writer)
    return None
Exemple #15
0
def train_or_eval_model(i, args, raw_in_data, raw_out_data):
    # reduce number of threads as we're running FUTURE_CHUNKS parallel processes
    num_threads = int(torch.get_num_threads() / Model.FUTURE_CHUNKS)
    torch.set_num_threads(num_threads)

    # create or load a model
    model = Model()
    if args.load_model:
        model_path = path.join(args.load_model, 'py-{}.pt'.format(i))
        model.load(model_path)
        sys.stderr.write('[{}] Loaded model from {}\n'.format(i, model_path))
    else:
        sys.stderr.write('[{}] Created a new model\n'.format(i))

    # normalize input data
    if args.inference:
        input_data = model.normalize_input(raw_in_data, update_obs=False)
    else:
        input_data = model.normalize_input(raw_in_data, update_obs=True)

    # discretize output data
    output_data = model.discretize_output(raw_out_data)

    # print some stats
    print_stats(i, output_data)

    if args.inference:
        model.set_model_eval()

        sys.stderr.write('[{}] test set size: {}\n'.format(i, len(input_data)))
        sys.stderr.write('[{}] loss: {:.3f}, accuracy: {:.2f}%\n'
            .format(i, model.compute_loss(input_data, output_data),
                    100 * model.compute_accuracy(input_data, output_data)))
    else:  # training
        model.set_model_train()

        # train a neural network with data
        train(i, args, model, input_data, output_data)
Exemple #16
0
def time_evaluate_cpu_all(models, number=1, stft_only=False):
    orig_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    with torch.no_grad():
        inp = torch.rand(8192)
        times = {}
        for name, model in models.items():
            if model is None:
                continue

            print(f'Running {name}')
            model.eval()
            if not stft_only:
                times[name] = timeit.timeit(
                    '_ = model(inp)', number=number, globals=locals()) / number
            else:
                times[name] = timeit.timeit(
                    '_ = model.decoder(model.encoder(inp))',
                    number=number,
                    globals=locals()) / number

    torch.set_num_threads(orig_threads)
    return times
def evaluate(model, loader, device):
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = detection_util.MetricLogger(delimiter="  ")
    header = 'Test:'
    tp_total, fp_total, fn_total = torch.zeros([26, 10]), torch.zeros(
        [26, 10]), torch.zeros([26, 10])

    for images, targets in metric_logger.log_every(loader, 100, header):

        images = list(img.to(device) for img in images)

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        evaluator_time = time.time()
        tp, fp, fn = detection_metrics.getnum_tp_fp_fn(targets, outputs)
        tp_total += tp
        fp_total += fp
        fn_total += fn

        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    print("AP:", detection_metrics.get_mAP(tp_total, fp_total, fn_total))
    torch.set_num_threads(n_threads)
    return
Exemple #18
0
    def __init__(self,
                 act_limit,
                 obs_dim,
                 act_dim,
                 hidden_sizes,
                 pi_lr=1e-3,
                 q_lr=1e-3,
                 gamma=None,
                 alpha=None,
                 polyak=None,
                 load=False,
                 noise_scale=0.1,
                 target_noise=0.2,
                 noise_clip=0.5,
                 policy_delay=2,
                 exp_name='Exp1',
                 replay_buffer=None,
                 path='saved_models/'):
        self.act_limit = act_limit
        self.gamma = gamma
        self.alpha = alpha
        self.polyak = polyak
        self.load = load
        self.exp_name = exp_name
        self.path = path
        self.pi_lr = pi_lr
        self.q_lr = q_lr
        self.noise_scale = noise_scale
        self.target_noise = target_noise
        self.noise_clip = noise_clip
        self.policy_delay = policy_delay
        self.replay_buffer = replay_buffer
        self.create_networks(obs_dim, act_dim, hidden_sizes)
        self.update_timer = 0

        torch.set_num_threads(torch.get_num_threads())
Exemple #19
0
def main():
    G = SSRN().to(DEVICE)
    D = ConditionalDiscriminatorBlock().to(DEVICE)
    
    print('{} threads are used...'.format(torch.get_num_threads()))
    ckpt_dir = os.path.join(args.logdir, type(G).__name__)
    G_optim = torch.optim.Adam(G.parameters(), lr=args.lr)
    D_optim = torch.optim.Adam(D.parameters(), lr=args.lr)
    # scheduler = MultiStepLR(optimizer, milestones=[100000, 200000], gamma=0.5)

    if not os.path.exists(ckpt_dir):
        os.makedirs(os.path.join(ckpt_dir, 'A', 'train'))
    else:
        print('Already exists. Retrain the model.')
        ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '{}-*k.pth'.format(type(G).__name__))))
        state = torch.load(ckpt)
        args.global_step = state['global_step']
        G.load_state_dict(state['G'])
        G_optim.load_state_dict(state['G_optim'])
        ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '{}-*k.pth'.format(type(D).__name__))))
        state = torch.load(ckpt)
        D.load_state_dict(state['D'])
        D_optim.load_state_dict(state['D_optim'])

    dataset = SpeechDataset(args.data_path, args.meta_train, type(G).__name__, mem_mode=args.mem_mode)
    validset = SpeechDataset(args.data_path, args.meta_eval, type(G).__name__, mem_mode=args.mem_mode)
    data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size,
                             shuffle=True, collate_fn=collate_fn,
                             drop_last=True, pin_memory=True)
    valid_loader = DataLoader(dataset=validset, batch_size=args.test_batch,
                              shuffle=False, collate_fn=collate_fn)
    
    writer = SummaryWriter(ckpt_dir)
    train(G, D, data_loader, valid_loader, G_optim, D_optim,
          batch_size=args.batch_size, ckpt_dir=ckpt_dir, writer=writer)
    return None
Exemple #20
0
def evaluate(model, data_loader, device, data_set=None, mAP_list=None):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    if data_set is None:
        data_set = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(data_set, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = torch.stack(images, dim=0)

        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        images = images.to(device)
        # targets = {k: v.to(device) for k, v in targets.items()}

        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        #  list((bboxes_out, labels_out, scores_out), ...)
        results = model(images, targets)

        outputs = []
        for index, (bboxes_out, labels_out, scores_out) in enumerate(results):
            # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax)
            height_width = targets[index]["height_width"]
            # height_width = [300, 300]
            bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1]
            bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0]

            info = {"boxes": bboxes_out.to(cpu_device),
                    "labels": labels_out.to(cpu_device),
                    "scores": scores_out.to(cpu_device)}
            outputs.append(info)

        # outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = dict()
        for index in range(len(outputs)):
            info = {targets[index]["image_id"].item(): outputs[index]}
            res.update(info)
        # res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)

    print_txt = coco_evaluator.coco_eval[iou_types[0]].stats
    coco_mAP = print_txt[0]
    voc_mAP = print_txt[1]
    if isinstance(mAP_list, list):
        mAP_list.append(voc_mAP)
Exemple #21
0
levels_and_models = [("phylum", ConvNet(3)), ("class", ConvNet(5)),
                     ("order", ConvNet(10))]
lr_space = np.geomspace(1e-6, 1e3, num=10)
weight_decay = np.geomspace(1e-6, 1e3, num=10)

# populate paramter dicts
param_dicts = list()
for model_id, (level, m) in enumerate(levels_and_models):
    for l in lr_space:
        for w in weight_decay:
            param_dict = {
                "level": level,
                "model": copy.deepcopy(m),
                "eval_on": "val",
                "cnn_config": {
                    "model": model_id,
                    "lr": l,
                    "weight_decay": w
                }
            }
            param_dicts.append(param_dict)


def cnn_train_test_unpack(args):
    return cnn_train_eval(**args)


with Pool(int(cpu_count() / torch.get_num_threads()) - 1) as p:
    experiment_logs = p.map(cnn_train_test_unpack, param_dicts)
np.save("grid_search_best_cnn_logs.npy", np.array(experiment_logs))
Exemple #22
0
def main_function(experiment_directory, continue_from, batch_split):

    logging.debug("running " + experiment_directory)

    specs = ws.load_experiment_specifications(experiment_directory)

    logging.info("Experiment description: \n" + specs["Description"])

    data_source = specs["DataSource"]
    train_split_file = specs["TrainSplit"]

    arch = __import__("networks." + specs["NetworkArch"], fromlist=["Decoder"])

    logging.debug(specs["NetworkSpecs"])

    latent_size = specs["CodeLength"]

    checkpoints = list(
        range(
            specs["SnapshotFrequency"],
            specs["NumEpochs"] + 1,
            specs["SnapshotFrequency"],
        ))

    for checkpoint in specs["AdditionalSnapshots"]:
        checkpoints.append(checkpoint)
    checkpoints.sort()

    lr_schedules = get_learning_rate_schedules(specs)

    grad_clip = get_spec_with_default(specs, "GradientClipNorm", None)
    if grad_clip is not None:
        logging.debug("clipping gradients to max norm {}".format(grad_clip))

    def save_latest(epoch):

        save_model(experiment_directory, "latest.pth", decoder, epoch)
        save_optimizer(experiment_directory, "latest.pth", optimizer_all,
                       epoch)
        save_latent_vectors(experiment_directory, "latest.pth", lat_vecs,
                            epoch)

    def save_checkpoints(epoch):

        save_model(experiment_directory, str(epoch) + ".pth", decoder, epoch)
        save_optimizer(experiment_directory,
                       str(epoch) + ".pth", optimizer_all, epoch)
        save_latent_vectors(experiment_directory,
                            str(epoch) + ".pth", lat_vecs, epoch)

    def signal_handler(sig, frame):
        logging.info("Stopping early...")
        sys.exit(0)

    def adjust_learning_rate(lr_schedules, optimizer, epoch):

        for i, param_group in enumerate(optimizer.param_groups):
            param_group["lr"] = lr_schedules[i].get_learning_rate(epoch)

    def empirical_stat(latent_vecs, indices):
        lat_mat = torch.zeros(0).cuda()
        for ind in indices:
            lat_mat = torch.cat([lat_mat, latent_vecs[ind]], 0)
        mean = torch.mean(lat_mat, 0)
        var = torch.var(lat_mat, 0)
        return mean, var

    signal.signal(signal.SIGINT, signal_handler)

    num_samp_per_scene = specs["SamplesPerScene"]
    scene_per_batch = specs["ScenesPerBatch"]
    clamp_dist = specs["ClampingDistance"]
    minT = -clamp_dist
    maxT = clamp_dist
    enforce_minmax = True

    do_code_regularization = get_spec_with_default(specs, "CodeRegularization",
                                                   True)
    code_reg_lambda = get_spec_with_default(specs, "CodeRegularizationLambda",
                                            1e-4)

    code_bound = get_spec_with_default(specs, "CodeBound", None)

    decoder = arch.Decoder(latent_size, **specs["NetworkSpecs"]).cuda()

    logging.info("training with {} GPU(s)".format(torch.cuda.device_count()))

    # if torch.cuda.device_count() > 1:
    decoder = torch.nn.DataParallel(decoder)

    num_epochs = specs["NumEpochs"]
    log_frequency = get_spec_with_default(specs, "LogFrequency", 10)

    with open(train_split_file, "r") as f:
        train_split = json.load(f)

    sdf_dataset = deep_sdf.data.SDFSamples(data_source,
                                           train_split,
                                           num_samp_per_scene,
                                           load_ram=False)
    print('[HERE: In train_deep_sdf.main_function] sdf_dataset len =',
          len(sdf_dataset))

    num_data_loader_threads = get_spec_with_default(specs, "DataLoaderThreads",
                                                    1)
    logging.debug(
        "loading data with {} threads".format(num_data_loader_threads))

    sdf_loader = data_utils.DataLoader(
        sdf_dataset,
        batch_size=scene_per_batch,
        shuffle=True,
        num_workers=num_data_loader_threads,
        drop_last=True,
    )
    print('[HERE: In train_deep_sdf.main_function] sdf_loader len =',
          len(sdf_loader))

    logging.debug("torch num_threads: {}".format(torch.get_num_threads()))

    num_scenes = len(sdf_dataset)

    logging.info("There are {} scenes".format(num_scenes))

    logging.debug(decoder)

    lat_vecs = torch.nn.Embedding(num_scenes, latent_size, max_norm=code_bound)
    torch.nn.init.normal_(
        lat_vecs.weight.data,
        0.0,
        get_spec_with_default(specs, "CodeInitStdDev", 1.0) /
        math.sqrt(latent_size),
    )

    logging.debug("initialized with mean magnitude {}".format(
        get_mean_latent_vector_magnitude(lat_vecs)))

    loss_l1 = torch.nn.L1Loss(reduction="sum")

    optimizer_all = torch.optim.Adam([
        {
            "params": decoder.parameters(),
            "lr": lr_schedules[0].get_learning_rate(0),
        },
        {
            "params": lat_vecs.parameters(),
            "lr": lr_schedules[1].get_learning_rate(0),
        },
    ])

    loss_log = []
    lr_log = []
    lat_mag_log = []
    timing_log = []
    param_mag_log = {}

    start_epoch = 1

    if continue_from is not None:

        logging.info('continuing from "{}"'.format(continue_from))

        lat_epoch = load_latent_vectors(experiment_directory,
                                        continue_from + ".pth", lat_vecs)

        model_epoch = ws.load_model_parameters(experiment_directory,
                                               continue_from, decoder)

        optimizer_epoch = load_optimizer(experiment_directory,
                                         continue_from + ".pth", optimizer_all)

        loss_log, lr_log, timing_log, lat_mag_log, param_mag_log, log_epoch = load_logs(
            experiment_directory)

        if not log_epoch == model_epoch:
            loss_log, lr_log, timing_log, lat_mag_log, param_mag_log = clip_logs(
                loss_log, lr_log, timing_log, lat_mag_log, param_mag_log,
                model_epoch)

        if not (model_epoch == optimizer_epoch and model_epoch == lat_epoch):
            raise RuntimeError("epoch mismatch: {} vs {} vs {} vs {}".format(
                model_epoch, optimizer_epoch, lat_epoch, log_epoch))

        start_epoch = model_epoch + 1

        logging.debug("loaded")

    logging.info("starting from epoch {}".format(start_epoch))

    logging.info("Number of decoder parameters: {}".format(
        sum(p.data.nelement() for p in decoder.parameters())))
    logging.info(
        "Number of shape code parameters: {} (# codes {}, code dim {})".format(
            lat_vecs.num_embeddings * lat_vecs.embedding_dim,
            lat_vecs.num_embeddings,
            lat_vecs.embedding_dim,
        ))

    for epoch in range(start_epoch, num_epochs + 1):

        start = time.time()

        logging.info("epoch {}...".format(epoch))

        decoder.train()

        adjust_learning_rate(lr_schedules, optimizer_all, epoch)

        for sdf_data, indices in sdf_loader:
            #print('[HERE: In train_deep_sdf.LOOPsdf_loader] indices =', indices)

            # Process the input data
            sdf_data = sdf_data.reshape(-1, 4)

            num_sdf_samples = sdf_data.shape[0]

            sdf_data.requires_grad = False

            xyz = sdf_data[:, 0:3]
            sdf_gt = sdf_data[:, 3].unsqueeze(1)

            if enforce_minmax:
                sdf_gt = torch.clamp(sdf_gt, minT, maxT)

            xyz = torch.chunk(xyz, batch_split)
            indices = torch.chunk(
                indices.unsqueeze(-1).repeat(1, num_samp_per_scene).view(-1),
                batch_split,
            )

            sdf_gt = torch.chunk(sdf_gt, batch_split)

            batch_loss = 0.0

            optimizer_all.zero_grad()

            for i in range(batch_split):
                #print('[HERE: In train_deep_sdf.LOOPbatch_split] i/batch_split = %d/%d'%(i, batch_split))

                batch_vecs = lat_vecs(indices[i])

                input = torch.cat([batch_vecs, xyz[i]], dim=1)

                # NN optimization
                pred_sdf = decoder(input)

                if enforce_minmax:
                    pred_sdf = torch.clamp(pred_sdf, minT, maxT)

                chunk_loss = loss_l1(pred_sdf,
                                     sdf_gt[i].cuda()) / num_sdf_samples

                if do_code_regularization:
                    l2_size_loss = torch.sum(torch.norm(batch_vecs, dim=1))
                    reg_loss = (code_reg_lambda * min(1, epoch / 100) *
                                l2_size_loss) / num_sdf_samples

                    chunk_loss = chunk_loss + reg_loss.cuda()

                chunk_loss.backward()

                batch_loss += chunk_loss.item()

            logging.debug("loss = {}".format(batch_loss))
            logging.info("loss = {}".format(batch_loss))

            loss_log.append(batch_loss)

            if grad_clip is not None:

                torch.nn.utils.clip_grad_norm_(decoder.parameters(), grad_clip)

            optimizer_all.step()

        end = time.time()

        seconds_elapsed = end - start
        timing_log.append(seconds_elapsed)

        lr_log.append(
            [schedule.get_learning_rate(epoch) for schedule in lr_schedules])

        lat_mag_log.append(get_mean_latent_vector_magnitude(lat_vecs))

        append_parameter_magnitudes(param_mag_log, decoder)

        if epoch in checkpoints:
            save_checkpoints(epoch)

        if epoch % log_frequency == 0:

            save_latest(epoch)
            save_logs(
                experiment_directory,
                loss_log,
                lr_log,
                timing_log,
                lat_mag_log,
                param_mag_log,
                epoch,
            )
from joblib import Parallel, delayed
import util
import torch
import torch as T
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import OrderedDict
from config_reader import config_reader
from scipy.ndimage.filters import gaussian_filter
#parser = argparse.ArgumentParser()
#parser.add_argument('--t7_file', required=True)
#parser.add_argument('--pth_file', required=True)
#args = parser.parse_args()

torch.set_num_threads(torch.get_num_threads())
weight_name = './model/pose_model.pth'

blocks = {}

# find connection in the specified sequence, center 29 is in the position 15
limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
           [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
           [1,16], [16,18], [3,17], [6,18]]
           
# the middle joints heatmap correpondence
mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
          [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
          [55,56], [37,38], [45,46]]
          
# visualize
Exemple #24
0
def evaluate(model,
             data_loader,
             device,
             metric_logger,
             print_freq,
             file_save=None):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()

    metric_logger.renew(epoch_size=len(data_loader),
                        delimiter="  ",
                        train=False)
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    all_results = []
    for image, targets in metric_logger.log_every(data_loader, print_freq,
                                                  header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)
        }
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

        # TODO: add also mask to results
        if file_save is not None:
            for image_id in res.keys():
                for b in range(len(res[image_id]['labels'])):
                    # boxes xyxy -> xywh
                    current_box = res[image_id]['boxes'][b].numpy().tolist()
                    current_box[2] -= current_box[0]
                    current_box[3] -= current_box[1]
                    all_results.append({
                        "image_id":
                        int(image_id),
                        "category_id":
                        int(res[image_id]['labels'][b].numpy()),
                        "bbox":
                        current_box,
                        "score":
                        float(res[image_id]['scores'][b].numpy())
                    })

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate(make_print=True)
    coco_evaluator.summarize(make_print=True)

    for k in coco_evaluator.coco_eval.keys():
        acc_name = k + "_" + "mAP"
        # here I add only the main figure of merit, could be extended
        acc_val = coco_evaluator.coco_eval[k].stats[0]

        metric_logger.add_meter(
            acc_name, utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
        metric_logger.meters[acc_name].update(acc_val)

    metric_logger.end_epoch()
    metric_logger.print_out("Averaged stats: {}".format(str(metric_logger)))

    # save results
    if file_save is not None:
        with open(file_save, 'w') as outfile:
            json.dump(all_results, outfile)

    torch.set_num_threads(n_threads)
    return coco_evaluator
Exemple #25
0
def evaluate(model, data_loader, device, epoch_num=None, check_num=200):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")

    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    idx = 0

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs_set = model(images)  # 输出:对应a validate batch里面的每一个输出组成的list

        outputs_list = [{k: v.to(cpu_device)
                         for k, v in t.items()}
                        for t in outputs_set]  # 对于minibatch里面的每个output

        # outputs_list包含一个个 t 是 {'boxes','labels','scores','masks'},每个的值都是一个tensor

        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs_list)
        }
        # 构建一个dict,每个键为target["image_id"].item() 即imageid
        # 值为对应数据在模型预测的时候的输出t, 是 {'boxes','labels','scores','masks'}字典,
        # 其内每个的值都是一个tensor,长度=预测目标数

        idx += 1
        if idx - idx // check_num * check_num == 0:  # 每100次记录一次
            if epoch_num is not None:
                coco_a_result_check(images, targets, res,
                                    'E' + str(epoch_num) + '_' + str(idx))
            else:
                coco_a_result_check(images, targets, res)
        '''
        for key in res:
            print(len(res[key]['boxes']))  # 一开始mask rcnn网络输出是100个框(detr 200),后续学好了之后框的数量会大大下降。
        '''

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
Exemple #26
0
def main(args):
    torch.manual_seed(123)

    local_rank = int(os.environ[args.env_rank])
    world_size = int(os.environ[args.env_world_size])

    device = torch.device('cuda:%d' %
                          (local_rank) if torch.cuda.is_available() else 'cpu')

    if local_rank == 0:
        print('A number of cuda devices: %d' % (torch.cuda.device_count()))
        print('A number of cpu threads: %d' % (torch.get_num_threads()))

    transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize((224, 224)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
    ])

    if world_size > 1:
        print('rank: {}/{}'.format(local_rank + 1, world_size))
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://',
                                             rank=local_rank,
                                             world_size=world_size)

    # Data loading code
    train_dataset = ImagesDataset(data_dir=args.data_dir, transform=transform)

    train_sampler = None
    if world_size > 1:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=0,
                                               pin_memory=True,
                                               drop_last=False,
                                               sampler=train_sampler)

    net = torchvision.models.resnet50()
    net = net.to(device)

    if world_size > 1:
        net = torch.nn.parallel.DistributedDataParallel(net)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)

    net.train()
    for epoch in range(args.epochs):
        epoch_start = timeit.default_timer()

        if world_size > 1:
            train_sampler.set_epoch(epoch)

        train_loss = 0
        for index, (images, labels) in enumerate(train_loader, 1):
            # forward pass
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)

            # backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

            if local_rank == 0:
                print(
                    '\repoch %3d batch %3d/%3d train loss %6.4f' %
                    (epoch + 1, index, len(train_loader), train_loss / index),
                    end='')

        if local_rank == 0:
            print('\repoch %3d batch %3d/%3d train loss %6.4f' %
                  (epoch + 1, index, len(train_loader),
                   train_loss / len(train_loader)),
                  end='')
            print(' %5.3fsec' % (timeit.default_timer() - epoch_start))
Exemple #27
0
    def _setup(self, config):
        self.config = config
        print('NeuroCard config:')
        pprint.pprint(config)
        os.chdir(config['cwd'])
        for k, v in config.items():
            setattr(self, k, v)

        if config['__gpu'] == 0:
            torch.set_num_threads(config['__cpu'])

        # W&B.
        # Do wandb.init() after the os.chdir() above makes sure that the Git
        # diff file (diff.patch) is w.r.t. the directory where this file is in,
        # rather than w.r.t. Ray's package dir.
        wandb_project = config['__run']
        wandb.init(name=os.path.basename(
            self.logdir if self.logdir[-1] != '/' else self.logdir[:-1]),
                   sync_tensorboard=True,
                   config=config,
                   project=wandb_project)

        self.epoch = 0

        if isinstance(self.join_tables, int):
            # Hack to support training single-model tables.
            sorted_table_names = sorted(
                list(datasets.TPC_DS.GetTDSLightJoinKeys().keys()))
            self.join_tables = [sorted_table_names[self.join_tables]]

        # Try to make all the runs the same, except for input orderings.
        torch.manual_seed(0)
        np.random.seed(0)

        # Common attributes.
        self.loader = None
        self.join_spec = None
        join_iter_dataset = None
        table_primary_index = None

        # New datasets should be loaded here.
        assert self.dataset in ['tpcds']
        if self.dataset == 'tpcds':
            print('Training on Join({})'.format(self.join_tables))
            loaded_tables = []
            for t in self.join_tables:
                print('Loading', t)
                table = datasets.LoadTds(t, use_cols=self.use_cols)
                table.data.info()
                loaded_tables.append(table)
            if len(self.join_tables) > 1:
                join_spec, join_iter_dataset, loader, table = self.MakeSamplerDatasetLoader(
                    loaded_tables)

                self.join_spec = join_spec
                self.train_data = join_iter_dataset
                self.loader = loader

                table_primary_index = [t.name
                                       for t in loaded_tables].index('title')

                table.cardinality = datasets.TPC_DS.GetFullOuterCardinalityOrFail(
                    self.join_tables)
                self.train_data.cardinality = table.cardinality

                print('rows in full join', table.cardinality,
                      'cols in full join', len(table.columns), 'cols:', table)
            else:
                # Train on a single table.
                table = loaded_tables[0]

        if self.dataset != 'tpcds' or len(self.join_tables) == 1:
            table.data.info()
            self.train_data = self.MakeTableDataset(table)

        self.table = table
        # Provide true cardinalities in a file or implement an oracle CardEst.
        self.oracle = None
        self.table_bits = 0

        # A fixed ordering?
        self.fixed_ordering = self.MakeOrdering(table)

        model = self.MakeModel(self.table,
                               self.train_data,
                               table_primary_index=table_primary_index)

        # NOTE: ReportModel()'s returned value is the true model size in
        # megabytes containing all all *trainable* parameters.  As impl
        # convenience, the saved ckpts on disk have slightly bigger footprint
        # due to saving non-trainable constants (the masks in each layer) as
        # well.  They can be deterministically reconstructed based on RNG seeds
        # and so should not be counted as model size.
        self.mb = train_utils.ReportModel(model)
        if not isinstance(model, transformer.Transformer):
            print('applying train_utils.weight_init()')
            model.apply(train_utils.weight_init)
        self.model = model

        if self.use_data_parallel:
            self.model = DataParallelPassthrough(self.model)

        wandb.watch(model, log='all')

        if self.use_transformer:
            opt = torch.optim.Adam(
                list(model.parameters()),
                2e-4,
                # betas=(0.9, 0.98),  # B in Lingvo; in Trfmr paper.
                betas=(0.9, 0.997),  # A in Lingvo.
                eps=1e-9,
            )
        else:
            if self.optimizer == 'adam':
                opt = torch.optim.Adam(list(model.parameters()), 2e-4)
            else:
                print('Using Adagrad')
                opt = torch.optim.Adagrad(list(model.parameters()), 2e-4)
        print('Optimizer:', opt)
        self.opt = opt

        total_steps = self.epochs * self.max_steps
        if self.lr_scheduler == 'CosineAnnealingLR':
            # Starts decaying to 0 immediately.
            self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                opt, total_steps)
        elif self.lr_scheduler == 'OneCycleLR':
            # Warms up to max_lr, then decays to ~0.
            self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
                opt, max_lr=2e-3, total_steps=total_steps)
        elif self.lr_scheduler is not None and self.lr_scheduler.startswith(
                'OneCycleLR-'):
            warmup_percentage = float(self.lr_scheduler.split('-')[-1])
            # Warms up to max_lr, then decays to ~0.
            self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
                opt,
                max_lr=2e-3,
                total_steps=total_steps,
                pct_start=warmup_percentage)
        elif self.lr_scheduler is not None and self.lr_scheduler.startswith(
                'wd_'):
            # Warmups and decays.
            splits = self.lr_scheduler.split('_')
            assert len(splits) == 3, splits
            lr, warmup_fraction = float(splits[1]), float(splits[2])
            self.custom_lr_lambda = train_utils.get_cosine_learning_rate_fn(
                total_steps,
                learning_rate=lr,
                min_learning_rate_mult=1e-5,
                constant_fraction=0.,
                warmup_fraction=warmup_fraction)
        else:
            assert self.lr_scheduler is None, self.lr_scheduler

        self.tbx_logger = tune_logger.TBXLogger(self.config, self.logdir)

        if self.checkpoint_to_load:
            self.LoadCheckpoint()

        self.loaded_queries = None
        self.oracle_cards = None
        if self.dataset == 'tpcds' and len(self.join_tables) > 1:
            queries_job_format = utils.JobToQuery(self.queries_csv)
            self.loaded_queries, self.oracle_cards = utils.UnpackQueries(
                self.table, queries_job_format)

        if config['__gpu'] == 0:
            print('CUDA not available, using # cpu cores for intra-op:',
                  torch.get_num_threads(), '; inter-op:',
                  torch.get_num_interop_threads())
Exemple #28
0
from bayesmark import np_util
from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from sklearn.preprocessing import power_transform

import numpy as np
import pandas as pd
import torch
from torch.quasirandom import SobolEngine
from pyDOE2 import lhs
from hebo.design_space.design_space import DesignSpace
from hebo.models.model_factory import get_model
from hebo.acquisitions.acq import LCB, Mean, Sigma, MOMeanSigmaLCB, MACE
from hebo.optimizers.evolution_optimizer import EvolutionOpt

torch.set_num_threads(min(1, torch.get_num_threads()))


class MACEBO(AbstractOptimizer):
    # Unclear what is best package to list for primary_import here.
    primary_import = "bayesmark"

    def __init__(self, api_config, model_name='gpy'):
        AbstractOptimizer.__init__(self, api_config)
        self.api_config = api_config
        self.space = self.parse_space(api_config)
        self.X = pd.DataFrame(columns=self.space.para_names)
        self.y = np.zeros((0, 1))
        self.model_name = model_name
        for k in api_config:
            print(k, api_config[k])
    random.seed(666)
    np.random.seed(666)
    torch.cuda.manual_seed(666)
    torch.manual_seed(666)

    argparser = OptionParser()
    argparser.add_option('--config_file', type="str", default='config.cfg')
    argparser.add_option("--numthread", type="int", dest="nthread", default=4)
    argparser.add_option('--use_cuda', action='store_true', default=True)
    argparser.add_option('--parsingmodel', type="str", default='BaseParser')

    (args, extra_args) = argparser.parse_args()
    options = Configurable(args.config_file, extra_args)

    torch.set_num_threads(args.nthread)
    print("Pytorch using {} threads.".format(torch.get_num_threads()))

    if options.external_embedding:
        print('Using external embedding: {}'.format(
            options.external_embedding))

    if options.gpuFlag:
        print("Use GPU!")

    print('Preparing vocab')
    words, w2i, p2i, rels = vocab(options.conll_train)
    '''
    proportion setting
    '''
    with open(options.conll_train, 'r') as conllFP:
        sentencesData = list(read_conll(conllFP))
Exemple #30
0
    def generateArgs() -> argparse.Namespace:
        """
			This function parses and returns the arguments provided by the user.

			Use python main.py --help to get a full list of arguments.

			:return: Returns the parsed arguments given by the user.
			"""
        # Argument parser. Most defaults are what the original paper outlined.
        arg_parser = argparse.ArgumentParser()

        # Groups for the arguments
        systemArgs = arg_parser.add_argument_group(
            'system',
            "Args that affect the system to be used during the running of the program."
        )
        trainingArgs = arg_parser.add_argument_group(
            'training', "Args that affect training.")
        loggingArgs = arg_parser.add_argument_group(
            'logging', "Args that affect logging.")
        fileArgs = arg_parser.add_argument_group(
            'files', "Args that deal with files (saving and loading).")
        testingArgs = arg_parser.add_argument_group(
            'testing', "Args that deal with testing.")
        actionArgs = arg_parser.add_argument_group(
            'actions', "Args that deal with what the program does.")

        # Add flag argument to run with GPU or not.
        systemArgs.add_argument('-g',
                                '--gpu',
                                action='store_true',
                                required=False,
                                help="Use GPU",
                                default=False)
        systemArgs.add_argument('--version',
                                action='version',
                                dest='version',
                                version=Config.version)
        systemArgs.add_argument(
            '-t',
            '--threads',
            help="The number of threads that you want to use.",
            type=int,
            required=False,
            default=1)
        systemArgs.add_argument(
            '-T',
            '--max-threads',
            help="Use the most number of threads possible.",
            action="store_true",
            required=False,
            default=False)
        systemArgs.add_argument(
            '--force-threads',
            help=
            "Force the program to not limit the number of threads from 1-7",
            action="store_true",
            required=False,
            default=False)
        systemArgs.add_argument(
            '--model-number',
            help="The number of the model. To be used when saving.",
            type=int,
            required=False,
            default=0)

        # Reduce learning rate by this rate
        # the gamma in the LR scheduler
        trainingArgs.add_argument('-F',
                                  '--factor',
                                  help="Reduce learning rate by factor",
                                  type=float,
                                  required=False,
                                  default=.1)
        # The base learning rate to start out with.
        trainingArgs.add_argument('-l',
                                  '--learning-rate',
                                  help="Standard learning rate",
                                  type=float,
                                  required=False,
                                  default=1e-05)
        # The momentum for the network.
        trainingArgs.add_argument('-m',
                                  '--momentum',
                                  help="Momentum rate",
                                  type=float,
                                  required=False,
                                  default=.9)
        # Batch size for the network.
        trainingArgs.add_argument('-b',
                                  '--batch-size',
                                  help="Batch size",
                                  type=int,
                                  required=False,
                                  default=75)
        # Beta for the loss function.
        trainingArgs.add_argument('-B',
                                  '--beta',
                                  help="Beta for loss function",
                                  type=int,
                                  required=False,
                                  default=500)
        trainingArgs.add_argument('-e',
                                  '--epochs',
                                  help="Total number of epochs for this model",
                                  type=int,
                                  required=False,
                                  default=10)
        trainingArgs.add_argument(
            '-d',
            '--database-root',
            type=str,
            help="The root folder of the database to be used.",
            required=False,
            default="KingsCollege/")
        trainingArgs.add_argument(
            '--threshold-factor',
            help=
            "When loss is less than the beta times this number, halve the threshold. Should be (0,1]",
            required=False,
            type=float,
            default=2.0 / 3.0)

        loggingArgs.add_argument(
            '-v',
            '--verbose',
            help="Print everything the neural network is doing.",
            action='store_true',
            required=False,
            default=False)
        # Print progress every nth batch.
        loggingArgs.add_argument('-p',
                                 '--print-every',
                                 help="Print progress every nth batch",
                                 type=int,
                                 required=False,
                                 default=4)
        loggingArgs.add_argument(
            '-L',
            '--log-config',
            help="How much the program should log.",
            type=str,
            choices=["all", "main", "min", "warn", "err", "none"],
            required=False,
            default="main")

        fileArgs.add_argument(
            '-f',
            "--model-file",
            help="Save model to this file",
            type=str,
            required=False,
            default="models/posenet-model-v{}-E{:04d}-N{:02d}.model")
        # Use the provided pretrained model.
        fileArgs.add_argument('-M',
                              '--pretrained-model',
                              help="Resume using given pretrained model",
                              type=str,
                              required=False,
                              default=None)
        fileArgs.add_argument(
            '-s',
            '--dont-save',
            help=
            "Don't save models after each epoch. Default action is to save models after each epoch.",
            required=False,
            action='store_true',
            default=False)
        fileArgs.add_argument('-r',
                              '--resume',
                              help="Resume from latest model",
                              action='store_true',
                              required=False,
                              default=False)

        testingArgs.add_argument(
            '--num-of-tests',
            help="Number of times to test the network to get the uncertainty.",
            required=False,
            default=64,
            type=int)
        testingArgs.add_argument('--test-every',
                                 help="Test every given epochs.",
                                 required=False,
                                 default=2,
                                 type=int)
        testingArgs.add_argument(
            '--test-print-lots',
            help=
            "Print testing results with the frequency based on the batch-size (default) or not. "
            + "\nWith the batch size, testing results will be printed less.",
            required=False,
            action="store_true",
            default=False)

        actionArgs.add_argument('-i',
                                '--image',
                                type=str,
                                help="A single image to test the network on.",
                                required=False,
                                default=None)
        actionArgs.add_argument(
            '--skip-training',
            help=
            "Skips training and validation and goes straight to testing. Good to use if you keep getting memory errors.",
            required=False,
            action="store_true",
            default=False)
        actionArgs.add_argument('--plot',
                                help="Plot losses of specified model.",
                                required=False,
                                action="store_true",
                                default=False)
        actionArgs.add_argument(
            '--skip-testing',
            help=
            "Skip testing (useful if you get memory errors only while testing.)",
            required=False,
            action="store_true",
            default=False)

        out: argparse.Namespace = arg_parser.parse_args()
        if out.verbose:
            out.log_config = "all"
        if out.resume and out.pretrained_model is None:
            out.pretrained_model = "models/posenet-latest-v{version}-N{num:02d}.model".format(
                version=Config.version, num=out.model_number)

        if out.max_threads or out.threads > torch.get_num_threads():
            out.threads = torch.get_num_threads()
        if out.threads >= 8 and not out.force_threads:
            out.threads = 7
        if out.threads is 0:
            out.threads = 1

        return out
Exemple #31
0
    filename = save_dir + 'exp.log'
    if not os.path.isfile(filename):
        f = open(filename, mode='w')
        f.close()
    logger = get_logger(filename)

    argument_file = save_dir + '.args'
    argsDict = args.__dict__
    with open(argument_file, 'w') as f:
        f.writelines('------------------ start ------------------' + '\n')
        for eachArg, value in argsDict.items():
            f.writelines(eachArg + ' : ' + str(value) + '\n')
        f.writelines('------------------- end -------------------' + '\n')

    torch.set_num_threads(torch.get_num_threads())

    sac(lambda: SoccerPLUS(visual=False),
        actor_critic=MLPActorCritic,
        ac_kwargs=dict(hidden_sizes=[args.hid] * args.l),
        gamma=args.gamma,
        seed=args.seed,
        epochs=args.epochs,
        policy_type=args.policy_type,
        replay_size=args.replay_size,
        lr=args.lr,
        alpha=args.alpha,
        batch_size=args.batch_size,
        start_steps=10000,
        steps_per_epoch=1000,
        polyak=0.995,
Exemple #32
0
def evaluate(
        model, data_loader, device, maxDets=None, crop_inference_to_fov=False):
    # See: https://cocodataset.org/#detection-eval

    # NOTE: The coco evaluator (and what's reported in FasterRCNN and
    #  maskrcnn papers) combines detection and classification by
    #  considering something to be detected only if it's from the same
    #  class. eg. If the model places a bounding box and labels it "traffic
    #  light", but in reality that location has a "person", this is
    #  considered a false positive traffic light and a false negative
    #  person. We'd like to get this metric, sure, but we're also
    #  interested in classic detection .. i.e. just "is there a nucleus?"
    #  so we get AP using both the full set of classes AS WELL AS
    #  a remapped class set where anything is considered a "nucleus"

    n_threads = torch.get_num_threads()
    # mFIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()

    # iou_types = _get_iou_types(model)
    iou_types = ['bbox']  # segmAP is meaningless in my hybrid bbox/segm dataset
    maxDets = [1, 10, 100] if maxDets is None else maxDets
    cropper = tvdt.Cropper() if crop_inference_to_fov else None

    # combined detection & classification precision/recall
    dst = data_loader.dataset
    coco = get_coco_api_from_dataset(dst, crop_inference_to_fov=crop_inference_to_fov)
    coco_evaluator = CocoEvaluator(coco, iou_types, maxDets=maxDets)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    # precision/recall for just detection (objectness)
    classification = dst.do_classification
    if classification:

        # IMPORTANT: REVERSE ME AFTER DEFINING COCO API
        dst.do_classification = False
        dst.set_labelmaps()

        metric_logger_objectness = utils.MetricLogger(delimiter="  ")
        coco_objectness = get_coco_api_from_dataset(
            dst, crop_inference_to_fov=crop_inference_to_fov)
        coco_evaluator_objectness = CocoEvaluator(
            coco_objectness, iou_types, maxDets=maxDets)

        # IMPORTANT: THIS LINE IS CRITICAL
        dst.do_classification = True
        dst.set_labelmaps()

    else:
        metric_logger_objectness = None
        # noinspection PyUnusedLocal
        coco_objectness = None
        coco_evaluator_objectness = None

    n_true = 0
    n_pred = 0
    n_matched = 0
    cltargets = []
    clprobabs = []
    cloutlabs = []
    seg_intersects = []
    seg_sums = []

    def _get_categnames(prefix):
        if prefix == '':
            return dst.categs_names
        return dst.supercategs_names

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)
        targets = list(targets)

        # uncomment if GPU
        # torch.cuda.synchronize()

        model_time = time.time()
        outputs = model(images)
        outputs = [
            {k: v.to(cpu_device) for k, v in t.items() if v is not None}
            for t in outputs
        ]
        model_time = time.time() - model_time

        if crop_inference_to_fov:
            images, targets, outputs = _crop_all_to_fov(
                images=images, targets=targets, outputs=outputs,
                cropper=cropper)

        # combined detection & classification precision/recall
        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(
            model_time=model_time, evaluator_time=evaluator_time)

        probabs_exist = 'probabs' in outputs[0]

        if classification:

            # IMPORTANT NOTE: The way that FasterRCNN is implemented
            # assigns each box prediction a confidence score and a label. This
            # is NOT the same as the "traditional" classifier where there is a
            # confidence score for ALL classes per object/pixel. Instead, here
            # the class logits are "flattened" so that each box-label
            # combination is considered separately, then the NMS is done
            # independently per class. Long story short, each box only has
            # one label and confidence

            # Match truth to outputs and only count matched objects for
            # classification accuracy stats
            for target, output in zip(targets, outputs):

                # Match, ignoring ambiguous nuclei. Note that the model
                #  already filters out anything predicted as ignore_label
                #  in inference mode, so we only need to do this for gtruth
                keep = target['iscrowd'] == 0
                cltrg_boxes = np.int32(target['boxes'][keep])
                cltrg_labels = np.int32(target['labels'][keep])
                keep_target, keep_output, _, _ = \
                    map_bboxes_using_hungarian_algorithm(
                        bboxes1=cltrg_boxes,
                        bboxes2=np.int32(output['boxes']),
                        min_iou=0.5)

                # classification performance
                n_true += cltrg_boxes.shape[0]
                n_pred += output['boxes'].shape[0]
                n_matched += len(keep_output)
                cltargets.extend(cltrg_labels[keep_target].tolist())
                if probabs_exist:
                    clprobabs.extend(
                        np.float32(output['probabs'])[keep_output, :].tolist()
                    )
                else:
                    cloutlabs.extend(
                        np.int32(output['labels'])[keep_output].tolist()
                    )

                # FIXME: for now, we just assess this if classification because
                #   otherwise I'll need to refactor the function output
                # segmentation performance
                if 'masks' in target:
                    ismask = np.int32(target['ismask'])[keep_target] == 1
                    tmask = np.int32(target['masks'])[keep_target, ...][ismask, ...]
                    if not model.transform.densify_mask:
                        omask = np.int32(output['masks'] > 0.5)
                        omask = omask[:, 0, :, :]
                    else:
                        omask = np.int32(output['masks'])
                        obj_ids = np.arange(1, omask.max() + 1)
                        omask = omask == obj_ids[:, None, None]
                        omask = 0 + omask
                    omask = omask[keep_output, ...][ismask, ...]
                    for i in range(tmask.shape[0]):
                        sms = tmask[i, ...].sum() + omask[i, ...].sum()
                        isc = np.sum(
                            0 + ((tmask[i, ...] + omask[i, ...]) == 2)
                        )
                        if (sms > 0) and (isc > 0):
                            seg_sums.append(sms)
                            seg_intersects.append(isc)

            # FIXME (low priority): have this use a map from the data loader
            #   labelcodes to justdetection code (eg 2 -> 1, 3 -> 1, etc)
            #   instead of hardcoding the assumption that "nucleus" will
            #   always have the code 1. Note that the model already filters
            #   out anything predicted as ignore_label.
            # remap predictions to just "nucleus". Note that the labels
            # have already been remapped during indexing of the coco API.
            # NEEDLESS TO SAY, this must happen AFTER we've assigned
            # the classifications to the classification_outputs list
            for _, output in res.items():
                output['labels'] = 1 + (0 * output['labels'])

            # precision/recall for just detection (objectness)
            evaluator_time = time.time()
            coco_evaluator_objectness.update(res)
            evaluator_time = time.time() - evaluator_time
            metric_logger_objectness.update(
                model_time=model_time, evaluator_time=evaluator_time)

    # combined detection & classification precision/recall
    # gather the stats from all processes & accumulate preds from all imgs
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    if classification:
        # Init classification results
        classification_metrics = {
            'n_true_nuclei_excl_ambiguous': n_true,
            'n_predicted_nuclei': n_pred,
            'n_matched_for_classif': n_matched,
        }
        for prefix in ['', 'superCateg_']:
            categs_names = _get_categnames(prefix)
            classification_metrics.update({
                f'{prefix}{k}': np.nan
                for k in ['accuracy', 'auroc_micro', 'auroc_macro', 'mcc']
            })
            # Class-by-class
            classification_metrics.update({
                f'{prefix}accuracy_{cls_name}': np.nan
                for cls_name in categs_names
            })
            classification_metrics.update({
                f'{prefix}mcc_{cls_name}': np.nan
                for cls_name in categs_names
            })
            if probabs_exist:
                classification_metrics.update({
                    f'{prefix}aucroc_{cls_name}': np.nan
                    for cls_name in categs_names
                })
        for prefix in ['', 'superCateg_']:
            categs_names = _get_categnames(prefix)
            classification_metrics.update({
                f'{prefix}confusion_trueClass-{tc}_predictedClass-{pc}': 0
                for tc in categs_names
                for pc in categs_names
            })
        # segmentation -- restricted to matched nuclei with available seg
        if len(seg_sums) > 0:
            seg_intersects = np.array(seg_intersects)
            seg_sums = np.array(seg_sums)
            intersect = np.sum(seg_intersects)
            sums = np.sum(seg_sums)
            ious = seg_intersects / (seg_sums - seg_intersects)
            dices = 2. * seg_intersects / seg_sums
            classification_metrics.update({
                # overall
                'seg_intersect': intersect,
                'seg_sum': sums,
                'seg_IOU': intersect / (sums - intersect),
                'seg_DICE': 2. * intersect / sums,
                # by nucleus
                'seg_n': len(ious),
                'seg_medIOU': np.median(ious),
                'seg_medDICE': np.median(dices),
            })

        metric_logger_objectness.synchronize_between_processes()
        print("\nAveraged stats (OBJECTNESS):", metric_logger_objectness)
        coco_evaluator_objectness.synchronize_between_processes()
        coco_evaluator_objectness.accumulate()
        coco_evaluator_objectness.summarize()

        # NOTE: WE MAKE SURE ALL LABELMAPS BELOW START AT ZERO SINCE THE
        # FUNCTION _update_classification_metrics DOES AN ARGMAX INTERNALLY
        # SO FIRST COLUMN CORRESPONDS TO ZERO'TH CLASS, WHICH CORRESPONDS TO
        # LABEL = 1 IN OUR DATASET AND MODEL
        # classification accuracy without remapping
        clkwargs = {
            'metrics_dict': classification_metrics,
            'all_labels': np.array(cltargets) - 1,
            'rlabelcodes': {
                k - 1: v
                for k, v in dst.rlabelcodes.items() if v != 'AMBIGUOUS'
            },
            'codemap': None,
            'prefix': 'superCateg_' if dst.use_supercategs else '',
        }
        if probabs_exist:
            clkwargs['all_scores'] = np.array(clprobabs)
        else:
            clkwargs['output_labels'] = np.array(cloutlabs)
        _update_classification_metrics(**clkwargs)

        # FIXME (low priority): this hard-codes the name of ambiguous categ
        # classification accuracy mapped to supercategs
        if not dst.use_supercategs:
            clkwargs.update({
                'rlabelcodes': {
                    k - 1: v
                    for k, v in dst.supercategs_rlabelcodes.items()
                    if v != 'AMBIGUOUS'
                },
                'codemap': {
                    k - 1: v - 1
                    for k, v in dst.main_codes_to_supercategs_codes.items()
                    if dst.supercategs_rlabelcodes[v] != 'AMBIGUOUS'
                },
                'prefix': 'superCateg_',
            })
            _update_classification_metrics(**clkwargs)
    else:
        classification_metrics = {}

    torch.set_num_threads(n_threads)

    return coco_evaluator, coco_evaluator_objectness, classification_metrics