Beispiel #1
0
def get_train_data_source(ds_metainfo,
                          batch_size,
                          num_workers):
    if ds_metainfo.use_imgrec:
        return ds_metainfo.train_imgrec_iter(
            ds_metainfo=ds_metainfo,
            batch_size=batch_size,
            num_workers=num_workers)
    else:
        transform_train = ds_metainfo.train_transform(ds_metainfo=ds_metainfo)
        dataset = ds_metainfo.dataset_class(
            root=ds_metainfo.root_dir_path,
            mode="train",
            transform=(transform_train if ds_metainfo.do_transform else None))
        if not ds_metainfo.do_transform:
            dataset = dataset.transform_first(fn=transform_train)
        return DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=True,
            last_batch="discard",
            num_workers=num_workers)
Beispiel #2
0
def test_autolog_registering_model():
    registered_model_name = "test_autolog_registered_model"
    mlflow.gluon.autolog(registered_model_name=registered_model_name)

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    model = HybridSequential()
    model.add(Dense(64, activation="relu"))
    model.add(Dense(10))
    model.initialize()
    model.hybridize()

    trainer = Trainer(
        model.collect_params(), "adam", optimizer_params={"learning_rate": 0.001, "epsilon": 1e-07}
    )
    est = get_estimator(model, trainer)

    with mlflow.start_run():
        est.fit(data, epochs=3)

        registered_model = MlflowClient().get_registered_model(registered_model_name)
        assert registered_model.name == registered_model_name
def test_autolog_ends_auto_created_run():
    mlflow.gluon.autolog()

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    model = HybridSequential()
    model.add(Dense(64, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(10))
    model.initialize()
    model.hybridize()

    trainer = Trainer(model.collect_params(), "adam",
                      optimizer_params={"learning_rate": .001, "epsilon": 1e-07})
    est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(),
                              metrics=Accuracy(), trainer=trainer)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est.fit(data, epochs=3)

    assert mlflow.active_run() is None
Beispiel #4
0
 def _internal_predict(self,
                       test_data,
                       get_original_labels=True,
                       get_probabilities=False):
     assert self.net is not None
     assert self.config is not None
     if not isinstance(test_data, TabularDataset):
         if isinstance(test_data, (list, dict)):
             test_data = pd.DataFrame(test_data)
         test_data = TabularDataset(
             test_data,
             columns=self._feature_columns,
             column_properties=self._column_properties)
     processed_test = self._preprocessor.process_test(test_data)
     inference_batch_size = self.config.optimization.per_device_batch_size\
                            * self.config.optimization.val_batch_size_mult
     test_dataloader = DataLoader(
         processed_test,
         batch_size=inference_batch_size,
         shuffle=False,
         batchify_fn=self._preprocessor.batchify(is_test=True))
     test_predictions = _classification_regression_predict(
         self._net,
         dataloader=test_dataloader,
         problem_type=self._problem_types[0],
         has_label=False)
     if self._problem_types[0] == _C.CLASSIFICATION:
         if get_probabilities:
             return test_predictions
         else:
             test_predictions = test_predictions.argmax(axis=-1)
             if get_original_labels:
                 test_predictions = np.array(
                     list(
                         map(
                             self._column_properties[
                                 self._label_columns[0]].inv_transform,
                             test_predictions)))
     return test_predictions
 def get_dataloader(dataset):
     """create data loader based on the dataset chunk"""
     t0 = time.time()
     lengths = dataset.get_field('valid_lengths')
     logging.debug('Num samples = %d', len(lengths))
     # A batch includes: input_id, masked_id, masked_position, masked_weight,
     #                   next_sentence_label, segment_id, valid_length
     batchify_fn = Tuple(Pad(), Pad(), Pad(), Pad(), Stack(), Pad(),
                         Stack())
     if args.by_token:
         # sharded data loader
         sampler = nlp.data.FixedBucketSampler(
             lengths=lengths,
             # batch_size per shard
             batch_size=batch_size,
             num_buckets=args.num_buckets,
             shuffle=is_train,
             use_average_length=True,
             num_shards=num_ctxes)
         dataloader = nlp.data.ShardedDataLoader(dataset,
                                                 batch_sampler=sampler,
                                                 batchify_fn=batchify_fn,
                                                 num_workers=num_ctxes)
         logging.debug('Batch Sampler:\n%s', sampler.stats())
     else:
         sampler = FixedBucketSampler(lengths,
                                      batch_size=batch_size * num_ctxes,
                                      num_buckets=args.num_buckets,
                                      ratio=0,
                                      shuffle=is_train)
         dataloader = DataLoader(dataset=dataset,
                                 batch_sampler=sampler,
                                 batchify_fn=batchify_fn,
                                 num_workers=1)
         logging.debug('Batch Sampler:\n%s', sampler.stats())
     t1 = time.time()
     logging.debug('Dataloader creation cost = %.2f s', t1 - t0)
     return dataloader
Beispiel #6
0
    def get_train_data(self, batch_size):
        """
        获取训练数据,数据扩充
        """
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=0.4,
                                         contrast=0.4,
                                         saturation=0.4),
            transforms.RandomLighting(0.1),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        td = MultilabelDataset(data_folder=self.train_folder,
                               data_file=self.train_file,
                               transform=transform_train)
        train_data = DataLoader(dataset=td,
                                batch_size=batch_size,
                                shuffle=True)

        return train_data, len(td)
Beispiel #7
0
    def __call__(self, dataset, sampler):
        # A batch includes: input_id, masked_id, masked_position, masked_weight,
        #                   next_sentence_label, segment_id, valid_length
        batchify_fn = Tuple(Pad(),    # input_id
                            Pad(),    # masked_id
                            Pad(),    # masked_position
                            Pad(),    # masked_weight
                            Stack(),  # next_sentence_label
                            Pad(),    # segment_id
                            Stack())   # valid_length

        if self._use_avg_len:
            # sharded data loader
            dataloader = nlp.data.ShardedDataLoader(dataset,
                                                    batch_sampler=sampler,
                                                    batchify_fn=batchify_fn,
                                                    num_workers=self._num_ctxes)
        else:
            dataloader = DataLoader(dataset=dataset,
                                    batch_sampler=sampler,
                                    batchify_fn=batchify_fn,
                                    num_workers=self._num_ctxes)
        return dataloader
Beispiel #8
0
def test_autolog_persists_manually_created_run():
    mlflow.gluon.autolog()

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    with mlflow.start_run() as run:

        model = HybridSequential()
        model.add(Dense(64, activation="relu"))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(10))
        model.initialize()
        model.hybridize()
        trainer = Trainer(
            model.collect_params(),
            "adam",
            optimizer_params={"learning_rate": 0.001, "epsilon": 1e-07},
        )
        est = get_estimator(model, trainer)

        est.fit(data, epochs=3)

        assert mlflow.active_run().info.run_id == run.info.run_id
    def evaluate_by_ckpt(ckpt_name, best_ckpt):
        classify_net.load_parameters(ckpt_name, ctx=ctx_l, cast_dtype=True)
        logging.info('Prepare dev data')

        dev_data, label = get_task_data(args,
                                        tokenizer,
                                        segment='eval',
                                        task=task)
        dev_batchify = bf.Group(bf.Group(bf.Pad(), bf.Pad(), bf.Stack()),
                                bf.Stack())
        dataloader = DataLoader(dev_data,
                                batch_size=args.batch_size,
                                batchify_fn=dev_batchify,
                                shuffle=False)

        for sample_l in grouper(dataloader, len(ctx_l)):
            for sample, ctx in zip(sample_l, ctx_l):
                if sample is None:
                    continue
                (token_ids, token_types, valid_length), label = sample
                token_ids = mx.np.array(token_ids, ctx=ctx)
                token_types = mx.np.array(token_types, ctx=ctx)
                valid_length = mx.np.array(valid_length, ctx=ctx)
                scores = classify_net(token_ids, token_types, valid_length)

                if args.task_name == 'sts':
                    label = label.reshape((-1, 1))
                for metric in metrics:
                    metric.update([label], [scores])
                #pred.append(scores)

        for metric in metrics:
            metric_name, result = metric.get()
            logging.info('checkpoint {} get result: {}:{}'.format(
                ckpt_name, metric_name, result))
            if best_ckpt.get(metric_name, [0, ''])[0] < result:
                best_ckpt[metric_name] = [result, ckpt_name]
Beispiel #10
0
    def init_eval_data(self, input_dir):

        cfg = self.cfg
        eval_dataset = CollectionDataset(input_dir,
                                         cfg,
                                         max_samples=None,
                                         load_to_memory=False,
                                         output_idx=True)
        eval_n_samples = len(eval_dataset)
        if eval_n_samples <= 0:
            print('number of training samples should be > 0')
            raise ValueError

        eval_dataloader = DataLoader(eval_dataset,
                                     batch_size=cfg['val_batch_size'],
                                     thread_pool=True,
                                     shuffle=True,
                                     num_workers=cfg['val_loader_workers'],
                                     last_batch='discard')

        print('total eval samples: {}'.format(eval_n_samples))
        print('batch size: {}'.format(cfg['val_batch_size']))

        return eval_dataset, eval_dataloader
Beispiel #11
0
def get_class_data_loader(batch_size=50, num_workers=8):
    img_map, reverse_map, img_list = get_img_map()
    # used_data_num = len(img_list) // 100
    # img_list = img_list[:used_data_num]
    random.shuffle(img_list)
    dataset = ClassDataset(img_list)
    data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, \
                             last_batch='rollover')
    data_loader.dataset_size = (dataset.__len__(), 3, 120, 100)
    get_cls_id = lambda img_name: int(img_name.split('/')[0])
    cls_num = max(map(get_cls_id, img_list))

    data_num = 0
    cls_size_list = [0]*(cls_num+1)
    for img_name in img_list:
        cls_id = int(img_name.split('/')[0])
        data_id = int(img_name.split('.')[0].split('/')[1])
        data_num = max(data_num, data_id)
        cls_size_list[cls_id] += 1

    data_loader.cls_size_list = cls_size_list
    data_loader.cls_num = cls_num
    data_loader.data_num = data_num
    return data_loader
Beispiel #12
0
def gluon_model(model_data):
    train_data, train_label, _ = model_data
    dataset = mx.gluon.data.ArrayDataset(train_data, train_label)
    train_data_loader = DataLoader(dataset,
                                   batch_size=128,
                                   last_batch="discard")
    model = HybridSequential()
    model.add(Dense(128, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(10))
    model.initialize()
    model.hybridize()
    trainer = Trainer(model.collect_params(),
                      "adam",
                      optimizer_params={
                          "learning_rate": 0.001,
                          "epsilon": 1e-07
                      })

    est = get_estimator(model, trainer)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est.fit(train_data_loader, epochs=3)
    return model
Beispiel #13
0
    def predict_multi(self, imgs):
        loader = DataLoader(imgs.as_in_context(self.ctx),
                            self.batch_size,
                            last_batch='keep')
        max_sims = []
        labels = []
        features = []
        cls_center = nd.L2Normalization(self.cls_center)
        max_sims = []
        labels = []
        for data in loader:

            data_batch = mx.io.DataBatch(data=(data, ),
                                         pad=self.batch_size - data.shape[0])
            self.model.forward(data_batch, is_train=False)
            embeddings = self.model.get_outputs()[0]
            features.append(embeddings)
            embeddings = nd.L2Normalization(embeddings, mode='instance')

            if self.cls_center is not None:
                temp1 = embeddings.expand_dims(axis=1)
                temp2 = cls_center.expand_dims(axis=0)
                dis_mat = nd.sum(temp1 * temp2, axis=2)
                max_sim = nd.max(dis_mat, axis=1)
                label = nd.argmax(dis_mat, axis=1)

                labels += list(label.asnumpy())
                max_sims += list(max_sim.asnumpy())
            else:
                label = None

        features = nd.concatenate(features, axis=0)
        if self.label_map is not None:
            labels = [self.label_map[int(x)] for x in labels]

        return (max_sims, labels), features
def validdataloader(path="Dataset/valid",
                    input_size=(512, 512),
                    batch_size=2,
                    num_workers=2,
                    shuffle=True,
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]):

    transform = YoloValidTransform(input_size[0],
                                   input_size[1],
                                   mean=mean,
                                   std=std)
    dataset = DetectionDataset(path=path, transform=transform, test=False)

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        batchify_fn=Tuple(Stack(use_shared_mem=True), Pad(pad_val=-1),
                          Stack()),
        last_batch='rollover',  # or "keep", "discard"
        num_workers=num_workers,
        shuffle=shuffle)

    return dataloader, dataset
Beispiel #15
0
    def create_loader(self):
        """
        Overwrite the data loader function
        :return: pairwised data loader, None, eval source loader, test target loader
        """
        cpus = cpu_count()

        train_tforms, eval_tforms = [transforms.Resize(self.args.resize)
                                     ], [transforms.Resize(self.args.resize)]

        if self.args.random_crop:
            train_tforms.append(
                transforms.RandomResizedCrop(self.args.size, scale=(0.8, 1.2)))
        else:
            train_tforms.append(transforms.CenterCrop(self.args.size))

        eval_tforms.append(transforms.CenterCrop(self.args.size))

        if self.args.flip:
            train_tforms.append(transforms.RandomFlipLeftRight())

        if self.args.random_color:
            train_tforms.append(
                transforms.RandomColorJitter(self.args.color_jitter,
                                             self.args.color_jitter,
                                             self.args.color_jitter, 0.1))

        train_tforms.extend([
            transforms.ToTensor(),
            transforms.Normalize(self.args.mean, self.args.std)
        ])
        eval_tforms.extend([
            transforms.ToTensor(),
            transforms.Normalize(self.args.mean, self.args.std)
        ])

        train_tforms = transforms.Compose(train_tforms)
        eval_tforms = transforms.Compose(eval_tforms)

        if 'digits' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_digits_datasets(
                train_tforms, eval_tforms)
        elif 'office' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_office_datasets(
                train_tforms, eval_tforms)
        elif 'visda' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_visda_datasets(
                train_tforms, eval_tforms)
        else:
            raise NotImplementedError

        self.train_src_loader = DataLoader(trs_set,
                                           self.args.bs,
                                           shuffle=True,
                                           num_workers=cpus)
        self.test_src_loader = DataLoader(tes_set,
                                          self.args.bs,
                                          shuffle=False,
                                          num_workers=cpus)
        self.test_tgt_loader = DataLoader(tet_set,
                                          self.args.bs,
                                          shuffle=False,
                                          num_workers=cpus)
Beispiel #16
0
def data_loader(train, batch_size, num_workers):
    dataset = MNIST(train=train, transform=transform)
    return DataLoader(dataset,
                      batch_size,
                      shuffle=train,
                      num_workers=num_workers)
Beispiel #17
0
    # center and crop an area of size (224,224)
    cropped, crop_info = mx.image.center_crop(resized, SIZE)

    # transpose the channels to be (3,224,224)
    transposed = nd.transpose(cropped, (2, 0, 1))

    return transposed, label


################################################
#  Loading Images from folders
################################################
dataset_train = ImageFolderDataset(root=train_data_dir, transform=transform)
dataset_test = ImageFolderDataset(root=validation_data_dir, transform=transform)

dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                              shuffle=True, num_workers=NUM_WORKERS) # last_batch='discard' (removed for testing)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, # last_batch='discard',
                             shuffle=True, num_workers=NUM_WORKERS)
print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test)))


################################################
#  Check categories - for debuging only
################################################

categories = dataset_train.synsets
NUM_CLASSES = len(categories)

print(categories)
print(NUM_CLASSES)
Beispiel #18
0
# with mx.Context(_ctx):
exp_name = "seg_iou_bw_eval"

test_aug = Compose([
    #AdaptResize(72000),
    #AdaptResize(360000),
    ToNDArray(),
    #Normalize(nd.array([107]), nd.array([1]))
])

my_test = ReadDataSet('bw', 'val', test_aug, fixed_weight=True)

#train_loader = DataLoader(my_train, batch_size=1, shuffle=False, last_batch='rollover',num_workers=4,thread_pool=True)
test_loader = DataLoader(my_test,
                         batch_size=1,
                         shuffle=False,
                         last_batch='keep',
                         num_workers=4,
                         thread_pool=True)

model = zoo.eval_model(
    _ctx,
    symb="unext101_64_4d_deconv_bw_direct_72000-symbol.json",
    param="unext101_64_4d_deconv_bw_direct_72000-0000.params")

with mx.Context(_ctx):
    model.hybridize()

test_num_steps = len(my_test)
# print(num_steps)
criterion = WeightedBCEDICE(axis=1)
test_metrics = IoUMetric(nb_cls=6, display=False, output='')
Beispiel #19
0
            T.Resize(256, keep_ratio=True),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        eval_transformer = T.Compose([
            T.ToTensor(),
            T.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
        ])

    # fetch dataset and dataloader
    dataset = ImageNet if opt.dataset == 'imagenet' else CIFAR10
    eval_dataset = dataset(train=False).transform_first(eval_transformer)
    eval_loader = DataLoader(dataset=eval_dataset,
                             batch_size=opt.batch_size,
                             num_workers=opt.num_workers,
                             last_batch='keep')
    if opt.quantize_input_offline:
        train_dataset = dataset(train=True).transform_first(eval_transformer)
        if opt.dataset == 'imagenet':
            train_labels = [item[1] for item in train_dataset._data.items]
        elif opt.dataset == 'cifar10':
            train_labels = train_dataset._data._label
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=opt.batch_size,
                                  sampler=UniformSampler(
                                      classes, opt.num_sample, train_labels),
                                  num_workers=opt.num_workers,
                                  last_batch='keep')

    # calibrate for input ranges and evaluate for simulation
Beispiel #20
0
def inf_train_gen(loader):
    while True:
        for batch in loader:
            yield batch


ctx = [mx.gpu(int(i)) for i in opt.ctx.split(",")]

batch_size = opt.batch_size
num_iterations = opt.niters

margin_s = opt.margin_s
margin_m = opt.margin_m

train_set = get_recognition_dataset(opt.dataset, transform=transform_train)
train_data = DataLoader(train_set, batch_size, shuffle=True, num_workers=opt.num_workers, last_batch='discard')
batch_generator = inf_train_gen(train_data)

targets = opt.target
val_sets = [get_recognition_dataset(name, transform=transform_test) for name in targets.split(",")]
val_datas = [DataLoader(dataset, batch_size, last_batch='keep') for dataset in val_sets]

dtype = opt.dtype
train_net = get_model(opt.model, classes=train_set.num_classes, weight_norm=True, feature_norm=True)
train_net.initialize(init=mx.init.MSRAPrelu(), ctx=ctx)

lr_period = [int(iter) for iter in opt.lr_decay_iter.split(",")]
lr_scheduler = IterLRScheduler(mode=opt.lr_mode, baselr=opt.lr, step=lr_period,
                               step_factor=opt.lr_decay, power=2,
                               niters=num_iterations, warmup_iters=opt.lr_warmup_iters)
optimizer = 'nag'
Beispiel #21
0
    def get_loader(self):
        def batchify_fn(list_target_texts):

            input_words = []
            input_valid_lens = []
            input_segments = []
            target_words = []
            target_valid_lens = []
            target_segments = []
            target_actions = []
            target_pms = []
            list_input_texts = []
            _list_target_texts = []
            pm_error_idxs = []
            pm_add_idxs = []
            pm_remove_idxs = []

            if self.mode == 'train':  # 先暫時這樣本來 test 應該走 else

                for str_target_text in list_target_texts:

                    # if np.random.ranf() > 0.5:
                    #   str_input_text = self.structure.randomize_word_order(str_target_text)
                    # else:
                    # str_input_text = self.pinyin_sampler.errorize_sentence(str_target_text)
                    str_input_text, pm_error_idx, pm_add_idx, pm_remove_idx = self.errorize_pm(
                        str_target_text)
                    input_data = self.transformer([str_input_text])
                    target_data = self.transform_target(str_target_text)
                    if len(target_data[0]) > self.max_seq_len or input_data[
                            0].shape[0] > self.max_seq_len:  # 超過長度
                        continue
                    pm_error_idx, pm_add_idx, pm_remove_idx = self.transform_pm_error(
                        pm_error_idx, pm_add_idx, pm_remove_idx)

                    input_word, input_valid_len, input_segment = nd.array([
                        input_data[0]
                    ]), nd.array([input_data[1]]), nd.array([input_data[2]])
                    target_word, target_valid_len = nd.array(
                        [target_data[0]]), nd.array([target_data[1]])
                    target_segment = input_segment

                    _list_target_texts.append(str_target_text)
                    input_words.append(input_word.astype(np.float32))
                    input_valid_lens.append(input_valid_len.astype(np.float32))
                    input_segments.append(input_segment.astype(np.float32))
                    target_words.append(target_word.astype(np.float32))
                    target_valid_lens.append(
                        target_valid_len.astype(np.float32))
                    target_segments.append(target_segment.astype(np.float32))
                    pm_add_idxs.append(pm_add_idx)
                    pm_error_idxs.append(pm_error_idx)
                    pm_remove_idxs.append(pm_remove_idx)
                    # target_actions.append(target_action.astype(np.float32)); target_pms.append(target_pm.astype(np.float32));

                    list_input_texts.append(str_input_text)

                return nd.concat(*input_words, dim=0), nd.concat(
                    *input_valid_lens,
                    dim=0), nd.concat(*input_segments, dim=0), nd.concat(
                        *target_words, dim=0
                    ), nd.concat(*target_valid_lens, dim=0), nd.concat(
                        *target_segments,
                        dim=0), nd.concat(*pm_error_idxs, dim=0), nd.concat(
                            *pm_add_idxs, dim=0), nd.concat(
                                *pm_remove_idxs,
                                dim=0), list_input_texts, _list_target_texts
            # return nd.concat(*input_words, dim = 0), nd.concat(*input_valid_lens, dim = 0), nd.concat(*input_segments, dim = 0), nd.concat(*target_words, dim = 0), nd.concat(*target_valid_lens, dim = 0), nd.concat(*target_segments, dim = 0)#, nd.concat(*target_actions, dim = 0), nd.concat(*target_pms, dim = 0), list_input_texts, list_target_texts

            else:

                # print(list_target_texts)
                # print(len(list_target_texts))
                assert (len(list_target_texts) == 1)
                # for test_pair in list_target_texts:
                str_input_text = list_target_texts[0][0]
                str_target_text = list_target_texts[0][1]
                return str_input_text, str_target_text

        self.dataset = SimpleDataset(self.data)
        shuffle = True if self.mode == 'train' else False
        self.loader = DataLoader(self.dataset,
                                 batch_size=self.batch_size,
                                 batchify_fn=batchify_fn,
                                 shuffle=shuffle,
                                 last_batch='rollover')

        return self.loader
Beispiel #22
0
        self._len = len(self._imglist)

    def __getitem__(self, idx):
        img = image.imread(self._imglist[idx])
        label = pd.read_csv(self._csvPath[idx])
        return (img, label)

    def __len__(self):
        return self._len


if __name__ == "__main__":
    batch_size = 2
    batchify_fn = Tuple(Append(), Append())
    train_dataset = DensemapDataset()
    im = train_dataset[0]

    def train_transform(*trans_data):
        img = trans_data[0]
        aug = gdata.vision.transforms.RandomFlipLeftRight()
        return (aug(img), trans_data[1])

    train_loader = DataLoader(train_dataset.transform(train_transform),
                              batch_size=2,
                              shuffle=True,
                              batchify_fn=batchify_fn)
    for ib, batch in enumerate(train_loader):
        # batch[0] 是 X , batch[1] 是 y
        # batch[0][0] 是第0个X
        print(type(batch[0][0]), type(batch[1][0]))
Beispiel #23
0
def train(cfg,
          ctx_lst,
          project_name,
          log_interval=5,
          no_val=False,
          lr=None,
          wd=None):
    wandb.init(job_type='train',
               dir=my_tools.root_dir(),
               config=cfg,
               project=project_name)
    if lr and wd:
        wandb.config.lr = lr
        wandb.config.wd = wd

    ctx = my_tools.get_contexts(ctx_lst)
    wandb.config.ctx = ctx

    data_factory = DataFactory(wandb.config.data_name)
    model_factory = ModelFactory(wandb.config.model_name)

    norm_layer, norm_kwargs = my_tools.get_norm_layer(wandb.config.norm,
                                                      len(ctx))
    model_kwargs = {
        'nclass': data_factory.num_class,
        'backbone': wandb.config.backbone,
        'pretrained_base': wandb.config.backbone_init.get('manner') == 'cls',
        'aux': wandb.config.aux,
        'crop_size': wandb.config.crop_size,
        'base_size': wandb.config.base_size,
        'dilate': wandb.config.dilate,
        'norm_layer': norm_layer,
        'norm_kwargs': norm_kwargs,
    }
    net = model_factory.get_model(
        model_kwargs,
        resume=wandb.config.resume,
        lr_mult=wandb.config.lr_mult,
        backbone_init_manner=wandb.config.backbone_init.get('manner'),
        backbone_ckpt=wandb.config.backbone_init.get('backbone_ckpt'),
        prior_classes=wandb.config.backbone_init.get('prior_classes'),
        ctx=ctx)
    if net.symbolize:
        net.hybridize()

    num_worker = 0 if platform.system() == 'Windows' else 16
    train_set = data_factory.seg_dataset(
        split='train',  # sometimes would be 'trainval'
        mode='train',
        transform=my_tools.image_transform(),
        base_size=wandb.config.base_size,
        crop_size=wandb.config.crop_size)
    train_iter = DataLoader(train_set,
                            wandb.config.bs_train,
                            shuffle=True,
                            last_batch='discard',
                            num_workers=num_worker)
    val_set = data_factory.seg_dataset(split='val',
                                       mode='val',
                                       transform=my_tools.image_transform(),
                                       base_size=wandb.config.base_size,
                                       crop_size=wandb.config.crop_size)
    val_iter = DataLoader(val_set,
                          wandb.config.bs_val,
                          shuffle=False,
                          last_batch='keep',
                          num_workers=num_worker)
    wandb.config.num_train = len(train_set)
    wandb.config.num_valid = len(val_set)

    criterion = _get_criterion(wandb.config.aux, wandb.config.aux_weight)
    criterion.initialize(ctx=ctx)
    wandb.config.criterion = type(criterion)

    if wandb.config.optimizer == 'adam':
        trainer = Trainer(net.collect_params(),
                          'adam',
                          optimizer_params={
                              'learning_rate': wandb.config.lr,
                              'wd': wandb.config.wd,
                              'beta1': wandb.config.adam.get('adam_beta1'),
                              'beta2': wandb.config.adam.get('adam_beta2')
                          })
    elif wandb.config.optimizer in ('sgd', 'nag'):
        scheduler = _lr_scheduler(
            mode=wandb.config.lr_scheduler,
            base_lr=wandb.config.lr,
            target_lr=wandb.config.target_lr,
            nepochs=wandb.config.epochs,
            iters_per_epoch=len(train_iter),
            step_epoch=wandb.config.step.get('step_epoch'),
            step_factor=wandb.config.step.get('step_factor'),
            power=wandb.config.poly.get('power'))
        trainer = Trainer(net.collect_params(),
                          wandb.config.optimizer,
                          optimizer_params={
                              'lr_scheduler': scheduler,
                              'wd': wandb.config.wd,
                              'momentum': wandb.config.momentum,
                              'multi_precision': True
                          })
    else:
        raise RuntimeError(f"Unknown optimizer: {wandb.config.optimizer}")

    metric = SegmentationMetric(data_factory.num_class)

    logger = get_logger(name='train', level=10)
    t_start = my_tools.get_strftime()
    logger.info(f'Training start: {t_start}')
    for k, v in wandb.config.items():
        logger.info(f'{k}: {v}')
    logger.info('-----> end hyper-parameters <-----')
    wandb.config.start_time = t_start

    best_score = .0
    best_epoch = 0
    for epoch in range(wandb.config.epochs):
        train_loss = .0
        tbar = tqdm(train_iter)
        for i, (data, target) in enumerate(tbar):
            gpu_datas = split_and_load(data, ctx_list=ctx)
            gpu_targets = split_and_load(target, ctx_list=ctx)
            with autograd.record():
                loss_gpus = [
                    criterion(*net(gpu_data), gpu_target)
                    for gpu_data, gpu_target in zip(gpu_datas, gpu_targets)
                ]
            for loss in loss_gpus:
                autograd.backward(loss)
            trainer.step(wandb.config.bs_train)
            nd.waitall()
            train_loss += sum([loss.mean().asscalar()
                               for loss in loss_gpus]) / len(loss_gpus)
            tbar.set_description(
                'Epoch-%d [training], loss %.5f, %s' %
                (epoch, train_loss /
                 (i + 1), my_tools.get_strftime('%Y-%m-%d %H:%M:%S')))
            if (i % log_interval == 0) or (i + 1 == len(train_iter)):
                wandb.log({
                    f'train_loss_batch, interval={log_interval}':
                    train_loss / (i + 1)
                })

        wandb.log({
            'train_loss_epoch': train_loss / (len(train_iter)),
            'custom_step': epoch
        })

        if not no_val:
            val_loss = .0
            vbar = tqdm(val_iter)
            for i, (data, target) in enumerate(vbar):
                gpu_datas = split_and_load(data=data,
                                           ctx_list=ctx,
                                           even_split=False)
                gpu_targets = split_and_load(data=target,
                                             ctx_list=ctx,
                                             even_split=False)
                loss_gpus = []
                for gpu_data, gpu_target in zip(gpu_datas, gpu_targets):
                    gpu_output = net(gpu_data)
                    loss_gpus.append(criterion(*gpu_output, gpu_target))
                    metric.update(gpu_target, gpu_output[0])
                val_loss += sum([loss.mean().asscalar()
                                 for loss in loss_gpus]) / len(loss_gpus)
                vbar.set_description(
                    'Epoch-%d [validation], PA %.4f, mIoU %.4f' %
                    (epoch, metric.get()[0], metric.get()[1]))
                nd.waitall()
            pix_acc, mean_iou = metric.get()
            wandb.log({
                'val_PA': pix_acc,
                'val_mIoU': mean_iou,
                'val_loss': val_loss / len(val_iter),
                'custom_step': epoch
            })
            metric.reset()
            if mean_iou > best_score:
                my_tools.save_checkpoint(
                    model=net,
                    model_name=wandb.config.model_name.lower(),
                    backbone=wandb.config.backbone.lower(),
                    data_name=wandb.config.data_name.lower(),
                    time_stamp=wandb.config.start_time,
                    is_best=True)
                best_score = mean_iou
                best_epoch = epoch

    logger.info(
        f'Best val mIoU={round(best_score * 100, 2)} at epoch: {best_epoch}')
    wandb.config.best_epoch = best_epoch
    my_tools.save_checkpoint(model=net,
                             model_name=wandb.config.model_name.lower(),
                             backbone=wandb.config.backbone.lower(),
                             data_name=wandb.config.data_name.lower(),
                             time_stamp=wandb.config.start_time,
                             is_best=False)
Beispiel #24
0
    dataset = LSUN(root=opt.dataroot,
                   classes=['bedroom_train'],
                   transform=transforms.Compose([
                       transforms.Resize(opt.imageSize,
                                         keep_ratio=True,
                                         interpolation=3),
                       transforms.CenterCrop(opt.imageSize),
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                   ]))

    assert dataset
    dataloader = DataLoader(dataset,
                            batch_size=opt.batchSize,
                            shuffle=True,
                            last_batch='discard',
                            pin_memory=True,
                            num_workers=opt.workers)

    # Choose G and D in [MLP_G, MLP_D, DCGAN_G, DCGAN_D, DCGAN_G_NOBN, DCGAN_D_NOBN]
    if opt.noBN:
        net_G = DCGAN_G_NOBN(opt.imageSize, opt.nz, opt.ngf, opt.nc,
                             opt.n_extra_layers)
    elif opt.mlp_G:
        net_G = MLP_G(opt.imageSize, opt.nz, opt.ngf, opt.nc)
    else:
        net_G = DCGAN_G(opt.imageSize, opt.nz, opt.ngf, opt.nc,
                        opt.n_extra_layers)

    if opt.noBN:
        net_D = DCGAN_D_NOBN(opt.imageSize, opt.nc, opt.ndf,
Beispiel #25
0
def train():
    """Training function."""
    trainer = gluon.Trainer(model.collect_params(), args.optimizer, {
        'learning_rate': args.lr,
        'beta2': 0.98,
        'epsilon': 1e-9
    })

    train_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(),
                                  btf.Stack(dtype='float32'),
                                  btf.Stack(dtype='float32'))
    test_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(),
                                 btf.Stack(dtype='float32'),
                                 btf.Stack(dtype='float32'), btf.Stack())
    target_val_lengths = list(map(lambda x: x[-1], data_val_lengths))
    target_test_lengths = list(map(lambda x: x[-1], data_test_lengths))
    if args.bucket_scheme == 'constant':
        bucket_scheme = ConstWidthBucket()
    elif args.bucket_scheme == 'linear':
        bucket_scheme = LinearWidthBucket()
    elif args.bucket_scheme == 'exp':
        bucket_scheme = ExpWidthBucket(bucket_len_step=1.2)
    else:
        raise NotImplementedError
    train_batch_sampler = FixedBucketSampler(lengths=data_train_lengths,
                                             batch_size=args.batch_size,
                                             num_buckets=args.num_buckets,
                                             ratio=args.bucket_ratio,
                                             shuffle=True,
                                             use_average_length=True,
                                             num_shards=len(ctx),
                                             bucket_scheme=bucket_scheme)
    logging.info('Train Batch Sampler:\n{}'.format(
        train_batch_sampler.stats()))
    train_data_loader = ShardedDataLoader(data_train,
                                          batch_sampler=train_batch_sampler,
                                          batchify_fn=train_batchify_fn,
                                          num_workers=8)

    val_batch_sampler = FixedBucketSampler(lengths=target_val_lengths,
                                           batch_size=args.test_batch_size,
                                           num_buckets=args.num_buckets,
                                           ratio=args.bucket_ratio,
                                           shuffle=False,
                                           use_average_length=True,
                                           bucket_scheme=bucket_scheme)
    logging.info('Valid Batch Sampler:\n{}'.format(val_batch_sampler.stats()))
    val_data_loader = DataLoader(data_val,
                                 batch_sampler=val_batch_sampler,
                                 batchify_fn=test_batchify_fn,
                                 num_workers=8)
    test_batch_sampler = FixedBucketSampler(lengths=target_test_lengths,
                                            batch_size=args.test_batch_size,
                                            num_buckets=args.num_buckets,
                                            ratio=args.bucket_ratio,
                                            shuffle=False,
                                            use_average_length=True,
                                            bucket_scheme=bucket_scheme)
    logging.info('Test Batch Sampler:\n{}'.format(test_batch_sampler.stats()))
    test_data_loader = DataLoader(data_test,
                                  batch_sampler=test_batch_sampler,
                                  batchify_fn=test_batchify_fn,
                                  num_workers=8)

    if args.bleu == 'tweaked':
        bpe = bool(args.dataset != 'IWSLT2015' and args.dataset != 'TOY')
        split_compound_word = bpe
        tokenized = True
    elif args.bleu == '13a' or args.bleu == 'intl':
        bpe = False
        split_compound_word = False
        tokenized = False
    else:
        raise NotImplementedError

    best_valid_bleu = 0.0
    step_num = 0
    warmup_steps = args.warmup_steps
    grad_interval = args.num_accumulated
    model.collect_params().setattr('grad_req', 'add')
    average_start = (len(train_data_loader) //
                     grad_interval) * (args.epochs - args.average_start)
    average_param_dict = None
    model.collect_params().zero_grad()
    for epoch_id in range(args.epochs):
        log_avg_loss = 0
        log_wc = 0
        loss_denom = 0
        step_loss = 0
        log_start_time = time.time()
        for batch_id, seqs \
                in enumerate(train_data_loader):
            if batch_id % grad_interval == 0:
                step_num += 1
                new_lr = args.lr / math.sqrt(args.num_units) \
                         * min(1. / math.sqrt(step_num), step_num * warmup_steps ** (-1.5))
                trainer.set_learning_rate(new_lr)
            src_wc, tgt_wc, bs = np.sum(
                [(shard[2].sum(), shard[3].sum(), shard[0].shape[0])
                 for shard in seqs],
                axis=0)
            src_wc = src_wc.asscalar()
            tgt_wc = tgt_wc.asscalar()
            loss_denom += tgt_wc - bs
            seqs = [[seq.as_in_context(context) for seq in shard]
                    for context, shard in zip(ctx, seqs)]
            Ls = []
            with mx.autograd.record():
                for src_seq, tgt_seq, src_valid_length, tgt_valid_length in seqs:
                    out, _ = model(src_seq, tgt_seq[:, :-1], src_valid_length,
                                   tgt_valid_length - 1)
                    smoothed_label = label_smoothing(tgt_seq[:, 1:])
                    ls = loss_function(out, smoothed_label,
                                       tgt_valid_length - 1).sum()
                    Ls.append((ls * (tgt_seq.shape[1] - 1)) / args.batch_size /
                              100.0)
            for L in Ls:
                L.backward()
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                if average_param_dict is None:
                    average_param_dict = {
                        k: v.data(ctx[0]).copy()
                        for k, v in model.collect_params().items()
                    }
                trainer.step(float(loss_denom) / args.batch_size / 100.0)
                param_dict = model.collect_params()
                param_dict.zero_grad()
                if step_num > average_start:
                    alpha = 1. / max(1, step_num - average_start)
                    for name, average_param in average_param_dict.items():
                        average_param[:] += alpha * (
                            param_dict[name].data(ctx[0]) - average_param)
            step_loss += sum([L.asscalar() for L in Ls])
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                log_avg_loss += step_loss / loss_denom * args.batch_size * 100.0
                loss_denom = 0
                step_loss = 0
            log_wc += src_wc + tgt_wc
            if (batch_id + 1) % (args.log_interval * grad_interval) == 0:
                wps = log_wc / (time.time() - log_start_time)
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ppl={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'.format(
                                 epoch_id, batch_id + 1,
                                 len(train_data_loader),
                                 log_avg_loss / args.log_interval,
                                 np.exp(log_avg_loss / args.log_interval),
                                 wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0
        mx.nd.waitall()
        valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
        valid_bleu_score, _, _, _, _ = compute_bleu(
            [val_tgt_sentences],
            valid_translation_out,
            tokenized=tokenized,
            tokenizer=args.bleu,
            split_compound_word=split_compound_word,
            bpe=bpe)
        logging.info(
            '[Epoch {}] valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
            .format(epoch_id, valid_loss, np.exp(valid_loss),
                    valid_bleu_score * 100))
        test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
        test_bleu_score, _, _, _, _ = compute_bleu(
            [test_tgt_sentences],
            test_translation_out,
            tokenized=tokenized,
            tokenizer=args.bleu,
            split_compound_word=split_compound_word,
            bpe=bpe)
        logging.info(
            '[Epoch {}] test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'.
            format(epoch_id, test_loss, np.exp(test_loss),
                   test_bleu_score * 100))
        write_sentences(
            valid_translation_out,
            os.path.join(args.save_dir,
                         'epoch{:d}_valid_out.txt').format(epoch_id))
        write_sentences(
            test_translation_out,
            os.path.join(args.save_dir,
                         'epoch{:d}_test_out.txt').format(epoch_id))
        if valid_bleu_score > best_valid_bleu:
            best_valid_bleu = valid_bleu_score
            save_path = os.path.join(args.save_dir, 'valid_best.params')
            logging.info('Save best parameters to {}'.format(save_path))
            model.save_parameters(save_path)
        save_path = os.path.join(args.save_dir,
                                 'epoch{:d}.params'.format(epoch_id))
        model.save_parameters(save_path)
    save_path = os.path.join(args.save_dir, 'average.params')
    mx.nd.save(save_path, average_param_dict)
    if args.average_checkpoint:
        for j in range(args.num_averages):
            params = mx.nd.load(
                os.path.join(args.save_dir,
                             'epoch{:d}.params'.format(args.epochs - j - 1)))
            alpha = 1. / (j + 1)
            for k, v in model._collect_params_with_prefix().items():
                for c in ctx:
                    v.data(c)[:] += alpha * (params[k].as_in_context(c) -
                                             v.data(c))
        save_path = os.path.join(
            args.save_dir,
            'average_checkpoint_{}.params'.format(args.num_averages))
        model.save_parameters(save_path)
    elif args.average_start > 0:
        for k, v in model.collect_params().items():
            v.set_data(average_param_dict[k])
        save_path = os.path.join(args.save_dir, 'average.params')
        model.save_parameters(save_path)
    else:
        model.load_parameters(os.path.join(args.save_dir, 'valid_best.params'),
                              ctx)
    valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
    valid_bleu_score, _, _, _, _ = compute_bleu(
        [val_tgt_sentences],
        valid_translation_out,
        tokenized=tokenized,
        tokenizer=args.bleu,
        bpe=bpe,
        split_compound_word=split_compound_word)
    logging.info(
        'Best model valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'.
        format(valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
    test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
    test_bleu_score, _, _, _, _ = compute_bleu(
        [test_tgt_sentences],
        test_translation_out,
        tokenized=tokenized,
        tokenizer=args.bleu,
        bpe=bpe,
        split_compound_word=split_compound_word)
    logging.info(
        'Best model test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'.
        format(test_loss, np.exp(test_loss), test_bleu_score * 100))
    write_sentences(valid_translation_out,
                    os.path.join(args.save_dir, 'best_valid_out.txt'))
    write_sentences(test_translation_out,
                    os.path.join(args.save_dir, 'best_test_out.txt'))
def test_multi_worker_forked_data_loader():
    """
    Test should successfully run its course of multi-process/forked data loader without errors
    """
    class Dummy(Dataset):
        def __init__(self, random_shape):
            self.random_shape = random_shape

        def __getitem__(self, idx):
            key = idx
            if self.random_shape:
                out = np.random.uniform(size=(random.randint(1000, 1100), 40))
                labels = np.random.uniform(size=(random.randint(10, 15)))
            else:
                out = np.random.uniform(size=(1000, 40))
                labels = np.random.uniform(size=(10))
            return key, out, labels

        def __len__(self):
            return 50

        def batchify_list(self, data):
            """
            return list of ndarray without stack/concat/pad
            """
            if isinstance(data, (tuple, list)):
                return list(data)
            if isinstance(data, mx.nd.NDArray):
                return [data]
            return data

        def batchify(self, data):
            """
            Collate data into batch. Use shared memory for stacking.

            :param data: a list of array, with layout of 'NTC'.
            :return either x  and x's unpadded lengths, or x, x's unpadded lengths, y and y's unpadded lengths
                    if labels are not supplied.
            """

            # input layout is NTC
            keys, inputs, labels = [item[0] for item in data], [item[1] for item in data], \
                                   [item[2] for item in data]

            if len(data) > 1:
                max_data_len = max([seq.shape[0] for seq in inputs])
                max_labels_len = 0 if not labels else max(
                    [seq.shape[0] for seq in labels])
            else:
                max_data_len = inputs[0].shape[0]
                max_labels_len = 0 if not labels else labels[0].shape[0]

            x_lens = [item.shape[0] for item in inputs]
            y_lens = [item.shape[0] for item in labels]

            for i, seq in enumerate(inputs):
                pad_len = max_data_len - seq.shape[0]
                inputs[i] = np.pad(seq, ((0, pad_len), (0, 0)),
                                   'constant',
                                   constant_values=0)
                labels[i] = np.pad(labels[i],
                                   (0, max_labels_len - labels[i].shape[0]),
                                   'constant',
                                   constant_values=-1)

            inputs = np.asarray(inputs, dtype=np.float32)
            if labels is not None:
                labels = np.asarray(labels, dtype=np.float32)
            inputs = inputs.transpose((1, 0, 2))
            labels = labels.transpose((1, 0))

            return (nd.array(inputs, dtype=inputs.dtype, ctx=context.Context('cpu_shared', 0)),
                    nd.array(x_lens, ctx=context.Context('cpu_shared', 0))) \
                if labels is None else (
                nd.array(inputs, dtype=inputs.dtype, ctx=context.Context('cpu_shared', 0)),
                nd.array(x_lens, ctx=context.Context('cpu_shared', 0)),
                nd.array(labels, dtype=labels.dtype, ctx=context.Context('cpu_shared', 0)),
                nd.array(y_lens, ctx=context.Context('cpu_shared', 0)))

    # This test is pointless on Windows because Windows doesn't fork
    if platform.system() != 'Windows':
        data = Dummy(True)
        loader = DataLoader(data,
                            batch_size=40,
                            batchify_fn=data.batchify,
                            num_workers=2)
        for epoch in range(1):
            for i, data in enumerate(loader):
                if i % 100 == 0:
                    print(data)
                    print('{}:{}'.format(epoch, i))

        data = Dummy(True)
        loader = DataLoader(data,
                            batch_size=40,
                            batchify_fn=data.batchify_list,
                            num_workers=2)
        for epoch in range(1):
            for i, data in enumerate(loader):
                if i % 100 == 0:
                    print(data)
                    print('{}:{}'.format(epoch, i))
#
# Because Faster-RCNN handles raw images with various aspect ratios and various shapes, we provide a
# :py:class:`gluoncv.data.batchify.Append`, which neither stack or pad images, but instead return lists.
# In such way, image tensors and labels returned have their own shapes, unaware of the rest in the same batch.

from gluoncv.data.batchify import Tuple, Append
from mxnet.gluon.data import DataLoader

batch_size = 2  # for tutorial, we use smaller batch-size
num_workers = 0  # you can make it larger(if your CPU has more cores) to accelerate data loading

# behavior of batchify_fn: stack images, and pad labels
batchify_fn = Tuple(Append(), Append())
train_loader = DataLoader(train_dataset.transform(train_transform),
                          batch_size,
                          shuffle=True,
                          batchify_fn=batchify_fn,
                          last_batch='rollover',
                          num_workers=num_workers)
val_loader = DataLoader(val_dataset.transform(val_transform),
                        batch_size,
                        shuffle=False,
                        batchify_fn=batchify_fn,
                        last_batch='keep',
                        num_workers=num_workers)

for ib, batch in enumerate(train_loader):
    if ib > 3:
        break
    print('data 0:', batch[0][0].shape, 'label 0:', batch[1][0].shape)
    print('data 1:', batch[0][1].shape, 'label 1:', batch[1][1].shape)
Beispiel #28
0
    train_ann_file = os.path.join(root, "annotations\\PennFudanPed.json")
    val_ann_file = os.path.join(root, "annotations\\PennFudanPed.json")
    train_transforms = Compose([
        Resize(image_size, True),
        RandomHorizontalFlip(),
        Normalize(mean=(127, 127, 127), std=(255,255, 255)),
        ToTensor()
    ])
    val_transforms = Compose([
        Resize(image_size, True),
        Normalize(mean=(127, 127, 127), std=(255, 255, 255)),
        ToTensor()
    ])

    train_dataset = COCODataset(root, train_ann_file, train_transforms)
    train_data_loader = DataLoader(train_dataset, batch_size, True, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers)

    val_dataset = COCODataset(root, val_ann_file, val_transforms)
    val_data_loader = DataLoader(val_dataset, batch_size, False, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers)

    ctx = cpu()
    num_devices = 1
    gluon_norm_kwargs = {"num_devices": num_devices} if num_devices >= 1 else {}
    base_network = resnet50_v1b(pretrained=True, dilated=False, use_global_stats=False,
                                norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs)
    sym_norm_kwargs = {"ndev": num_devices} if num_devices >= 1 else {}
    features = FPNFeatureExpander(
        network=base_network,
        outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'],
        num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True,
        no_bias=True, pretrained=True, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs
Beispiel #29
0
def train_net(train_path, val_path, anno_file, num_class, batch_size,
              pretrained, pretrained_path, epochs, ctx, learning_rate,
              weight_decay, optimizer, momentum, lr_refactor_steps,
              lr_refactor_ratio, log_file, tensorboard, num_workers,
              per_device_batch_size):
    """ Training network """
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    if log_file:
        fh = logging.FileHandler(log_file)
        logger.addHandler(fh)

    # split training dataset into training and validation dataset
    train_anno_file, val_anno_file = split_image_dataset(
        train_path, val_path, anno_file)
    # load dataset
    train_data = DataLoader(eco_dataset.ImageNpyDataset(
        train_path, train_anno_file).transform_first(get_transform('train')),
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=num_workers)
    val_data = DataLoader(eco_dataset.ImageNpyDataset(
        val_path, val_anno_file).transform_first(get_transform('test')),
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=num_workers)

    # build network
    net = eco_full.eco_full()

    # pre-train model
    if pretrained:
        logger.info(
            "Start training from pretrained model {}".format(pretrained))
        params_file = get_latest_params_file(pretrained_path)
        if not params_file:
            logger.info(
                "No params file exist, the net will be initialized by Xavier")
            net.collect_params().initialize(mx.init.Xavier(), ctx)
            net.hybridize()
        else:
            #            logger.info("Initialize network by symbol parameters.")
            #            net = gluon.SymbolBlock.imports("eco_gluon_to_symbol-symbol.json",
            #                        ["data"], "eco_gluon_to_symbol-0000.params", ctx=mx.gpu())

            logger.info("Initialize network by %s" % params_file)
            net.load_parameters(
                '/home/lijie/ECO_Full_kinetics_pretrained/model/' +
                params_file, ctx)
            net.hybridize()
    else:
        net.collect_params().initialize(mx.init.Xavier(), ctx)
        net.hybridize()

    # learning rate refactor steps
    if lr_refactor_steps is None:
        decay_interval = int(epochs / 3)
        lr_refactor_steps = [i for i in range(1, epochs, decay_interval)]
    else:
        lr_refactor_steps = [
            int(i.strip()) for i in lr_refactor_steps.split(',')
        ]

    trainer = gluon.Trainer(net.collect_params(), optimizer, {
        'learning_rate': learning_rate,
        'momentum': momentum,
        'wd': weight_decay
    })

    metric_acc = metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()

    lr_counter = 0
    num_batch = len(train_data)

    for epoch in range(epochs):
        epoch_start = time.time()
        if lr_counter < len(
                lr_refactor_steps) and epoch == lr_refactor_steps[lr_counter]:
            trainer.set_learning_rate(trainer.learning_rate *
                                      lr_refactor_ratio)
            lr_counter += 1
        train_loss = 0
        metric_acc.reset()

        for i, batch in enumerate(train_data):
            batch_start = time.time()
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                # print('data length : {}'.format(len(data)))
                outputs = []
                data = data[0]
                label = label[0]
                for idx in range(data.shape[0]):
                    outputs.append(net(data[idx]))
                loss = 0
                for yhat, y in zip(outputs, label):
                    loss = loss + mx.nd.mean(L(yhat, y))
                loss.backward()
            # for l in loss:
            #     l.backward()

            trainer.step(batch_size, ignore_stale_grad=True)
            # train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
            train_loss = loss.mean().asscalar() / batch_size
            metric_acc.update(label, outputs)
            _, train_acc = metric_acc.get()
            # save parameters
            if i % 100 == 0 and i != 0:
                logger.info("Save parameters")
                net.save_parameters(
                    os.path.join(pretrained_path,
                                 'eco_net_iter_{}.params'.format(str(i))))
            logger.info(
                '[Epoch %d] Iter: %d, Train-acc: %.3f, loss: %.3f | time: %.1f'
                % (epoch, i, train_acc, train_loss, time.time() - batch_start))

        _, train_acc = metric_acc.get()
        train_loss /= num_batch

        _, val_acc = test(net, val_data, ctx)

        logger.info(
            '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f | time: %.1f'
            %
            (epoch, train_acc, train_loss, val_acc, time.time() - epoch_start))
Beispiel #30
0
 def __call__(self, dataset, sampler):
     dataloader = DataLoader(dataset=dataset,
                             batch_sampler=sampler,
                             batchify_fn=self._batchify_fn,
                             num_workers=self._num_ctxes)
     return dataloader