def gluon_model(model_data): train_data, train_label, _ = model_data train_data_loader = DataLoader(list(zip(train_data, train_label)), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(128, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }) # `metrics` was renamed in mxnet 1.6.0: https://github.com/apache/incubator-mxnet/pull/17048 arg_name = ("metrics" if LooseVersion(mx.__version__) < LooseVersion("1.6.0") else "train_metrics") est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), trainer=trainer, **{arg_name: Accuracy()}) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(train_data_loader, epochs=3) return model
def gluon_random_data_run(): mlflow.gluon.autolog() with mlflow.start_run() as run: data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") validation = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": .001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3, val_data=validation) return client.get_run(run.info.run_id)
def test_autolog_ends_auto_created_run(): mlflow.gluon.autolog() data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), trainer=trainer, **get_metrics()) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3) assert mlflow.active_run() is None
def gluon_model(model_data): train_data, train_label, _ = model_data train_data_loader = DataLoader(list(zip(train_data, train_label)), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(128, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": .001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(train_data_loader, epochs=3) return model
def test_autolog_persists_manually_created_run(): mlflow.gluon.autolog() data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") with mlflow.start_run() as run: model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer( model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }, ) est = get_estimator(model, trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3) assert mlflow.active_run().info.run_id == run.info.run_id
def test_autolog_registering_model(): registered_model_name = "test_autolog_registered_model" mlflow.gluon.autolog(registered_model_name=registered_model_name) data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }) est = get_estimator(model, trainer) with mlflow.start_run(), warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3) registered_model = MlflowClient().get_registered_model( registered_model_name) assert registered_model.name == registered_model_name
def get_gluon_random_data_run(log_models=True): mlflow.gluon.autolog(log_models) with mlflow.start_run() as run: data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") validation = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer( model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }, ) est = get_estimator(model, trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3, val_data=validation) client = mlflow.tracking.MlflowClient() return client.get_run(run.info.run_id)
def build_generator(n_filters, n_channels, mx_ctx): netG = HybridSequential() with netG.name_scope(): # Input is Z netG.add(Conv2DTranspose(n_filters * 8, kernel_size=4, strides=1, padding=0, use_bias=False)) netG.add(BatchNorm()) netG.add(Activation("relu")) netG.add(Conv2DTranspose(n_filters * 4, kernel_size=4, strides=2, padding=1, use_bias=False)) netG.add(BatchNorm()) netG.add(Activation("relu")) netG.add(Conv2DTranspose(n_filters * 2, kernel_size=4, strides=2, padding=1, use_bias=False)) netG.add(BatchNorm()) netG.add(Activation("relu")) netG.add(Conv2DTranspose(n_filters, kernel_size=4, strides=2, padding=1, use_bias=False)) netG.add(BatchNorm()) netG.add(Activation("relu")) netG.add(Conv2DTranspose(n_channels, kernel_size=4, strides=2, padding=1, use_bias=False)) netG.add(BatchNorm()) netG.add(Activation("tanh")) netG.initialize(mx.init.Normal(0.02), ctx=mx_ctx) netG.hybridize() return netG
def test_autolog_persists_manually_created_run(): kiwi.gluon.autolog() data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") with kiwi.start_run() as run: model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": .001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3) assert kiwi.active_run().info.run_id == run.info.run_id
class AnchorOffstNet(HybridBlock): def __init__(self, net=None, version=None, anchors=None, target_size=None, ctx=mx.cpu()): super(AnchorOffstNet, self).__init__() self._net = net features = [] strides = [] darknet_output = get_darknet(version, pretrained=False, ctx=mx.cpu(), dummy=True)(mx.nd.random_uniform( low=0, high=1, shape=(1, 3, target_size[0], target_size[1]), ctx=mx.cpu())) for out in darknet_output: # feature_14, feature_24, feature_28 out_height, out_width = out.shape[2:] features.append([out_width, out_height]) strides.append( [target_size[1] // out_width, target_size[0] // out_height]) features = features[::-1] strides = strides[::-1] # deep -> middle -> shallow 순으로 !!! anchors = OrderedDict(anchors) anchors = list(anchors.values())[::-1] self._numoffst = len(anchors) with self.name_scope(): self._anchor_generators = HybridSequential() for i, anchor, feature, stride in zip(range(len(features)), anchors, features, strides): self._anchor_generators.add( YoloAnchorGenerator(i, anchor, feature, stride)) self._anchor_generators.initialize(ctx=ctx) def hybrid_forward(self, F, x): output82, output94, output106 = self._net(x) anchors = [] offsets = [] strides = [] for i in range(self._numoffst): anchor, offset, stride = self._anchor_generators[i](x) anchors.append(anchor) offsets.append(offset) strides.append(stride) return output82, output94, output106, \ anchors[0], anchors[1], anchors[2], \ offsets[0], offsets[1], offsets[2], \ strides[0], strides[1], strides[2]
class AnchorNet(HybridBlock): def __init__(self, net=None, version=None, target_size=None, anchor_sizes=[32, 64, 128, 256, 512], anchor_size_ratios=[1, pow(2, 1 / 3), pow(2, 2 / 3)], anchor_aspect_ratios=[0.5, 1, 2], box_offset=(0.5, 0.5), anchor_box_clip=True, ctx=mx.cpu()): super(AnchorNet, self).__init__() self._net = net if version not in [0, 1, 2, 3, 4, 5, 6]: raise ValueError feature_sizes = [] bifpn = get_bifpn(version, ctx=mx.cpu(), dummy=True)( mx.nd.random_uniform(low=0, high=1, shape=(1, 3, target_size[0], target_size[1]), ctx=mx.cpu())) for bif in bifpn: feature_sizes.append(bif.shape[2:]) # h, w # get_fpn_resnet 외부에서 보내지 않으면. 무조건 forward 한번 시켜야 하는데, # 네트워크를 정확히 정의 해놓지 않아서...(default init) 쓸데 없는 코드를 # 넣어야 한다. with self.name_scope(): # 아래 두줄은 self.name_scope()안에 있어야 한다. - 새롭게 anchor만드는 네크워크를 생성하는 것이므로.!!! # self.name_scope() 밖에 있으면 기존의 self._net 과 이름이 겹친다. self._bifpn = get_bifpn(version, ctx=ctx, dummy=True) self._bifpn.forward(mx.nd.ones(shape=(1, 3) + target_size, ctx=ctx)) self._anchor_generators = HybridSequential() for index, feature_size, anchor_size in zip(range(len(feature_sizes)), feature_sizes, anchor_sizes): self._anchor_generators.add(EfficientAnchorGenerator(index=index, input_size=target_size, feature_size=feature_size, anchor_size=anchor_size, anchor_size_ratios=anchor_size_ratios, anchor_aspect_ratios=anchor_aspect_ratios, box_offset=box_offset, box_clip=anchor_box_clip)) self._anchor_generators.initialize(ctx=ctx) def hybrid_forward(self, F, x): cls_preds, box_preds = self._net(x) anchors = [anchor_generator(bifpn_feature) for bifpn_feature, anchor_generator in zip(self._bifpn(x), self._anchor_generators)] anchors = F.reshape(F.concat(*anchors, dim=0), shape=(1, -1, 4)) return cls_preds, box_preds, anchors
def build_model(A, X): model = HybridSequential() with model.name_scope(): features, out_units = build_features(A, X) model.add(features) logger.info("GCN Summary: \n{}".format(model)) classifier = LogisticRegressor(out_units) model.add(classifier) logger.info("GCN + LR Summary: \n{}".format(model)) model.hybridize() model.initialize(Uniform(1)) return model, features
def build_model(A, X): model = HybridSequential() hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')] in_units = in_units = X.shape[1] with model.name_scope(): features, out_units = build_features(A, X) model.add(features) classifier = LogisticRegressor(out_units) model.add(classifier) model.hybridize() model.initialize(Uniform(1)) return model, features
def create_model(): model = HybridSequential() layers = [ Conv2D(6, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Conv2D(16, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Rearrange('b c h w -> b (c h w)'), Dense(120), LeakyReLU(alpha=0.0), Dense(84), LeakyReLU(alpha=0.0), Dense(10), ] for layer in layers: model.add(layer) model.initialize(mxnet.init.Xavier(), ctx=mxnet.cpu()) return model
def gluon_model(model_data): train_data, train_label, _ = model_data dataset = mx.gluon.data.ArrayDataset(train_data, train_label) train_data_loader = DataLoader(dataset, batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(128, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer( model.collect_params(), "adam", optimizer_params={"learning_rate": 0.001, "epsilon": 1e-07} ) est = get_estimator(model, trainer) est.fit(train_data_loader, epochs=3) return model
def test_autolog_ends_auto_created_run(): mlflow.gluon.autolog() data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer( model.collect_params(), "adam", optimizer_params={"learning_rate": 0.001, "epsilon": 1e-07} ) est = get_estimator(model, trainer) est.fit(data, epochs=3) assert mlflow.active_run() is None
class CenterNet(HybridBlock): def __init__(self, base=18, heads=OrderedDict(), head_conv_channel=64, pretrained=True, root=os.path.join(os.getcwd(), 'models'), use_dcnv2=False, ctx=mx.cpu()): super(CenterNet, self).__init__() with self.name_scope(): self._base_network = get_upconv_resnet(base=base, pretrained=pretrained, root=root, use_dcnv2=use_dcnv2, ctx=ctx) self._heads = HybridSequential('heads') for name, values in heads.items(): head = HybridSequential(name) num_output = values['num_output'] bias = values.get('bias', 0.0) head.add( Conv2D(head_conv_channel, kernel_size=(3, 3), padding=(1, 1), use_bias=True)) head.add(Activation('relu')) head.add( Conv2D(num_output, kernel_size=(1, 1), use_bias=True, bias_initializer=mx.init.Constant(bias))) self._heads.add(head) self._heads.initialize(ctx=ctx) def hybrid_forward(self, F, x): feature = self._base_network(x) heatmap, offset, wh = [head(feature) for head in self._heads] heatmap = F.sigmoid(heatmap) return heatmap, offset, wh
def build_discriminator(n_filters, n_channels, mx_ctx): netD = HybridSequential() with netD.name_scope(): # Input is n_channels * 64 * 64 netD.add(Conv2D(n_filters, kernel_size=4, strides=2, padding=1, use_bias=False)) netD.add(LeakyReLU(0.2)) netD.add(Conv2D(n_filters * 2, kernel_size=4, strides=2, padding=1, use_bias=False)) netD.add(BatchNorm()) netD.add(LeakyReLU(0.2)) netD.add(Conv2D(n_filters * 4, kernel_size=4, strides=2, padding=1, use_bias=False)) netD.add(BatchNorm()) netD.add(LeakyReLU(0.2)) netD.add(Conv2D(n_filters * 8, kernel_size=4, strides=2, padding=1, use_bias=False)) netD.add(BatchNorm()) netD.add(LeakyReLU(0.2)) netD.add(Conv2D(1, 4, 1, 0, use_bias=False)) netD.initialize(mx.init.Normal(0.02), ctx=mx_ctx) netD.hybridize()
class AnchorNet(HybridBlock): def __init__(self, net=None, version=None, target_size=None, box_sizes300=None, box_ratios300=None, box_sizes512=None, box_ratios512=None, anchor_box_clip=None, anchor_box_offset=(0.5, 0.5), ctx=mx.cpu()): super(AnchorNet, self).__init__() self._net = net if version not in [300, 512]: raise ValueError if version == 300: box_sizes = box_sizes300 box_ratios = box_ratios300 elif version == 512: box_sizes = box_sizes512 box_ratios = box_ratios512 if len(box_sizes) - 1 != len(box_ratios): raise ValueError feature_sizes = [] fetures_output = VGG16(version, ctx=mx.cpu(), dummy=True)( mx.nd.random_uniform(low=0, high=1, shape=(1, 3, target_size[0], target_size[1]), ctx=mx.cpu())) for f in fetures_output: feature_sizes.append(f.shape[2:]) # h, w sizes = list(zip(box_sizes[:-1], box_sizes[1:])) # VGG16을 외부에서 보내지 않으면. 무조건 forward 한번 시켜야 하는데, # 네트워크를 정확히 정의 해놓지 않아서...(default init) 쓸데 없는 코드를 # 넣어야 한다. with self.name_scope(): # 아래 두줄은 self.name_scope()안에 있어야 한다. - 새롭게 anchor만드는 네크워크를 생성하는 것이므로.!!! # self.name_scope() 밖에 있으면 기존의 self._net 과 이름이 겹친다. self._features = VGG16(version, ctx=ctx, dummy=True) self._features.forward( mx.nd.ones(shape=(1, 3) + target_size, ctx=ctx)) self._anchor_generators = HybridSequential() for index, size, ratio, feature_size in zip( range(len(feature_sizes)), sizes, box_ratios, feature_sizes): self._anchor_generators.add( SSDAnchorGenerator(index=index, feature_size=feature_size, input_size=target_size, box_size=size, box_ratio=ratio, box_offset=anchor_box_offset, box_clip=anchor_box_clip)) self._anchor_generators.initialize(ctx=ctx) def hybrid_forward(self, F, x): cls_preds, box_preds = self._net(x) feature_list = self._features(x) anchors = [ anchor_generator(feature) for feature, anchor_generator in zip( feature_list, self._anchor_generators) ] anchors = F.reshape(F.concat(*anchors, dim=0), shape=(1, -1, 4)) return cls_preds, box_preds, anchors
class Efficient(HybridBlock): def __init__( self, version=0, input_size=(512, 512), anchor_sizes=[32, 64, 128, 256, 512], anchor_size_ratios=[1, pow(2, 1 / 3), pow(2, 2 / 3)], anchor_aspect_ratios=[0.5, 1, 2], num_classes=1, # foreground만 anchor_box_offset=(0.5, 0.5), anchor_box_clip=True, alloc_size=[256, 256], ctx=mx.cpu()): super(Efficient, self).__init__() if version not in [0, 1, 2, 3, 4, 5, 6]: raise ValueError # Box / Class Layer - 논문에 나온대로 repeat = [3, 3, 3, 4, 4, 4, 5, 5] channels = [64, 88, 112, 160, 224, 288, 384, 384] feature_sizes = [] bifpn_output = get_bifpn(version, ctx=mx.cpu(), dummy=True)( mx.nd.random_uniform(low=0, high=1, shape=(1, 3, input_size[0], input_size[1]), ctx=mx.cpu())) for bifpn in bifpn_output: feature_sizes.append(bifpn.shape[2:]) # h, w self._bifpn = get_bifpn(version, ctx=ctx) self._num_classes = num_classes with self.name_scope(): self._class_subnet = HybridSequential() self._box_subnet = HybridSequential() self._anchor_generators = HybridSequential() for _ in range(repeat[version]): self._class_subnet.add( ConvPredictor( num_channel=channels[version], kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation='relu', use_bias=True, in_channels=0, )) self._box_subnet.add( ConvPredictor( num_channel=channels[version], kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation='relu', use_bias=True, in_channels=0, )) ''' bias_initializer=mx.init.Constant(-np.log((1-0.01)/0.01)? 논문에서, For the final convlayer of the classification subnet, we set the bias initialization to b = − log((1 − π)/π), where π specifies that at that at the start of training every anchor should be labeled as foreground with confidence of ∼π. We use π = .01 in all experiments, although results are robust to the exact value. As explained in section 3.3, this initialization prevents the large number of background anchors from generating a large, destabilizing loss value in the first iteration of training. -> 초기에 class subnet 마지막 bias를 b = − log((1 − π)/π)로 설정함으로써, 모든 anchor를 0.01 값을 가지는 foreground로 만들어버리는 초기화 방법 거의 대부분인 background anchor 가 첫 번째 학습에서 불안정한 loss 값을 가지는 것을 방지해준다함. ''' prior = 0.01 self._class_subnet.add( ConvPredictor( num_channel=num_classes * len(anchor_size_ratios) * len(anchor_aspect_ratios), kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation=None, use_bias=True, in_channels=0, bias_initializer=mx.init.Constant(-np.log((1 - prior) / prior)))) self._box_subnet.add( ConvPredictor( num_channel=4 * len(anchor_size_ratios) * len(anchor_aspect_ratios), kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation=None, use_bias=True, in_channels=0, )) for index, feature_size, anchor_size in zip( range(len(feature_sizes)), feature_sizes, anchor_sizes): self._anchor_generators.add( EfficientAnchorGenerator( index=index, input_size=input_size, feature_size=feature_size, anchor_size=anchor_size, anchor_size_ratios=anchor_size_ratios, anchor_aspect_ratios=anchor_aspect_ratios, box_offset=anchor_box_offset, box_clip=anchor_box_clip, alloc_size=(alloc_size[0] // (2**index), alloc_size[1] // (2**index)))) self._class_subnet.initialize(ctx=ctx) self._box_subnet.initialize(ctx=ctx) self._anchor_generators.initialize(ctx=ctx) logging.info( f"{self.__class__.__name__}_{version} Head weight init 완료") def hybrid_forward(self, F, x): # class, box prediction # self._bifpn(x) # p3, p4, p5, p6, p7 # (batch, height, width, class) -> (batch, -1) cls_preds = [ F.flatten(data=F.transpose(data=self._class_subnet(bifpn_feature), axes=(0, 2, 3, 1))) for bifpn_feature in self._bifpn(x) ] # (batch, height, width, 4) -> (batch, -1) box_preds = [ F.flatten(data=F.transpose(data=self._box_subnet(bifpn_feature), axes=(0, 2, 3, 1))) for bifpn_feature in self._bifpn(x) ] anchors = [ anchor_generator(bifpn_feature) for bifpn_feature, anchor_generator in zip(self._bifpn(x), self._anchor_generators) ] cls_preds = F.reshape(data=F.concat(*cls_preds, dim=-1), shape=(0, -1, self._num_classes)) box_preds = F.reshape(data=F.concat(*box_preds, dim=-1), shape=(0, -1, 4)) anchors = F.reshape(F.concat(*anchors, dim=0), shape=(1, -1, 4)) return cls_preds, box_preds, anchors
class YoloV3output(HybridBlock): def __init__(self, Darknetlayer=53, anchors={"shallow": [(10, 13), (16, 30), (33, 23)], "middle": [(30, 61), (62, 45), (59, 119)], "deep": [(116, 90), (156, 198), (373, 326)]}, num_classes=1, # foreground만 pretrained=True, pretrained_path="modelparam", ctx=mx.cpu()): super(YoloV3output, self).__init__() if Darknetlayer not in [53]: raise ValueError anchors = OrderedDict(anchors) anchors = list(anchors.values())[::-1] # 각 레이어에서 나오는 anchor갯수가 바뀔수도 있으니!! self._num_anchors = [] for anchor in anchors: self._num_anchors.append(len(anchor)) # 변화 가능 self._darkenet = get_darknet(Darknetlayer, pretrained=pretrained, ctx=ctx, root=pretrained_path) self._num_classes = num_classes self._num_pred = 5 + num_classes # 고정 with self.name_scope(): head_init_num_channel = 512 trans_init_num_channel = 256 self._head = HybridSequential() self._transition = HybridSequential() self._upsampleconv = HybridSequential() # output for j in range(len(anchors)): if j == 0: factor = 1 else: factor = 2 head_init_num_channel = head_init_num_channel // factor for _ in range(3): self._head.add(Conv2D(channels=head_init_num_channel, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros' )) self._head.add(BatchNorm(epsilon=1e-5, momentum=0.9)) self._head.add(LeakyReLU(0.1)) self._head.add(Conv2D(channels=head_init_num_channel * 2, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros' )) self._head.add(BatchNorm(epsilon=1e-5, momentum=0.9)) self._head.add(LeakyReLU(0.1)) self._head.add(Conv2D(channels=len(anchors[j]) * self._num_pred, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros' )) # for upsample - transition for i in range(len(anchors) - 1): if i == 0: factor = 1 else: factor = 2 trans_init_num_channel = trans_init_num_channel // factor self._transition.add(Conv2D(channels=trans_init_num_channel, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros' )) self._transition.add(BatchNorm(epsilon=1e-5, momentum=0.9)) self._transition.add(LeakyReLU(0.1)) # for deconvolution upsampleing for i in range(len(anchors) - 1): if i == 0: factor = 1 else: factor = 2 trans_init_num_channel = trans_init_num_channel // factor self._upsampleconv.add(Conv2DTranspose(trans_init_num_channel, kernel_size=3, strides=2, padding=1, output_padding=1, use_bias=True, in_channels=0)) self._upsampleconv.add(BatchNorm(epsilon=1e-5, momentum=0.9)) self._upsampleconv.add(LeakyReLU(0.1)) self._head.initialize(ctx=ctx) self._transition.initialize(ctx=ctx) self._upsampleconv.initialize(ctx=ctx) print(f"{self.__class__.__name__} Head weight init 완료") def hybrid_forward(self, F, x): feature_36, feature_61, feature_74 = self._darkenet(x) # first transition = self._head[:15](feature_74) # darknet 기준 75 ~ 79 output82 = self._head[15:19](transition) # darknet 기준 79 ~ 82 # second transition = self._transition[0:3](transition) # transition = F.UpSampling(transition, scale=2, # sample_type='nearest') # or sample_type = "bilinear" , 후에 deconvolution으로 대체 transition = self._upsampleconv[0:3](transition) transition = F.concat(transition, feature_61, dim=1) transition = self._head[19:34](transition) # darknet 기준 75 ~ 91 output94 = self._head[34:38](transition) # darknet 기준 91 ~ 82 # third transition = self._transition[3:](transition) # transition = F.UpSampling(transition, scale=2, # sample_type='nearest') # or sample_type = "bilinear" , 후에 deconvolution으로 대체 transition = self._upsampleconv[3:](transition) transition = F.concat(transition, feature_36, dim=1) output106 = self._head[38:](transition) # darknet 기준 91 ~ 106 output82 = F.transpose(output82, axes=(0, 2, 3, 1)) output94 = F.transpose(output94, axes=(0, 2, 3, 1)) output106 = F.transpose(output106, axes=(0, 2, 3, 1)) # (batch size, height, width, len(anchors), (5 + num_classes) return output82, output94, output106
return loss def calc_con_loss(self, gt_masks, anchor_masks): ''' :param gt_masks: :param anchor_masks: :return: ''' F = mx.symbol # todo: v1 --> v2 # todo: still use mean square error loss_raw = F.softmax_cross_entropy(anchor_masks, gt_masks) loss_obj = F.sum(loss_raw * anchor_masks) loss_noobj = F.sum(loss_raw * (1 - anchor_masks)) loss = 1.0 * loss_obj + self.noobj_scale * loss_noobj return loss if __name__ == '__main__': net = HybridSequential() net.add(YoloHead(5, 4, prefix='yolo_head/')) net.initialize() x = mx.nd.random_uniform(-1, 1, shape=(2, 1024, 13, 13)) y = net(x) print(y.shape, y.mean())
class RetinaNet_Except_Anchor(HybridBlock): def __init__(self, version=18, input_size=(512, 512), anchor_size_ratios=[1, pow(2, 1 / 3), pow(2, 2 / 3)], anchor_aspect_ratios=[0.5, 1, 2], num_classes=1, pretrained=False, pretrained_path="modelparam", ctx=mx.cpu()): super(RetinaNet_Except_Anchor, self).__init__() if version not in [18, 34, 50, 101, 152]: raise ValueError feature_sizes = [] fpn_output = get_fpn_resnet(version, ctx=mx.cpu(), dummy=True)( mx.nd.random_uniform(low=0, high=1, shape=(1, 3, input_size[0], input_size[1]), ctx=mx.cpu())) for fpn in fpn_output: feature_sizes.append(fpn.shape[2:]) # h, w self._fpn_resnet = get_fpn_resnet(version, pretrained=pretrained, ctx=ctx, root=pretrained_path) self._num_classes = num_classes with self.name_scope(): self._class_subnet = HybridSequential() self._box_subnet = HybridSequential() for _ in range(3): self._class_subnet.add( ConvPredictor(num_channel=256, kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation='relu', use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros')) self._box_subnet.add( ConvPredictor(num_channel=256, kernel=3, pad=1, stride=1, activation='relu', use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros')) ''' bias_initializer=mx.init.Constant(-np.log((1-0.01)/0.01)? 논문에서, For the final convlayer of the classification subnet, we set the bias initialization to b = − log((1 − π)/π), where π specifies that at that at the start of training every anchor should be labeled as foreground with confidence of ∼π. We use π = .01 in all experiments, although results are robust to the exact value. As explained in section 3.3, this initialization prevents the large number of background anchors from generating a large, destabilizing loss value in the first iteration of training. -> 초기에 class subnet 마지막 bias를 b = − log((1 − π)/π)로 설정함으로써, 모든 anchor를 0.01 값을 가지는 foreground로 만들어버리는 초기화 방법 거의 대부분인 background anchor가 첫 번째 학습에서 불안정한 loss 값을 가지는 것을 방지해준다함. ''' prior = 0.01 self._class_subnet.add( ConvPredictor( num_channel=num_classes * len(anchor_size_ratios) * len(anchor_aspect_ratios), kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation=None, use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer=mx.init.Constant(-np.log((1 - prior) / prior)))) self._box_subnet.add( ConvPredictor(num_channel=4 * len(anchor_size_ratios) * len(anchor_aspect_ratios), kernel=3, pad=1, stride=1, activation=None, use_bias=True, in_channels=0, weight_initializer=mx.init.Normal(0.01), bias_initializer='zeros')) self._class_subnet.initialize(ctx=ctx) self._box_subnet.initialize(ctx=ctx) print(f"{self.__class__.__name__} Head weight init 완료") def hybrid_forward(self, F, x): # class, box prediction # self._fpn_resnet(x) # p3, p4, p5, p6, p7 # (batch, height, width, class) -> (batch, -1) cls_preds = [ F.flatten(data=F.transpose(data=self._class_subnet(fpn_feature), axes=(0, 2, 3, 1))) for fpn_feature in self._fpn_resnet(x) ] # (batch, height, width, 4) -> (batch, -1) box_preds = [ F.flatten(data=F.transpose(data=self._box_subnet(fpn_feature), axes=(0, 2, 3, 1))) for fpn_feature in self._fpn_resnet(x) ] cls_preds = F.reshape(data=F.concat(*cls_preds, dim=-1), shape=(0, -1, self._num_classes)) box_preds = F.reshape(data=F.concat(*box_preds, dim=-1), shape=(0, -1, 4)) return cls_preds, box_preds
def train(args: argparse.Namespace) -> HybridBlock: session = boto3.session.Session() client = session.client(service_name="secretsmanager", region_name="us-east-1") mlflow_secret = client.get_secret_value(SecretId=args.mlflow_secret) mlflowdb_conf = json.loads(mlflow_secret["SecretString"]) converters.encoders[np.float64] = converters.escape_float converters.conversions = converters.encoders.copy() converters.conversions.update(converters.decoders) mlflow.set_tracking_uri( f"mysql+pymysql://{mlflowdb_conf['username']}:{mlflowdb_conf['password']}@{mlflowdb_conf['host']}/mlflow" ) if mlflow.get_experiment_by_name(args.mlflow_experiment) is None: mlflow.create_experiment(args.mlflow_experiment, args.mlflow_artifacts_location) mlflow.set_experiment(args.mlflow_experiment) col_names = ["target"] + [f"kinematic_{i}" for i in range(1, 22)] train_df = pd.read_csv(f"{args.train_channel}/train.csv.gz", header=None, names=col_names) val_df = pd.read_csv(f"{args.validation_channel}/val.csv.gz", header=None, names=col_names) train_X = train_df.drop("target", axis=1) train_y = train_df["target"] train_dataset = ArrayDataset(train_X.to_numpy(dtype="float32"), train_y.to_numpy(dtype="float32")) train = DataLoader(train_dataset, batch_size=args.batch_size) val_X = val_df.drop("target", axis=1) val_y = val_df["target"] val_dataset = ArrayDataset(val_X.to_numpy(dtype="float32"), val_y.to_numpy(dtype="float32")) validation = DataLoader(val_dataset, batch_size=args.batch_size) ctx = [gpu(i) for i in range(args.gpus)] if args.gpus > 0 else cpu() mlflow.gluon.autolog() with mlflow.start_run(): net = HybridSequential() with net.name_scope(): net.add(Dense(256)) net.add(Dropout(.2)) net.add(Dense(64)) net.add(Dropout(.1)) net.add(Dense(16)) net.add(Dense(2)) net.initialize(Xavier(magnitude=2.24), ctx=ctx) net.hybridize() trainer = Trainer(net.collect_params(), "sgd", {"learning_rate": args.learning_rate}) est = estimator.Estimator(net=net, loss=SoftmaxCrossEntropyLoss(), trainer=trainer, train_metrics=Accuracy(), context=ctx) est.fit(train, epochs=args.epochs, val_data=validation) return net
class UpConvResNet(HybridBlock): def __init__(self, base=18, deconv_channels=(256, 128, 64), deconv_kernels=(4, 4, 4), pretrained=True, root=os.path.join(os.getcwd(), 'models'), use_dcnv2=False, ctx=mx.cpu()): mxnet_version = float(mx.__version__[0:3]) if mxnet_version < 1.5: logging.error("please upgrade mxnet version above 1.5.x") raise EnvironmentError super(UpConvResNet, self).__init__() self._use_dcnv2 = use_dcnv2 self._resnet = get_resnet(base, pretrained=pretrained, root=root, ctx=ctx) self._upconv = HybridSequential('') with self._upconv.name_scope(): for channel, kernel in zip(deconv_channels, deconv_kernels): kernel, padding, output_padding = self._get_conv_argument(kernel) if self._use_dcnv2: ''' in paper, we first change the channels of the three upsampling layers to 256, 128, 64, respectively, to save computation, we then add one 3 x 3 deformable convolutional layer before each up-convolution layer with channel 256, 128, 64 ''' assert hasattr(contrib.cnn, 'ModulatedDeformableConvolution'), \ "No ModulatedDeformableConvolution found in mxnet, consider upgrade to mxnet 1.6.0..." self._upconv.add(contrib.cnn.ModulatedDeformableConvolution(channels=channel, kernel_size=3, strides=1, padding=1, use_bias=False, num_deformable_group=1)) else: self._upconv.add(Conv2D(channels=channel, kernel_size=3, strides=1, padding=1, use_bias=False)) self._upconv.add(BatchNorm(momentum=0.9)) self._upconv.add(Activation('relu')) self._upconv.add(Conv2DTranspose(channels=channel, kernel_size=kernel, strides=2, padding=padding, output_padding=output_padding, use_bias=False, weight_initializer=mx.init.Bilinear())) self._upconv.add(BatchNorm(momentum=0.9)) self._upconv.add(Activation('relu')) self._upconv.initialize(ctx=ctx) logging.info(f"{self.__class__.__name__} weight init 완료") def _get_conv_argument(self, kernel): """Get the upconv configs using presets""" if kernel == 4: padding = 1 output_padding = 0 elif kernel == 3: padding = 1 output_padding = 1 elif kernel == 2: padding = 0 output_padding = 0 else: raise ValueError('Unsupported deconvolution kernel: {}'.format(kernel)) return kernel, padding, output_padding def hybrid_forward(self, F, x): x = self._resnet(x) x = self._upconv(x) return x
class SSD_VGG16(HybridBlock): def __init__( self, version=512, input_size=(512, 512), box_sizes=[21, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72], box_ratios=[[1, 2, 0.5]] + [[1, 2, 0.5, 3, 1.0 / 3]] * 4 + [[1, 2, 0.5]] * 2, num_classes=1, pretrained=False, pretrained_path="modelparam", anchor_box_offset=(0.5, 0.5), anchor_box_clip=False, alloc_size=[256, 256], ctx=mx.cpu()): super(SSD_VGG16, self).__init__() if version not in [300, 512]: raise ValueError if len(box_sizes) - 1 != len(box_ratios): raise ValueError feature_sizes = [] fetures_output = VGG16(version=version, ctx=mx.cpu(), dummy=True)( mx.nd.random_uniform(low=0, high=1, shape=(1, 3, input_size[0], input_size[1]), ctx=mx.cpu())) for fpn in fetures_output: feature_sizes.append(fpn.shape[2:]) # h, w self._features = VGG16(version=version, pretrained=pretrained, ctx=ctx, root=pretrained_path) self._num_classes = num_classes sizes = list(zip(box_sizes[:-1], box_sizes[1:])) with self.name_scope(): self._class_predictors = HybridSequential() self._box_predictors = HybridSequential() self._anchor_generators = HybridSequential() for index, size, ratio, feature_size in zip( range(len(feature_sizes)), sizes, box_ratios, feature_sizes): self._class_predictors.add( ConvPredictor( num_channel=(len(ratio) + 1) * (num_classes + 1), kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation=None, use_bias=True, in_channels=0, weight_initializer=mx.init.Xavier(rnd_type="uniform", factor_type="avg", magnitude=3))) # activation = None 인 것 주의 self._box_predictors.add( ConvPredictor(num_channel=(len(ratio) + 1) * 4, kernel=(3, 3), pad=(1, 1), stride=(1, 1), activation=None, use_bias=True, in_channels=0, weight_initializer=mx.init.Xavier( rnd_type="uniform", factor_type="avg", magnitude=3))) self._anchor_generators.add( SSDAnchorGenerator( index=index, feature_size=feature_size, input_size=input_size, box_size=size, box_ratio=ratio, box_offset=anchor_box_offset, box_clip=anchor_box_clip, alloc_size=(alloc_size[0] // (2**index), alloc_size[1] // (2**index)))) self._class_predictors.initialize(ctx=ctx) self._box_predictors.initialize(ctx=ctx) self._anchor_generators.initialize(ctx=ctx) logging.info(f"{self.__class__.__name__} Head weight init 완료") def hybrid_forward(self, F, x): # 1. VGG16 Feature feature_list = self._features(x) # 2. class, box prediction cls_preds = [ F.flatten(data=F.transpose(data=class_prediction(feature), axes=(0, 2, 3, 1))) # (batch, height, width, class) for feature, class_prediction in zip(feature_list, self._class_predictors) ] box_preds = [ F.flatten(data=F.transpose(data=box_predictor(feature), axes=(0, 2, 3, 1))) # (batch, height, width, box) for feature, box_predictor in zip(feature_list, self._box_predictors) ] # feature가 anchor_generator에 통과는 하는데 사용하지 않음. anchors = [ anchor_generator(feature) for feature, anchor_generator in zip( feature_list, self._anchor_generators) ] ''' shape=(0,..)에서 0은 무엇인가? mxnet reshape의 특징인데, 자세한 설명은 https://mxnet.incubator.apache.org/api/python/ndarray/ndarray.html#mxnet.ndarray.NDArray.reshape 에 있고, 간략히 설명하자면, 0 copy this dimension from the input to the output shape 이라고 한다. -1, -2, -3, -4 에 대한 설명도 있으니, 나중에 사용하게 되면 참고하자. 첫번째 축을 명시해 주지 않아도 되는 장점이 있다. ''' # https://github.com/apache/incubator-mxnet/issues/13998 - expand_dims은 copy한다. # expand_dims() makes copy instead of simply reshaping - 아래와 같은 경우 reshape을 사용하자. cls_preds = F.reshape(data=F.concat(*cls_preds, dim=-1), shape=(0, -1, self._num_classes + 1)) box_preds = F.reshape(data=F.concat(*box_preds, dim=-1), shape=(0, -1, 4)) # anchors = F.concat(*anchors, dim=0).expand_dims(axis=0) # anchors = F.reshape(F.concat(*anchors, dim=0), shape=(1, -1, 4)) # anchors = F.concat(*anchors, dim=0) return cls_preds, box_preds, anchors
num_workers=4) mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader(mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # Only hybrid based networks can be exported net = HybridSequential() net.add( Conv2D(channels=6, kernel_size=5, activation="relu"), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=16, kernel_size=3, activation="relu"), MaxPool2D(pool_size=2, strides=2), Flatten(), Dense(120, activation="relu"), Dense(84, activation="relu"), Dense(10), ) net.initialize(init=init.Xavier()) # Only after hybridization a model can be exported with architecture included net.hybridize() trainer = Trainer(net.collect_params(), "sgd", {"learning_rate": 0.1}) est = estimator.Estimator(net=net, loss=SoftmaxCrossEntropyLoss(), train_metrics=Accuracy(), trainer=trainer) est.fit(train_data=train_data, epochs=2, val_data=valid_data)
kernel_size=(3, 3), padding=(3 // 2, 3 // 2), activation='relu'), BatchNorm(axis=1, momentum=0.9), MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # layer 5 Flatten(), Dropout(0.3), Dense(128, activation='relu'), # layer 6 Dense(10)) # %% # -- Initialize parameters net.initialize(init=init.Xavier(), ctx=mx_ctx) for name, param in net.collect_params().items(): print(name) # %% # -- Define loss function and optimizer loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'Adam', {'learning_rate': 0.001}) # %%% # -- Custom metric function def acc(output, label):
return d_iter batch_size = 256 train_iter, test_iter = d2l.load_data_mnist(batch_size=1) train_iter = blendData(train_iter, imgs) test_iter = blendData(test_iter, imgs) num_classes = 2 net = HybridSequential() net.add(Conv2D(channels=3, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=8, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=120, kernel_size=4, activation='sigmoid'), Conv2D(channels=84, kernel_size=1, activation='sigmoid'), Conv2D(channels=10, kernel_size=1), Conv2DTranspose(num_classes, kernel_size=56, padding=14, strides=28, activation='sigmoid')) lr, num_epochs = 0.9, 10 ctx = d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)