def Up(x1, x2, in_channels, out_channels, bilinear=True): if bilinear: x1 = eddl.UpSampling2D(x1, size=[2, 2], interpolation='bilinear') x = eddl.Concat([x1, x2], axis=1) x = DoubleConv(x, out_channels, in_channels // 2) else: x1 = eddl.ConvT2D(x1, in_channels // 2, kernel_size=[2, 2], output_padding='same', strides=[2, 2]) x = eddl.Concat([x1, x2], axis=1) x = DoubleConv(x, out_channels) return x
def UNetWithPadding(layer): x = layer depth = 32 x = LBC(x, depth, [3, 3], [1, 1], "same") x = LBC(x, depth, [3, 3], [1, 1], "same") x2 = eddl.MaxPool(x, [2, 2], [2, 2]) x2 = LBC(x2, 2*depth, [3, 3], [1, 1], "same") x2 = LBC(x2, 2*depth, [3, 3], [1, 1], "same") x3 = eddl.MaxPool(x2, [2, 2], [2, 2]) x3 = LBC(x3, 4*depth, [3, 3], [1, 1], "same") x3 = LBC(x3, 4*depth, [3, 3], [1, 1], "same") x4 = eddl.MaxPool(x3, [2, 2], [2, 2]) x4 = LBC(x4, 8*depth, [3, 3], [1, 1], "same") x4 = LBC(x4, 8*depth, [3, 3], [1, 1], "same") x5 = eddl.MaxPool(x4, [2, 2], [2, 2]) x5 = LBC(x5, 8*depth, [3, 3], [1, 1], "same") x5 = LBC(x5, 8*depth, [3, 3], [1, 1], "same") x5 = eddl.BatchNormalization(eddl.Conv( eddl.UpSampling(x5, [2, 2]), 8*depth, [3, 3], [1, 1], "same" ), True) x4 = eddl.Concat([x4, x5]) if USE_CONCAT else eddl.Add([x4, x5]) x4 = LBC(x4, 8*depth, [3, 3], [1, 1], "same") x4 = LBC(x4, 8*depth, [3, 3], [1, 1], "same") x4 = eddl.BatchNormalization(eddl.Conv( eddl.UpSampling(x4, [2, 2]), 4*depth, [3, 3], [1, 1], "same" ), True) x3 = eddl.Concat([x3, x4]) if USE_CONCAT else eddl.Add([x3, x4]) x3 = LBC(x3, 4*depth, [3, 3], [1, 1], "same") x3 = LBC(x3, 4*depth, [3, 3], [1, 1], "same") x3 = eddl.Conv( eddl.UpSampling(x3, [2, 2]), 2*depth, [3, 3], [1, 1], "same" ) x2 = eddl.Concat([x2, x3]) if USE_CONCAT else eddl.Add([x2, x3]) x2 = LBC(x2, 2*depth, [3, 3], [1, 1], "same") x2 = LBC(x2, 2*depth, [3, 3], [1, 1], "same") x2 = eddl.BatchNormalization(eddl.Conv( eddl.UpSampling(x2, [2, 2]), depth, [3, 3], [1, 1], "same" ), True) x = eddl.Concat([x, x2]) if USE_CONCAT else eddl.Add([x, x2]) x = LBC(x, depth, [3, 3], [1, 1], "same") x = LBC(x, depth, [3, 3], [1, 1], "same") x = eddl.BatchNormalization(eddl.Conv(x, 1, [1, 1]), True) return x
def main(args): eddl.download_flickr() epochs = 2 if args.small else 50 olength = 20 outvs = 2000 embdim = 32 # True: remove last layers and set new top = flatten # new input_size: [3, 256, 256] (from [224, 224, 3]) net = eddl.download_resnet18(True, [3, 256, 256]) lreshape = eddl.getLayer(net, "top") # create a new model from input output image_in = eddl.getLayer(net, "input") # Decoder ldecin = eddl.Input([outvs]) ldec = eddl.ReduceArgMax(ldecin, [0]) ldec = eddl.RandomUniform(eddl.Embedding(ldec, outvs, 1, embdim, True), -0.05, 0.05) ldec = eddl.Concat([ldec, lreshape]) layer = eddl.LSTM(ldec, 512, True) out = eddl.Softmax(eddl.Dense(layer, outvs)) eddl.setDecoder(ldecin) net = eddl.Model([image_in], [out]) # Build model eddl.build( net, eddl.adam(0.01), ["softmax_cross_entropy"], ["accuracy"], eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem)) eddl.summary(net) # Load dataset x_train = Tensor.load("flickr_trX.bin", "bin") y_train = Tensor.load("flickr_trY.bin", "bin") if args.small: x_train = x_train.select([f"0:{2 * args.batch_size}", ":", ":", ":"]) y_train = y_train.select([f"0:{2 * args.batch_size}", ":"]) xtrain = Tensor.permute(x_train, [0, 3, 1, 2]) y_train = Tensor.onehot(y_train, outvs) # batch x timesteps x input_dim y_train.reshape_([y_train.shape[0], olength, outvs]) eddl.fit(net, [xtrain], [y_train], args.batch_size, epochs) eddl.save(net, "img2text.bin", "bin") print("\n === INFERENCE ===\n") # Get all the reshapes of the images. Only use the CNN timage = Tensor([x_train.shape[0], 512]) # images reshape cnn = eddl.Model([image_in], [lreshape]) eddl.build( cnn, eddl.adam(0.001), # not relevant ["mse"], # not relevant ["mse"], # not relevant eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem)) eddl.summary(cnn) # forward images xbatch = Tensor([args.batch_size, 3, 256, 256]) # numbatches = x_train.shape[0] / args.batch_size j = 0 eddl.next_batch([x_train], [xbatch]) eddl.forward(cnn, [xbatch]) ybatch = eddl.getOutput(lreshape) sample = str(j * args.batch_size) + ":" + str((j + 1) * args.batch_size) timage.set_select([sample, ":"], ybatch) # Create Decoder non recurrent for n-best ldecin = eddl.Input([outvs]) image = eddl.Input([512]) lstate = eddl.States([2, 512]) ldec = eddl.ReduceArgMax(ldecin, [0]) ldec = eddl.RandomUniform(eddl.Embedding(ldec, outvs, 1, embdim), -0.05, 0.05) ldec = eddl.Concat([ldec, image]) lstm = eddl.LSTM([ldec, lstate], 512, True) lstm.isrecurrent = False # Important out = eddl.Softmax(eddl.Dense(lstm, outvs)) decoder = eddl.Model([ldecin, image, lstate], [out]) eddl.build( decoder, eddl.adam(0.001), # not relevant ["softmax_cross_entropy"], # not relevant ["accuracy"], # not relevant eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem)) eddl.summary(decoder) # Copy params from trained net eddl.copyParam(eddl.getLayer(net, "LSTM1"), eddl.getLayer(decoder, "LSTM2")) eddl.copyParam(eddl.getLayer(net, "dense1"), eddl.getLayer(decoder, "dense2")) eddl.copyParam(eddl.getLayer(net, "embedding1"), eddl.getLayer(decoder, "embedding2")) # N-best for sample s s = 1 if args.small else 100 # sample 100 # three input tensors with batch_size = 1 (one sentence) treshape = timage.select([str(s), ":"]) text = y_train.select([str(s), ":", ":"]) # 1 x olength x outvs for j in range(olength): print(f"Word: {j}") word = None if j == 0: word = Tensor.zeros([1, outvs]) else: word = text.select(["0", str(j - 1), ":"]) word.reshape_([1, outvs]) # batch = 1 treshape.reshape_([1, 512]) # batch = 1 state = Tensor.zeros([1, 2, 512]) # batch = 1 input_ = [word, treshape, state] eddl.forward(decoder, input_) # outword = eddl.getOutput(out) vstates = eddl.getStates(lstm) for i in range(len(vstates)): vstates[i].reshape_([1, 1, 512]) state.set_select([":", str(i), ":"], vstates[i]) print("All done")
def main(args): eddl.download_drive() in_1 = eddl.Input([3, 584, 584]) in_2 = eddl.Input([1, 584, 584]) layer = eddl.Concat([in_1, in_2]) layer = eddl.RandomCropScale(layer, [0.9, 1.0]) layer = eddl.CenteredCrop(layer, [512, 512]) img = eddl.Select(layer, ["0:3"]) mask = eddl.Select(layer, ["3"]) # DA net danet = eddl.Model([in_1, in_2], []) eddl.build(danet) if args.gpu: eddl.toGPU(danet, mem="low_mem") eddl.summary(danet) # SegNet in_ = eddl.Input([3, 512, 512]) out = eddl.Sigmoid(UNetWithPadding(in_)) segnet = eddl.Model([in_], [out]) eddl.build( segnet, eddl.adam(0.00001), # Optimizer ["mse"], # Losses ["mse"], # Metrics eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem) ) eddl.summary(segnet) print("Reading training data") # x_train_f = Tensor.fromarray(np.load("drive_trX.npy").astype(np.float32)) x_train_f = Tensor.load("drive_trX.bin") x_train = x_train_f.permute([0, 3, 1, 2]) x_train.info() x_train.div_(255.0) print("Reading test data") # y_train = Tensor.fromarray(np.load("drive_trY.npy").astype(np.float32)) y_train = Tensor.load("drive_trY.bin") y_train.info() y_train.reshape_([20, 1, 584, 584]) y_train.div_(255.0) xbatch = Tensor([args.batch_size, 3, 584, 584]) ybatch = Tensor([args.batch_size, 1, 584, 584]) print("Starting training") for i in range(args.epochs): print("\nEpoch %d/%d" % (i + 1, args.epochs)) eddl.reset_loss(segnet) for j in range(args.num_batches): eddl.next_batch([x_train, y_train], [xbatch, ybatch]) # DA net eddl.forward(danet, [xbatch, ybatch]) xbatch_da = eddl.getOutput(img) ybatch_da = eddl.getOutput(mask) # SegNet eddl.train_batch(segnet, [xbatch_da], [ybatch_da]) eddl.print_loss(segnet, j) if i == args.epochs - 1: yout = eddl.getOutput(out).select(["0"]) yout.save("./out_%d.jpg" % j) print() print("All done")