def main(args):
    eddl.download_flickr()

    epochs = 2 if args.small else 50

    olength = 20
    outvs = 2000
    embdim = 32

    # True: remove last layers and set new top = flatten
    # new input_size: [3, 256, 256] (from [224, 224, 3])
    net = eddl.download_resnet18(True, [3, 256, 256])
    lreshape = eddl.getLayer(net, "top")

    # create a new model from input output
    image_in = eddl.getLayer(net, "input")

    # Decoder
    ldecin = eddl.Input([outvs])
    ldec = eddl.ReduceArgMax(ldecin, [0])
    ldec = eddl.RandomUniform(eddl.Embedding(ldec, outvs, 1, embdim, True),
                              -0.05, 0.05)

    ldec = eddl.Concat([ldec, lreshape])
    layer = eddl.LSTM(ldec, 512, True)
    out = eddl.Softmax(eddl.Dense(layer, outvs))
    eddl.setDecoder(ldecin)
    net = eddl.Model([image_in], [out])

    # Build model
    eddl.build(
        net, eddl.adam(0.01), ["softmax_cross_entropy"], ["accuracy"],
        eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem))
    eddl.summary(net)

    # Load dataset
    x_train = Tensor.load("flickr_trX.bin", "bin")
    y_train = Tensor.load("flickr_trY.bin", "bin")
    if args.small:
        x_train = x_train.select([f"0:{2 * args.batch_size}", ":", ":", ":"])
        y_train = y_train.select([f"0:{2 * args.batch_size}", ":"])
    xtrain = Tensor.permute(x_train, [0, 3, 1, 2])
    y_train = Tensor.onehot(y_train, outvs)
    # batch x timesteps x input_dim
    y_train.reshape_([y_train.shape[0], olength, outvs])

    eddl.fit(net, [xtrain], [y_train], args.batch_size, epochs)
    eddl.save(net, "img2text.bin", "bin")

    print("\n === INFERENCE ===\n")

    # Get all the reshapes of the images. Only use the CNN
    timage = Tensor([x_train.shape[0], 512])  # images reshape
    cnn = eddl.Model([image_in], [lreshape])
    eddl.build(
        cnn,
        eddl.adam(0.001),  # not relevant
        ["mse"],  # not relevant
        ["mse"],  # not relevant
        eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem))
    eddl.summary(cnn)

    # forward images
    xbatch = Tensor([args.batch_size, 3, 256, 256])
    # numbatches = x_train.shape[0] / args.batch_size
    j = 0
    eddl.next_batch([x_train], [xbatch])
    eddl.forward(cnn, [xbatch])
    ybatch = eddl.getOutput(lreshape)
    sample = str(j * args.batch_size) + ":" + str((j + 1) * args.batch_size)
    timage.set_select([sample, ":"], ybatch)

    # Create Decoder non recurrent for n-best
    ldecin = eddl.Input([outvs])
    image = eddl.Input([512])
    lstate = eddl.States([2, 512])
    ldec = eddl.ReduceArgMax(ldecin, [0])
    ldec = eddl.RandomUniform(eddl.Embedding(ldec, outvs, 1, embdim), -0.05,
                              0.05)
    ldec = eddl.Concat([ldec, image])
    lstm = eddl.LSTM([ldec, lstate], 512, True)
    lstm.isrecurrent = False  # Important
    out = eddl.Softmax(eddl.Dense(lstm, outvs))
    decoder = eddl.Model([ldecin, image, lstate], [out])
    eddl.build(
        decoder,
        eddl.adam(0.001),  # not relevant
        ["softmax_cross_entropy"],  # not relevant
        ["accuracy"],  # not relevant
        eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem))
    eddl.summary(decoder)

    # Copy params from trained net
    eddl.copyParam(eddl.getLayer(net, "LSTM1"),
                   eddl.getLayer(decoder, "LSTM2"))
    eddl.copyParam(eddl.getLayer(net, "dense1"),
                   eddl.getLayer(decoder, "dense2"))
    eddl.copyParam(eddl.getLayer(net, "embedding1"),
                   eddl.getLayer(decoder, "embedding2"))

    # N-best for sample s
    s = 1 if args.small else 100  # sample 100
    # three input tensors with batch_size = 1 (one sentence)
    treshape = timage.select([str(s), ":"])
    text = y_train.select([str(s), ":", ":"])  # 1 x olength x outvs
    for j in range(olength):
        print(f"Word: {j}")
        word = None
        if j == 0:
            word = Tensor.zeros([1, outvs])
        else:
            word = text.select(["0", str(j - 1), ":"])
            word.reshape_([1, outvs])  # batch = 1
        treshape.reshape_([1, 512])  # batch = 1
        state = Tensor.zeros([1, 2, 512])  # batch = 1
        input_ = [word, treshape, state]
        eddl.forward(decoder, input_)
        # outword = eddl.getOutput(out)
        vstates = eddl.getStates(lstm)
        for i in range(len(vstates)):
            vstates[i].reshape_([1, 1, 512])
            state.set_select([":", str(i), ":"], vstates[i])

    print("All done")
Esempio n. 2
0
def main(args):
    eddl.download_eutrans()

    epochs = 1 if args.small else 5

    ilength = 30
    olength = 30
    invs = 687
    outvs = 514
    embedding = 64

    # Encoder
    in_ = eddl.Input([1])  # 1 word
    layer = in_
    lE = eddl.RandomUniform(
        eddl.Embedding(layer, invs, 1, embedding, True), -0.05, 0.05
    )
    enc = eddl.LSTM(lE, 128, True)
    cps = eddl.GetStates(enc)

    # Decoder
    ldin = eddl.Input([outvs])
    ld = eddl.ReduceArgMax(ldin, [0])
    ld = eddl.RandomUniform(
        eddl.Embedding(ld, outvs, 1, embedding), -0.05, 0.05
    )
    layer = eddl.LSTM([ld, cps], 128)
    out = eddl.Softmax(eddl.Dense(layer, outvs))
    eddl.setDecoder(ldin)

    net = eddl.Model([in_], [out])

    # Build model
    eddl.build(
        net,
        eddl.adam(0.01),
        ["softmax_cross_entropy"],
        ["accuracy"],
        eddl.CS_GPU(mem=args.mem) if args.gpu else eddl.CS_CPU(mem=args.mem)
    )
    eddl.summary(net)

    # Load dataset
    x_train = Tensor.load("eutrans_trX.bin")
    y_train = Tensor.load("eutrans_trY.bin")
    y_train = Tensor.onehot(y_train, outvs)
    # batch x timesteps x input_dim
    x_train.reshape_([x_train.shape[0], ilength, 1])
    # batch x timesteps x ouput_dim
    y_train.reshape_([y_train.shape[0], olength, outvs])

    x_test = Tensor.load("eutrans_tsX.bin")
    y_test = Tensor.load("eutrans_tsY.bin")
    y_test = Tensor.onehot(y_test, outvs)
    # batch x timesteps x input_dim
    x_test.reshape_([x_test.shape[0], ilength, 1])
    # batch x timesteps x ouput_dim
    y_test.reshape_([y_test.shape[0], olength, outvs])

    if args.small:
        sel = [f":{3 * args.batch_size}", ":", ":"]
        x_train = x_train.select(sel)
        y_train = y_train.select(sel)
        x_test = x_test.select(sel)
        y_test = y_test.select(sel)

    # Train model
    ybatch = Tensor([args.batch_size, olength, outvs])
    eddl.next_batch([y_train], [ybatch])
    for i in range(epochs):
        eddl.fit(net, [x_train], [y_train], args.batch_size, 1)

    print("All done")