Python Adagrad Examples

Programming Language: Python

Namespace/Package Name: optim.Adagrad

Class/Type: Adagrad

Examples at hotexamples.com: 7

Python Adagrad - 7 examples found. These are the top rated real world Python examples of optim.Adagrad.Adagrad extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Adagrad(7)

Frequently Used Methods

Adagrad (7)

Example #1

Show file

def main():
    full_path = os.path.realpath(__file__)
    path, filename = os.path.split(full_path)
    logging.config.fileConfig(os.path.join(path, 'logging.conf'), defaults={})
    log = logging.getLogger(__name__)

    if len(sys.argv) != 2:
        log.error("Missing argument: <JSON config file>")
        exit(1)

    argsDict = JsonArgParser(PARAMETERS).parse(sys.argv[1])
    args = dict2obj(argsDict, 'ShortDocArguments')
    logging.getLogger(__name__).info(argsDict)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    lr = args.lr
    startSymbol = args.start_symbol
    endSymbol = args.end_symbol
    numEpochs = args.num_epochs
    shuffle = args.shuffle
    normalizeMethod = args.normalization
    wordWindowSize = args.word_window_size
    hiddenLayerSize = args.hidden_size
    convSize = args.conv_size

    # Load classes for filters.
    filters = []
    for filterName in args.filters:
        moduleName, className = filterName.rsplit('.', 1)
        log.info("Filtro: " + moduleName + " " + className)

        module_ = importlib.import_module(moduleName)
        filters.append(getattr(module_, className)())

    W1 = None
    b1 = None
    W2 = None
    b2 = None

    wordEmbedding = None
    if args.word_embedding:
        log.info("Reading W2v File")
        (wordLexicon, wordEmbedding) = Embedding.fromWord2Vec(args.word_embedding, unknownSymbol="__UNKNOWN__")
        wordLexicon.stopAdd()
    elif args.word_lexicon and args.word_emb_size:
        wordLexicon = Lexicon.fromTextFile(args.word_lexicon, hasUnknowSymbol=False)
        wordEmbedding = Embedding(wordLexicon, embeddingSize=args.word_emb_size)
        wordLexicon.stopAdd()
    else:
        log.error("You must provide argument word_embedding or word_lexicon and word_emb_size")

    # Create the lexicon of labels.
    labelLexicon = None
    if args.labels is not None:
        if args.label_lexicon is not None:
            log.error("Only one of the parameters label_lexicon and labels can be provided!")
            exit(1)
        labelLexicon = Lexicon.fromList(args.labels, hasUnknowSymbol=False)
    elif args.label_lexicon is not None:
        labelLexicon = Lexicon.fromTextFile(args.label_lexicon, hasUnknowSymbol=False)
    else:
        log.error("One of the parameters label_lexicon or labels must be provided!")
        exit(1)

    #
    # Build the network model (Theano graph).
    #

    # TODO: debug
    # theano.config.compute_test_value = 'warn'
    # ex = trainIterator.next()
    # inWords.tag.test_value = ex[0][0]
    # outLabel.tag.test_value = ex[1][0]

    # Matriz de entrada. Cada linha representa um token da oferta. Cada token é
    # representado por uma janela de tokens (token central e alguns tokens
    # próximos). Cada valor desta matriz corresponde a um índice que representa
    # um token no embedding.
    inWords = tensor.lmatrix("inWords")

    # Categoria correta de uma oferta.
    outLabel = tensor.lscalar("outLabel")

    # List of input tensors. One for each input layer.
    inputTensors = [inWords]

    # Whether the word embedding will be updated during training.
    embLayerTrainable = not args.fix_word_embedding

    if not embLayerTrainable:
        log.info("Not updating the word embedding!")

    # Lookup table for word features.
    embeddingLayer = EmbeddingLayer(inWords, wordEmbedding.getEmbeddingMatrix(), trainable=embLayerTrainable)

    # if not args.train and args.load_wordEmbedding:
    #     attrs = np.load(args.load_wordEmbedding)
    #     embeddingLayer.load(attrs)
    #     log.info("Loaded word embedding (shape %s) from file %s" % (
    #         str(attrs[0].shape), args.load_wordEmbedding))

    # A saída da lookup table possui 3 dimensões (numTokens, szWindow, szEmbedding).
    # Esta camada dá um flat nas duas últimas dimensões, produzindo uma saída
    # com a forma (numTokens, szWindow * szEmbedding).
    flattenInput = FlattenLayer(embeddingLayer)

    # Random weight initialization procedure.
    weightInit = GlorotUniform()

    # Convolution layer. Convolução no texto de uma oferta.
    convW = None
    convb = None

    if not args.train and args.load_conv:
        convNPY = np.load(args.load_conv)
        convW = convNPY[0]
        convb = convNPY[1]
        log.info("Loaded convolutional layer (shape %s) from file %s" % (str(convW.shape), args.load_conv))

    convLinear = LinearLayer(flattenInput,
                             wordWindowSize * wordEmbedding.getEmbeddingSize(),
                             convSize, W=convW, b=convb,
                             weightInitialization=weightInit)

    if args.conv_act:
        convOut = ActivationLayer(convLinear, tanh)
    else:
        convOut = convLinear

    # Max pooling layer.
    maxPooling = MaxPoolingLayer(convOut)

    # Hidden layer.
    if not args.train and args.load_hiddenLayer:
        hiddenNPY = np.load(args.load_hiddenLayer)
        W1 = hiddenNPY[0]
        b1 = hiddenNPY[1]
        log.info("Loaded hidden layer (shape %s) from file %s" % (str(W1.shape), args.load_hiddenLayer))

    hiddenLinear = LinearLayer(maxPooling,
                               convSize,
                               hiddenLayerSize,
                               W=W1, b=b1,
                               weightInitialization=weightInit)

    hiddenAct = ActivationLayer(hiddenLinear, tanh)

    # Entrada linear da camada softmax.
    if not args.train and args.load_softmax:
        hiddenNPY = np.load(args.load_softmax)
        W2 = hiddenNPY[0]
        b2 = hiddenNPY[1]
        log.info("Loaded softmax layer (shape %s) from file %s" % (str(W2.shape), args.load_softmax))

    sotmaxLinearInput = LinearLayer(hiddenAct,
                                    hiddenLayerSize,
                                    labelLexicon.getLen(),
                                    W=W2, b=b2,
                                    weightInitialization=ZeroWeightGenerator())

    # Softmax.
    # softmaxAct = ReshapeLayer(ActivationLayer(sotmaxLinearInput, softmax), (1, -1))
    softmaxAct = ActivationLayer(sotmaxLinearInput, softmax)

    # Prediction layer (argmax).
    prediction = ArgmaxPrediction(None).predict(softmaxAct.getOutput())

    # Loss function.
    if args.label_weights is not None and len(args.label_weights) != labelLexicon.getLen():
        log.error("Number of label weights (%d) is different from number of labels (%d)!" % (
            len(args.label_weights), labelLexicon.getLen()))
    nlloe = NegativeLogLikelihoodOneExample(weights=args.label_weights)
    loss = nlloe.calculateError(softmaxAct.getOutput()[0], prediction, outLabel)

    # Input generators: word window.
    inputGenerators = [WordWindowGenerator(wordWindowSize, wordLexicon, filters, startSymbol, endSymbol)]

    # Output generator: generate one label per offer.
    outputGenerators = [TextLabelGenerator(labelLexicon)]
    # outputGenerators = [lambda label: labelLexicon.put(label)]

    evalPerIteration = None
    if args.train:
        trainDatasetReader = ShortDocReader(args.train)
        if args.load_method == "sync":
            log.info("Reading training examples...")
            trainIterator = SyncBatchIterator(trainDatasetReader,
                                              inputGenerators,
                                              outputGenerators,
                                              - 1,
                                              shuffle=shuffle)
            wordLexicon.stopAdd()
        elif args.load_method == "async":
            log.info("Examples will be asynchronously loaded.")
            trainIterator = AsyncBatchIterator(trainDatasetReader,
                                               inputGenerators,
                                               outputGenerators,
                                               - 1,
                                               shuffle=shuffle,
                                               maxqSize=1000)
        else:
            log.error("The argument 'load_method' has an invalid value: %s." % args.load_method)
            sys.exit(1)

        labelLexicon.stopAdd()

        # Get dev inputs and output
        dev = args.dev
        evalPerIteration = args.eval_per_iteration
        if not dev and evalPerIteration > 0:
            log.error("Argument eval_per_iteration cannot be used without a dev argument.")
            sys.exit(1)

        if dev:
            log.info("Reading development examples")
            devReader = ShortDocReader(args.dev)
            devIterator = SyncBatchIterator(devReader,
                                            inputGenerators,
                                            outputGenerators,
                                            - 1,
                                            shuffle=False)
        else:
            devIterator = None
    else:
        trainIterator = None
        devIterator = None

    if normalizeMethod == "minmax":
        log.info("Normalization: minmax")
        wordEmbedding.minMaxNormalization()
    elif normalizeMethod == "mean":
        log.info("Normalization: mean normalization")
        wordEmbedding.meanNormalization()
    elif normalizeMethod == "zscore":
        log.info("Normalization: zscore normalization")
        wordEmbedding.zscoreNormalization()
    elif normalizeMethod:
        log.error("Normalization: unknown value %s" % normalizeMethod)
        sys.exit(1)

    # Decaimento da taxa de aprendizado.
    decay = None
    if args.decay == "none":
        decay = 0.0
    elif args.decay == "linear":
        decay = 1.0
    else:
        log.error("Unknown decay parameter %s." % args.decay)
        exit(1)

    # Algoritmo de aprendizado.
    if args.alg == "adagrad":
        log.info("Using Adagrad")
        opt = Adagrad(lr=lr, decay=decay)
    elif args.alg == "sgd":
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)
    else:
        log.error("Unknown algorithm: %s." % args.alg)
        sys.exit(1)

    # TODO: debug
    # opt.lr.tag.test_value = 0.05

    # Printing embedding information.
    dictionarySize = wordEmbedding.getNumberOfVectors()
    embeddingSize = wordEmbedding.getEmbeddingSize()
    log.info("Dictionary size: %d" % dictionarySize)
    log.info("Embedding size: %d" % embeddingSize)
    log.info("Number of categories: %d" % labelLexicon.getLen())

    # Train metrics.
    trainMetrics = None
    if trainIterator:
        trainMetrics = [
            LossMetric("TrainLoss", loss),
            AccuracyMetric("TrainAccuracy", outLabel, prediction)
        ]

    # Evaluation metrics.
    evalMetrics = None
    if devIterator:
        evalMetrics = [
            LossMetric("EvalLoss", loss),
            AccuracyMetric("EvalAccuracy", outLabel, prediction),
            FMetric("EvalFMetric", outLabel, prediction, labels=labelLexicon.getLexiconDict().values())
        ]

    # Test metrics.
    testMetrics = None
    if args.test:
        testMetrics = [
            LossMetric("TestLoss", loss),
            AccuracyMetric("TestAccuracy", outLabel, prediction),
            FMetric("TestFMetric", outLabel, prediction, labels=labelLexicon.getLexiconDict().values())
        ]

    # TODO: debug
    # mode = theano.compile.debugmode.DebugMode(optimizer=None)
    mode = None
    model = BasicModel(x=inputTensors,
                       y=[outLabel],
                       allLayers=softmaxAct.getLayerSet(),
                       optimizer=opt,
                       prediction=prediction,
                       loss=loss,
                       trainMetrics=trainMetrics,
                       evalMetrics=evalMetrics,
                       testMetrics=testMetrics,
                       mode=mode)

    # Training
    if trainIterator:
        log.info("Training")
        model.train(trainIterator, numEpochs, devIterator, evalPerIteration=evalPerIteration)

    # Saving model after training
        if args.save_wordEmbedding:
            embeddingLayer.saveAsW2V(args.save_wordEmbedding, lexicon=wordLexicon)
            log.info("Saved word to vector to file: %s" % (args.save_wordEmbedding))
        if args.save_conv:
            convLinear.save(args.save_conv)
            log.info("Saved convolution layer to file: %s" % (args.save_conv))
        if args.save_hiddenLayer:
            hiddenLinear.save(args.save_hiddenLayer)
            log.info("Saved hidden layer to file: %s" % (args.save_hiddenLayer))
        if args.save_softmax:
            sotmaxLinearInput.save(args.save_softmax)
            log.info("Saved softmax to file: %s" % (args.save_softmax))

    # Testing
    if args.test:
        log.info("Reading test examples")
        testReader = ShortDocReader(args.test)
        testIterator = SyncBatchIterator(testReader,
                                         inputGenerators,
                                         outputGenerators,
                                         - 1,
                                         shuffle=False)

        log.info("Testing")
        model.test(testIterator)

Example #2

Show file

File: unsupervised_backpropagation.py Project: eraldoluis/lia-pln-deeplearning

def main(**kwargs):
    log = logging.getLogger(__name__)
    log.info(kwargs)

    if kwargs["seed"] != None:
        random.seed(kwargs["seed"])
        np.random.seed(kwargs["seed"])

    filters = []

    for filterName in kwargs["filters"]:
        moduleName, className = filterName.rsplit('.', 1)
        log.info("Usando o filtro: " + moduleName + " " + className)

        module_ = importlib.import_module(moduleName)
        filters.append(getattr(module_, className)())

    wordWindowSize = kwargs["word_window_size"]
    hiddenLayerSize = kwargs["hidden_size"]
    batchSize = kwargs["batch_size"]
    startSymbol = kwargs["start_symbol"]
    endSymbol = kwargs["end_symbol"]
    numEpochs = kwargs["num_epochs"]
    lr = kwargs["lr"]
    tagLexicon = createLexiconUsingFile(kwargs["label_file"])
    # _lambda = theano.shared(kwargs["lambda"], "lambda")
    _lambda = theano.shared(0.0, "lambda")
    useAdagrad = kwargs["adagrad"]
    shuffle = kwargs["shuffle"]
    supHiddenLayerSize = kwargs["hidden_size_supervised_part"]
    unsupHiddenLayerSize = kwargs["hidden_size_unsupervised_part"]
    normalization = kwargs["normalization"]
    activationHiddenExtractor = kwargs["activation_hidden_extractor"]

    withCharWNN = kwargs["with_charwnn"]
    convSize = kwargs["conv_size"]
    charEmbeddingSize = kwargs["char_emb_size"]
    charWindowSize = kwargs["char_window_size"]
    startSymbolChar = "</s>"

    if kwargs["charwnn_with_act"]:
        charAct = tanh
    else:
        charAct = None

    # TODO: the maximum number of characters of word is fixed in 20.
    numMaxChar = 20

    if kwargs["decay"].lower() == "normal":
        decay = 0.0
    elif kwargs["decay"].lower() == "divide_epoch":
        decay = 1.0

    # Add the lexicon of target
    domainLexicon = Lexicon()

    domainLexicon.put("0")
    domainLexicon.put("1")
    domainLexicon.stopAdd()

    log.info("Reading W2v File1")
    wordEmbedding = EmbeddingFactory().createFromW2V(kwargs["word_embedding"],
                                                     RandomUnknownStrategy())

    log.info("Reading training examples")
    # Generators
    inputGenerators = [
        WordWindowGenerator(wordWindowSize, wordEmbedding, filters,
                            startSymbol)
    ]
    outputGeneratorTag = LabelGenerator(tagLexicon)

    if withCharWNN:
        # Create the character embedding
        charEmbedding = EmbeddingFactory().createRandomEmbedding(
            charEmbeddingSize)

        # Insert the padding of the character window
        charEmbedding.put(startSymbolChar)

        # Insert the character that will be used to fill the matrix
        # with a dimension lesser than chosen dimension.This enables that the convolution is performed by a matrix multiplication.
        artificialChar = "ART_CHAR"
        charEmbedding.put(artificialChar)

        inputGenerators.append(
            CharacterWindowGenerator(charEmbedding,
                                     numMaxChar,
                                     charWindowSize,
                                     wordWindowSize,
                                     artificialChar,
                                     startSymbolChar,
                                     startPaddingWrd=startSymbol,
                                     endPaddingWrd=endSymbol))

    unsupervisedLabelSource = ConstantLabel(domainLexicon, "0")

    # Reading supervised and unsupervised data sets.
    trainSupervisedDatasetReader = TokenLabelReader(
        kwargs["train_source"], kwargs["token_label_separator"])
    trainSupervisedBatch = SyncBatchIterator(
        trainSupervisedDatasetReader,
        inputGenerators, [outputGeneratorTag, unsupervisedLabelSource],
        batchSize[0],
        shuffle=shuffle)

    # Get Unsupervised Input
    unsupervisedLabelTarget = ConstantLabel(domainLexicon, "1")

    trainUnsupervisedDatasetReader = TokenReader(kwargs["train_target"])
    trainUnsupervisedDatasetBatch = SyncBatchIterator(
        trainUnsupervisedDatasetReader,
        inputGenerators, [unsupervisedLabelTarget],
        batchSize[1],
        shuffle=shuffle)

    # Stopping to add new words, labels and chars
    wordEmbedding.stopAdd()
    tagLexicon.stopAdd()
    domainLexicon.stopAdd()

    if withCharWNN:
        charEmbedding.stopAdd()

    # Printing embedding information
    dictionarySize = wordEmbedding.getNumberOfVectors()
    embeddingSize = wordEmbedding.getEmbeddingSize()
    log.info("Size of  word dictionary and word embedding size: %d and %d" %
             (dictionarySize, embeddingSize))
    log.info(
        "Size of  char dictionary and char embedding size: %d and %d" %
        (charEmbedding.getNumberOfVectors(), charEmbedding.getEmbeddingSize()))

    # Word Embedding Normalization
    if normalization == "zscore":
        wordEmbedding.zscoreNormalization()
    elif normalization == "minmax":
        wordEmbedding.minMaxNormalization()
    elif normalization == "mean":
        wordEmbedding.meanNormalization()
    elif normalization == "none" or not normalization:
        pass
    else:
        raise Exception()

    # Source input
    wordWindowSource = T.lmatrix(name="windowSource")
    sourceInput = [wordWindowSource]

    # Create the layers related with the extractor of features
    embeddingLayerSrc = EmbeddingLayer(wordWindowSource,
                                       wordEmbedding.getEmbeddingMatrix(),
                                       trainable=True)
    flattenSrc = FlattenLayer(embeddingLayerSrc)

    if withCharWNN:
        log.info("Using charwnn")

        # Create the charwn
        charWindowIdxSrc = T.ltensor4(name="char_window_idx_source")
        sourceInput.append(charWindowIdxSrc)

        charEmbeddingConvLayerSrc = EmbeddingConvolutionalLayer(
            charWindowIdxSrc, charEmbedding.getEmbeddingMatrix(), numMaxChar,
            convSize, charWindowSize, charEmbeddingSize, charAct)
        layerBeforeLinearSrc = ConcatenateLayer(
            [flattenSrc, charEmbeddingConvLayerSrc])
        sizeLayerBeforeLinearSrc = wordWindowSize * (
            wordEmbedding.getEmbeddingSize() + convSize)
    else:
        layerBeforeLinearSrc = flattenSrc
        sizeLayerBeforeLinearSrc = wordWindowSize * wordEmbedding.getEmbeddingSize(
        )

    if activationHiddenExtractor == "tanh":
        log.info("Using tanh in the hidden layer of extractor")

        linear1 = LinearLayer(layerBeforeLinearSrc,
                              sizeLayerBeforeLinearSrc,
                              hiddenLayerSize,
                              weightInitialization=GlorotUniform())
        act1 = ActivationLayer(linear1, tanh)
    elif activationHiddenExtractor == "sigmoid":
        log.info("Using sigmoid in the hidden layer of extractor")

        linear1 = LinearLayer(layerBeforeLinearSrc,
                              sizeLayerBeforeLinearSrc,
                              hiddenLayerSize,
                              weightInitialization=SigmoidGenerator())
        act1 = ActivationLayer(linear1, sigmoid)
    else:
        raise Exception()

    # Create the layers with the Tagger
    if supHiddenLayerSize == 0:
        layerBeforeSupSoftmax = act1
        layerSizeBeforeSupSoftmax = hiddenLayerSize
        log.info("It didn't insert the layer before the supervised softmax.")
    else:
        linear2 = LinearLayer(act1,
                              hiddenLayerSize,
                              supHiddenLayerSize,
                              weightInitialization=GlorotUniform())
        act2 = ActivationLayer(linear2, tanh)

        layerBeforeSupSoftmax = act2
        layerSizeBeforeSupSoftmax = supHiddenLayerSize

        log.info("It inserted the layer before the supervised softmax.")

    supervisedLinear = LinearLayer(layerBeforeSupSoftmax,
                                   layerSizeBeforeSupSoftmax,
                                   tagLexicon.getLen(),
                                   weightInitialization=ZeroWeightGenerator())
    supervisedSoftmax = ActivationLayer(supervisedLinear, softmax)

    # Create the layers with the domain classifier
    gradientReversalSource = GradientReversalLayer(act1, _lambda)

    if unsupHiddenLayerSize == 0:
        layerBeforeUnsupSoftmax = gradientReversalSource
        layerSizeBeforeUnsupSoftmax = hiddenLayerSize
        log.info("It didn't insert the layer before the unsupervised softmax.")
    else:
        unsupervisedSourceLinearBf = LinearLayer(
            gradientReversalSource,
            hiddenLayerSize,
            unsupHiddenLayerSize,
            weightInitialization=GlorotUniform())
        actUnsupervisedSourceBf = ActivationLayer(unsupervisedSourceLinearBf,
                                                  tanh)

        layerBeforeUnsupSoftmax = actUnsupervisedSourceBf
        layerSizeBeforeUnsupSoftmax = unsupHiddenLayerSize

        log.info("It inserted the layer before the unsupervised softmax.")

    unsupervisedSourceLinear = LinearLayer(
        layerBeforeUnsupSoftmax,
        layerSizeBeforeUnsupSoftmax,
        domainLexicon.getLen(),
        weightInitialization=ZeroWeightGenerator())
    unsupervisedSourceSoftmax = ActivationLayer(unsupervisedSourceLinear,
                                                softmax)

    ## Target Part
    windowTarget = T.lmatrix(name="windowTarget")
    targetInput = [windowTarget]

    # Create the layers related with the extractor of features
    embeddingLayerUnsuper1 = EmbeddingLayer(
        windowTarget, embeddingLayerSrc.getParameters()[0], trainable=True)
    flattenUnsuper1 = FlattenLayer(embeddingLayerUnsuper1)

    if withCharWNN:
        log.info("Using charwnn")

        # Create the charwn
        charWindowIdxTgt = T.ltensor4(name="char_window_idx_target")
        targetInput.append(charWindowIdxTgt)

        charEmbeddingConvLayerTgt = EmbeddingConvolutionalLayer(
            charWindowIdxTgt,
            charEmbeddingConvLayerSrc.getParameters()[0],
            numMaxChar,
            convSize,
            charWindowSize,
            charEmbeddingSize,
            charAct,
            trainable=True)
        layerBeforeLinearTgt = ConcatenateLayer(
            [flattenUnsuper1, charEmbeddingConvLayerTgt])
        sizeLayerBeforeLinearTgt = wordWindowSize * (
            wordEmbedding.getEmbeddingSize() + convSize)
    else:
        layerBeforeLinearTgt = flattenUnsuper1
        sizeLayerBeforeLinearTgt = wordWindowSize * wordEmbedding.getEmbeddingSize(
        )

    w, b = linear1.getParameters()
    linearUnsuper1 = LinearLayer(layerBeforeLinearTgt,
                                 sizeLayerBeforeLinearTgt,
                                 hiddenLayerSize,
                                 W=w,
                                 b=b,
                                 trainable=True)

    if activationHiddenExtractor == "tanh":
        log.info("Using tanh in the hidden layer of extractor")
        actUnsupervised1 = ActivationLayer(linearUnsuper1, tanh)
    elif activationHiddenExtractor == "sigmoid":
        log.info("Using sigmoid in the hidden layer of extractor")
        actUnsupervised1 = ActivationLayer(linearUnsuper1, sigmoid)
    else:
        raise Exception()

    # Create the layers with the domain classifier
    grandientReversalTarget = GradientReversalLayer(actUnsupervised1, _lambda)

    if unsupHiddenLayerSize == 0:
        layerBeforeUnsupSoftmax = grandientReversalTarget
        layerSizeBeforeUnsupSoftmax = hiddenLayerSize
        log.info("It didn't insert the layer before the unsupervised softmax.")
    else:
        w, b = unsupervisedSourceLinearBf.getParameters()
        unsupervisedTargetLinearBf = LinearLayer(grandientReversalTarget,
                                                 hiddenLayerSize,
                                                 unsupHiddenLayerSize,
                                                 W=w,
                                                 b=b,
                                                 trainable=True)
        actUnsupervisedTargetLinearBf = ActivationLayer(
            unsupervisedTargetLinearBf, tanh)

        layerBeforeUnsupSoftmax = actUnsupervisedTargetLinearBf
        layerSizeBeforeUnsupSoftmax = unsupHiddenLayerSize

        log.info("It inserted the layer before the unsupervised softmax.")

    w, b = unsupervisedSourceLinear.getParameters()
    unsupervisedTargetLinear = LinearLayer(layerBeforeUnsupSoftmax,
                                           layerSizeBeforeUnsupSoftmax,
                                           domainLexicon.getLen(),
                                           W=w,
                                           b=b,
                                           trainable=True)
    unsupervisedTargetSoftmax = ActivationLayer(unsupervisedTargetLinear,
                                                softmax)

    # Set loss and prediction and retrieve all layers
    supervisedLabel = T.lvector("supervisedLabel")
    unsupervisedLabelSource = T.lvector("unsupervisedLabelSource")
    unsupervisedLabelTarget = T.lvector("unsupervisedLabelTarget")

    supervisedOutput = supervisedSoftmax.getOutput()
    supervisedPrediction = ArgmaxPrediction(1).predict(supervisedOutput)
    supervisedLoss = NegativeLogLikelihood().calculateError(
        supervisedOutput, supervisedPrediction, supervisedLabel)

    unsupervisedOutputSource = unsupervisedSourceSoftmax.getOutput()
    unsupervisedPredSource = ArgmaxPrediction(1).predict(
        unsupervisedOutputSource)
    unsupervisedLossSource = NegativeLogLikelihood().calculateError(
        unsupervisedOutputSource, None, unsupervisedLabelSource)

    unsupervisedOutputTarget = unsupervisedTargetSoftmax.getOutput()
    unsupervisedPredTarget = ArgmaxPrediction(1).predict(
        unsupervisedOutputTarget)
    unsupervisedLossTarget = NegativeLogLikelihood().calculateError(
        unsupervisedOutputTarget, None, unsupervisedLabelTarget)

    # Creates model

    if useAdagrad:
        log.info("Using ADAGRAD")
        opt = Adagrad(lr=lr, decay=decay)
    else:
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)

    allLayersSource = supervisedSoftmax.getLayerSet(
    ) | unsupervisedSourceSoftmax.getLayerSet()
    allLayersTarget = unsupervisedTargetSoftmax.getLayerSet()
    unsupervisedLossTarget *= float(
        trainSupervisedBatch.size()) / trainUnsupervisedDatasetBatch.size()

    supervisedTrainMetrics = [
        LossMetric("TrainSupervisedLoss", supervisedLoss),
        AccuracyMetric("TrainSupervisedAcc", supervisedLabel,
                       supervisedPrediction),
        LossMetric("TrainUnsupervisedLoss", unsupervisedLossSource),
        AccuracyMetric("TrainUnsupervisedAccuracy", unsupervisedLabelSource,
                       unsupervisedPredSource)
    ]
    unsupervisedTrainMetrics = [
        LossMetric("TrainUnsupervisedLoss", unsupervisedLossTarget),
        AccuracyMetric("TrainUnsupervisedAccuracy", unsupervisedLabelTarget,
                       unsupervisedPredTarget)
    ]

    evalMetrics = [
        AccuracyMetric("EvalAcc", supervisedLabel, supervisedPrediction)
    ]

    testMetrics = [
        AccuracyMetric("TestAcc", supervisedLabel, supervisedPrediction)
    ]

    #TODO: Não tive tempo de testar o código depois das modificações
    GradientReversalModel(sourceInput,
                          targetInput,
                          supervisedLabel,
                          unsupervisedLabelSource,
                          unsupervisedLabelTarget,
                          allLayersSource,
                          allLayersTarget,
                          opt,
                          supervisedPrediction,
                          supervisedLoss,
                          unsupervisedLossSource,
                          unsupervisedLossTarget,
                          supervisedTrainMetrics,
                          unsupervisedTrainMetrics,
                          evalMetrics,
                          testMetrics,
                          mode=None)

    # Get dev inputs and output
    log.info("Reading development examples")
    devDatasetReader = TokenLabelReader(kwargs["dev"],
                                        kwargs["token_label_separator"])
    devReader = SyncBatchIterator(devDatasetReader,
                                  inputGenerators, [outputGeneratorTag],
                                  sys.maxint,
                                  shuffle=False)

    callbacks = []
    # log.info("Usando lambda fixo: " + str(_lambda.get_value()))
    log.info("Usando lambda variado. alpha=" + str(kwargs["alpha"]) +
             " height=" + str(kwargs["height"]))
    callbacks.append(
        ChangeLambda(_lambda, kwargs["alpha"], numEpochs, kwargs["height"]))

    if kwargs["additional_dev"]:
        callbacks.append(
            AdditionalDevDataset(model, kwargs["additional_dev"],
                                 kwargs["token_label_separator"],
                                 inputGenerators, outputGeneratorTag))

    # Training Model
    model.train([trainSupervisedBatch, trainUnsupervisedDatasetBatch],
                numEpochs,
                devReader,
                callbacks=callbacks)

Example #3

Show file

def trainNetwork(args, log, trainIterator, devIterator, wordEmbedding, charEmbedding, borrow, labelLexicon):
    # Build neural network.
    wordWindow = T.lmatrix("word_window")
    inputModel = [wordWindow]

    wordEmbeddingLayer = EmbeddingLayer(wordWindow, wordEmbedding.getEmbeddingMatrix(), borrow=borrow,
                                        structGrad=args.struct_grad, trainable=True, name="word_embedding_layer")
    flatWordEmbedding = FlattenLayer(wordEmbeddingLayer)

    charWindowIdxs = T.ltensor4(name="char_window_idx")
    inputModel.append(charWindowIdxs)

    # # TODO: debug
    # theano.config.compute_test_value = 'warn'
    # ex = trainIterator.next()
    # inWords.tag.test_value = ex[0][0]
    # outLabel.tag.test_value = ex[1][0]

    charEmbeddingConvLayer = EmbeddingConvolutionalLayer(charWindowIdxs, charEmbedding.getEmbeddingMatrix(), 20,
                                                         args.conv_size, args.char_window_size, args.char_emb_size,
                                                         tanh, structGrad=args.char_struct_grad,
                                                         name="char_convolution_layer", borrow=borrow)

    layerBeforeLinear = ConcatenateLayer([flatWordEmbedding, charEmbeddingConvLayer])
    sizeLayerBeforeLinear = args.word_window_size * (wordEmbedding.getEmbeddingSize() + args.conv_size)

    hiddenActFunction = method_name(args.hidden_activation_function)
    weightInit = SigmoidGlorot() if hiddenActFunction == sigmoid else GlorotUniform()

    linearHidden = LinearLayer(layerBeforeLinear, sizeLayerBeforeLinear, args.hidden_size,
                               weightInitialization=weightInit, name="linear1")
    actHidden = ActivationLayer(linearHidden, hiddenActFunction)

    linearSoftmax = LinearLayer(actHidden, args.hidden_size, labelLexicon.getLen(),
                                weightInitialization=ZeroWeightGenerator(), name="linear_softmax")
    actSoftmax = ActivationLayer(linearSoftmax, softmax)
    prediction = ArgmaxPrediction(1).predict(actSoftmax.getOutput())

    # Output symbolic tensor variable.
    y = T.lvector("y")

    if args.decay.lower() == "normal":
        decay = 0.0
    elif args.decay.lower() == "divide_epoch":
        decay = 1.0
    else:
        log.error("Unknown decay argument: %s" % args.decay)
        sys.exit(1)

    if args.adagrad:
        log.info("Training algorithm: Adagrad")
        opt = Adagrad(lr=args.lr, decay=decay)
    else:
        log.info("Training algorithm: SGD")
        opt = SGD(lr=args.lr, decay=decay)

    # Training loss function.
    loss = NegativeLogLikelihood().calculateError(actSoftmax.getOutput(), prediction, y)

    # L2 regularization.
    if args.l2:
        loss += args.l2 * (T.sum(T.square(linearHidden.getParameters()[0])))

    # # TODO: debug
    # opt.lr.tag.test_value = 0.02

    # Metrics.
    trainMetrics = [
        LossMetric("LossTrain", loss, True),
        AccuracyMetric("AccTrain", y, prediction)
    ]

    evalMetrics = None
    if args.dev:
        evalMetrics = [
            LossMetric("LossDev", loss, True),
            AccuracyMetric("AccDev", y, prediction),
            CustomMetric("CustomMetricDev", y, prediction)
        ]

    testMetrics = None
    if args.test:
        testMetrics = [
            CustomMetric("CustomMetricTest", y, prediction)
        ]

    log.info("Compiling the network...")
    # # TODO: debug
    # mode = theano.compile.debugmode.DebugMode(optimizer=None)
    mode = None
    wnnModel = BasicModel(inputModel, [y], actSoftmax.getLayerSet(), opt, prediction, loss, trainMetrics=trainMetrics,
                          evalMetrics=evalMetrics, testMetrics=testMetrics, mode=mode)

    log.info("Training...")
    wnnModel.train(trainIterator, args.num_epochs, devIterator)

Example #4

Show file

def main(args):
    log = logging.getLogger(__name__)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    lr = args.lr
    startSymbol = args.start_symbol
    endSymbol = args.end_symbol
    numEpochs = args.num_epochs
    shuffle = args.shuffle
    normalizeMethod = args.normalization
    wordWindowSize = args.word_window_size
    hiddenLayerSize = args.hidden_size
    convSize = args.conv_size

    # Load classes for filters.
    filters = []
    for filterName in args.filters:
        moduleName, className = filterName.rsplit('.', 1)
        log.info("Filtro: " + moduleName + " " + className)

        module_ = importlib.import_module(moduleName)
        filters.append(getattr(module_, className)())

    loadPath = args.load_model

    if loadPath:
        with codecs.open(loadPath + ".param", "r",
                         encoding="utf-8") as paramsFile:
            param = json.load(paramsFile, encoding="utf-8")

        hiddenActFunctionName = param['hiddenActFunction']
        hiddenActFunction = method_name(hiddenActFunctionName)

        # Loading Embedding
        log.info("Loading Model")
        wordEmbedding = EmbeddingFactory().createFromW2V(
            loadPath + ".wv", ChosenUnknownStrategy(param["unknown"]))
        labelLexicon = Lexicon()

        for l in param["labels"]:
            labelLexicon.put(l)

        labelLexicon.stopAdd()

        # Loading model
        labelWeights = np.load(loadPath + ".npy").item(0)

        W1 = labelWeights["W_Hidden"]
        b1 = labelWeights["b_Hidden"]
        W2 = labelWeights["W_Softmax"]
        b2 = labelWeights["b_Softmax"]

        hiddenLayerSize = b1.shape[0]
    else:
        W1 = None
        b1 = None
        W2 = None
        b2 = None
        hiddenActFunctionName = args.hidden_activation_function
        hiddenActFunction = method_name(hiddenActFunctionName)

        if args.word_embedding:
            log.info("Reading W2v File")
            wordEmbedding = EmbeddingFactory().createFromW2V(
                args.word_embedding, RandomUnknownStrategy())
            wordEmbedding.stopAdd()
        elif args.hash_lex_size:
            wordEmbedding = RandomEmbedding(args.word_emb_size,
                                            RandomUnknownStrategy(),
                                            HashLexicon(args.hash_lex_size))
        else:
            wordEmbedding = EmbeddingFactory().createRandomEmbedding(
                args.word_emb_size)

        # Get the inputs and output
        if args.labels:
            labelLexicon = createLexiconUsingFile(args.labels)
        else:
            labelLexicon = Lexicon()

        if args.load_hidden_layer:
            # Loading Hidden Layer
            log.info("Loading Hidden Layer")

            hl = np.load(args.load_hidden_layer).item(0)

            W1 = hl["W_Encoder"]
            b1 = hl["b_Encoder"]

            hiddenLayerSize = b1.shape[0]

    #
    # Build the network model (Theano graph).
    #

    # TODO: debug
    # theano.config.compute_test_value = 'warn'
    # ex = trainIterator.next()
    # inWords.tag.test_value = ex[0][0]
    # outLabel.tag.test_value = ex[1][0]

    # Matriz de entrada. Cada linha representa um token da oferta. Cada token é
    # representado por uma janela de tokens (token central e alguns tokens
    # próximos). Cada valor desta matriz corresponde a um índice que representa
    # um token no embedding.
    inWords = T.lmatrix("inWords")

    # Categoria correta de uma oferta.
    outLabel = T.lscalar("outLabel")

    # List of input tensors. One for each input layer.
    inputTensors = [inWords]

    # Whether the word embedding will be updated during training.
    embLayerTrainable = not args.fix_word_embedding

    if not embLayerTrainable:
        log.info("Not updating the word embedding!")

    # Lookup table for word features.
    embeddingLayer = EmbeddingLayer(inWords,
                                    wordEmbedding.getEmbeddingMatrix(),
                                    trainable=embLayerTrainable)

    # A saída da lookup table possui 3 dimensões (numTokens, szWindow, szEmbedding).
    # Esta camada dá um flat nas duas últimas dimensões, produzindo uma saída
    # com a forma (numTokens, szWindow * szEmbedding).
    flattenInput = FlattenLayer(embeddingLayer)

    # Random weight initialization procedure.
    weightInit = SigmoidGlorot(
    ) if hiddenActFunction == sigmoid else GlorotUniform()

    # Convolution layer. Convolução no texto de uma oferta.
    convLinear = LinearLayer(flattenInput,
                             wordWindowSize * wordEmbedding.getEmbeddingSize(),
                             convSize,
                             W=None,
                             b=None,
                             weightInitialization=weightInit)

    # Max pooling layer.
    maxPooling = MaxPoolingLayer(convLinear)

    # List of input layers (will be concatenated).
    inputLayers = [maxPooling]

    # Generate word windows.
    wordWindowFeatureGenerator = WordWindowGenerator(wordWindowSize,
                                                     wordEmbedding, filters,
                                                     startSymbol, endSymbol)

    # List of input generators.
    inputGenerators = [
        lambda offer: wordWindowFeatureGenerator(offer["tokens"])
    ]

    concatenatedSize = convSize

    # Additional features.
    if args.categorical_features is not None:
        log.info("Using categorical features: %s" %
                 str([ftr[0] for ftr in args.categorical_features]))
        for ftr in args.categorical_features:
            concatenatedSize += ftr[2]
            ftrLexicon = createLexiconUsingFile(ftr[1])
            ftrEmbedding = RandomEmbedding(
                embeddingSize=ftr[2],
                unknownGenerateStrategy=RandomUnknownStrategy(),
                lexicon=ftrLexicon,
            )
            ftrInput = T.lscalar("in_" + ftr[0])
            ftrLayer = EmbeddingLayer(ftrInput,
                                      ftrEmbedding.getEmbeddingMatrix())

            inputGenerators.append(
                lambda offer: ftrLexicon.put(offer[ftr[0]].strip().lower()))
            inputTensors.append(ftrInput)
            inputLayers.append(ftrLayer)

    log.info("Input layers: %s" % str(inputLayers))

    # Concatenate all input layers, when there are more thean one input layer.
    concatenatedInLayers = maxPooling if len(
        inputLayers) == 1 else ConcatenateLayer(inputLayers, axis=0)

    if args.include_hidden_layer:
        # Hidden layer.
        hiddenLinear = LinearLayer(concatenatedInLayers,
                                   concatenatedSize,
                                   hiddenLayerSize,
                                   W=W1,
                                   b=b1,
                                   weightInitialization=weightInit)
        hiddenAct = ActivationLayer(hiddenLinear, hiddenActFunction)
    else:
        # Do not use a hidden layer.
        log.info("Not using hidden layer!")
        hiddenAct = concatenatedInLayers
        hiddenLayerSize = concatenatedSize

    # Entrada linear da camada softmax.
    sotmaxLinearInput = LinearLayer(hiddenAct,
                                    hiddenLayerSize,
                                    labelLexicon.getLen(),
                                    W=W2,
                                    b=b2,
                                    weightInitialization=ZeroWeightGenerator())
    # Softmax.
    # softmaxAct = ReshapeLayer(ActivationLayer(sotmaxLinearInput, softmax), (1, -1))
    softmaxAct = ActivationLayer(sotmaxLinearInput, softmax)

    # Prediction layer (argmax).
    prediction = ArgmaxPrediction(None).predict(softmaxAct.getOutput())

    # Class weights.
    labelWeights = None
    if args.labels_probs:
        numLabels = labelLexicon.getLen()
        labelWeights = np.zeros(numLabels, dtype=theano.config.floatX)
        if args.labels_probs.startswith("@"):
            # Load the dictionary from a JSON file.
            with codecs.open(args.labels_probs[1:], mode="r",
                             encoding="utf8") as f:
                labelDistribution = json.load(f)
        else:
            # The argument value is already a JSON.
            labelDistribution = json.loads(args.labels_probs)

        for k, v in labelDistribution.items():
            # The weight of a class is inversely-proportional to its frequency.
            labelWeights[labelLexicon.getLexiconIndex(k)] = 1.0 / v

        if args.labels_weights_log:
            # Attenuate weights for highly unbalanced classes.
            labelWeights = np.log(labelWeights)

        log.info("Label weights: " + str(labelWeights))

    # Loss function.
    loss = NegativeLogLikelihoodOneExample(labelWeights).calculateError(
        softmaxAct.getOutput()[0], prediction, outLabel)

    # Output generator: generate one label per offer.
    outputGenerators = [TextLabelGenerator(labelLexicon)]

    if args.train:
        trainDatasetReader = OfertasReader(args.train)
        if args.load_method == "sync":
            log.info("Reading training examples...")
            trainIterator = SyncBatchIterator(trainDatasetReader,
                                              intputGenerators,
                                              outputGenerators,
                                              -1,
                                              shuffle=shuffle)
            wordEmbedding.stopAdd()
        elif args.load_method == "async":
            log.info("Examples will be asynchronously loaded.")
            trainIterator = AsyncBatchIterator(trainDatasetReader,
                                               inputGenerators,
                                               outputGenerators,
                                               -1,
                                               shuffle=shuffle,
                                               maxqSize=1000)
        else:
            log.error("The argument 'load_method' has an invalid value: %s." %
                      args.load_method)
            sys.exit(1)

        labelLexicon.stopAdd()

        # Get dev inputs and output
        dev = args.dev
        evalPerIteration = args.eval_per_iteration
        if not dev and evalPerIteration > 0:
            log.error(
                "Argument eval_per_iteration cannot be used without a dev argument."
            )
            sys.exit(1)

        if dev:
            log.info("Reading development examples")
            devReader = OfertasReader(args.dev)
            devIterator = SyncBatchIterator(devReader,
                                            inputGenerators,
                                            outputGenerators,
                                            -1,
                                            shuffle=False)
        else:
            devIterator = None
    else:
        trainIterator = None
        devIterator = None

    if normalizeMethod == "minmax":
        log.info("Normalization: minmax")
        wordEmbedding.minMaxNormalization()
    elif normalizeMethod == "mean":
        log.info("Normalization: mean normalization")
        wordEmbedding.meanNormalization()
    elif normalizeMethod == "zscore":
        log.info("Normalization: zscore normalization")
        wordEmbedding.zscoreNormalization()
    elif normalizeMethod:
        log.error("Normalization: unknown value %s" % normalizeMethod)
        sys.exit(1)

    if normalizeMethod is not None and loadPath is not None:
        log.warn(
            "The word embedding of model was normalized. This can change the result of test."
        )

    #     if kwargs["lambda"]:
    #         _lambda = kwargs["lambda"]
    #         log.info("Using L2 with lambda= %.2f", _lambda)
    #         loss += _lambda * (T.sum(T.square(hiddenLinear.getParameters()[0])))

    # Decaimento da taxa de aprendizado.
    decay = 0.0
    if args.decay == "linear":
        decay = 1.0

    # Algoritmo de aprendizado.
    if args.alg == "adagrad":
        log.info("Using Adagrad")
        opt = Adagrad(lr=lr, decay=decay)
    elif args.alg == "sgd":
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)
    else:
        log.error(
            "Unknown algorithm: %s. Expected values are: adagrad or sgd." %
            args.alg)
        sys.exit(1)

    # TODO: debug
    # opt.lr.tag.test_value = 0.05

    # Printing embedding information.
    dictionarySize = wordEmbedding.getNumberOfVectors()
    embeddingSize = wordEmbedding.getEmbeddingSize()
    log.info("Dictionary size: %d" % dictionarySize)
    log.info("Embedding size: %d" % embeddingSize)
    log.info("Number of categories: %d" % labelLexicon.getLen())

    # Train metrics.
    trainMetrics = None
    if trainIterator:
        trainMetrics = [
            LossMetric("TrainLoss", loss),
            AccuracyMetric("TrainAccuracy", outLabel, prediction)
        ]

    # Evaluation metrics.
    evalMetrics = None
    if devIterator:
        evalMetrics = [
            LossMetric("EvalLoss", loss),
            AccuracyMetric("EvalAccuracy", outLabel, prediction),
            FMetric("EvalFMetric",
                    outLabel,
                    prediction,
                    labels=labelLexicon.getLexiconDict().values())
        ]

    # Test metrics.
    testMetrics = None
    if args.test:
        testMetrics = [
            LossMetric("TestLoss", loss),
            AccuracyMetric("TestAccuracy", outLabel, prediction),
            FMetric("TestFMetric",
                    outLabel,
                    prediction,
                    labels=labelLexicon.getLexiconDict().values())
        ]

        if args.test_probs:
            # Append predicted probabilities for the test set.
            testMetrics.append(
                PredictedProbabilities("TestProbs", softmaxAct.getOutput()))
    else:
        if args.test_probs:
            log.error(
                "The option test_probs requires a test dataset (option test).")
            sys.exit(1)

    # TODO: debug
    # mode = theano.compile.debugmode.DebugMode(optimizer=None)
    mode = None
    model = Model(x=inputTensors,
                  y=[outLabel],
                  allLayers=softmaxAct.getLayerSet(),
                  optimizer=opt,
                  prediction=prediction,
                  loss=loss,
                  trainMetrics=trainMetrics,
                  evalMetrics=evalMetrics,
                  testMetrics=testMetrics,
                  mode=mode)

    # Training
    if trainIterator:
        callback = []

        if args.save_model:
            savePath = args.save_model
            modelWriter = OfertasModelWritter(savePath, embeddingLayer,
                                              hiddenLinear, sotmaxLinearInput,
                                              wordEmbedding, labelLexicon,
                                              hiddenActFunctionName)
            callback.append(SaveModelCallback(modelWriter, "eval_acc", True))

        log.info("Training")
        model.train(trainIterator,
                    numEpochs,
                    devIterator,
                    evalPerIteration=evalPerIteration,
                    callbacks=callback)

    # Testing
    if args.test:
        log.info("Reading test examples")
        testReader = OfertasReader(args.test)
        testIterator = SyncBatchIterator(testReader,
                                         inputGenerators,
                                         outputGenerators,
                                         -1,
                                         shuffle=False)

        log.info("Testing")
        model.test(testIterator)

Example #5

Show file

def main():
    full_path = os.path.realpath(__file__)
    path, filename = os.path.split(full_path)
    logging.config.fileConfig(os.path.join(path, 'logging.conf'), defaults={})
    log = logging.getLogger(__name__)

    if len(sys.argv) != 3:
        log.error("Missing argument: <JSON config file> or/and <Input file>")
        exit(1)

    argsDict = JsonArgParser(PARAMETERS).parse(sys.argv[1])
    args = dict2obj(argsDict, 'ShortDocArguments')
    logging.getLogger(__name__).info(argsDict)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    lr = args.lr
    startSymbol = args.start_symbol
    endSymbol = args.end_symbol
    numEpochs = args.num_epochs
    shuffle = args.shuffle
    normalizeMethod = args.normalization
    wordWindowSize = args.word_window_size
    hiddenLayerSize = args.hidden_size
    convSize = args.conv_size

    # Load classes for filters.
    filters = []
    for filterName in args.filters:
        moduleName, className = filterName.rsplit('.', 1)
        log.info("Filtro: " + moduleName + " " + className)

        module_ = importlib.import_module(moduleName)
        filters.append(getattr(module_, className)())

    W1 = None
    b1 = None
    W2 = None
    b2 = None

    wordEmbedding = None
    if args.word_embedding:
        log.info("Reading W2v File")
        (wordLexicon,
         wordEmbedding) = Embedding.fromWord2Vec(args.word_embedding,
                                                 unknownSymbol="__UNKNOWN__")
        wordLexicon.stopAdd()
    elif args.word_lexicon and args.word_emb_size:
        wordLexicon = Lexicon.fromTextFile(args.word_lexicon,
                                           hasUnknowSymbol=False)
        wordEmbedding = Embedding(wordLexicon,
                                  embeddingSize=args.word_emb_size)
        wordLexicon.stopAdd()
    else:
        log.error(
            "You must provide argument word_embedding or word_lexicon and word_emb_size"
        )

    # Create the lexicon of labels.
    labelLexicon = None
    if args.labels is not None:
        if args.label_lexicon is not None:
            log.error(
                "Only one of the parameters label_lexicon and labels can be provided!"
            )
            exit(1)
        labelLexicon = Lexicon.fromList(args.labels, hasUnknowSymbol=False)
    elif args.label_lexicon is not None:
        labelLexicon = Lexicon.fromTextFile(args.label_lexicon,
                                            hasUnknowSymbol=False)
    else:
        log.error(
            "One of the parameters label_lexicon or labels must be provided!")
        exit(1)

    #
    # Build the network model (Theano graph).
    #

    # TODO: debug
    # theano.config.compute_test_value = 'warn'
    # ex = trainIterator.next()
    # inWords.tag.test_value = ex[0][0]
    # outLabel.tag.test_value = ex[1][0]

    # Matriz de entrada. Cada linha representa um token da oferta. Cada token é
    # representado por uma janela de tokens (token central e alguns tokens
    # próximos). Cada valor desta matriz corresponde a um índice que representa
    # um token no embedding.
    inWords = tensor.lmatrix("inWords")

    # Categoria correta de uma oferta.
    outLabel = tensor.lscalar("outLabel")

    # List of input tensors. One for each input layer.
    inputTensors = [inWords]

    # Whether the word embedding will be updated during training.
    embLayerTrainable = not args.fix_word_embedding

    if not embLayerTrainable:
        log.info("Not updating the word embedding!")

    # Lookup table for word features.
    embeddingLayer = EmbeddingLayer(inWords,
                                    wordEmbedding.getEmbeddingMatrix(),
                                    trainable=embLayerTrainable)

    # if not args.train and args.load_wordEmbedding:
    #     attrs = np.load(args.load_wordEmbedding)
    #     embeddingLayer.load(attrs)
    #     log.info("Loaded word embedding (shape %s) from file %s" % (
    #         str(attrs[0].shape), args.load_wordEmbedding))

    # A saída da lookup table possui 3 dimensões (numTokens, szWindow, szEmbedding).
    # Esta camada dá um flat nas duas últimas dimensões, produzindo uma saída
    # com a forma (numTokens, szWindow * szEmbedding).
    flattenInput = FlattenLayer(embeddingLayer)

    # Random weight initialization procedure.
    weightInit = GlorotUniform()

    # Convolution layer. Convolução no texto de uma oferta.
    convW = None
    convb = None

    if not args.train and args.load_conv:
        convNPY = np.load(args.load_conv)
        convW = convNPY[0]
        convb = convNPY[1]
        log.info("Loaded convolutional layer (shape %s) from file %s" %
                 (str(convW.shape), args.load_conv))

    convLinear = LinearLayer(flattenInput,
                             wordWindowSize * wordEmbedding.getEmbeddingSize(),
                             convSize,
                             W=convW,
                             b=convb,
                             weightInitialization=weightInit)

    # Max pooling layer.
    maxPooling = MaxPoolingLayer(convLinear)

    # Hidden layer.
    if not args.train and args.load_hiddenLayer:
        hiddenNPY = np.load(args.load_hiddenLayer)
        W1 = hiddenNPY[0]
        b1 = hiddenNPY[1]
        log.info("Loaded hidden layer (shape %s) from file %s" %
                 (str(W1.shape), args.load_hiddenLayer))

    hiddenLinear = LinearLayer(maxPooling,
                               convSize,
                               hiddenLayerSize,
                               W=W1,
                               b=b1,
                               weightInitialization=weightInit)

    hiddenAct = ActivationLayer(hiddenLinear, tanh)

    # Entrada linear da camada softmax.
    if not args.train and args.load_softmax:
        hiddenNPY = np.load(args.load_softmax)
        W2 = hiddenNPY[0]
        b2 = hiddenNPY[1]
        log.info("Loaded softmax layer (shape %s) from file %s" %
                 (str(W2.shape), args.load_softmax))

    sotmaxLinearInput = LinearLayer(hiddenAct,
                                    hiddenLayerSize,
                                    labelLexicon.getLen(),
                                    W=W2,
                                    b=b2,
                                    weightInitialization=ZeroWeightGenerator())

    # Softmax.
    # softmaxAct = ReshapeLayer(ActivationLayer(sotmaxLinearInput, softmax), (1, -1))
    softmaxAct = ActivationLayer(sotmaxLinearInput, softmax)

    # Prediction layer (argmax).
    prediction = ArgmaxPrediction(None).predict(softmaxAct.getOutput())

    # Loss function.
    if args.label_weights is not None and len(
            args.label_weights) != labelLexicon.getLen():
        log.error(
            "Number of label weights (%d) is different from number of labels (%d)!"
            % (len(args.label_weights), labelLexicon.getLen()))
    nlloe = NegativeLogLikelihoodOneExample(weights=args.label_weights)
    loss = nlloe.calculateError(softmaxAct.getOutput()[0], prediction,
                                outLabel)

    # Input generators: word window.
    inputGenerators = [
        WordWindowGenerator(wordWindowSize, wordLexicon, filters, startSymbol,
                            endSymbol)
    ]

    # Output generator: generate one label per offer.
    outputGenerators = [TextLabelGenerator(labelLexicon)]
    # outputGenerators = [lambda label: labelLexicon.put(label)]

    evalPerIteration = None

    if normalizeMethod == "minmax":
        log.info("Normalization: minmax")
        wordEmbedding.minMaxNormalization()
    elif normalizeMethod == "mean":
        log.info("Normalization: mean normalization")
        wordEmbedding.meanNormalization()
    elif normalizeMethod == "zscore":
        log.info("Normalization: zscore normalization")
        wordEmbedding.zscoreNormalization()
    elif normalizeMethod:
        log.error("Normalization: unknown value %s" % normalizeMethod)
        sys.exit(1)

    # Decaimento da taxa de aprendizado.
    decay = None
    if args.decay == "none":
        decay = 0.0
    elif args.decay == "linear":
        decay = 1.0
    else:
        log.error("Unknown decay parameter %s." % args.decay)
        exit(1)

    # Algoritmo de aprendizado.
    if args.alg == "adagrad":
        log.info("Using Adagrad")
        opt = Adagrad(lr=lr, decay=decay)
    elif args.alg == "sgd":
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)
    else:
        log.error("Unknown algorithm: %s." % args.alg)
        sys.exit(1)

    # TODO: debug
    # opt.lr.tag.test_value = 0.05

    # Printing embedding information.
    dictionarySize = wordEmbedding.getNumberOfVectors()
    embeddingSize = wordEmbedding.getEmbeddingSize()
    log.info("Dictionary size: %d" % dictionarySize)
    log.info("Embedding size: %d" % embeddingSize)
    log.info("Number of categories: %d" % labelLexicon.getLen())

    # TODO: debug
    # mode = theano.compile.debugmode.DebugMode(optimizer=None)
    mode = None
    model = BasicModel(x=inputTensors,
                       y=[outLabel],
                       allLayers=softmaxAct.getLayerSet(),
                       optimizer=opt,
                       prediction=prediction,
                       loss=loss,
                       mode=mode)

    wordWindow = WordWindowGenerator(wordWindowSize, wordLexicon, filters,
                                     startSymbol, endSymbol)

    # GETS HIDDEN LAYER:
    # graph = EmbeddingGraph([inWords], [hiddenAct.getOutput()], wordWindow)

    # GRAPH FOR PREDICTION LAYER
    graph = EmbeddingGraph(inputTensors, prediction, wordWindow, mode)

    lblTxt = ["Sim", "Nao"]

    tweets = []
    with open(sys.argv[2]) as inputFile:
        content = inputFile.readlines()
    for line in content:
        tweets.append(line.decode('utf-8').encode('utf-8'))
    #print tweets
    # graph.getResultsFor(t) retorna a predição para dado Tweet t
    try:
        output_file = open("Output.txt", "w")
    except:
        print "Falha em criar o arquivo de saida\n"
    try:
        for t in tweets:
            output_file.write(
                t.replace('\n', '').replace('\t', '') + "\t " +
                lblTxt[graph.getResultsFor(t)] + "\n")
        print "Resultados gerados com sucesso!\n"
    except:
        print "Erro na geração de resultados\n"

Example #6

Show file

File: wnn.py Project: eraldoluis/lia-pln-deeplearning

def main(args):
    log = logging.getLogger(__name__)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    lr = args.lr
    startSymbol = args.start_symbol
    endSymbol = args.end_symbol
    numEpochs = args.num_epochs
    shuffle = args.shuffle
    normalizeMethod = args.wv_normalization
    wordWindowSize = args.word_window_size
    hiddenLayerSize = args.hidden_size
    convSize = args.conv_size

    # Load classes for filters.
    filters = []
    for filterName in args.filters:
        moduleName, className = filterName.rsplit('.', 1)
        log.info("Filtro: " + moduleName + " " + className)
        module_ = importlib.import_module(moduleName)
        filters.append(getattr(module_, className)())

    W1 = None
    b1 = None
    W2 = None
    b2 = None
    hiddenActFunction = tanh

    if args.word_embedding:
        log.info("Reading W2v File")
        (lexicon,
         wordEmbedding) = Embedding.fromWord2Vec(args.word_embedding,
                                                 unknownSymbol='unknown')
        lexicon.stopAdd()
    else:
        wordEmbedding = EmbeddingFactory().createRandomEmbedding(
            args.word_emb_size)

    # Get the inputs and output
    if args.labels:
        labelLexicon = Lexicon.fromTextFile(args.labels, hasUnknowSymbol=False)
    else:
        labelLexicon = Lexicon()

    #
    # Build the network model (Theano graph).
    #

    # TODO: debug
    # theano.config.compute_test_value = 'warn'
    # ex = trainIterator.next()
    # inWords.tag.test_value = ex[0][0]
    # outLabel.tag.test_value = ex[1][0]

    # Matriz de entrada. Cada linha representa um token da oferta. Cada token é
    # representado por uma janela de tokens (token central e alguns tokens
    # próximos). Cada valor desta matriz corresponde a um índice que representa
    # um token no embedding.
    inWords = T.lmatrix("inWords")

    # Categoria correta de uma oferta.
    outLabel = T.lscalar("outLabel")

    # List of input tensors. One for each input layer.
    inputTensors = [inWords]

    # Whether the word embedding will be updated during training.
    embLayerTrainable = not args.fix_word_embedding

    if not embLayerTrainable:
        log.info("Not updating the word embedding!")

    # Lookup table for word features.
    embeddingLayer = EmbeddingLayer(inWords,
                                    wordEmbedding.getEmbeddingMatrix(),
                                    trainable=embLayerTrainable)

    # A saída da lookup table possui 3 dimensões (numTokens, szWindow, szEmbedding).
    # Esta camada dá um flat nas duas últimas dimensões, produzindo uma saída
    # com a forma (numTokens, szWindow * szEmbedding).
    flattenInput = FlattenLayer(embeddingLayer)

    # Random weight initialization procedure.
    weightInit = SigmoidGlorot(
    ) if hiddenActFunction == sigmoid else GlorotUniform()

    # Convolution layer. Convolução no texto de um documento.
    convLinear = LinearLayer(flattenInput,
                             wordWindowSize * wordEmbedding.getEmbeddingSize(),
                             convSize,
                             W=None,
                             b=None,
                             weightInitialization=weightInit)

    # Max pooling layer.
    maxPooling = MaxPoolingLayer(convLinear)

    # Generate word windows.
    wordWindowFeatureGenerator = WordWindowGenerator(wordWindowSize, lexicon,
                                                     filters, startSymbol,
                                                     endSymbol)

    # List of input generators.
    inputGenerators = [wordWindowFeatureGenerator]

    # Hidden layer.
    hiddenLinear = LinearLayer(maxPooling,
                               convSize,
                               hiddenLayerSize,
                               W=W1,
                               b=b1,
                               weightInitialization=weightInit)
    hiddenAct = ActivationLayer(hiddenLinear, hiddenActFunction)

    # Entrada linear da camada softmax.
    sotmaxLinearInput = LinearLayer(hiddenAct,
                                    hiddenLayerSize,
                                    labelLexicon.getLen(),
                                    W=W2,
                                    b=b2,
                                    weightInitialization=ZeroWeightGenerator())
    # Softmax.
    # softmaxAct = ReshapeLayer(ActivationLayer(sotmaxLinearInput, softmax), (1, -1))
    softmaxAct = ActivationLayer(sotmaxLinearInput, softmax)

    # Prediction layer (argmax).
    prediction = ArgmaxPrediction(None).predict(softmaxAct.getOutput())

    # Loss function.
    loss = NegativeLogLikelihoodOneExample().calculateError(
        softmaxAct.getOutput()[0], prediction, outLabel)

    # Output generator: generate one label per offer.
    outputGenerators = [TextLabelGenerator(labelLexicon)]

    if args.train:
        trainDatasetReader = DocReader(args.train)

        log.info("Reading training examples...")
        trainIterator = SyncBatchIterator(trainDatasetReader,
                                          inputGenerators,
                                          outputGenerators,
                                          -1,
                                          shuffle=shuffle)
        lexicon.stopAdd()
        labelLexicon.stopAdd()

        # Get dev inputs and output
        dev = args.dev
        evalPerIteration = args.eval_per_iteration
        if not dev and evalPerIteration > 0:
            log.error(
                "Argument eval_per_iteration cannot be used without a dev argument."
            )
            sys.exit(1)

        if dev:
            log.info("Reading development examples")
            devReader = DocReader(args.dev)
            devIterator = SyncBatchIterator(devReader,
                                            inputGenerators,
                                            outputGenerators,
                                            -1,
                                            shuffle=False)
        else:
            devIterator = None
    else:
        trainIterator = None
        devIterator = None

    if normalizeMethod == "minmax":
        log.info("Normalization: minmax")
        wordEmbedding.minMaxNormalization()
    elif normalizeMethod == "mean":
        log.info("Normalization: mean normalization")
        wordEmbedding.meanNormalization()
    elif normalizeMethod == "zscore":
        log.info("Normalization: zscore normalization")
        wordEmbedding.zscoreNormalization()
    elif normalizeMethod:
        log.error("Normalization: unknown value %s" % normalizeMethod)
        sys.exit(1)

    # Decaimento da taxa de aprendizado.
    if args.decay == "linear":
        decay = 1.0
    elif args.decay == "none":
        decay = 0.0
    else:
        log.error("Unknown decay strategy %s. Expected: none or linear." %
                  args.decay)
        sys.exit(1)

    # Algoritmo de aprendizado.
    if args.alg == "adagrad":
        log.info("Using Adagrad")
        opt = Adagrad(lr=lr, decay=decay)
    elif args.alg == "sgd":
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)
    else:
        log.error(
            "Unknown algorithm: %s. Expected values are: adagrad or sgd." %
            args.alg)
        sys.exit(1)

    # TODO: debug
    # opt.lr.tag.test_value = 0.05

    # Printing embedding information.
    dictionarySize = wordEmbedding.getNumberOfVectors()
    embeddingSize = wordEmbedding.getEmbeddingSize()
    log.info("Dictionary size: %d" % dictionarySize)
    log.info("Embedding size: %d" % embeddingSize)
    log.info("Number of categories: %d" % labelLexicon.getLen())

    # Train metrics.
    trainMetrics = None
    if trainIterator:
        trainMetrics = [
            LossMetric("TrainLoss", loss),
            AccuracyMetric("TrainAccuracy", outLabel, prediction)
        ]

    # Evaluation metrics.
    evalMetrics = None
    if devIterator:
        evalMetrics = [
            LossMetric("EvalLoss", loss),
            AccuracyMetric("EvalAccuracy", outLabel, prediction)
        ]

    # Test metrics.
    testMetrics = None
    if args.test:
        testMetrics = [
            LossMetric("TestLoss", loss),
            AccuracyMetric("TestAccuracy", outLabel, prediction)
        ]

    # TODO: debug
    # mode = theano.compile.debugmode.DebugMode(optimizer=None)
    mode = None
    model = BasicModel(x=inputTensors,
                       y=[outLabel],
                       allLayers=softmaxAct.getLayerSet(),
                       optimizer=opt,
                       prediction=prediction,
                       loss=loss,
                       trainMetrics=trainMetrics,
                       evalMetrics=evalMetrics,
                       testMetrics=testMetrics,
                       mode=mode)

    # Training
    if trainIterator:
        log.info("Training")
        model.train(trainIterator,
                    numEpochs,
                    devIterator,
                    evalPerIteration=evalPerIteration)

    # Testing
    if args.test:
        log.info("Reading test examples")
        testReader = DocReader(args.test)
        testIterator = SyncBatchIterator(testReader,
                                         inputGenerators,
                                         outputGenerators,
                                         -1,
                                         shuffle=False)

        log.info("Testing")
        model.test(testIterator)

Example #7

Show file

File: wnn.py Project: eraldoluis/lia-pln-deeplearning

def mainWnn(args):
    ################################################
    # Initializing parameters
    ##############################################
    log = logging.getLogger(__name__)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    parametersToSaveOrLoad = {"word_filters", "suffix_filters", "char_filters", "cap_filters",
                              "alg", "hidden_activation_function", "word_window_size", "char_window_size",
                              "hidden_size", "with_charwnn", "conv_size", "charwnn_with_act", "suffix_size",
                              "use_capitalization", "start_symbol", "end_symbol", "with_hidden"}

    # Load parameters of the saving model
    if args.load_model:
        persistentManager = H5py(args.load_model)
        savedParameters = json.loads(persistentManager.getAttribute("parameters"))

        if savedParameters.get("charwnn_filters", None) != None:
            savedParameters["char_filters"] = savedParameters["charwnn_filters"]
            savedParameters.pop("charwnn_filters")
            print savedParameters

        log.info("Loading parameters of the model")
        args = args._replace(**savedParameters)

    log.info(str(args))

    # Read the parameters
    lr = args.lr
    startSymbol = args.start_symbol
    endSymbol = args.end_symbol
    numEpochs = args.num_epochs
    shuffle = args.shuffle
    normalizeMethod = args.normalization.lower() if args.normalization is not None else None
    wordWindowSize = args.word_window_size
    hiddenLayerSize = args.hidden_size
    hiddenActFunctionName = args.hidden_activation_function
    embeddingSize = args.word_emb_size

    withCharWNN = args.with_charwnn
    charEmbeddingSize = args.char_emb_size
    charWindowSize = args.char_window_size
    startSymbolChar = "</s>"

    suffixEmbSize = args.suffix_emb_size
    capEmbSize = args.cap_emb_size

    useSuffixFeatures = args.suffix_size > 0
    useCapFeatures = args.use_capitalization

    # Insert the character that will be used to fill the matrix
    # with a dimension lesser than chosen dimension.This enables that the convolution is performed by a matrix multiplication.
    artificialChar = "ART_CHAR"

    # TODO: the maximum number of characters of word is fixed in 20.
    numMaxChar = 20

    if args.alg == "window_stn":
        isSentenceModel = True
    elif args.alg == "window_word":
        isSentenceModel = False
    else:
        raise Exception("The value of model_type isn't valid.")

    batchSize = -1 if isSentenceModel else args.batch_size
    wordFilters = []

    # Lendo Filtros do wnn
    log.info("Lendo filtros básicos")
    wordFilters = getFilters(args.word_filters, log)

    # Lendo Filtros do charwnn
    log.info("Lendo filtros do charwnn")
    charFilters = getFilters(args.char_filters, log)

    # Lendo Filtros do suffix
    log.info("Lendo filtros do sufixo")
    suffixFilters = getFilters(args.suffix_filters, log)

    # Lendo Filtros da capitalização
    log.info("Lendo filtros da capitalização")
    capFilters = getFilters(args.cap_filters, log)

    ################################################
    # Create the lexicon and go out after this
    ################################################
    if args.create_only_lexicon:
        inputGenerators = []
        lexiconsToSave = []

        if args.word_lexicon and not os.path.exists(args.word_lexicon):
            wordLexicon = Lexicon("UUUNKKK", "labelLexicon")

            inputGenerators.append(
                WordWindowGenerator(wordWindowSize, wordLexicon, wordFilters, startSymbol, endSymbol))
            lexiconsToSave.append((wordLexicon, args.word_lexicon))

        if not os.path.exists(args.label_file):
            labelLexicon = Lexicon(None, "labelLexicon")
            outputGenerator = [LabelGenerator(labelLexicon)]
            lexiconsToSave.append((labelLexicon, args.label_file))
        else:
            outputGenerator = None

        if args.char_lexicon and not os.path.exists(args.char_lexicon):
            charLexicon = Lexicon("UUUNKKK", "charLexicon")

            charLexicon.put(startSymbolChar)
            charLexicon.put(artificialChar)

            inputGenerators.append(
                CharacterWindowGenerator(charLexicon, numMaxChar, charWindowSize, wordWindowSize, artificialChar,
                                         startSymbolChar, startPaddingWrd=startSymbol, endPaddingWrd=endSymbol,
                                         filters=charFilters))

            lexiconsToSave.append((charLexicon, args.char_lexicon))

        if args.suffix_lexicon and not os.path.exists(args.suffix_lexicon):
            suffixLexicon = Lexicon("UUUNKKK", "suffixLexicon")

            if args.suffix_size <= 0:
                raise Exception(
                    "Unable to generate the suffix lexicon because the suffix is less than or equal to 0.")

            inputGenerators.append(
                SuffixFeatureGenerator(args.suffix_size, wordWindowSize, suffixLexicon, suffixFilters))

            lexiconsToSave.append((suffixLexicon, args.suffix_lexicon))

        if args.cap_lexicon and not os.path.exists(args.cap_lexicon):
            capLexicon = Lexicon("UUUNKKK", "capitalizationLexicon")

            inputGenerators.append(CapitalizationFeatureGenerator(wordWindowSize, capLexicon, capFilters))

            lexiconsToSave.append((capLexicon, args.cap_lexicon))

        if len(inputGenerators) == 0:
            inputGenerators = None

        if not (inputGenerators or outputGenerator):
            log.info("All lexicons have been generated.")
            return

        trainDatasetReader = TokenLabelReader(args.train, args.token_label_separator)
        trainReader = SyncBatchIterator(trainDatasetReader, inputGenerators, outputGenerator, batchSize,
                                        shuffle=shuffle)

        for lexicon, pathToSave in lexiconsToSave:
            lexicon.save(pathToSave)

        log.info("Lexicons were generated with success!")

        return

    ################################################
    # Starting training
    ###########################################

    if withCharWNN and (useSuffixFeatures or useCapFeatures):
        raise Exception("It's impossible to use hand-crafted features with Charwnn.")

    # Read word lexicon and create word embeddings
    if args.load_model:
        wordLexicon = Lexicon.fromPersistentManager(persistentManager, "word_lexicon")
        vectors = EmbeddingLayer.getEmbeddingFromPersistenceManager(persistentManager, "word_embedding_layer")

        wordEmbedding = Embedding(wordLexicon, vectors)

    elif args.word_embedding:
        wordLexicon, wordEmbedding = Embedding.fromWord2Vec(args.word_embedding, "UUUNKKK", "word_lexicon")
    elif args.word_lexicon:
        wordLexicon = Lexicon.fromTextFile(args.word_lexicon, True, "word_lexicon")
        wordEmbedding = Embedding(wordLexicon, vectors=None, embeddingSize=embeddingSize)
    else:
        log.error("You need to set one of these parameters: load_model, word_embedding or word_lexicon")
        return

    # Read char lexicon and create char embeddings
    if withCharWNN:
        if args.load_model:
            charLexicon = Lexicon.fromPersistentManager(persistentManager, "char_lexicon")
            vectors = EmbeddingConvolutionalLayer.getEmbeddingFromPersistenceManager(persistentManager,
                                                                                     "char_convolution_layer")

            charEmbedding = Embedding(charLexicon, vectors)
        elif args.char_lexicon:
            charLexicon = Lexicon.fromTextFile(args.char_lexicon, True, "char_lexicon")
            charEmbedding = Embedding(charLexicon, vectors=None, embeddingSize=charEmbeddingSize)
        else:
            log.error("You need to set one of these parameters: load_model or char_lexicon")
            return
    else:
        # Read suffix lexicon if suffix size is greater than 0
        if useSuffixFeatures:
            if args.load_model:
                suffixLexicon = Lexicon.fromPersistentManager(persistentManager, "suffix_lexicon")
                vectors = EmbeddingConvolutionalLayer.getEmbeddingFromPersistenceManager(persistentManager,
                                                                                         "suffix_embedding")

                suffixEmbedding = Embedding(suffixLexicon, vectors)
            elif args.suffix_lexicon:
                suffixLexicon = Lexicon.fromTextFile(args.suffix_lexicon, True, "suffix_lexicon")
                suffixEmbedding = Embedding(suffixLexicon, vectors=None, embeddingSize=suffixEmbSize)
            else:
                log.error("You need to set one of these parameters: load_model or suffix_lexicon")
                return

        # Read capitalization lexicon
        if useCapFeatures:
            if args.load_model:
                capLexicon = Lexicon.fromPersistentManager(persistentManager, "cap_lexicon")
                vectors = EmbeddingConvolutionalLayer.getEmbeddingFromPersistenceManager(persistentManager,
                                                                                         "cap_embedding")

                capEmbedding = Embedding(capLexicon, vectors)
            elif args.cap_lexicon:
                capLexicon = Lexicon.fromTextFile(args.cap_lexicon, True, "cap_lexicon")
                capEmbedding = Embedding(capLexicon, vectors=None, embeddingSize=capEmbSize)
            else:
                log.error("You need to set one of these parameters: load_model or cap_lexicon")
                return

    # Read labels
    if args.load_model:
        labelLexicon = Lexicon.fromPersistentManager(persistentManager, "label_lexicon")
    elif args.label_file:
        labelLexicon = Lexicon.fromTextFile(args.label_file, False, lexiconName="label_lexicon")
    else:
        log.error("You need to set one of these parameters: load_model, word_embedding or word_lexicon")
        return

    # Normalize the word embedding
    if not normalizeMethod:
        pass
    elif normalizeMethod == "minmax":
        log.info("Normalization: minmax")
        wordEmbedding.minMaxNormalization()
    elif normalizeMethod == "mean":
        log.info("Normalization: mean normalization")
        wordEmbedding.meanNormalization()
    else:
        log.error("Unknown normalization method: %s" % normalizeMethod)
        sys.exit(1)

    if normalizeMethod is not None and args.load_model is not None:
        log.warn("The word embedding of model was normalized. This can change the result of test.")

    # Build neural network
    if isSentenceModel:
        raise NotImplementedError("Sentence model is not implemented!")
    else:
        wordWindow = T.lmatrix("word_window")
        inputModel = [wordWindow]

        wordEmbeddingLayer = EmbeddingLayer(wordWindow, wordEmbedding.getEmbeddingMatrix(), trainable=True,
                                            name="word_embedding_layer")
        flatten = FlattenLayer(wordEmbeddingLayer)

        if withCharWNN:
            # Use the convolution
            log.info("Using charwnn")
            convSize = args.conv_size

            if args.charwnn_with_act:
                charAct = tanh
            else:
                charAct = None

            charWindowIdxs = T.ltensor4(name="char_window_idx")
            inputModel.append(charWindowIdxs)

            charEmbeddingConvLayer = EmbeddingConvolutionalLayer(charWindowIdxs, charEmbedding.getEmbeddingMatrix(),
                                                                 numMaxChar, convSize, charWindowSize,
                                                                 charEmbeddingSize, charAct,
                                                                 name="char_convolution_layer")
            layerBeforeLinear = ConcatenateLayer([flatten, charEmbeddingConvLayer])
            sizeLayerBeforeLinear = wordWindowSize * (wordEmbedding.getEmbeddingSize() + convSize)
        elif useSuffixFeatures or useCapFeatures:
            # Use hand-crafted features
            concatenateInputs = [flatten]
            nmFetauresByWord = wordEmbedding.getEmbeddingSize()

            if useSuffixFeatures:
                log.info("Using suffix features")

                suffixInput = T.lmatrix("suffix_input")
                suffixEmbLayer = EmbeddingLayer(suffixInput, suffixEmbedding.getEmbeddingMatrix(),
                                                name="suffix_embedding")
                suffixFlatten = FlattenLayer(suffixEmbLayer)
                concatenateInputs.append(suffixFlatten)

                nmFetauresByWord += suffixEmbedding.getEmbeddingSize()
                inputModel.append(suffixInput)

            if useCapFeatures:
                log.info("Using capitalization features")

                capInput = T.lmatrix("capitalization_input")
                capEmbLayer = EmbeddingLayer(capInput, capEmbedding.getEmbeddingMatrix(),
                                             name="cap_embedding")
                capFlatten = FlattenLayer(capEmbLayer)
                concatenateInputs.append(capFlatten)

                nmFetauresByWord += capEmbedding.getEmbeddingSize()
                inputModel.append(capInput)

            layerBeforeLinear = ConcatenateLayer(concatenateInputs)
            sizeLayerBeforeLinear = wordWindowSize * nmFetauresByWord
        else:
            # Use only the word embeddings
            layerBeforeLinear = flatten
            sizeLayerBeforeLinear = wordWindowSize * wordEmbedding.getEmbeddingSize()

        # The rest of the NN
        if args.with_hidden:
            hiddenActFunction = method_name(hiddenActFunctionName)
            weightInit = SigmoidGlorot() if hiddenActFunction == sigmoid else GlorotUniform()

            linear1 = LinearLayer(layerBeforeLinear, sizeLayerBeforeLinear, hiddenLayerSize,
                                  weightInitialization=weightInit, name="linear1")
            act1 = ActivationLayer(linear1, hiddenActFunction)

            layerBeforeSoftmax = act1
            sizeLayerBeforeSoftmax = hiddenLayerSize
            log.info("Using hidden layer")
        else:
            layerBeforeSoftmax = layerBeforeLinear
            sizeLayerBeforeSoftmax = sizeLayerBeforeLinear
            log.info("Not using hidden layer")

        linear2 = LinearLayer(layerBeforeSoftmax, sizeLayerBeforeSoftmax, labelLexicon.getLen(),
                              weightInitialization=ZeroWeightGenerator(),
                              name="linear_softmax")
        act2 = ActivationLayer(linear2, softmax)
        prediction = ArgmaxPrediction(1).predict(act2.getOutput())

    # Load the model
    if args.load_model:
        alreadyLoaded = set([wordEmbeddingLayer])

        for o in (act2.getLayerSet() - alreadyLoaded):
            if o.getName():
                persistentManager.load(o)

    # Set the input and output
    inputGenerators = [WordWindowGenerator(wordWindowSize, wordLexicon, wordFilters, startSymbol, endSymbol)]

    if withCharWNN:
        inputGenerators.append(
            CharacterWindowGenerator(charLexicon, numMaxChar, charWindowSize, wordWindowSize, artificialChar,
                                     startSymbolChar, startPaddingWrd=startSymbol, endPaddingWrd=endSymbol,
                                     filters=charFilters))
    else:
        if useSuffixFeatures:
            inputGenerators.append(
                SuffixFeatureGenerator(args.suffix_size, wordWindowSize, suffixLexicon, suffixFilters))

        if useCapFeatures:
            inputGenerators.append(CapitalizationFeatureGenerator(wordWindowSize, capLexicon, capFilters))

    outputGenerator = LabelGenerator(labelLexicon)

    if args.train:
        log.info("Reading training examples")

        trainDatasetReader = TokenLabelReader(args.train, args.token_label_separator)
        trainReader = SyncBatchIterator(trainDatasetReader, inputGenerators, [outputGenerator], batchSize,
                                        shuffle=shuffle)

        # Get dev inputs and output
        dev = args.dev

        if dev:
            log.info("Reading development examples")
            devDatasetReader = TokenLabelReader(args.dev, args.token_label_separator)
            devReader = SyncBatchIterator(devDatasetReader, inputGenerators, [outputGenerator], sys.maxint,
                                          shuffle=False)
        else:
            devReader = None
    else:
        trainReader = None
        devReader = None

    y = T.lvector("y")

    if args.decay.lower() == "normal":
        decay = 0.0
    elif args.decay.lower() == "divide_epoch":
        decay = 1.0

    if args.adagrad:
        log.info("Using Adagrad")
        opt = Adagrad(lr=lr, decay=decay)
    else:
        log.info("Using SGD")
        opt = SGD(lr=lr, decay=decay)

    # Printing embedding information
    dictionarySize = wordEmbedding.getNumberOfVectors()

    log.info("Size of  word dictionary and word embedding size: %d and %d" % (dictionarySize, embeddingSize))

    if withCharWNN:
        log.info("Size of  char dictionary and char embedding size: %d and %d" % (
            charEmbedding.getNumberOfVectors(), charEmbedding.getEmbeddingSize()))

    if useSuffixFeatures:
        log.info("Size of  suffix dictionary and suffix embedding size: %d and %d" % (
            suffixEmbedding.getNumberOfVectors(), suffixEmbedding.getEmbeddingSize()))

    if useCapFeatures:
        log.info("Size of  capitalization dictionary and capitalization embedding size: %d and %d" % (
            capEmbedding.getNumberOfVectors(), capEmbedding.getEmbeddingSize()))

    # Compiling
    loss = NegativeLogLikelihood().calculateError(act2.getOutput(), prediction, y)

    if args.lambda_L2:
        _lambda = args.lambda_L2
        log.info("Using L2 with lambda= %.2f", _lambda)
        loss += _lambda * (T.sum(T.square(linear1.getParameters()[0])))

    trainMetrics = [
        LossMetric("LossTrain", loss, True),
        AccuracyMetric("AccTrain", y, prediction),
    ]

    evalMetrics = [
        LossMetric("LossDev", loss, True),
        AccuracyMetric("AccDev", y, prediction),
    ]

    testMetrics = [
        LossMetric("LossTest", loss, True),
        AccuracyMetric("AccTest", y, prediction),
    ]

    wnnModel = BasicModel(inputModel, [y], act2.getLayerSet(), opt, prediction, loss, trainMetrics=trainMetrics,
                          evalMetrics=evalMetrics, testMetrics=testMetrics, mode=None)
    # Training
    if trainReader:
        callback = []

        if args.save_model:
            savePath = args.save_model
            objsToSave = list(act2.getLayerSet()) + [wordLexicon, labelLexicon]

            if withCharWNN:
                objsToSave.append(charLexicon)

            if useSuffixFeatures:
                objsToSave.append(suffixLexicon)

            if useCapFeatures:
                objsToSave.append(capLexicon)

            modelWriter = ModelWriter(savePath, objsToSave, args, parametersToSaveOrLoad)

            # Save the model with best acc in dev
            if args.save_by_acc:
                callback.append(SaveModelCallback(modelWriter, evalMetrics[1], "accuracy", True))

        log.info("Training")
        wnnModel.train(trainReader, numEpochs, devReader, callbacks=callback)

        # Save the model at the end of training
        if args.save_model and not args.save_by_acc:
            modelWriter.save()

    # Testing
    if args.test:
        log.info("Reading test examples")
        testDatasetReader = TokenLabelReader(args.test, args.token_label_separator)
        testReader = SyncBatchIterator(testDatasetReader, inputGenerators, [outputGenerator], sys.maxint, shuffle=False)

        log.info("Testing")
        wnnModel.test(testReader)

        if args.print_prediction:
            f = codecs.open(args.print_prediction, "w", encoding="utf-8")

            for x, labels in testReader:
                inputs = x

                predictions = wnnModel.prediction(inputs)

                for prediction in predictions:
                    f.write(labelLexicon.getLexicon(prediction))
                    f.write("\n")