Esempio n. 1
0
def getNamesFromParameters(red):
    parameters = []

    for definition in red.find_all("def"):
        for argument in definition.arguments:
            parameters.append(argument.target)

    return parameters
    def __init__(self, fileEmbeddings, wordEmbeddings, weights=None, contextSize=None, negative=None):
        filesCount, fileEmbeddingSize = fileEmbeddings.shape
        wordsCount, wordEmbeddingSize = wordEmbeddings.shape

        trainWeights = weights is None
        if trainWeights:
            weights = rnd2(fileEmbeddingSize + contextSize * wordEmbeddingSize, wordsCount)
        else:
            featuresCount, activationsCount = weights.shape
            contextSize = (featuresCount - fileEmbeddingSize) / wordEmbeddingSize
            negative = activationsCount - 1

        self.fileEmbeddings = theano.shared(asfx(fileEmbeddings), 'fileEmbeddings', borrow=False)
        self.wordEmbeddings = theano.shared(asfx(wordEmbeddings), 'wordEmbeddings', borrow=False)
        self.weights = theano.shared(asfx(weights), 'weights', borrow=False)

        fileIndexOffset = 0
        wordIndicesOffset = fileIndexOffset + 1
        indicesOffset = wordIndicesOffset + contextSize

        contexts = T.imatrix('contexts')
        fileIndices = contexts[:,fileIndexOffset:wordIndicesOffset]
        wordIndices = contexts[:,wordIndicesOffset:indicesOffset]
        indices = contexts[:,indicesOffset:indicesOffset + negative]

        files = self.fileEmbeddings[fileIndices]
        fileFeatures = T.flatten(files, outdim=2)
        words = self.wordEmbeddings[wordIndices]
        wordFeatures = T.flatten(words, outdim=2)
        features = T.concatenate([fileFeatures, wordFeatures], axis=1)

        subWeights = self.weights[:,indices].dimshuffle(1, 0, 2)

        probabilities = T.batched_dot(features, subWeights)

        parameters = [self.fileEmbeddings]
        subParameters = [files]
        consider_constant = [self.wordEmbeddings]

        if trainWeights:
            parameters.append(self.weights)
            subParameters.append(subWeights)
        else:
            consider_constant.append(self.weights)

        # cost = -T.mean(T.log(T.nnet.sigmoid(probabilities[:,0])) + T.sum(T.log(T.nnet.sigmoid(-probabilities[:,1:])), dtype=floatX, acc_dtype=floatX), dtype=floatX, acc_dtype=floatX)
        cost = -T.log(T.nnet.sigmoid(probabilities[:,0])) - T.sum(T.log(T.nnet.sigmoid(-probabilities[:,1:])), dtype=floatX, acc_dtype=floatX)

        learningRate = T.scalar('learningRate', dtype=floatX)

        updates = []
        for p, subP in zip(parameters, subParameters):
            if subP is not None:
                gradient = T.jacobian(cost, wrt=subP)
                update = (p, T.inc_subtensor(subP, -learningRate * gradient))
            else:
                gradient = T.jacobian(cost, wrt=p)
                update = (p, p - learningRate * gradient)

            updates.append(update)

        contextIndex = T.iscalar('contextIndex')
        self.trainingContexts = theano.shared(empty(1,1,1), 'trainingContexts', borrow=False)

        self.trainModel = theano.function(
            inputs=[contextIndex, learningRate],
            outputs=cost,
            updates=updates,
            givens={
                contexts: self.trainingContexts[:,contextIndex]
            }
        )
Esempio n. 3
0
    def __init__(self, fileEmbeddings, wordEmbeddings, weights=None, contextSize=None, negative=None):
        filesCount, fileEmbeddingSize = fileEmbeddings.shape
        wordsCount, wordEmbeddingSize = wordEmbeddings.shape

        if weights is not None:
            featuresCount, activationsCount = weights.shape
            contextSize = (featuresCount - fileEmbeddingSize) / wordEmbeddingSize
            negative = activationsCount - 1
        else:
            weights = rnd2(fileEmbeddingSize + contextSize * wordEmbeddingSize, wordsCount)

        self.fileEmbeddings = theano.shared(asfx(fileEmbeddings), 'fileEmbeddings', borrow=False)
        self.wordEmbeddings = theano.shared(asfx(wordEmbeddings), 'wordEmbeddings', borrow=False)
        self.weights = theano.shared(asfx(weights), 'weights', borrow=False)

        fileIndexOffset = 0
        wordIndicesOffset = fileIndexOffset + 1
        indicesOffset = wordIndicesOffset + contextSize

        contexts = T.imatrix('contexts')
        context = T.flatten(contexts)
        fileIndex = context[fileIndexOffset:wordIndicesOffset]
        wordIndices = context[wordIndicesOffset:indicesOffset]
        indices = context[indicesOffset:indicesOffset + negative]

        file = self.fileEmbeddings[fileIndex]
        fileFeatures = T.flatten(file, outdim=1)
        words = self.wordEmbeddings[wordIndices]
        wordFeatures = T.flatten(words, outdim=1)
        features = T.concatenate([fileFeatures, wordFeatures], axis=0)

        subWeights = self.weights[:,indices]

        probabilities = T.dot(features, subWeights)

        parameters = [self.fileEmbeddings]
        subParameters = [file]
        consider_constant = [self.wordEmbeddings]

        if weights is not None:
            consider_constant.append(self.weights)
        else:
            parameters.append(self.weights)
            subParameters.append(subWeights)

        cost = -T.mean(T.log(T.nnet.sigmoid(probabilities[0])) + T.sum(T.log(T.nnet.sigmoid(-probabilities[1:]))))

        learningRate = T.scalar('learningRate', dtype=floatX)

        gradients = [T.grad(cost, wrt=subP, consider_constant=consider_constant) for subP in subParameters]
        updates = [(p, T.inc_subtensor(subP, -learningRate * g)) for p, subP, g in zip(parameters, subParameters, gradients)]

        contextIndex = T.iscalar('batchIndex')
        self.trainingContexts = theano.shared(empty(1,1), 'trainingContexts', borrow=False)

        self.trainModel = theano.function(
            inputs=[contextIndex, learningRate],
            outputs=cost,
            updates=updates,
            givens={
                contexts: self.trainingContexts[contextIndex:contextIndex + 1]
            }
        )