예제 #1
0
    def __init__(self, size=100, window=2, min_count=1):
        """
        :param size: the required dimension of word vectors
        :param window: the length of context window
        :param min_count: minimum frequency required for a word to be considered part of vocabulary
        """
        self.size = size
        self.window = window
        self.index_to_word = {}
        self.word_to_index = {}
        vocab_size = 3
        self.vocab_size = vocab_size
        self.W_inp = initializations.uniform_init(shape=(vocab_size, size))
        self.W_out = initializations.uniform_init(shape=(vocab_size, size))
        self.min_count = min_count

        self.start_token = "START_TOKEN"
        self.end_token = "END_TOKEN"
        self.unknown_token = "UNKNOWN_TOKEN"

        self.index_to_word[0] = self.start_token
        self.index_to_word[1] = self.end_token
        self.index_to_word[2] = self.unknown_token

        self.word_to_index[self.start_token] = 0
        self.word_to_index[self.end_token] = 1
        self.word_to_index[self.unknown_token] = 2
        self.loss_history = []
예제 #2
0
 def initialize_weights(self, inputSize, hiddenSize1, hiddenSize2,
                        num_classes):
     weights = dict()
     weights['W1'] = initializations.xavier_init((inputSize, hiddenSize1))
     weights['b1'] = initializations.uniform_init((hiddenSize1, ))
     weights['W2'] = initializations.xavier_init((hiddenSize1, hiddenSize2))
     weights['b2'] = initializations.uniform_init((hiddenSize2, ))
     weights['W3'] = initializations.xavier_init((hiddenSize2, num_classes))
     weights['b3'] = initializations.uniform_init((num_classes, ))
     return weights
예제 #3
0
    def __init__(self, inputSize, hiddenSize):
        self.inputSize = inputSize
        self.hiddenSize = hiddenSize
        self.loss_history = []
        self.W1 = initializations.xavier_init(shape=(inputSize, hiddenSize), hiddenLayer='relu')
        self.b1 = initializations.uniform_init(shape=(hiddenSize,))

        self.W2 = initializations.xavier_init(shape=(hiddenSize, inputSize), hiddenLayer='sigmoid')
        self.b2 = initializations.uniform_init(shape=(inputSize,))
        self.params = {}
        self.reg = 1e-5
예제 #4
0
 def __init__(self,
              input_dim,
              hidden_dim,
              output_dim,
              non_linearity='tanh'):
     self.input_dim = input_dim
     self.hidden_dim = hidden_dim
     self.non_liniearity = non_linearity
     self.Wx = initializations.xavier_init((input_dim, hidden_dim))
     self.Wh = initializations.xavier_init((hidden_dim, hidden_dim))
     self.W1 = initializations.xavier_init((hidden_dim, output_dim))
     self.b1 = initializations.uniform_init((output_dim, ))
     self.b = initializations.uniform_init((hidden_dim, ))
     self.loss_history = []
     self.params = {}
예제 #5
0
    def __init__(self, inputSize, hiddenSize1, hiddenSize2, outputSize, hiddenLayer='relu'):
        self.inputSize = inputSize
        self.hiddenSize1 = hiddenSize1
        self.hiddenSize2 = hiddenSize2
        self.outputSize = outputSize
        self.hiddenLayer = hiddenLayer
        self.loss_history = []
        self.gradientLayer1 = []
        self.gradientLayer2 = []
        self.params = {}

        self.W1 = initializations.xavier_init((inputSize, hiddenSize1), hiddenLayer=hiddenLayer)
        self.b1 = initializations.uniform_init((hiddenSize1,))
        self.W2 = initializations.xavier_init((hiddenSize1, hiddenSize2), hiddenLayer=hiddenLayer)
        self.b2 = initializations.uniform_init((hiddenSize2,))
        self.W3 = initializations.xavier_init((hiddenSize2, outputSize), hiddenLayer=hiddenLayer)
        self.b3 = initializations.uniform_init((outputSize,))
예제 #6
0
    def build_vocab(self, vocab):
        """
        :param vocab: list of sentences to be used for training
        """
        word_counter = Counter()
        for sentence in vocab:
            word_list = nltk.word_tokenize(sentence)
            for word in word_list:
                word_counter[word] += 1

        itr = 3
        stop_words = set(stopwords.words('english'))
        for key, value in word_counter.items():
            if key not in stop_words and value >= self.min_count:
                self.index_to_word[itr] = key
                self.word_to_index[key] = itr
                itr += 1

        self.vocab_size = len(self.index_to_word)
        self.W_inp = initializations.uniform_init(shape=(self.vocab_size, self.size))
        self.W_out = initializations.uniform_init(shape=(self.vocab_size, self.size))