def __init__(self, size=100, window=2, min_count=1): """ :param size: the required dimension of word vectors :param window: the length of context window :param min_count: minimum frequency required for a word to be considered part of vocabulary """ self.size = size self.window = window self.index_to_word = {} self.word_to_index = {} vocab_size = 3 self.vocab_size = vocab_size self.W_inp = initializations.uniform_init(shape=(vocab_size, size)) self.W_out = initializations.uniform_init(shape=(vocab_size, size)) self.min_count = min_count self.start_token = "START_TOKEN" self.end_token = "END_TOKEN" self.unknown_token = "UNKNOWN_TOKEN" self.index_to_word[0] = self.start_token self.index_to_word[1] = self.end_token self.index_to_word[2] = self.unknown_token self.word_to_index[self.start_token] = 0 self.word_to_index[self.end_token] = 1 self.word_to_index[self.unknown_token] = 2 self.loss_history = []
def initialize_weights(self, inputSize, hiddenSize1, hiddenSize2, num_classes): weights = dict() weights['W1'] = initializations.xavier_init((inputSize, hiddenSize1)) weights['b1'] = initializations.uniform_init((hiddenSize1, )) weights['W2'] = initializations.xavier_init((hiddenSize1, hiddenSize2)) weights['b2'] = initializations.uniform_init((hiddenSize2, )) weights['W3'] = initializations.xavier_init((hiddenSize2, num_classes)) weights['b3'] = initializations.uniform_init((num_classes, )) return weights
def __init__(self, inputSize, hiddenSize): self.inputSize = inputSize self.hiddenSize = hiddenSize self.loss_history = [] self.W1 = initializations.xavier_init(shape=(inputSize, hiddenSize), hiddenLayer='relu') self.b1 = initializations.uniform_init(shape=(hiddenSize,)) self.W2 = initializations.xavier_init(shape=(hiddenSize, inputSize), hiddenLayer='sigmoid') self.b2 = initializations.uniform_init(shape=(inputSize,)) self.params = {} self.reg = 1e-5
def __init__(self, input_dim, hidden_dim, output_dim, non_linearity='tanh'): self.input_dim = input_dim self.hidden_dim = hidden_dim self.non_liniearity = non_linearity self.Wx = initializations.xavier_init((input_dim, hidden_dim)) self.Wh = initializations.xavier_init((hidden_dim, hidden_dim)) self.W1 = initializations.xavier_init((hidden_dim, output_dim)) self.b1 = initializations.uniform_init((output_dim, )) self.b = initializations.uniform_init((hidden_dim, )) self.loss_history = [] self.params = {}
def __init__(self, inputSize, hiddenSize1, hiddenSize2, outputSize, hiddenLayer='relu'): self.inputSize = inputSize self.hiddenSize1 = hiddenSize1 self.hiddenSize2 = hiddenSize2 self.outputSize = outputSize self.hiddenLayer = hiddenLayer self.loss_history = [] self.gradientLayer1 = [] self.gradientLayer2 = [] self.params = {} self.W1 = initializations.xavier_init((inputSize, hiddenSize1), hiddenLayer=hiddenLayer) self.b1 = initializations.uniform_init((hiddenSize1,)) self.W2 = initializations.xavier_init((hiddenSize1, hiddenSize2), hiddenLayer=hiddenLayer) self.b2 = initializations.uniform_init((hiddenSize2,)) self.W3 = initializations.xavier_init((hiddenSize2, outputSize), hiddenLayer=hiddenLayer) self.b3 = initializations.uniform_init((outputSize,))
def build_vocab(self, vocab): """ :param vocab: list of sentences to be used for training """ word_counter = Counter() for sentence in vocab: word_list = nltk.word_tokenize(sentence) for word in word_list: word_counter[word] += 1 itr = 3 stop_words = set(stopwords.words('english')) for key, value in word_counter.items(): if key not in stop_words and value >= self.min_count: self.index_to_word[itr] = key self.word_to_index[key] = itr itr += 1 self.vocab_size = len(self.index_to_word) self.W_inp = initializations.uniform_init(shape=(self.vocab_size, self.size)) self.W_out = initializations.uniform_init(shape=(self.vocab_size, self.size))