def __init__(self, L0, U0=None, alpha=0.005, rseed=10): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(LH = (self.hdim, self.hdim), RH = (self.hdim, self.hdim), U = (self.vdim, self.hdim * 2)) # note that only L gets sparse updates param_dims_sparse = dict(LL = L0.shape, RL = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### np.random.seed(rseed) # be sure to seed this for repeatability! self.alpha = alpha # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here #self.sparams.LL = np.random.randn(*L0.shape) * np.sqrt(0.1) #self.sparams.RL = np.random.randn(*L0.shape) * np.sqrt(0.1) self.sparams.LL = L0 self.sparams.RL = L0 self.params.U = np.random.randn(self.vdim, self.hdim*2) * np.sqrt(0.1) # Initialize H matrix, as with W and U in part 1 self.params.LH = random_weight_matrix(self.hdim, self.hdim) self.params.RH = random_weight_matrix(self.hdim, self.hdim)
def __init__(self, vdim, hdim, wdim, alpha=0.005, rho=0.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.vdim = vdim self.wdim = wdim # Parameters np.random.seed(rseed) sigma = .1 self.params = {} self.params['L'] = np.random.normal(0, sigma, (wdim, vdim)) # "wide" array self.params['Wh'] = random_weight_matrix(hdim, hdim) self.params['Wx'] = random_weight_matrix(hdim, wdim) self.params['b1'] = np.zeros(hdim) # self.params['U'] = random_weight_matrix(vdim, hdim) # self.params['b2'] = np.zeros(vdim) # Learning rate self.alpha = alpha # Regularization self.rho = rho # Store hs and yhats self.hs = None self.yhats = None # grads self.grads = {}
def __init__(self, vdim, hdim, wdim, outdim=2, alpha=.005, rho=.0001, mu=0.75, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim self.outdim = outdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha self.mu = mu self.rho = rho self.rseed = rseed ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector self.Wx = shared(random_weight_matrix(hdim, wdim), name='Wx') self.Wh = shared(random_weight_matrix(wdim, wdim), name='Wh') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.L, self.Wx, self.Wh, self.U, self.b] self.vparams = [0.0*param.get_value() for param in self.params] self.prop_compiled = self.compile_self() self.generate_compiled = self.compile_generate()
def __init__(self, hdim, outdim, alpha=0.005, rho = 0.0001, rseed=10): # Dimensions self.hdim = hdim self.outdim = outdim # Parameters np.random.seed(rseed) sigma = .1 self.params = {} #self.params['L'] = np.random.normal(0, sigma, (wdim, vdim)) # "wide" array self.params['Wh'] = random_weight_matrix(hdim, hdim) #self.params['Wx'] = random_weight_matrix(hdim, wdim) # for now, not using xs self.params['b1'] = np.zeros(hdim) self.params['U'] = random_weight_matrix(outdim, hdim) self.params['b2'] = np.zeros(outdim) # Learning rate self.alpha = alpha # Regularization self.rho = rho # Store hs and yhats self.hs = None self.yhats = None # grads self.grads = {}
def __init__(self, L0, Dy=N_ASPECTS*SENT_DIM, U0=None, alpha=0.005, rseed=10, bptt=5): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ydim = Dy param_dims = dict(H = (self.hdim, self.hdim), U = (self.ydim, self.hdim), b1 = (self.hdim,), b2 =(self.ydim,)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### var = .1 sigma = sqrt(var) from misc import random_weight_matrix random.seed(rseed) # Initialize word vectors self.bptt = bptt self.alpha = alpha self.params.H=random_weight_matrix(*self.params.H.shape) if U0 is not None: self.params.U= U0.copy() else: self.params.U= random_weight_matrix(*self.params.U.shape) self.sparams.L = L0.copy() self.params.b1 = zeros((self.hdim,)) self.params.b2 = zeros((self.ydim,))
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') self.Wx = shared(random_weight_matrix(hdim, wdim), name='Wx') self.Wh = shared(random_weight_matrix(hdim, hdim), name='Wh') self.params = [self.L, self.Wx, self.Wh] self.vparams = [0.0 * param.get_value() for param in self.params]
def __init__(self, L0, U0=None,alpha=0.005, lreg = 0.00001, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim)) #,U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) #self.params.U = 0.1*np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, hdim, outdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.hdim = hdim self.outdim = outdim self.out_end = outdim # the end token # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices # W: times character-vector, U: times previous-hidden-vector # i: input, f: forget, o: output, c: new-cell self.Ui = shared(random_weight_matrix(hdim, hdim), name='Ui') self.Uf = shared(random_weight_matrix(hdim, hdim), name='Uf') self.Uo = shared(random_weight_matrix(hdim, hdim), name='Uo') self.Uc = shared(random_weight_matrix(hdim, hdim), name='Uc') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.Ui, self.Uf, self.Uo, self.Uc, self.U, self.b] self.vparams = [0.0 * param.get_value() for param in self.params]
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') self.Wx = shared(random_weight_matrix(hdim, wdim), name='Wx') self.Wh = shared(random_weight_matrix(hdim, hdim), name='Wh') self.params = [self.L, self.Wx, self.Wh] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, dims=[100, 20, 20, 5], reg=0.1, alpha=0.001, rseed=10): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[1] # number of output classes self.dims = dims # todo move to superclass param_dims = dict( W=(dims[1], dims[0]), b1=(dims[1], ), U=(dims[2], dims[1]), b2=(dims[2], ), G=(dims[3], dims[2]), b3=(dims[3], ), ) NNBase.__init__(self, param_dims) #self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.params.G = random_weight_matrix(*self.params.G.shape) self.outputsize = dims[3]
def __init__(self, dims=[100, 5, 100], reg=0.1, alpha=0.001, ro = 0.05, rseed=10, beta=0.2): """ Set up autoencoder: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.dims = dims # todo move to superclass self.ro = ro # ro sparsity parameter self.beta = beta # sparsity penalty param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],), ) NNBase.__init__(self, param_dims) #self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.outputsize = dims[2]
def __init__(self, L0, Dy=N_ASPECTS * SENT_DIM, U0=None, alpha=0.005, rseed=10, bptt=5): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ydim = Dy param_dims = dict(H=(self.hdim, self.hdim), U=(self.ydim, self.hdim), b1=(self.hdim, ), b2=(self.ydim, )) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### var = .1 sigma = sqrt(var) from misc import random_weight_matrix random.seed(rseed) # Initialize word vectors self.bptt = bptt self.alpha = alpha self.params.H = random_weight_matrix(*self.params.H.shape) if U0 is not None: self.params.U = U0.copy() else: self.params.U = random_weight_matrix(*self.params.U.shape) self.sparams.L = L0.copy() self.params.b1 = zeros((self.hdim, )) self.params.b2 = zeros((self.ydim, ))
def __init__(self, L0, D0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ddim = D0.shape[0] # doc size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape, G = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape, D = D0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.bptt = bptt self.alpha = alpha # Initialize word vectors self.sparams.L = L0.copy() self.sparams.D = D0.copy() self.params.U = random.randn(self.vdim, self.hdim)*0.1 # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(self.hdim, self.hdim) self.params.G = random_weight_matrix(self.vdim, self.hdim)
def __init__(self, hdim, outdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.hdim = hdim self.outdim = outdim self.out_end = outdim # the end token # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices # W: times character-vector, U: times previous-hidden-vector # i: input, f: forget, o: output, c: new-cell self.Uz = shared(random_weight_matrix(hdim, hdim), name='Uz') self.Ur = shared(random_weight_matrix(hdim, hdim), name='Ur') self.Uh = shared(random_weight_matrix(hdim, hdim), name='Uh') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.Uz, self.Ur, self.Uh, self.U, self.b] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, hdim, outdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.hdim = hdim self.outdim = outdim self.out_end = outdim # the end token # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.Wh = shared(random_weight_matrix(hdim, hdim), name='Wh') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.Wh, self.U, self.b] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, hdim, outdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.hdim = hdim self.outdim = outdim self.out_end = outdim # the end token # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.Wh = shared(random_weight_matrix(hdim, hdim), name='Wh') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.Wh, self.U, self.b] self.vparams = [0.0 * param.get_value() for param in self.params]
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed |V| = Size of vocabulary n = length of our word vectors """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension print "input size: %d" % dims[0] print "hidden size: %d" % dims[1] print "output size: %d" % dims[2] param_dims = dict( W=(dims[1], dims[0]), b1=(dims[1], ), U=(dims[2], dims[1]), b2=(dims[2], ), ) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need self.sparams.wv = wv.copy() self.params.W = random_weight_matrix(param_dims["W"][0], param_dims["W"][1]) self.params.U = random_weight_matrix(param_dims["U"][0], param_dims["U"][1])
def __init__(self, wdim, hdim=None, odim=2, alpha=0.005, rho=1e-4, rseed=10, bptt=1, drop_p=0.5, context=1): np.random.seed(rseed) self.rseed = rseed self.wdim = wdim #self.wdim = L.shape[1] # word vector dimensions #self.vdim = L.shape[0] # vocab size if hdim is None: hdim = self.wdim self.hdim = hdim self.odim = odim self.context = context param_dims = dict(W11=(self.hdim, self.wdim * (1 + self.context * 2)), b11=(self.hdim, ), W12=(self.hdim, self.hdim), b12=(self.hdim, ), W21=(self.hdim, self.hdim), b21=(self.hdim, ), Ws=(self.odim, self.hdim), bs=(self.odim, )) # word embeddings are not updated # no longer needed because passing word vectors X #self.L = L # no sparse updates in this model #param_dims_sparse = dict(L = L0.shape) #NNBase.__init__(self, param_dims, param_dims_sparse) NNBase.__init__(self, param_dims) #### YOUR CODE HERE #### # not recursive yet, but leaving bptt anyway self.bptt = bptt self.alpha = alpha self.rho = rho self.drop_p = drop_p # probability of dropping word embedding element from training # Initialize weight matrices self.params.W11 = random_weight_matrix(*self.params.W11.shape) self.params.W12 = random_weight_matrix(*self.params.W12.shape) self.params.W21 = random_weight_matrix(*self.params.W21.shape) self.params.Ws = random_weight_matrix(*self.params.Ws.shape) # initialize bias vectors self.params.b11 = zeros((self.hdim)) self.params.b12 = zeros((self.hdim)) self.params.b21 = zeros((self.hdim)) self.params.bs = zeros((self.odim))
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector # i: input, f: forget, o: output, c: new-cell self.Wi = shared(random_weight_matrix(hdim, wdim), name='Wi') self.Ui = shared(random_weight_matrix(hdim, hdim), name='Ui') self.Wf = shared(random_weight_matrix(hdim, wdim), name='Wf') self.Uf = shared(random_weight_matrix(hdim, hdim), name='Uf') self.Wo = shared(random_weight_matrix(hdim, wdim), name='Wo') self.Uo = shared(random_weight_matrix(hdim, hdim), name='Uo') self.Wc = shared(random_weight_matrix(hdim, wdim), name='Wc') self.Uc = shared(random_weight_matrix(hdim, hdim), name='Uc') self.params = [self.L, self.Wi, self.Ui, self.Wf, self.Uf, self.Wo, self.Uo, self.Wc, self.Uc] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],), ) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need #self.sparams, self.grads, self.param, self.sgrads #where are they defined? #为什么可以直接可以使用? self.sparams.L = wv.copy() #self.sparam.L = wv.copy() self.params.U = random_weight_matrix(*param_dims["U"]) #self.param.U = random_weight_matrix(param_dims["U"]) self.params.W = random_weight_matrix(*param_dims["W"]) #self.param.b1 = zeros(param_dims["b1"]) #self.param.b2 = zeros(param_dims["b2"]) self.windowSize = windowsize self.wordVecLen = wv.shape[1] self.wordVecNum = wv.shape[0]
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W1=(dims[1], dims[0]), # 100 x 150 b2=(dims[1],), # 100 x 1 W2=(dims[2], dims[1]), # 5 X 100 b3=(dims[2],), # 5 x 1 ) param_dims_sparse = dict(L=wv.shape) # |V| x 50 # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### self.sparams.L = wv.copy(); self.params.W1 = random_weight_matrix(param_dims['W1'][0], param_dims['W1'][1]) self.params.b2 = append([], random_weight_matrix(param_dims['b2'][0], 1)) self.params.b3 = append([], random_weight_matrix(param_dims['b3'][0], 1)) self.params.W2 = random_weight_matrix(param_dims['W2'][0], param_dims['W2'][1]) self.n = wv.shape[1] # informational self.windowsize = windowsize self.hidden_units = dims[1]
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) n=50 note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # input dimension, wv.shape is the dimension of each word vector representation dims[0] = windowsize * wv.shape[1] # 50*3 param_dims = dict( W=(dims[1], dims[0]), # 100*150 b1=(dims[1]), U=(dims[2], dims[1]), b2=(dims[2])) param_dims_sparse = dict(L=wv.shape) # L.shape = (|V|*50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! self.params.W = random_weight_matrix(*self.params.W.shape) # 100*150 self.params.U = random_weight_matrix(*self.params.U.shape) # 5*100 #self.params.b1 = zeros((dims[1],)) # 100*1 #self.params.b2 = zeros((self.nclass,)) # 5*1 self.sparams.L = wv.copy()
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # number of output classes self.D = wv.shape[1] self.windowsize = windowsize dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],)) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! ## # Now we can access the parameters using # self.params.<name> for normal parameters # self.sparams.<name> for params with sparse gradients # and get access to normal NumPy arrays self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) # self.params.b1 = zeros(*self.params.b1.shape) # done automatically! self.params.U = random_weight_matrix(*self.params.U.shape)
def __init__(self, L0, U0=None, alpha=0.005, lreg=0.00001, rseed=10, bptt=1, loadData=False): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), W=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt if loadData == True: with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid: H = pickle.load(fid) self.params.H = H with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid: W = pickle.load(fid) self.params.W = W with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid: U = pickle.load(fid) self.params.U = U with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid: L = pickle.load(fid) self.sparams.L = L return random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = 0.1 * np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, vdim, hdim, wdim, outdim=2, alpha=.005, rho=.0001, mu=0.75, rseed=10): # Dimensions self.vdim = vdim assert (wdim == hdim) # so it follows the forn of everything else #self.hdim = hdim self.wdim = wdim self.outdim = outdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha self.mu = mu self.rho = rho self.rseed = rseed ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector #self.Wx = shared(random_weight_matrix(hdim, wdim), name='Wx') #self.Wh = shared(random_weight_matrix(wdim, wdim), name='Wh') self.U = shared(random_weight_matrix(hdim, wdim), name='U') self.b = shared(np.zeros([hdim, 1]), name='b', broadcastable=(False, True)) self.U2 = shared(random_weight_matrix(outdim, hdim), name='U2') # for the second layer self.b2 = shared(np.zeros([outdim, 1]), name='b2', broadcastable=(False, True)) self.params = [self.L, self.U, self.b, self.U2, self.b2] self.vparams = [0.0 * param.get_value() for param in self.params] self.prop_compiled = self.compile_self() self.generate_compiled = self.compile_generate()
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions |D| self.vdim = L0.shape[0] # vocab size |V| param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape) # self.params.U self.params.U = 0.1 * random.standard_normal(self.params.U.shape) # Initialize H matrix, as with W and U in part 1 # self.params.H = random_weight_matrix(*self.params.H.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt self.alpha = alpha
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): random.seed(rseed) self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.sparams.L = L0.copy() self.params.H = random_weight_matrix(self.hdim, self.hdim) self.alpha = alpha self.bptt = bptt # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here if U0 is not None: self.params.U = U0.copy() else: sigma = 0.1 mu = 0 #self.params.U = random.normal(mu, sigma, (self.vdim, self.hdim)) self.params.U = sigma*random.randn(self.vdim, self.hdim) + mu
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.sparams.L = L0.copy() if U0 is None: self.params.U = random.normal(0, 0.1, param_dims['U']) else: self.params.U = U0.copy() self.params.H = random_weight_matrix(*param_dims['H']) self.alpha = alpha # self.rseed = rseed self.bptt = bptt
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = (L0.shape if U0 is None else U0.shape)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.alpha = alpha self.bptt = bptt # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here random.seed(rseed) sigma = sqrt(0.1) self.sparams.L = random.normal(0, sigma, L0.shape) self.params.U = random.normal(0, sigma, param_dims['U']) # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(*param_dims['H']) self.lamb = .0001 # regularization
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here if U0 is None: self.params.U = random.normal(0, 0.1, *param_dims["U"]) else: self.params.U = U0.copy() if L0 is None: self.sparams.L = random.normal(0, 0.1, *param_dims["L"]) else: self.sparams.L = L0.copy() # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(*param_dims["H"]) self.rseed = rseed self.bptt = bptt self.alpha = alpha
def __init__(self, wv, dims=[100, 5], reg=0.1, alpha=0.001, rseed=10): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[1] # number of output classes ## # NNBase stores parameters in a special format # for efficiency reasons, and to allow the code # to automatically implement gradient checks # and training algorithms, independent of the # specific model architecture # To initialize, give shapes as if to np.array((m,n)) param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix b = (dims[1])) # column vector # These parameters have sparse gradients, # which is *much* more efficient if only a row # at a time gets updated (e.g. word representations) param_dims_sparse = dict(L=wv.shape) NNBase.__init__(self, param_dims, param_dims_sparse) ## # Now we can access the parameters using # self.params.<name> for normal parameters # self.sparams.<name> for params with sparse gradients # and get access to normal NumPy arrays self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape)
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate #wv.shape: (100232,50) dims[0] = windowsize * wv.shape[1] # input dimension 3*50=150 param_dims = dict(W=(dims[1], dims[0]),# W(100,150) b1=(dims[1],),#b(100) U=(dims[2], dims[1]),#U(5,100) b2=(dims[2],),#(5,) ) param_dims_sparse = dict(L=wv.shape) #L(100232,50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need self.sparams.L = wv.copy() # store own representations,100232,50 matrix self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.window_size = windowsize#3 self.word_vec_size = wv.shape[1]#50
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector # z: update, r: reset, h: new memory content (my notation) self.Wz = shared(random_weight_matrix(hdim, wdim), name='Wz') self.Uz = shared(random_weight_matrix(hdim, hdim), name='Uz') self.Wr = shared(random_weight_matrix(hdim, wdim), name='Wr') self.Ur = shared(random_weight_matrix(hdim, hdim), name='Ur') self.Wh = shared(random_weight_matrix(hdim, wdim), name='Wh') self.Uh = shared(random_weight_matrix(hdim, hdim), name='Uh') self.params = [ self.L, self.Wz, self.Uz, self.Wr, self.Ur, self.Wh, self.Uh ] self.vparams = [0.0 * param.get_value() for param in self.params]
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector # z: update, r: reset, h: new memory content (my notation) self.Wz = shared(random_weight_matrix(hdim, wdim), name='Wz') self.Uz = shared(random_weight_matrix(hdim, hdim), name='Uz') self.Wr = shared(random_weight_matrix(hdim, wdim), name='Wr') self.Ur = shared(random_weight_matrix(hdim, hdim), name='Ur') self.Wh = shared(random_weight_matrix(hdim, wdim), name='Wh') self.Uh = shared(random_weight_matrix(hdim, hdim), name='Uh') self.params = [self.L, self.Wz, self.Uz, self.Wr, self.Ur, self.Wh, self.Uh] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.sparams.L = random_weight_matrix(*self.sparams.L.shape) self.sparams.U = random_weight_matrix(*self.params.U.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt self.alpha = alpha
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) n=50 note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # input dimension, wv.shape is the dimension of each word vector representation dims[0] = windowsize * wv.shape[1] # 50*3 param_dims = dict(W=(dims[1], dims[0]), # 100*150 b1=(dims[1]), U=(dims[2], dims[1]), b2=(dims[2])) param_dims_sparse = dict(L=wv.shape) # L.shape = (|V|*50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! self.params.W = random_weight_matrix(*self.params.W.shape) # 100*150 self.params.U = random_weight_matrix(*self.params.U.shape) # 5*100 #self.params.b1 = zeros((dims[1],)) # 100*1 #self.params.b2 = zeros((self.nclass,)) # 5*1 self.sparams.L = wv.copy()
def __init__(self, L0, U0=None, alpha=0.005, lreg = 0.00001, rseed=10, bptt=1,loadData=False): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt if loadData == True: with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid: H = pickle.load(fid) self.params.H = H with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid: W = pickle.load(fid) self.params.W = W with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid: U = pickle.load(fid) self.params.U = U with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid: L = pickle.load(fid) self.sparams.L = L return random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = 0.1*np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.vdim = vdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices # W for x, V for Lxm, U for h (small), S for h (big), R for hm, self.L = shared(random_weight_matrix(wdim, vdim), name='L') self.W = shared(random_weight_matrix(hdim, wdim), name='W') self.V = shared(random_weight_matrix(hdim, wdim), name='V') self.U = shared(random_weight_matrix(hdim, hdim), name='U') self.S = shared(random_weight_matrix(hdim, hdim), name='S') self.R = shared(random_weight_matrix(hdim, hdim), name='R') self.params = [self.L, self.W, self.V, self.U, self.S, self.R] self.vparams = [0.0*param.get_value() for param in self.params]
def __init__(self, dims=[100, 30, 20, 5], reg=0.1, alpha=0.001, rseed=10, activation='tanh', init_weights=[]): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[-1] # number of output classes self.dims = dims # todo move to superclass self.outputsize = dims[-1] ## We name the parameters as following # W1, b1, W2, b2, W3, b3 ... param_dims = {} for i in range(1, len(dims)): w_param = 'W' + str(i) b_param = 'b' + str(i) param_dims[w_param] = (dims[i], dims[i - 1]) param_dims[b_param] = (dims[i], ) NNBase.__init__(self, param_dims) # set activation function if activation == 'tanh': self.act = tanh self.act_grad = tanhd elif activation == 'sigmoid': self.act = sigmoid self.act_grad = sigmoid_grad else: raise 'Uknown activation function' #self.sparams.L = wv.copy() # store own representations # init weights # layers for which init_weights aren't passed are initialized randomly for i in range(1, len(self.dims)): if i - 1 < len(init_weights): # we have the corresponding weights passed for this layer cur_weight = init_weights[i - 1] assert cur_weight.shape == (dims[i], dims[i - 1]), ( "passed initial weight dimensions don't match") else: cur_weight = random_weight_matrix(dims[i], dims[i - 1]) self._set_param('W', i, cur_weight)
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### #random.seed(rseed) self.params.U=0.1*random.randn(*self.params.U.shape) self.sparams.L=0.1*random.randn(*self.sparams.L.shape) self.params.H=random_weight_matrix(*self.params.H.shape) self.bptt=bptt self.alpha=alpha
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape) self.sparams.U = 0.1 * random.standard_normal(self.params.U.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt
def __init__(self, vdim, hdim, wdim, alpha=.005, rho=.0001, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha # Regularization self.rho = rho ## Theano stuff # Params as theano.shared matrices self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector # i: input, f: forget, o: output, c: new-cell self.Wi = shared(random_weight_matrix(hdim, wdim), name='Wi') self.Ui = shared(random_weight_matrix(hdim, hdim), name='Ui') self.Wf = shared(random_weight_matrix(hdim, wdim), name='Wf') self.Uf = shared(random_weight_matrix(hdim, hdim), name='Uf') self.Wo = shared(random_weight_matrix(hdim, wdim), name='Wo') self.Uo = shared(random_weight_matrix(hdim, hdim), name='Uo') self.Wc = shared(random_weight_matrix(hdim, wdim), name='Wc') self.Uc = shared(random_weight_matrix(hdim, hdim), name='Uc') self.params = [ self.L, self.Wi, self.Ui, self.Wf, self.Uf, self.Wo, self.Uo, self.Wc, self.Uc ] self.vparams = [0.0 * param.get_value() for param in self.params]
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### random.seed(rseed) self.bptt = bptt self.alpha = alpha self.params.H = random_weight_matrix(self.hdim, self.hdim) self.params.U = sqrt(0.1) * random.randn(self.vdim, self.hdim) # Initialize word vectors self.sparams.L = sqrt(0.1) * random.randn(self.vdim, self.hdim)
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.vdim = L0.shape[0] # vocab size self.hdim = L0.shape[1] # word vector dimensions param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # hyperparameters self.bptt = bptt self.alpha = alpha # weights self.sparams.L = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim)) self.sparams.U = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim)) self.params.H = random_weight_matrix(self.hdim, self.hdim)
def __init__(self, vdim, hdim, wdim, outdim=2, alpha=.005, rho=.0001, mu=0.75, rseed=10): # Dimensions self.vdim = vdim self.hdim = hdim self.wdim = wdim self.outdim = outdim # Parameters np.random.seed(rseed) # Learning rate self.alpha = alpha self.mu = mu self.rho = rho self.rseed = rseed ## Theano stuff self.L = shared(random_weight_matrix(wdim, vdim), name='L') # W: times character-vector, U: times previous-hidden-vector # z: update, r: reset, h: new memory content (my notation) self.Wz = shared(random_weight_matrix(hdim, wdim), name='Wz') self.Uz = shared(random_weight_matrix(hdim, hdim), name='Uz') self.Wr = shared(random_weight_matrix(hdim, wdim), name='Wr') self.Ur = shared(random_weight_matrix(hdim, hdim), name='Ur') self.Wh = shared(random_weight_matrix(hdim, wdim), name='Wh') self.Uh = shared(random_weight_matrix(hdim, hdim), name='Uh') self.U = shared(random_weight_matrix(outdim, hdim), name='U') self.b = shared(np.zeros([outdim, 1]), name='b', broadcastable=(False, True)) self.params = [self.L, self.Wz, self.Uz, self.Wr, self.Ur, self.Wh, self.Uh, self.U, self.b] self.vparams = [0.0*param.get_value() for param in self.params] self.prop_compiled = self.compile_self() self.generate_compiled = self.compile_generate()
def test_random_weight_matrix(): from misc import random_weight_matrix A = random_weight_matrix(100,100) assert(A.shape == (100,100))
import sys, os import numpy as np from matplotlib.pyplot import * matplotlib.rcParams['savefig.dpi'] = 100 from misc import random_weight_matrix import data_utils.utils as du import data_utils.ner as ner from softmax_example import SoftmaxRegression from nerwindow import WindowMLP if __name__ == '__main__': random.seed(10) print random_weight_matrix(3,5) # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') # wv - matrix with word vectors N x D tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train') # [[sentence1 = [w1, tag1], [w2, tag2]...], [sentence2], ...] X_train, y_train = du.docs_to_windows(docs, word_to_num, tag_to_num, wsize=windowsize)
def _acc_grads(self, window, label): """ Accumulate gradients, given a training point (window, label) of the format window = [x_{i-1} x_{i} x_{i+1}] # three ints label = {0,1,2,3,4} # single int, gives class Your code should update self.grads and self.sgrads, in order for gradient_check and training to work. So, for example: self.grads.U += (your gradient dJ/dU) self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index """ print self.sgrads #### YOUR CODE HERE #### for i in self.param_dims.keys(): dim = matrix(self.grads[i]).shape self.grads[i] = random_weight_matrix(dim[0],dim[1]) x = [] #print self.wv[0] #print self.grads for i in window: x.extend(self.wv[i,]) x = matrix(x) ## # Forward propagation #print self.grads.W.shape print 'x_shape',x.shape #print self.grads.b1.shape z1 = dot(self.grads.W,x.transpose()) #print z1.shape z1 = (z1.transpose() + matrix(self.grads.b1)).transpose() h = 2*self.sigmoid(2*z1) - 1 h = matrix(h) print"u_Shape", self.grads.W.shape #print h.shape z2 = dot(self.grads.U,h) z2 = (z2.transpose() + matrix(self.grads.b2)).transpose() y = softmax(z2) ## # Backpropagation #print label,y new_label = [0,0,0,0,0] new_label[label] = 1 delta2 = y.transpose() - new_label temp = dot(h,delta2) #print temp.shape,self.grads.U.shape #print 'del2',delta2.shape self.grads.U += temp.transpose() self.grads.b2 = matrix(self.grads.b2)+delta2 temp = dot(delta2,self.grads.U) #print 'temp,h',temp.shape,h.shape temp = multiply(self.tan_grad(h),temp.transpose()) print 'temp,b1',temp.shape,matrix(self.grads.b1).shape self.grads.b1 = matrix(self.grads.b1)+temp.transpose() temp2 = dot(temp.transpose(),self.grads.W) temp = dot(temp,x) self.grads.W += temp print self.sgrads.L
if __name__ == '__main__' and __package__ is None: from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath('__file__')))) sys.dont_write_bytecode = True import sys, os from numpy import * from matplotlib.pyplot import * matplotlib.rcParams['savefig.dpi'] = 100 from misc import random_weight_matrix random.seed(10) print random_weight_matrix(3, 5) import data_utils.utils as du import data_utils.ner as ner # Load the starter word vectors wv, word_to_num, num_to_word = ner.load_wv('data/ner/vocab.txt', 'data/ner/wordVectors.txt') tagnames = ["O", "LOC", "MISC", "ORG", "PER"] num_to_tag = dict(enumerate(tagnames)) tag_to_num = du.invert_dict(num_to_tag) # Set window size windowsize = 3 # Load the training set docs = du.load_dataset('data/ner/train')
def __init__(self, L0, **kwargs): #### YOUR CODE HERE #### isCompression = False isME = False compression_size = 0 alpha = 0.1 bptt = 1 class_size = 2 U0 = zeros((10, 10)) Lcluster = zeros(10) cwords = zeros((10, 10)) cfreq = zeros(10) ngram_feat = 0 hash_size = 10000 gradient_cutoff = 15 rseed = 0 #regularization param rho = 1e-4 for key, value in kwargs.items(): if key == "U0": U0 = value.copy() if key == "isCompression": isCompression = value if key == "compression_size": compression_size = value if key == "isME": isME = value if key == "bptt": bptt = value if key == "alpha": alpha = value if key == "Lcluster": Lcluster = value if key == "cwords": cwords = value if key == "cfreq": cfreq = value if key == "ngram": ngram_feat = value if key == "hash_size": hash_size = value if key == "cutoff": gradient_cutoff = value if key == "rseed": rseed = value if key == "class_size": class_size = value if key == "regular": rho = value random.seed(rseed) self.primes = array([]) #print L0 self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size #print self.hdim self.cdim = compression_size # compression layer size self.isCompression = isCompression # True for self.cdim > 0 #print self.isCompression self.class_size = class_size # making word clusters self.Udim = self.vdim + self.class_size self.cutoff = gradient_cutoff self.isME = isME #max entropy optimization self.ngram = ngram_feat self.hsize = self.vdim #print self.hsize param_dims = {} if self.isCompression is True: if self.isME is True: param_dims = dict(H = (self.hdim, self.hdim), C = (self.cdim, self.hdim), U = (self.Udim, self.cdim), word_direct = (self.hsize,self.hsize), cluster_direct = (self.vdim, self.class_size)) else: param_dims = dict(H = (self.hdim, self.hdim), C = (self.cdim, self.hdim), U = (self.Udim, self.cdim)) else: if self.isME is True: param_dims = dict(H = (self.hdim, self.hdim), U = (self.Udim, self.hdim), word_direct = (self.hsize, self.hsize), cluster_direct=(self.vdim, self.class_size)) else: param_dims = dict(H = (self.hdim, self.hdim), U = (self.Udim, self.hdim)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.sparams.L = L0.copy() self.params.word_direct = zeros((self.hsize,self.hsize)) self.params.cluster_direct = zeros((self.vdim,self.class_size)) #word cluster informations self.Lcluster = Lcluster.copy() #cluster for every word self.cfreq = cfreq.copy() #every cluster containing word numbers self.cwords = cwords.copy() #every cluster containing word indexs self.htable = zeros(self.hsize) #print "CWORD SIZE ",cwords.shape self.params.H = random_weight_matrix(self.hdim, self.hdim) self.alpha = alpha self.bptt = bptt #regularization self.rho = rho if isCompression is True: self.params.C = random_weight_matrix(self.cdim, self.hdim) #sigma = 0.1 #self.params.C = sigma*random.uniform(low=-sigma,high=sigma, size=(self.cdim, self.hdim)) if U0 is not None: self.params.U = U0.copy() else: sigma = 0.1 mu = 0 if self.isCompression: self.params.U = sigma*random.randn(self.Udim, self.cdim) + mu else: self.params.U = sigma*random.randn(self.Udim, self.hdim) + mu
def test_random_weight_matrix(): from misc import random_weight_matrix A = random_weight_matrix(100, 100) assert (A.shape == (100, 100))