def __init__(self, L0, U0=None, alpha=0.005, rseed=10): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(LH = (self.hdim, self.hdim), RH = (self.hdim, self.hdim), U = (self.vdim, self.hdim * 2)) # note that only L gets sparse updates param_dims_sparse = dict(LL = L0.shape, RL = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### np.random.seed(rseed) # be sure to seed this for repeatability! self.alpha = alpha # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here #self.sparams.LL = np.random.randn(*L0.shape) * np.sqrt(0.1) #self.sparams.RL = np.random.randn(*L0.shape) * np.sqrt(0.1) self.sparams.LL = L0 self.sparams.RL = L0 self.params.U = np.random.randn(self.vdim, self.hdim*2) * np.sqrt(0.1) # Initialize H matrix, as with W and U in part 1 self.params.LH = random_weight_matrix(self.hdim, self.hdim) self.params.RH = random_weight_matrix(self.hdim, self.hdim)
def __init__(self, L0, D0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ddim = D0.shape[0] # doc size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape, G = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape, D = D0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.bptt = bptt self.alpha = alpha # Initialize word vectors self.sparams.L = L0.copy() self.sparams.D = D0.copy() self.params.U = random.randn(self.vdim, self.hdim)*0.1 # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(self.hdim, self.hdim) self.params.G = random_weight_matrix(self.vdim, self.hdim)
def __init__(self, L0, Dy=N_ASPECTS*SENT_DIM, U0=None, alpha=0.005, rseed=10, bptt=5): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ydim = Dy param_dims = dict(H = (self.hdim, self.hdim), U = (self.ydim, self.hdim), b1 = (self.hdim,), b2 =(self.ydim,)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### var = .1 sigma = sqrt(var) from misc import random_weight_matrix random.seed(rseed) # Initialize word vectors self.bptt = bptt self.alpha = alpha self.params.H=random_weight_matrix(*self.params.H.shape) if U0 is not None: self.params.U= U0.copy() else: self.params.U= random_weight_matrix(*self.params.U.shape) self.sparams.L = L0.copy() self.params.b1 = zeros((self.hdim,)) self.params.b2 = zeros((self.ydim,))
def __init__(self, wv, dims=[100, 5], reg=0.1, alpha=0.001, rseed=10): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[1] # number of output classes ## # NNBase stores parameters in a special format # for efficiency reasons, and to allow the code # to automatically implement gradient checks # and training algorithms, independent of the # specific model architecture # To initialize, give shapes as if to np.array((m,n)) param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix b = (dims[1])) # column vector # These parameters have sparse gradients, # which is *much* more efficient if only a row # at a time gets updated (e.g. word representations) param_dims_sparse = dict(L=wv.shape) NNBase.__init__(self, param_dims, param_dims_sparse) ## # Now we can access the parameters using # self.params.<name> for normal parameters # self.sparams.<name> for params with sparse gradients # and get access to normal NumPy arrays self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape)
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here if U0 is None: self.params.U = random.normal(0, 0.1, *param_dims["U"]) else: self.params.U = U0.copy() if L0 is None: self.sparams.L = random.normal(0, 0.1, *param_dims["L"]) else: self.sparams.L = L0.copy() # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(*param_dims["H"]) self.rseed = rseed self.bptt = bptt self.alpha = alpha
def __init__(self, dims=[100, 5, 100], reg=0.1, alpha=0.001, ro = 0.05, rseed=10, beta=0.2): """ Set up autoencoder: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.dims = dims # todo move to superclass self.ro = ro # ro sparsity parameter self.beta = beta # sparsity penalty param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],), ) NNBase.__init__(self, param_dims) #self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.outputsize = dims[2]
def __init__(self, L0, Dy=N_ASPECTS * SENT_DIM, U0=None, alpha=0.005, rseed=10, bptt=5): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size self.ydim = Dy param_dims = dict(H=(self.hdim, self.hdim), U=(self.ydim, self.hdim), b1=(self.hdim, ), b2=(self.ydim, )) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### var = .1 sigma = sqrt(var) from misc import random_weight_matrix random.seed(rseed) # Initialize word vectors self.bptt = bptt self.alpha = alpha self.params.H = random_weight_matrix(*self.params.H.shape) if U0 is not None: self.params.U = U0.copy() else: self.params.U = random_weight_matrix(*self.params.U.shape) self.sparams.L = L0.copy() self.params.b1 = zeros((self.hdim, )) self.params.b2 = zeros((self.ydim, ))
def __init__(self, dims=[100, 20, 20, 5], reg=0.1, alpha=0.001, rseed=10): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[1] # number of output classes self.dims = dims # todo move to superclass param_dims = dict( W=(dims[1], dims[0]), b1=(dims[1], ), U=(dims[2], dims[1]), b2=(dims[2], ), G=(dims[3], dims[2]), b3=(dims[3], ), ) NNBase.__init__(self, param_dims) #self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.params.G = random_weight_matrix(*self.params.G.shape) self.outputsize = dims[3]
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions |D| self.vdim = L0.shape[0] # vocab size |V| param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape) # self.params.U self.params.U = 0.1 * random.standard_normal(self.params.U.shape) # Initialize H matrix, as with W and U in part 1 # self.params.H = random_weight_matrix(*self.params.H.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt self.alpha = alpha
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = (L0.shape if U0 is None else U0.shape)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.alpha = alpha self.bptt = bptt # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here random.seed(rseed) sigma = sqrt(0.1) self.sparams.L = random.normal(0, sigma, L0.shape) self.params.U = random.normal(0, sigma, param_dims['U']) # Initialize H matrix, as with W and U in part 1 self.params.H = random_weight_matrix(*param_dims['H']) self.lamb = .0001 # regularization
def __init__(self, L0, U0=None,alpha=0.005, lreg = 0.00001, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim)) #,U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) #self.params.U = 0.1*np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): random.seed(rseed) self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.sparams.L = L0.copy() self.params.H = random_weight_matrix(self.hdim, self.hdim) self.alpha = alpha self.bptt = bptt # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here if U0 is not None: self.params.U = U0.copy() else: sigma = 0.1 mu = 0 #self.params.U = random.normal(mu, sigma, (self.vdim, self.hdim)) self.params.U = sigma*random.randn(self.vdim, self.hdim) + mu
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.sparams.L = L0.copy() if U0 is None: self.params.U = random.normal(0, 0.1, param_dims['U']) else: self.params.U = U0.copy() self.params.H = random_weight_matrix(*param_dims['H']) self.alpha = alpha # self.rseed = rseed self.bptt = bptt
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse)
def grad_check(self, x, y, outfd=sys.stderr, **kwargs): bptt_old = self.bptt self.bptt = len(y) print >> outfd, "NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt NNBase.grad_check(self, x, y, outfd=outfd, **kwargs) self.bptt = bptt_old print >> outfd, "Reset self.bptt = %d" % self.bptt
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed |V| = Size of vocabulary n = length of our word vectors """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension print "input size: %d" % dims[0] print "hidden size: %d" % dims[1] print "output size: %d" % dims[2] param_dims = dict( W=(dims[1], dims[0]), b1=(dims[1], ), U=(dims[2], dims[1]), b2=(dims[2], ), ) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need self.sparams.wv = wv.copy() self.params.W = random_weight_matrix(param_dims["W"][0], param_dims["W"][1]) self.params.U = random_weight_matrix(param_dims["U"][0], param_dims["U"][1])
def __init__(self, wdim, hdim=None, odim=2, alpha=0.005, rho=1e-4, rseed=10, bptt=1, drop_p=0.5, context=1): np.random.seed(rseed) self.rseed = rseed self.wdim = wdim #self.wdim = L.shape[1] # word vector dimensions #self.vdim = L.shape[0] # vocab size if hdim is None: hdim = self.wdim self.hdim = hdim self.odim = odim self.context = context param_dims = dict(W11=(self.hdim, self.wdim * (1 + self.context * 2)), b11=(self.hdim, ), W12=(self.hdim, self.hdim), b12=(self.hdim, ), W21=(self.hdim, self.hdim), b21=(self.hdim, ), Ws=(self.odim, self.hdim), bs=(self.odim, )) # word embeddings are not updated # no longer needed because passing word vectors X #self.L = L # no sparse updates in this model #param_dims_sparse = dict(L = L0.shape) #NNBase.__init__(self, param_dims, param_dims_sparse) NNBase.__init__(self, param_dims) #### YOUR CODE HERE #### # not recursive yet, but leaving bptt anyway self.bptt = bptt self.alpha = alpha self.rho = rho self.drop_p = drop_p # probability of dropping word embedding element from training # Initialize weight matrices self.params.W11 = random_weight_matrix(*self.params.W11.shape) self.params.W12 = random_weight_matrix(*self.params.W12.shape) self.params.W21 = random_weight_matrix(*self.params.W21.shape) self.params.Ws = random_weight_matrix(*self.params.Ws.shape) # initialize bias vectors self.params.b11 = zeros((self.hdim)) self.params.b12 = zeros((self.hdim)) self.params.b21 = zeros((self.hdim)) self.params.bs = zeros((self.odim))
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse)
def grad_check(self, x, y, outfd=sys.stderr, **kwargs): """ Wrapper for gradient check on RNNs; ensures that backprop-through-time is run to completion, computing the full gradient for the loss as summed over the input sequence and predictions. Do not modify this function! """ NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],), ) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need #self.sparams, self.grads, self.param, self.sgrads #where are they defined? #为什么可以直接可以使用? self.sparams.L = wv.copy() #self.sparam.L = wv.copy() self.params.U = random_weight_matrix(*param_dims["U"]) #self.param.U = random_weight_matrix(param_dims["U"]) self.params.W = random_weight_matrix(*param_dims["W"]) #self.param.b1 = zeros(param_dims["b1"]) #self.param.b2 = zeros(param_dims["b2"]) self.windowSize = windowsize self.wordVecLen = wv.shape[1] self.wordVecNum = wv.shape[0]
def __init__(self, dims=[100, 30, 20, 5], reg=0.1, alpha=0.001, rseed=10, activation='tanh', init_weights=[]): """ Set up classifier: parameters, hyperparameters """ ## # Store hyperparameters self.lreg = reg # regularization self.alpha = alpha # default learning rate self.nclass = dims[-1] # number of output classes self.dims = dims # todo move to superclass self.outputsize = dims[-1] ## We name the parameters as following # W1, b1, W2, b2, W3, b3 ... param_dims = {} for i in range(1, len(dims)): w_param = 'W' + str(i) b_param = 'b' + str(i) param_dims[w_param] = (dims[i], dims[i - 1]) param_dims[b_param] = (dims[i], ) NNBase.__init__(self, param_dims) # set activation function if activation == 'tanh': self.act = tanh self.act_grad = tanhd elif activation == 'sigmoid': self.act = sigmoid self.act_grad = sigmoid_grad else: raise 'Uknown activation function' #self.sparams.L = wv.copy() # store own representations # init weights # layers for which init_weights aren't passed are initialized randomly for i in range(1, len(self.dims)): if i - 1 < len(init_weights): # we have the corresponding weights passed for this layer cur_weight = init_weights[i - 1] assert cur_weight.shape == (dims[i], dims[i - 1]), ( "passed initial weight dimensions don't match") else: cur_weight = random_weight_matrix(dims[i], dims[i - 1]) self._set_param('W', i, cur_weight)
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W1=(dims[1], dims[0]), # 100 x 150 b2=(dims[1],), # 100 x 1 W2=(dims[2], dims[1]), # 5 X 100 b3=(dims[2],), # 5 x 1 ) param_dims_sparse = dict(L=wv.shape) # |V| x 50 # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### self.sparams.L = wv.copy(); self.params.W1 = random_weight_matrix(param_dims['W1'][0], param_dims['W1'][1]) self.params.b2 = append([], random_weight_matrix(param_dims['b2'][0], 1)) self.params.b3 = append([], random_weight_matrix(param_dims['b3'][0], 1)) self.params.W2 = random_weight_matrix(param_dims['W2'][0], param_dims['W2'][1]) self.n = wv.shape[1] # informational self.windowsize = windowsize self.hidden_units = dims[1]
def grad_check(self, x, y, outfd=sys.stderr, **kwargs): """ Wrapper for gradient check on RNNs; ensures that backprop-through-time is run to completion, computing the full gradient for the loss as summed over the input sequence and predictions. Do not modify this function! """ bptt_old = self.bptt self.bptt = len(y) print >> outfd, "NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt NNBase.grad_check(self, x, y, outfd=outfd, **kwargs) self.bptt = bptt_old print >> outfd, "Reset self.bptt = %d" % self.bptt
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # number of output classes self.D = wv.shape[1] self.windowsize = windowsize dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],)) param_dims_sparse = dict(L=wv.shape) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! ## # Now we can access the parameters using # self.params.<name> for normal parameters # self.sparams.<name> for params with sparse gradients # and get access to normal NumPy arrays self.sparams.L = wv.copy() # store own representations self.params.W = random_weight_matrix(*self.params.W.shape) # self.params.b1 = zeros(*self.params.b1.shape) # done automatically! self.params.U = random_weight_matrix(*self.params.U.shape)
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) n=50 note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # input dimension, wv.shape is the dimension of each word vector representation dims[0] = windowsize * wv.shape[1] # 50*3 param_dims = dict( W=(dims[1], dims[0]), # 100*150 b1=(dims[1]), U=(dims[2], dims[1]), b2=(dims[2])) param_dims_sparse = dict(L=wv.shape) # L.shape = (|V|*50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! self.params.W = random_weight_matrix(*self.params.W.shape) # 100*150 self.params.U = random_weight_matrix(*self.params.U.shape) # 5*100 #self.params.b1 = zeros((dims[1],)) # 100*1 #self.params.b2 = zeros((self.nclass,)) # 5*1 self.sparams.L = wv.copy()
def __init__(self, L0, U0=None, alpha=0.005, lreg=0.00001, rseed=10, bptt=1, loadData=False): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), W=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt if loadData == True: with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid: H = pickle.load(fid) self.params.H = H with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid: W = pickle.load(fid) self.params.W = W with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid: U = pickle.load(fid) self.params.U = U with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid: L = pickle.load(fid) self.sparams.L = L return random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = 0.1 * np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate #wv.shape: (100232,50) dims[0] = windowsize * wv.shape[1] # input dimension 3*50=150 param_dims = dict(W=(dims[1], dims[0]),# W(100,150) b1=(dims[1],),#b(100) U=(dims[2], dims[1]),#U(5,100) b2=(dims[2],),#(5,) ) param_dims_sparse = dict(L=wv.shape) #L(100232,50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! #### YOUR CODE HERE #### # any other initialization you need self.sparams.L = wv.copy() # store own representations,100232,50 matrix self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = random_weight_matrix(*self.params.U.shape) self.window_size = windowsize#3 self.word_vec_size = wv.shape[1]#50
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### #random.seed(rseed) self.params.U=0.1*random.randn(*self.params.U.shape) self.sparams.L=0.1*random.randn(*self.sparams.L.shape) self.params.H=random_weight_matrix(*self.params.H.shape) self.bptt=bptt self.alpha=alpha
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.sparams.L = random_weight_matrix(*self.sparams.L.shape) self.sparams.U = random_weight_matrix(*self.params.U.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt self.alpha = alpha
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape) self.sparams.U = 0.1 * random.standard_normal(self.params.U.shape) self.params.H = random_weight_matrix(*self.params.H.shape) self.bptt = bptt
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) n=50 note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate self.nclass = dims[2] # input dimension, wv.shape is the dimension of each word vector representation dims[0] = windowsize * wv.shape[1] # 50*3 param_dims = dict(W=(dims[1], dims[0]), # 100*150 b1=(dims[1]), U=(dims[2], dims[1]), b2=(dims[2])) param_dims_sparse = dict(L=wv.shape) # L.shape = (|V|*50) # initialize parameters: don't change this line NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed this for repeatability! self.params.W = random_weight_matrix(*self.params.W.shape) # 100*150 self.params.U = random_weight_matrix(*self.params.U.shape) # 5*100 #self.params.b1 = zeros((dims[1],)) # 100*1 #self.params.b2 = zeros((self.nclass,)) # 5*1 self.sparams.L = wv.copy()
def __init__(self, wv, windowsize=3, dims=[None, 100, 5], reg=0.001, alpha=0.01, rseed=10): """ Initialize classifier model. Arguments: wv : initial word vectors (array |V| x n) => n is the input dimension, length of input word-vector note that this is the transpose of the n x |V| matrix L described in the handout; you'll want to keep it in this |V| x n form for efficiency reasons, since numpy stores matrix rows continguously. windowsize : int, size of context window dims : dimensions of [input, hidden, output] input dimension can be computed from wv.shape reg : regularization strength (lambda) alpha : default learning rate rseed : random initialization seed """ # Set regularization self.lreg = float(reg) self.alpha = alpha # default training rate dims[0] = windowsize * wv.shape[1] # input dimension param_dims = dict(W=(dims[1], dims[0]), b1=(dims[1],), U=(dims[2], dims[1]), b2=(dims[2],), ) param_dims_sparse = dict(L=wv.shape) # initialize parameters NNBase.__init__(self, param_dims, param_dims_sparse) random.seed(rseed) # be sure to seed, re-show the result # random initialization self.sparams.L = wv.copy() # store own representations self.params.U = random_weight_matrix(*self.params.U.shape) self.params.W = random_weight_matrix(*self.params.W.shape) self.window_size = windowsize self.word_vec_size = wv.shape[1]
def __init__(self, L0, U0=None, alpha=0.005, lreg = 0.00001, rseed=10, bptt=1,loadData=False): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) self.alpha = alpha self.lreg = lreg #### YOUR CODE HERE #### # Initialize word vectors # either copy the passed L0 and U0 (and initialize in your notebook) # or initialize with gaussian noise here # Initialize H matrix, as with W and U in part 1 self.bptt = bptt if loadData == True: with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid: H = pickle.load(fid) self.params.H = H with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid: W = pickle.load(fid) self.params.W = W with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid: U = pickle.load(fid) self.params.U = U with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid: L = pickle.load(fid) self.sparams.L = L return random.seed(rseed) self.params.H = random_weight_matrix(*self.params.H.shape) self.params.W = random_weight_matrix(*self.params.W.shape) self.params.U = 0.1*np.random.randn(*L0.shape) self.sparams.L = L0.copy()
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.vdim = L0.shape[0] # vocab size self.hdim = L0.shape[1] # word vector dimensions param_dims = dict(H = (self.hdim, self.hdim), U = L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### # hyperparameters self.bptt = bptt self.alpha = alpha # weights self.sparams.L = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim)) self.sparams.U = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim)) self.params.H = random_weight_matrix(self.hdim, self.hdim)
def grad_check(self, X, y, outfd=sys.stderr, **kwargs): """ Wrapper for gradient check on RNNs; ensures that backprop-through-time is run to completion, computing the full gradient for the loss as summed over the input sequence and predictions. Do not modify this function! """ # if not recurssive yet this setting of bptt does not matter bptt_old = self.bptt # single example if isinstance(X, ndarray): X = [X] for i in range(len(X)): self.bptt = X[i].shape[0] print("NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt, file=outfd) NNBase.grad_check(self, X[i], y[i], outfd=outfd, **kwargs) self.bptt = bptt_old print("Reset self.bptt = %d" % self.bptt, file=outfd)
def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1): self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape) # note that only L gets sparse updates param_dims_sparse = dict(L=L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### random.seed(rseed) self.bptt = bptt self.alpha = alpha self.params.H = random_weight_matrix(self.hdim, self.hdim) self.params.U = sqrt(0.1) * random.randn(self.vdim, self.hdim) # Initialize word vectors self.sparams.L = sqrt(0.1) * random.randn(self.vdim, self.hdim)
def __init__(self, L0, **kwargs): #### YOUR CODE HERE #### isCompression = False isME = False compression_size = 0 alpha = 0.1 bptt = 1 class_size = 2 U0 = zeros((10, 10)) Lcluster = zeros(10) cwords = zeros((10, 10)) cfreq = zeros(10) ngram_feat = 0 hash_size = 10000 gradient_cutoff = 15 rseed = 0 #regularization param rho = 1e-4 for key, value in kwargs.items(): if key == "U0": U0 = value.copy() if key == "isCompression": isCompression = value if key == "compression_size": compression_size = value if key == "isME": isME = value if key == "bptt": bptt = value if key == "alpha": alpha = value if key == "Lcluster": Lcluster = value if key == "cwords": cwords = value if key == "cfreq": cfreq = value if key == "ngram": ngram_feat = value if key == "hash_size": hash_size = value if key == "cutoff": gradient_cutoff = value if key == "rseed": rseed = value if key == "class_size": class_size = value if key == "regular": rho = value random.seed(rseed) self.primes = array([]) #print L0 self.hdim = L0.shape[1] # word vector dimensions self.vdim = L0.shape[0] # vocab size #print self.hdim self.cdim = compression_size # compression layer size self.isCompression = isCompression # True for self.cdim > 0 #print self.isCompression self.class_size = class_size # making word clusters self.Udim = self.vdim + self.class_size self.cutoff = gradient_cutoff self.isME = isME #max entropy optimization self.ngram = ngram_feat self.hsize = self.vdim #print self.hsize param_dims = {} if self.isCompression is True: if self.isME is True: param_dims = dict(H = (self.hdim, self.hdim), C = (self.cdim, self.hdim), U = (self.Udim, self.cdim), word_direct = (self.hsize,self.hsize), cluster_direct = (self.vdim, self.class_size)) else: param_dims = dict(H = (self.hdim, self.hdim), C = (self.cdim, self.hdim), U = (self.Udim, self.cdim)) else: if self.isME is True: param_dims = dict(H = (self.hdim, self.hdim), U = (self.Udim, self.hdim), word_direct = (self.hsize, self.hsize), cluster_direct=(self.vdim, self.class_size)) else: param_dims = dict(H = (self.hdim, self.hdim), U = (self.Udim, self.hdim)) # note that only L gets sparse updates param_dims_sparse = dict(L = L0.shape) NNBase.__init__(self, param_dims, param_dims_sparse) #NNBase.__init__(self, param_dims, param_dims_sparse) #### YOUR CODE HERE #### self.sparams.L = L0.copy() self.params.word_direct = zeros((self.hsize,self.hsize)) self.params.cluster_direct = zeros((self.vdim,self.class_size)) #word cluster informations self.Lcluster = Lcluster.copy() #cluster for every word self.cfreq = cfreq.copy() #every cluster containing word numbers self.cwords = cwords.copy() #every cluster containing word indexs self.htable = zeros(self.hsize) #print "CWORD SIZE ",cwords.shape self.params.H = random_weight_matrix(self.hdim, self.hdim) self.alpha = alpha self.bptt = bptt #regularization self.rho = rho if isCompression is True: self.params.C = random_weight_matrix(self.cdim, self.hdim) #sigma = 0.1 #self.params.C = sigma*random.uniform(low=-sigma,high=sigma, size=(self.cdim, self.hdim)) if U0 is not None: self.params.U = U0.copy() else: sigma = 0.1 mu = 0 if self.isCompression: self.params.U = sigma*random.randn(self.Udim, self.cdim) + mu else: self.params.U = sigma*random.randn(self.Udim, self.hdim) + mu
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) #parser.add_option("--test",action="store_true",dest="test",default=False) #parser.add_option("--plotEpochs",action="store_true",dest="plotEpochs",default=False) #parser.add_option("--plotWvecDim",action="store_true",dest="plotWvecDim",default=False) # Optimizer # minibatch of 0 means no minibatches, just iterate through parser.add_option("--minibatch",dest="minibatch",type="int",default=0) #parser.add_option("--optimizer",dest="optimizer",type="string", # default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--printevery",dest="printevery",type="int",default=4e4) parser.add_option("--annealevery",dest="annealevery",type="int",default=0) # anneal every this many epochs parser.add_option("--alpha",dest="alpha",type="float",default=0.005) parser.add_option("--rho",dest="rho",type="float",default=1e-5) parser.add_option("--drop_p",dest="drop_p",type="float",default=0.5) parser.add_option("--wdim",dest="wdim",type="int",default=50) parser.add_option("--hdim",dest="hdim",type="int",default=200) parser.add_option("--odim",dest="odim",type="int",default=2) parser.add_option("--rseed",dest="rseed",type="int",default=207) parser.add_option("--context",dest="context",type="int",default=1) #parser.add_option("--outFile",dest="outFile",type="string", # default="models/test.bin") #parser.add_option("--inFile",dest="inFile",type="string", # default="models/test.bin") #parser.add_option("--data",dest="data",type="string",default="train") parser.add_option("--model",dest="model",type="string",default="NNMX") (opts,args)=parser.parse_args(args) # name of folder to store results in resfolder = '_'.join( ['{k}={v}'.format(k=k,v=v) for k,v in vars(opts).items()] ) resfolder += '_timestamp={t}'.format(t=time.strftime('%Y%m%d%H%M%S')) resfolder = 'results/'+resfolder print(resfolder) if not os.path.exists(resfolder): os.makedirs(resfolder) ### Set up the training and test data to work with throughout the notebook: np.random.seed(opts.rseed) all_train_df, y, submit_df = load_raop_data() # useful for sklearn scoring #roc_scorer = make_scorer(roc_auc_score) n_all = all_train_df.shape[0] # set up kFolds to be used in the rest of the project kf = KFold(n_all, n_folds = 10, random_state=opts.rseed) body_vecs = Pipeline([ ('body', ExtractBody()), ('vec', PrepAndVectorize(d=opts.wdim)) ]).fit_transform(X=all_train_df,y=1) for train, test in kf: nn = init_model(opts) if opts.minibatch == 0: idxiter = list(train)*opts.epochs annealevery=len(train)*opts.annealevery printevery=opts.printevery else: idxiter = NNBase.randomiter( N=opts.epochs*len(train)/opts.minibatch, pickfrom=train,batch=opts.minibatch) annealevery=len(train)*opts.annealevery/opts.minibatch printevery=opts.printevery/opts.minibatch nn.train_sgd(body_vecs, y, idxiter=idxiter, devidx=test, savepath=resfolder, costevery=printevery, printevery=printevery, annealevery=annealevery) save_all_results(resultpath = 'results', savepath = 'result_summary')