Ejemplo n.º 1
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(LH = (self.hdim, self.hdim),
                          RH = (self.hdim, self.hdim),
                          U = (self.vdim, self.hdim * 2))
        # note that only L gets sparse updates
        param_dims_sparse = dict(LL = L0.shape, RL = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        np.random.seed(rseed) # be sure to seed this for repeatability!
        self.alpha = alpha

        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        #self.sparams.LL = np.random.randn(*L0.shape) * np.sqrt(0.1)
        #self.sparams.RL = np.random.randn(*L0.shape) * np.sqrt(0.1)
        self.sparams.LL = L0
        self.sparams.RL = L0
        self.params.U = np.random.randn(self.vdim, self.hdim*2) * np.sqrt(0.1)

        # Initialize H matrix, as with W and U in part 1
        self.params.LH = random_weight_matrix(self.hdim, self.hdim)
        self.params.RH = random_weight_matrix(self.hdim, self.hdim)
Ejemplo n.º 2
0
    def __init__(self, L0, D0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        self.ddim = D0.shape[0] # doc size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape, G = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape, D = D0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####

        self.bptt = bptt
        self.alpha = alpha

        # Initialize word vectors
        self.sparams.L = L0.copy()
        self.sparams.D = D0.copy()

        self.params.U = random.randn(self.vdim, self.hdim)*0.1

        # Initialize H matrix, as with W and U in part 1
        self.params.H = random_weight_matrix(self.hdim, self.hdim)
        self.params.G = random_weight_matrix(self.vdim, self.hdim)
Ejemplo n.º 3
0
    def __init__(self, L0, Dy=N_ASPECTS*SENT_DIM, U0=None,
                 alpha=0.005, rseed=10, bptt=5):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        self.ydim = Dy
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = (self.ydim, self.hdim),
                          b1 = (self.hdim,),
                          b2 =(self.ydim,))
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        var = .1
        sigma = sqrt(var)
        from misc import random_weight_matrix
        random.seed(rseed)
        # Initialize word vectors
        self.bptt = bptt
        self.alpha = alpha
        self.params.H=random_weight_matrix(*self.params.H.shape)
        if U0 is not None:
            self.params.U= U0.copy()
        else:
            self.params.U= random_weight_matrix(*self.params.U.shape)
        self.sparams.L = L0.copy()
        self.params.b1 = zeros((self.hdim,))
        self.params.b2 = zeros((self.ydim,))
Ejemplo n.º 4
0
    def __init__(self, wv, dims=[100, 5],
                 reg=0.1, alpha=0.001,
                 rseed=10):
        """
        Set up classifier: parameters, hyperparameters
        """
        ##
        # Store hyperparameters
        self.lreg = reg # regularization
        self.alpha = alpha # default learning rate
        self.nclass = dims[1] # number of output classes

        ##
        # NNBase stores parameters in a special format
        # for efficiency reasons, and to allow the code
        # to automatically implement gradient checks
        # and training algorithms, independent of the
        # specific model architecture
        # To initialize, give shapes as if to np.array((m,n))
        param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix
                          b = (dims[1])) # column vector
        # These parameters have sparse gradients,
        # which is *much* more efficient if only a row
        # at a time gets updated (e.g. word representations)
        param_dims_sparse = dict(L=wv.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        ##
        # Now we can access the parameters using
        # self.params.<name> for normal parameters
        # self.sparams.<name> for params with sparse gradients
        # and get access to normal NumPy arrays
        self.sparams.L = wv.copy() # store own representations
        self.params.W = random_weight_matrix(*self.params.W.shape)
Ejemplo n.º 5
0
    def __init__(self, wv, dims=[100, 5], reg=0.1, alpha=0.001, rseed=10):
        """
        Set up classifier: parameters, hyperparameters
        """
        ##
        # Store hyperparameters
        self.lreg = reg # regularization
        self.alpha = alpha # default learning rate
        self.nclass = dims[1] # number of output classes

        ##
        # NNBase stores parameters in a special format
        # for efficiency reasons, and to allow the code
        # to automatically implement gradient checks
        # and training algorithms, independent of the
        # specific model architecture
        # To initialize, give shapes as if to np.array((m,n))
        param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix
                          b = (dims[1])) # column vector
        # These parameters have sparse gradients,
        # which is *much* more efficient if only a row
        # at a time gets updated (e.g. word representations)
        param_dims_sparse = dict(L=wv.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        ##
        # Now we can access the parameters using
        # self.params.<name> for normal parameters
        # self.sparams.<name> for params with sparse gradients
        # and get access to normal NumPy arrays
        self.sparams.L = wv.copy() # store own representations
        self.params.W = random_weight_matrix(*self.params.W.shape)
Ejemplo n.º 6
0
    def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####

        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here

        if U0 is None:
            self.params.U = random.normal(0, 0.1, *param_dims["U"])
        else:
            self.params.U = U0.copy()

        if L0 is None:
            self.sparams.L = random.normal(0, 0.1, *param_dims["L"])
        else:
            self.sparams.L = L0.copy()

        # Initialize H matrix, as with W and U in part 1

        self.params.H = random_weight_matrix(*param_dims["H"])

        self.rseed = rseed
        self.bptt = bptt
        self.alpha = alpha
Ejemplo n.º 7
0
    def __init__(self, dims=[100, 5, 100],
                 reg=0.1, alpha=0.001, ro = 0.05,
                 rseed=10, beta=0.2):
        """
        Set up autoencoder: parameters, hyperparameters
        """
        ##
        # Store hyperparameters
        self.lreg = reg # regularization
        self.alpha = alpha # default learning rate
        self.dims = dims # todo move to superclass
        self.ro = ro # ro sparsity parameter
        self.beta = beta  # sparsity penalty 


        param_dims = dict(W=(dims[1], dims[0]),
                          b1=(dims[1],),
                          U=(dims[2], dims[1]),
                          b2=(dims[2],),
                          )
        NNBase.__init__(self, param_dims)

        #self.sparams.L = wv.copy() # store own representations
        self.params.W = random_weight_matrix(*self.params.W.shape)
        self.params.U = random_weight_matrix(*self.params.U.shape)
        self.outputsize = dims[2]
    def __init__(self,
                 L0,
                 Dy=N_ASPECTS * SENT_DIM,
                 U0=None,
                 alpha=0.005,
                 rseed=10,
                 bptt=5):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        self.ydim = Dy
        param_dims = dict(H=(self.hdim, self.hdim),
                          U=(self.ydim, self.hdim),
                          b1=(self.hdim, ),
                          b2=(self.ydim, ))
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        var = .1
        sigma = sqrt(var)
        from misc import random_weight_matrix
        random.seed(rseed)
        # Initialize word vectors
        self.bptt = bptt
        self.alpha = alpha
        self.params.H = random_weight_matrix(*self.params.H.shape)
        if U0 is not None:
            self.params.U = U0.copy()
        else:
            self.params.U = random_weight_matrix(*self.params.U.shape)
        self.sparams.L = L0.copy()
        self.params.b1 = zeros((self.hdim, ))
        self.params.b2 = zeros((self.ydim, ))
Ejemplo n.º 9
0
    def __init__(self, dims=[100, 20, 20, 5], reg=0.1, alpha=0.001, rseed=10):
        """
        Set up classifier: parameters, hyperparameters
        """
        ##
        # Store hyperparameters
        self.lreg = reg  # regularization
        self.alpha = alpha  # default learning rate
        self.nclass = dims[1]  # number of output classes
        self.dims = dims  # todo move to superclass

        param_dims = dict(
            W=(dims[1], dims[0]),
            b1=(dims[1], ),
            U=(dims[2], dims[1]),
            b2=(dims[2], ),
            G=(dims[3], dims[2]),
            b3=(dims[3], ),
        )
        NNBase.__init__(self, param_dims)

        #self.sparams.L = wv.copy() # store own representations
        self.params.W = random_weight_matrix(*self.params.W.shape)
        self.params.U = random_weight_matrix(*self.params.U.shape)
        self.params.G = random_weight_matrix(*self.params.G.shape)
        self.outputsize = dims[3]
Ejemplo n.º 10
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions      |D|
        self.vdim = L0.shape[0] # vocab size                  |V|
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####


        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape)
        # self.params.U
        self.params.U = 0.1 * random.standard_normal(self.params.U.shape)
        # Initialize H matrix, as with W and U in part 1
        # self.params.H = random_weight_matrix(*self.params.H.shape)
        self.params.H = random_weight_matrix(*self.params.H.shape)

        self.bptt = bptt
        self.alpha = alpha
Ejemplo n.º 11
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = (L0.shape if U0 is None else U0.shape))
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        self.alpha = alpha

        self.bptt = bptt

        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        random.seed(rseed)
        
        sigma = sqrt(0.1)
        self.sparams.L = random.normal(0, sigma, L0.shape)
        self.params.U = random.normal(0, sigma, param_dims['U'])
        
        # Initialize H matrix, as with W and U in part 1
        self.params.H = random_weight_matrix(*param_dims['H'])

        self.lamb = .0001 # regularization
Ejemplo n.º 12
0
    def __init__(self, L0, U0=None,alpha=0.005, lreg = 0.00001, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim))
                          #,U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        self.alpha = alpha
        self.lreg = lreg
        #### YOUR CODE HERE ####
        
        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here

        # Initialize H matrix, as with W and U in part 1
        self.bptt = bptt
        random.seed(rseed)
        self.params.H = random_weight_matrix(*self.params.H.shape)
        self.params.W = random_weight_matrix(*self.params.W.shape)
        #self.params.U = 0.1*np.random.randn(*L0.shape)
        self.sparams.L = L0.copy()
Ejemplo n.º 13
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):
        random.seed(rseed)
        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        self.sparams.L = L0.copy()
        self.params.H = random_weight_matrix(self.hdim, self.hdim)
        self.alpha = alpha
        self.bptt = bptt


        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        if U0 is not None:
            self.params.U = U0.copy()
        else:
            sigma = 0.1
            mu = 0
            #self.params.U = random.normal(mu, sigma, (self.vdim, self.hdim))
            self.params.U = sigma*random.randn(self.vdim, self.hdim) + mu
Ejemplo n.º 14
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####


        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        # Initialize H matrix, as with W and U in part 1
        self.sparams.L = L0.copy()
        if U0 is None:
            self.params.U = random.normal(0, 0.1, param_dims['U'])
        else:
            self.params.U = U0.copy()
        self.params.H = random_weight_matrix(*param_dims['H'])
        self.alpha = alpha
#        self.rseed = rseed
        self.bptt = bptt
Ejemplo n.º 15
0
    def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)
Ejemplo n.º 16
0
    def grad_check(self, x, y, outfd=sys.stderr, **kwargs):

        bptt_old = self.bptt
        self.bptt = len(y)
        print >> outfd, "NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt
        NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
        self.bptt = bptt_old
        print >> outfd, "Reset self.bptt = %d" % self.bptt
Ejemplo n.º 17
0
    def __init__(self,
                 wv,
                 windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001,
                 alpha=0.01,
                 rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n)
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed

        |V| = Size of vocabulary
        n   = length of our word vectors
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha  # default training rate

        dims[0] = windowsize * wv.shape[1]  # input dimension

        print "input size:  %d" % dims[0]
        print "hidden size: %d" % dims[1]
        print "output size: %d" % dims[2]

        param_dims = dict(
            W=(dims[1], dims[0]),
            b1=(dims[1], ),
            U=(dims[2], dims[1]),
            b2=(dims[2], ),
        )
        param_dims_sparse = dict(L=wv.shape)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed)  # be sure to seed this for repeatability!
        #### YOUR CODE HERE ####

        # any other initialization you need

        self.sparams.wv = wv.copy()
        self.params.W = random_weight_matrix(param_dims["W"][0],
                                             param_dims["W"][1])
        self.params.U = random_weight_matrix(param_dims["U"][0],
                                             param_dims["U"][1])
Ejemplo n.º 18
0
    def __init__(self,
                 wdim,
                 hdim=None,
                 odim=2,
                 alpha=0.005,
                 rho=1e-4,
                 rseed=10,
                 bptt=1,
                 drop_p=0.5,
                 context=1):
        np.random.seed(rseed)
        self.rseed = rseed

        self.wdim = wdim
        #self.wdim = L.shape[1] # word vector dimensions
        #self.vdim = L.shape[0] # vocab size
        if hdim is None: hdim = self.wdim
        self.hdim = hdim
        self.odim = odim
        self.context = context

        param_dims = dict(W11=(self.hdim, self.wdim * (1 + self.context * 2)),
                          b11=(self.hdim, ),
                          W12=(self.hdim, self.hdim),
                          b12=(self.hdim, ),
                          W21=(self.hdim, self.hdim),
                          b21=(self.hdim, ),
                          Ws=(self.odim, self.hdim),
                          bs=(self.odim, ))

        # word embeddings are not updated
        # no longer needed because passing word vectors X
        #self.L = L

        # no sparse updates in this model
        #param_dims_sparse = dict(L = L0.shape)
        #NNBase.__init__(self, param_dims, param_dims_sparse)
        NNBase.__init__(self, param_dims)

        #### YOUR CODE HERE ####
        # not recursive yet, but leaving bptt anyway
        self.bptt = bptt
        self.alpha = alpha
        self.rho = rho
        self.drop_p = drop_p  # probability of dropping word embedding element from training

        # Initialize weight matrices
        self.params.W11 = random_weight_matrix(*self.params.W11.shape)
        self.params.W12 = random_weight_matrix(*self.params.W12.shape)
        self.params.W21 = random_weight_matrix(*self.params.W21.shape)
        self.params.Ws = random_weight_matrix(*self.params.Ws.shape)

        # initialize bias vectors
        self.params.b11 = zeros((self.hdim))
        self.params.b12 = zeros((self.hdim))
        self.params.b21 = zeros((self.hdim))
        self.params.bs = zeros((self.odim))
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)
Ejemplo n.º 20
0
    def grad_check(self, x, y, outfd=sys.stderr, **kwargs):
        """
        Wrapper for gradient check on RNNs;
        ensures that backprop-through-time is run to completion,
        computing the full gradient for the loss as summed over
        the input sequence and predictions.

        Do not modify this function!
        """
        NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n)
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate

        dims[0] = windowsize * wv.shape[1] # input dimension
        param_dims = dict(W=(dims[1], dims[0]),
                          b1=(dims[1],),
                          U=(dims[2], dims[1]),
                          b2=(dims[2],),
                          )
        param_dims_sparse = dict(L=wv.shape)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed this for repeatability!
        #### YOUR CODE HERE ####

        # any other initialization you need
        #self.sparams, self.grads, self.param, self.sgrads
        #where are they defined?
        #为什么可以直接可以使用?
        self.sparams.L = wv.copy()
        #self.sparam.L = wv.copy()
        self.params.U = random_weight_matrix(*param_dims["U"])
        #self.param.U = random_weight_matrix(param_dims["U"])
        self.params.W = random_weight_matrix(*param_dims["W"])
        #self.param.b1 = zeros(param_dims["b1"])
        #self.param.b2 = zeros(param_dims["b2"])
        self.windowSize = windowsize
        self.wordVecLen = wv.shape[1]
        self.wordVecNum = wv.shape[0]
Ejemplo n.º 22
0
    def __init__(self,
                 dims=[100, 30, 20, 5],
                 reg=0.1,
                 alpha=0.001,
                 rseed=10,
                 activation='tanh',
                 init_weights=[]):
        """
        Set up classifier: parameters, hyperparameters
        """
        ##
        # Store hyperparameters
        self.lreg = reg  # regularization
        self.alpha = alpha  # default learning rate
        self.nclass = dims[-1]  # number of output classes
        self.dims = dims  # todo move to superclass
        self.outputsize = dims[-1]

        ## We name the parameters as following
        # W1, b1, W2, b2, W3, b3 ...
        param_dims = {}
        for i in range(1, len(dims)):
            w_param = 'W' + str(i)
            b_param = 'b' + str(i)
            param_dims[w_param] = (dims[i], dims[i - 1])
            param_dims[b_param] = (dims[i], )

        NNBase.__init__(self, param_dims)

        # set activation function
        if activation == 'tanh':
            self.act = tanh
            self.act_grad = tanhd
        elif activation == 'sigmoid':
            self.act = sigmoid
            self.act_grad = sigmoid_grad
        else:
            raise 'Uknown activation function'

        #self.sparams.L = wv.copy() # store own representations
        # init weights

        # layers for which init_weights aren't passed are initialized randomly
        for i in range(1, len(self.dims)):
            if i - 1 < len(init_weights):
                # we have the corresponding weights passed for this layer
                cur_weight = init_weights[i - 1]
                assert cur_weight.shape == (dims[i], dims[i - 1]), (
                    "passed initial weight dimensions don't match")
            else:
                cur_weight = random_weight_matrix(dims[i], dims[i - 1])
            self._set_param('W', i, cur_weight)
Ejemplo n.º 23
0
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n)
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate

        dims[0] = windowsize * wv.shape[1]         # input dimension
        param_dims = dict(W1=(dims[1], dims[0]),   # 100 x 150
                          b2=(dims[1],),           # 100 x 1
                          W2=(dims[2], dims[1]),   # 5 X 100
                          b3=(dims[2],),           # 5 x 1
                          )

        param_dims_sparse = dict(L=wv.shape)       # |V| x 50

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed this for repeatability!
        #### YOUR CODE HERE ####
        self.sparams.L = wv.copy();

        self.params.W1 = random_weight_matrix(param_dims['W1'][0], param_dims['W1'][1])

        self.params.b2 = append([], random_weight_matrix(param_dims['b2'][0], 1))
        self.params.b3 = append([], random_weight_matrix(param_dims['b3'][0], 1))
        self.params.W2 = random_weight_matrix(param_dims['W2'][0], param_dims['W2'][1])
        self.n = wv.shape[1]

        # informational
        self.windowsize = windowsize
        self.hidden_units = dims[1]
Ejemplo n.º 24
0
 def grad_check(self, x, y, outfd=sys.stderr, **kwargs):
     """
     Wrapper for gradient check on RNNs;
     ensures that backprop-through-time is run to completion,
     computing the full gradient for the loss as summed over
     the input sequence and predictions.
     Do not modify this function!
     """
     bptt_old = self.bptt
     self.bptt = len(y)
     print >> outfd, "NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt
     NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
     self.bptt = bptt_old
     print >> outfd, "Reset self.bptt = %d" % self.bptt
Ejemplo n.º 25
0
 def grad_check(self, x, y, outfd=sys.stderr, **kwargs):
     """
     Wrapper for gradient check on RNNs;
     ensures that backprop-through-time is run to completion,
     computing the full gradient for the loss as summed over
     the input sequence and predictions.
     Do not modify this function!
     """
     bptt_old = self.bptt
     self.bptt = len(y)
     print >> outfd, "NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt
     NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
     self.bptt = bptt_old
     print >> outfd, "Reset self.bptt = %d" % self.bptt
Ejemplo n.º 26
0
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n)
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate
        self.nclass = dims[2] # number of output classes

        self.D = wv.shape[1]
        self.windowsize = windowsize

        dims[0] = windowsize * wv.shape[1] # input dimension
        param_dims = dict(W=(dims[1], dims[0]),
                          b1=(dims[1],),
                          U=(dims[2], dims[1]),
                          b2=(dims[2],))
        param_dims_sparse = dict(L=wv.shape)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed this for repeatability!

        ##
        # Now we can access the parameters using
        # self.params.<name> for normal parameters
        # self.sparams.<name> for params with sparse gradients
        # and get access to normal NumPy arrays
        self.sparams.L = wv.copy() # store own representations
        self.params.W  = random_weight_matrix(*self.params.W.shape)
        # self.params.b1 = zeros(*self.params.b1.shape) # done automatically!
        self.params.U  = random_weight_matrix(*self.params.U.shape)
Ejemplo n.º 27
0
    def __init__(self,
                 wv,
                 windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001,
                 alpha=0.01,
                 rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n) n=50
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha  # default training rate
        self.nclass = dims[2]

        # input dimension, wv.shape is the dimension of each word vector representation
        dims[0] = windowsize * wv.shape[1]  # 50*3
        param_dims = dict(
            W=(dims[1], dims[0]),  # 100*150
            b1=(dims[1]),
            U=(dims[2], dims[1]),
            b2=(dims[2]))
        param_dims_sparse = dict(L=wv.shape)  # L.shape = (|V|*50)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed)  # be sure to seed this for repeatability!

        self.params.W = random_weight_matrix(*self.params.W.shape)  # 100*150
        self.params.U = random_weight_matrix(*self.params.U.shape)  # 5*100
        #self.params.b1 = zeros((dims[1],))  # 100*1
        #self.params.b2 = zeros((self.nclass,)) # 5*1

        self.sparams.L = wv.copy()
Ejemplo n.º 28
0
    def __init__(self,
                 L0,
                 U0=None,
                 alpha=0.005,
                 lreg=0.00001,
                 rseed=10,
                 bptt=1,
                 loadData=False):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        param_dims = dict(H=(self.hdim, self.hdim),
                          W=(self.hdim, self.hdim),
                          U=L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        self.alpha = alpha
        self.lreg = lreg
        #### YOUR CODE HERE ####

        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here

        # Initialize H matrix, as with W and U in part 1
        self.bptt = bptt
        if loadData == True:
            with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid:
                H = pickle.load(fid)
                self.params.H = H
            with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid:
                W = pickle.load(fid)
                self.params.W = W
            with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid:
                U = pickle.load(fid)
                self.params.U = U
            with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid:
                L = pickle.load(fid)
                self.sparams.L = L
            return

        random.seed(rseed)
        self.params.H = random_weight_matrix(*self.params.H.shape)
        self.params.W = random_weight_matrix(*self.params.W.shape)
        self.params.U = 0.1 * np.random.randn(*L0.shape)
        self.sparams.L = L0.copy()
Ejemplo n.º 29
0
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n)
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate

        #wv.shape: (100232,50)
        dims[0] = windowsize * wv.shape[1] # input dimension 3*50=150
        param_dims = dict(W=(dims[1], dims[0]),# W(100,150)
                          b1=(dims[1],),#b(100)
                          U=(dims[2], dims[1]),#U(5,100)
                          b2=(dims[2],),#(5,)
                          )
        param_dims_sparse = dict(L=wv.shape) #L(100232,50)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed this for repeatability!
        #### YOUR CODE HERE ####

        # any other initialization you need
        self.sparams.L = wv.copy() # store own representations,100232,50 matrix
        self.params.W = random_weight_matrix(*self.params.W.shape)
        self.params.U = random_weight_matrix(*self.params.U.shape)

        self.window_size = windowsize#3
        self.word_vec_size = wv.shape[1]#50
Ejemplo n.º 30
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        #random.seed(rseed)
        self.params.U=0.1*random.randn(*self.params.U.shape)
        self.sparams.L=0.1*random.randn(*self.sparams.L.shape)
        self.params.H=random_weight_matrix(*self.params.H.shape)
        self.bptt=bptt
        self.alpha=alpha
Ejemplo n.º 31
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        self.sparams.L = random_weight_matrix(*self.sparams.L.shape)
        self.sparams.U = random_weight_matrix(*self.params.U.shape)
        
        self.params.H = random_weight_matrix(*self.params.H.shape)

        self.bptt = bptt
        self.alpha = alpha
Ejemplo n.º 32
0
    def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####

        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here
        self.sparams.L = 0.1 * random.standard_normal(self.sparams.L.shape)
        self.sparams.U = 0.1 * random.standard_normal(self.params.U.shape)
        self.params.H = random_weight_matrix(*self.params.H.shape)
        self.bptt = bptt
Ejemplo n.º 33
0
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n) n=50
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate
        self.nclass = dims[2]
        
        # input dimension, wv.shape is the dimension of each word vector representation
        dims[0] = windowsize * wv.shape[1] # 50*3
        param_dims = dict(W=(dims[1], dims[0]), # 100*150
                          b1=(dims[1]),
                          U=(dims[2], dims[1]),
                          b2=(dims[2]))
        param_dims_sparse = dict(L=wv.shape) # L.shape = (|V|*50)

        # initialize parameters: don't change this line
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed this for repeatability!

        self.params.W = random_weight_matrix(*self.params.W.shape) # 100*150
        self.params.U = random_weight_matrix(*self.params.U.shape) # 5*100
        #self.params.b1 = zeros((dims[1],))  # 100*1
        #self.params.b2 = zeros((self.nclass,)) # 5*1
        
        self.sparams.L = wv.copy()        
Ejemplo n.º 34
0
    def __init__(self, wv, windowsize=3,
                 dims=[None, 100, 5],
                 reg=0.001, alpha=0.01, rseed=10):
        """
        Initialize classifier model.

        Arguments:
        wv : initial word vectors (array |V| x n) => n is the input dimension, length of input word-vector
            note that this is the transpose of the n x |V| matrix L
            described in the handout; you'll want to keep it in
            this |V| x n form for efficiency reasons, since numpy
            stores matrix rows continguously.
        windowsize : int, size of context window
        dims : dimensions of [input, hidden, output]
            input dimension can be computed from wv.shape
        reg : regularization strength (lambda)
        alpha : default learning rate
        rseed : random initialization seed
        """

        # Set regularization
        self.lreg = float(reg)
        self.alpha = alpha # default training rate

        dims[0] = windowsize * wv.shape[1] # input dimension
        param_dims = dict(W=(dims[1], dims[0]),
                          b1=(dims[1],),
                          U=(dims[2], dims[1]),
                          b2=(dims[2],),
                          )
        param_dims_sparse = dict(L=wv.shape)

        # initialize parameters
        NNBase.__init__(self, param_dims, param_dims_sparse)

        random.seed(rseed) # be sure to seed, re-show the result

        # random initialization
        self.sparams.L = wv.copy() # store own representations
        self.params.U = random_weight_matrix(*self.params.U.shape)
        self.params.W = random_weight_matrix(*self.params.W.shape)

        self.window_size = windowsize
        self.word_vec_size = wv.shape[1]
Ejemplo n.º 35
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, lreg = 0.00001, rseed=10, bptt=1,loadData=False):
        
        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        param_dims = dict(H = (self.hdim, self.hdim), W = (self.hdim,self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        self.alpha = alpha
        self.lreg = lreg
        #### YOUR CODE HERE ####
        
        # Initialize word vectors
        # either copy the passed L0 and U0 (and initialize in your notebook)
        # or initialize with gaussian noise here

        # Initialize H matrix, as with W and U in part 1
        self.bptt = bptt
        if loadData == True:
            with open("rnnlmWithW_hdim_150_bptt_4.H.npy") as fid:
                H = pickle.load(fid)
                self.params.H = H
            with open("rnnlmWithW_hdim_150_bptt_4.W.npy") as fid:
                W = pickle.load(fid)
                self.params.W = W
            with open("rnnlmWithW_hdim_150_bptt_4.U.npy") as fid:
                U = pickle.load(fid)
                self.params.U = U
            with open("rnnlmWithW_hdim_150_bptt_4.L.npy") as fid:
                L = pickle.load(fid)
                self.sparams.L = L
            return

        random.seed(rseed)
        self.params.H = random_weight_matrix(*self.params.H.shape)
        self.params.W = random_weight_matrix(*self.params.W.shape)
        self.params.U = 0.1*np.random.randn(*L0.shape)
        self.sparams.L = L0.copy()
Ejemplo n.º 36
0
    def __init__(self, L0, U0=None,
                 alpha=0.005, rseed=10, bptt=1):

        self.vdim = L0.shape[0] # vocab size
        self.hdim = L0.shape[1] # word vector dimensions
        param_dims = dict(H = (self.hdim, self.hdim),
                          U = L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####

        # hyperparameters
        self.bptt = bptt
        self.alpha = alpha

        # weights
        self.sparams.L = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim))
        self.sparams.U = random.normal(scale=sqrt(0.1), size=(self.vdim, self.hdim))
        self.params.H = random_weight_matrix(self.hdim, self.hdim)
Ejemplo n.º 37
0
    def grad_check(self, X, y, outfd=sys.stderr, **kwargs):
        """
        Wrapper for gradient check on RNNs;
        ensures that backprop-through-time is run to completion,
        computing the full gradient for the loss as summed over
        the input sequence and predictions.

        Do not modify this function!
        """
        # if not recurssive yet this setting of bptt does not matter
        bptt_old = self.bptt
        # single example
        if isinstance(X, ndarray): X = [X]

        for i in range(len(X)):
            self.bptt = X[i].shape[0]
            print("NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt, file=outfd)
            NNBase.grad_check(self, X[i], y[i], outfd=outfd, **kwargs)

        self.bptt = bptt_old
        print("Reset self.bptt = %d" % self.bptt, file=outfd)
Ejemplo n.º 38
0
    def __init__(self, L0, U0=None, alpha=0.005, rseed=10, bptt=1):

        self.hdim = L0.shape[1]  # word vector dimensions
        self.vdim = L0.shape[0]  # vocab size
        param_dims = dict(H=(self.hdim, self.hdim), U=L0.shape)
        # note that only L gets sparse updates
        param_dims_sparse = dict(L=L0.shape)
        NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####

        random.seed(rseed)

        self.bptt = bptt
        self.alpha = alpha

        self.params.H = random_weight_matrix(self.hdim, self.hdim)
        self.params.U = sqrt(0.1) * random.randn(self.vdim, self.hdim)

        # Initialize word vectors
        self.sparams.L = sqrt(0.1) * random.randn(self.vdim, self.hdim)
Ejemplo n.º 39
0
    def __init__(self, L0, **kwargs):
        #### YOUR CODE HERE ####
        isCompression = False
        isME = False
        compression_size = 0
        alpha = 0.1
        bptt = 1
        class_size = 2
        U0 = zeros((10, 10))
        Lcluster = zeros(10)
        cwords = zeros((10, 10))
        cfreq = zeros(10)
        ngram_feat = 0
        hash_size = 10000
        gradient_cutoff = 15
        rseed = 0
        
        #regularization param
        rho = 1e-4
        
        for key, value in kwargs.items():
            if key == "U0":
                U0 = value.copy()
            if key == "isCompression":
                isCompression = value
            if key == "compression_size":
                compression_size = value
            if key == "isME":
                isME = value
            if key == "bptt":
                bptt = value
            if key == "alpha":
                alpha = value
            if key == "Lcluster":
                Lcluster = value
            if key == "cwords":
                cwords = value
            if key == "cfreq":
                cfreq = value
            if key == "ngram":
                ngram_feat = value
            if key == "hash_size":
                hash_size = value
            if key == "cutoff":
                gradient_cutoff = value
            if key == "rseed":
                rseed = value
            if key == "class_size":
                class_size = value
                
            if key == "regular":
                rho = value
        
        random.seed(rseed)
        self.primes = array([])
        #print L0
        self.hdim = L0.shape[1] # word vector dimensions
        self.vdim = L0.shape[0] # vocab size
        #print self.hdim
        self.cdim = compression_size # compression layer size
        self.isCompression = isCompression # True for self.cdim > 0
        #print self.isCompression
        
        self.class_size = class_size # making word clusters
        self.Udim = self.vdim + self.class_size
        self.cutoff = gradient_cutoff
        
        self.isME =  isME #max entropy optimization
        self.ngram = ngram_feat
        self.hsize = self.vdim
        #print self.hsize
        param_dims = {}
        if self.isCompression is True:
            if self.isME is True:
                param_dims = dict(H = (self.hdim, self.hdim),
                                  C = (self.cdim, self.hdim),
                                  U = (self.Udim, self.cdim),
                                  word_direct = (self.hsize,self.hsize),
                                  cluster_direct = (self.vdim, self.class_size))
            else:
                param_dims = dict(H = (self.hdim, self.hdim),
                              C = (self.cdim, self.hdim),    
                              U = (self.Udim, self.cdim))
            
        else:
            if self.isME is True:
                param_dims = dict(H = (self.hdim, self.hdim),
                                 U = (self.Udim, self.hdim),
                                 word_direct = (self.hsize, self.hsize),
                                 cluster_direct=(self.vdim, self.class_size))
            else:
                param_dims = dict(H = (self.hdim, self.hdim),    
                          U = (self.Udim, self.hdim))
        # note that only L gets sparse updates
        param_dims_sparse = dict(L = L0.shape) 
        NNBase.__init__(self, param_dims, param_dims_sparse)
        #NNBase.__init__(self, param_dims, param_dims_sparse)

        #### YOUR CODE HERE ####
        self.sparams.L = L0.copy()
        self.params.word_direct = zeros((self.hsize,self.hsize))
        self.params.cluster_direct = zeros((self.vdim,self.class_size))
        #word cluster informations
        self.Lcluster = Lcluster.copy() #cluster for every word
        self.cfreq = cfreq.copy() #every cluster containing word numbers
        self.cwords = cwords.copy() #every cluster containing word indexs
        self.htable = zeros(self.hsize)
        #print "CWORD SIZE ",cwords.shape
        
        self.params.H = random_weight_matrix(self.hdim, self.hdim)
        
        self.alpha = alpha
        self.bptt = bptt
        
        #regularization
        self.rho = rho
        if isCompression is True:
            self.params.C = random_weight_matrix(self.cdim, self.hdim)
            #sigma = 0.1
            #self.params.C = sigma*random.uniform(low=-sigma,high=sigma, size=(self.cdim, self.hdim))
        if U0 is not None:
            self.params.U = U0.copy()
        else:
            sigma = 0.1
            mu = 0
            if self.isCompression:
                self.params.U = sigma*random.randn(self.Udim, self.cdim) + mu
            else:
                self.params.U = sigma*random.randn(self.Udim, self.hdim) + mu
Ejemplo n.º 40
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    #parser.add_option("--test",action="store_true",dest="test",default=False)
    #parser.add_option("--plotEpochs",action="store_true",dest="plotEpochs",default=False)
    #parser.add_option("--plotWvecDim",action="store_true",dest="plotWvecDim",default=False)

    # Optimizer
    # minibatch of 0 means no minibatches, just iterate through
    parser.add_option("--minibatch",dest="minibatch",type="int",default=0)
    #parser.add_option("--optimizer",dest="optimizer",type="string",
    #    default="adagrad")
    parser.add_option("--epochs",dest="epochs",type="int",default=50)
    parser.add_option("--printevery",dest="printevery",type="int",default=4e4)
    parser.add_option("--annealevery",dest="annealevery",type="int",default=0) # anneal every this many epochs

    parser.add_option("--alpha",dest="alpha",type="float",default=0.005)
    parser.add_option("--rho",dest="rho",type="float",default=1e-5)
    parser.add_option("--drop_p",dest="drop_p",type="float",default=0.5)

    parser.add_option("--wdim",dest="wdim",type="int",default=50)
    parser.add_option("--hdim",dest="hdim",type="int",default=200)
    parser.add_option("--odim",dest="odim",type="int",default=2)
    parser.add_option("--rseed",dest="rseed",type="int",default=207)
    parser.add_option("--context",dest="context",type="int",default=1)

    #parser.add_option("--outFile",dest="outFile",type="string",
    #    default="models/test.bin")
    #parser.add_option("--inFile",dest="inFile",type="string",
    #    default="models/test.bin")
    #parser.add_option("--data",dest="data",type="string",default="train")

    parser.add_option("--model",dest="model",type="string",default="NNMX")

    (opts,args)=parser.parse_args(args)


    # name of folder to store results in
    resfolder =  '_'.join(
        ['{k}={v}'.format(k=k,v=v) for k,v in vars(opts).items()]
        )

    resfolder += '_timestamp={t}'.format(t=time.strftime('%Y%m%d%H%M%S'))
    resfolder = 'results/'+resfolder
    print(resfolder)

    if not os.path.exists(resfolder):
        os.makedirs(resfolder)

    ### Set up the training and test data to work with throughout the notebook:
    np.random.seed(opts.rseed)

    all_train_df, y, submit_df = load_raop_data()

    # useful for sklearn scoring
    #roc_scorer = make_scorer(roc_auc_score)
    n_all = all_train_df.shape[0]

    # set up kFolds to be used in the rest of the project
    kf = KFold(n_all, n_folds = 10, random_state=opts.rseed)

    body_vecs = Pipeline([
        ('body', ExtractBody()),
        ('vec', PrepAndVectorize(d=opts.wdim))
        ]).fit_transform(X=all_train_df,y=1)

    for train, test in kf:
        nn = init_model(opts)
        if opts.minibatch == 0:
            idxiter = list(train)*opts.epochs
            annealevery=len(train)*opts.annealevery
            printevery=opts.printevery
        else:
            idxiter = NNBase.randomiter(
                N=opts.epochs*len(train)/opts.minibatch,
                pickfrom=train,batch=opts.minibatch)
            annealevery=len(train)*opts.annealevery/opts.minibatch
            printevery=opts.printevery/opts.minibatch

        nn.train_sgd(body_vecs, y, idxiter=idxiter,
                       devidx=test, savepath=resfolder,
                       costevery=printevery, printevery=printevery,
                       annealevery=annealevery)

    save_all_results(resultpath = 'results', savepath = 'result_summary')