Exemple #1
0
    def __init__(self,
                 dinput,
                 nstates,
                 sigma=0.1,
                 fbias=0.0,
                 last_state_only=False):
        W = random(nstates * 4, dinput + nstates + 1) * 0.1
        W[0 * nstates:1 * nstates,
          dinput:-1] = orthogonalize(random(nstates, nstates))
        W[1 * nstates:2 * nstates,
          dinput:-1] = orthogonalize(random(nstates, nstates))
        W[2 * nstates:3 * nstates,
          dinput:-1] = orthogonalize(random(nstates, nstates))
        W[3 * nstates:4 * nstates,
          dinput:-1] = orthogonalize(random(nstates, nstates))
        W[:, -1] = 0  # initialize all biases to zero
        W[2 * nstates:3 * nstates, -1] = fbias  # forget bias
        self.W = W

        self.c_0 = np.zeros((nstates, 1))
        self.Y_0 = np.zeros((nstates, 1))

        self.dinput = dinput
        self.nstates = nstates
        self.last_state_only = last_state_only

        self.forget()
Exemple #2
0
    def __init__(self, dinput, nstates, sigma=0.1, fbias=0.0, last_state_only=False):
        W = random(nstates * 4, dinput + nstates + 1) * 0.1
        W[0 * nstates : 1 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates))
        W[1 * nstates : 2 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates))
        W[2 * nstates : 3 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates))
        W[3 * nstates : 4 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates))
        W[:, -1] = 0  # initialize all biases to zero
        W[2 * nstates : 3 * nstates, -1] = fbias  # forget bias
        self.W = W

        self.c_0 = np.zeros((nstates, 1))
        self.Y_0 = np.zeros((nstates, 1))

        self.dinput = dinput
        self.nstates = nstates
        self.last_state_only = last_state_only

        self.forget()
    def __init__(self,
                 dinput,
                 nstates,
                 doutput,
                 clock_periods,
                 full_recurrence=False,
                 learn_state=True,
                 first_layer=False):
        super(CRNN, self).__init__()
        nclocks = len(clock_periods)

        Wi = random(nclocks * nstates, dinput + 1)
        Wh = random(nclocks * nstates, nclocks * nstates + 1)
        Wo = random(doutput, nclocks * nstates + 1)

        H_0 = np.zeros((nclocks * nstates, 1))

        Wi = glorotize(Wi)
        Wh[:, :-1] = orthogonalize(Wh[:, :-1])
        Wo = glorotize(Wo)

        utri_mask = recurrent_mask(nclocks, nstates)
        if not full_recurrence:
            Wh[:, :-1] *= utri_mask

        schedules = make_schedule(clock_periods, nstates)

        self.dinput = dinput
        self.nstates = nstates
        self.doutput = doutput
        self.clock_periods = clock_periods
        self.nclocks = nclocks
        self.Wi = nn.Parameter(torch.from_numpy(Wi).float())
        self.Wh = nn.Parameter(torch.from_numpy(Wh).float())
        self.Wo = nn.Parameter(torch.from_numpy(Wo).float())
        self.H_0 = torch.from_numpy(H_0).float()
        self.utri_mask = utri_mask
        self.schedules = schedules
        self.full_recurrence = full_recurrence
        self.learn_state = learn_state
        self.first_layer = first_layer
        self.H_last = None
Exemple #4
0
	def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False):
		'''

			Clockwork Recurrent Neural Network
			This follows the variant described in the paper by Koutnik et al.

			dinput: 
				dimension of the input (per time step)
			
			nstates: 
				number of states per module/clock
			
			doutput: 
				required dimension of the output
			
			clock_periods: 
				the periods of clocks (order is maintained and not sorted)
			
			full_recurrence:
				True: all modules can 'see' the hidden states every module
				False: as per the original paper - only faster modules can see slower modules

			learn_state:
				True: initial state is randomly initalized and learnt during training
				False: start with all zero initial state and don't learn

			first_layer:
				True: if this is the first layer of the network. If it is, the gradients w.r.t inputs
						are not calculated as it is useless for training. saves time
				False: gradients w.r.t are calculated and returned
		''' 

		nclocks = len(clock_periods)
		
		Wi = random(nclocks * nstates, dinput + 1)
		Wh = random(nclocks * nstates, nclocks * nstates + 1)
		Wo = random(doutput, nclocks * nstates + 1)
		
		if learn_state:
			H_0 = random(nclocks * nstates, 1)
		else:
			H_0 = np.zeros((nclocks * nstates, 1))

		# some fancy inits
		Wi = glorotize(Wi)
		Wh[:, :-1] = orthogonalize(Wh[:, :-1])
		Wo = glorotize(Wo)
	
		# mask to make Wh a block upper triangle matrix
		utri_mask = recurrent_mask(nclocks, nstates)
		if not full_recurrence:
			Wh[:,:-1] *= utri_mask

		# column vector to selectively activate rows based on time
		schedules = make_schedule(clock_periods, nstates)
		schedules = np.array(schedules).reshape(-1, 1)

		# store it all
		self.dinput = dinput
		self.nstates = nstates
		self.doutput = doutput
		self.clock_periods = clock_periods
		self.nclocks = nclocks
		self.Wi = Wi
		self.Wh = Wh
		self.Wo = Wo
		self.H_0 = H_0
		self.utri_mask = utri_mask
		self.schedules = schedules
		self.full_recurrence = full_recurrence
		self.learn_state = learn_state
		self.first_layer = first_layer

		self.forget()
Exemple #5
0
    def __init__(self,
                 dinput,
                 nstates,
                 doutput,
                 clock_periods,
                 full_recurrence=False,
                 learn_state=True,
                 first_layer=False):
        '''

			Clockwork Recurrent Neural Network
			This follows the variant described in the paper by Koutnik et al.

			dinput: 
				dimension of the input (per time step)
			
			nstates: 
				number of states per module/clock
			
			doutput: 
				required dimension of the output
			
			clock_periods: 
				the periods of clocks (order is maintained and not sorted)
			
			full_recurrence:
				True: all modules can 'see' the hidden states every module
				False: as per the original paper - only faster modules can see slower modules

			learn_state:
				True: initial state is randomly initalized and learnt during training
				False: start with all zero initial state and don't learn

			first_layer:
				True: if this is the first layer of the network. If it is, the gradients w.r.t inputs
						are not calculated as it is useless for training. saves time
				False: gradients w.r.t are calculated and returned
		'''

        nclocks = len(clock_periods)

        Wi = random(nclocks * nstates, dinput + 1)
        Wh = random(nclocks * nstates, nclocks * nstates + 1)
        Wo = random(doutput, nclocks * nstates + 1)

        if learn_state:
            H_0 = random(nclocks * nstates, 1)
        else:
            H_0 = np.zeros((nclocks * nstates, 1))

        # some fancy inits
        Wi = glorotize(Wi)
        Wh[:, :-1] = orthogonalize(Wh[:, :-1])
        Wo = glorotize(Wo)

        # mask to make Wh a block upper triangle matrix
        utri_mask = recurrent_mask(nclocks, nstates)
        if not full_recurrence:
            Wh[:, :-1] *= utri_mask

        # column vector to selectively activate rows based on time
        schedules = make_schedule(clock_periods, nstates)
        schedules = np.array(schedules).reshape(-1, 1)

        # store it all
        self.dinput = dinput
        self.nstates = nstates
        self.doutput = doutput
        self.clock_periods = clock_periods
        self.nclocks = nclocks
        self.Wi = Wi
        self.Wh = Wh
        self.Wo = Wo
        self.H_0 = H_0
        self.utri_mask = utri_mask
        self.schedules = schedules
        self.full_recurrence = full_recurrence
        self.learn_state = learn_state
        self.first_layer = first_layer

        self.forget()