def __init__(self, dinput, nstates, sigma=0.1, fbias=0.0, last_state_only=False): W = random(nstates * 4, dinput + nstates + 1) * 0.1 W[0 * nstates:1 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[1 * nstates:2 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[2 * nstates:3 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[3 * nstates:4 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[:, -1] = 0 # initialize all biases to zero W[2 * nstates:3 * nstates, -1] = fbias # forget bias self.W = W self.c_0 = np.zeros((nstates, 1)) self.Y_0 = np.zeros((nstates, 1)) self.dinput = dinput self.nstates = nstates self.last_state_only = last_state_only self.forget()
def __init__(self, dinput, nstates, sigma=0.1, fbias=0.0, last_state_only=False): W = random(nstates * 4, dinput + nstates + 1) * 0.1 W[0 * nstates : 1 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[1 * nstates : 2 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[2 * nstates : 3 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[3 * nstates : 4 * nstates, dinput:-1] = orthogonalize(random(nstates, nstates)) W[:, -1] = 0 # initialize all biases to zero W[2 * nstates : 3 * nstates, -1] = fbias # forget bias self.W = W self.c_0 = np.zeros((nstates, 1)) self.Y_0 = np.zeros((nstates, 1)) self.dinput = dinput self.nstates = nstates self.last_state_only = last_state_only self.forget()
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): super(CRNN, self).__init__() nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) H_0 = np.zeros((nclocks * nstates, 1)) Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:, :-1] *= utri_mask schedules = make_schedule(clock_periods, nstates) self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = nn.Parameter(torch.from_numpy(Wi).float()) self.Wh = nn.Parameter(torch.from_numpy(Wh).float()) self.Wo = nn.Parameter(torch.from_numpy(Wo).float()) self.H_0 = torch.from_numpy(H_0).float() self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.H_last = None
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): ''' Clockwork Recurrent Neural Network This follows the variant described in the paper by Koutnik et al. dinput: dimension of the input (per time step) nstates: number of states per module/clock doutput: required dimension of the output clock_periods: the periods of clocks (order is maintained and not sorted) full_recurrence: True: all modules can 'see' the hidden states every module False: as per the original paper - only faster modules can see slower modules learn_state: True: initial state is randomly initalized and learnt during training False: start with all zero initial state and don't learn first_layer: True: if this is the first layer of the network. If it is, the gradients w.r.t inputs are not calculated as it is useless for training. saves time False: gradients w.r.t are calculated and returned ''' nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) if learn_state: H_0 = random(nclocks * nstates, 1) else: H_0 = np.zeros((nclocks * nstates, 1)) # some fancy inits Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) # mask to make Wh a block upper triangle matrix utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:,:-1] *= utri_mask # column vector to selectively activate rows based on time schedules = make_schedule(clock_periods, nstates) schedules = np.array(schedules).reshape(-1, 1) # store it all self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = Wi self.Wh = Wh self.Wo = Wo self.H_0 = H_0 self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.forget()
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): ''' Clockwork Recurrent Neural Network This follows the variant described in the paper by Koutnik et al. dinput: dimension of the input (per time step) nstates: number of states per module/clock doutput: required dimension of the output clock_periods: the periods of clocks (order is maintained and not sorted) full_recurrence: True: all modules can 'see' the hidden states every module False: as per the original paper - only faster modules can see slower modules learn_state: True: initial state is randomly initalized and learnt during training False: start with all zero initial state and don't learn first_layer: True: if this is the first layer of the network. If it is, the gradients w.r.t inputs are not calculated as it is useless for training. saves time False: gradients w.r.t are calculated and returned ''' nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) if learn_state: H_0 = random(nclocks * nstates, 1) else: H_0 = np.zeros((nclocks * nstates, 1)) # some fancy inits Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) # mask to make Wh a block upper triangle matrix utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:, :-1] *= utri_mask # column vector to selectively activate rows based on time schedules = make_schedule(clock_periods, nstates) schedules = np.array(schedules).reshape(-1, 1) # store it all self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = Wi self.Wh = Wh self.Wo = Wo self.H_0 = H_0 self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.forget()