def __init__(self, hidden_size, input_size, stack_size=2, celltype=LSTM): self.input_size = input_size # Modelling self.model = StackedCells(input_size, celltype=celltype, activation=T.tanh, layers=[hidden_size] * stack_size) # disable modulation of the input layer self.model.layers[0].in_gate2.activation = lambda x: x # add an output layer self.model.layers.append( Layer(hidden_size, input_size, activation=softmax)) # Setup symbolic tensor variables that will be used in computation # inputs are windows of spectrum data self.input = T.fvector("input") self.prev_input = T.fvector("prev_input") # create symbolic variables for prediction: self.prediction = self.create_prediction() # create gradient training functions: self.create_cost_fun() self.create_training_function() self.create_predict_function()
def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0): self.t_layer_sizes = t_layer_sizes self.p_layer_sizes = p_layer_sizes # From our architecture definition, size of the notewise input self.t_input_size = 80 # time network maps from notewise input size to various hidden sizes self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes) self.time_model.layers.append(PassthroughLayer()) # pitch network takes last layer of time model and state of last note, moving upward # and eventually ends with a two-element sigmoid layer p_input_size = t_layer_sizes[-1] + 2 self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes) self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid)) self.dropout = dropout self.conservativity = T.fscalar() self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024)) self.setup_train() self.setup_predict() self.setup_slow_walk()
def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0): self.t_layer_sizes = t_layer_sizes self.p_layer_sizes = p_layer_sizes # From our architecture definition, size of the notewise input self.t_input_size = 80 # time network maps from notewise input size to various hidden sizes self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes) self.time_model.layers.append(PassthroughLayer()) #add the output layer of time model # pitch network takes last layer of time model and state of last note, moving upward # and eventually ends with a two-element sigmoid layer #The extra 2 input elements are #1. a value (0 or 1) for whether the previous (half-step lower) # note was chosen to be played (based on previous note-step, starts 0) #2. a value (0 or 1) for whether the previous (half-step lower) note was chosen to be articulated #(based on previous note-step, starts 0) p_input_size = t_layer_sizes[-1] + 2 self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes) self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid)) self.dropout = dropout self.conservativity = T.fscalar() #A placeholder for float number self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))#an object that is used to generate random number self.setup_train() self.setup_predict() self.setup_slow_walk()
def __init__(self, hidden_size, input_size, vocab_size, stack_size=1, celltype=LSTM): # declare model self.model = StackedCells(input_size, celltype=celltype, layers=[hidden_size] * stack_size) # add an embedding self.model.layers.insert(0, Embedding(vocab_size, input_size)) # add a classifier: self.model.layers.append( Layer(hidden_size, vocab_size, activation=softmax)) # inputs are matrices of indices, # each row is a sentence, each column a timestep self._stop_word = theano.shared(np.int32(999999999), name="stop word") self.for_how_long = T.ivector() self.input_mat = T.imatrix() self.priming_word = T.iscalar() self.srng = T.shared_randomstreams.RandomStreams( np.random.randint(0, 1024)) # create symbolic variables for prediction: self.predictions = self.create_prediction() # create symbolic variable for greedy search: self.greedy_predictions = self.create_prediction(greedy=True) # create gradient training functions: self.create_cost_fun() self.create_training_function() self.create_predict_function() # For saving state self.epochs = 0
def __init__(self, data_manager, t_layer_sizes, p_layer_sizes, dropout=0): print('{:25}'.format("Initializing Model"), end='', flush=True) self.t_layer_sizes = t_layer_sizes self.p_layer_sizes = p_layer_sizes self.dropout = dropout self.data_manager = data_manager self.t_input_size = self.data_manager.f.feature_count self.output_size = self.data_manager.s.information_count self.time_model = StackedCells(self.t_input_size, celltype=LSTM, layers=t_layer_sizes) self.time_model.layers.append(PassthroughLayer()) p_input_size = t_layer_sizes[-1] + self.output_size self.pitch_model = StackedCells(p_input_size, celltype=LSTM, layers=p_layer_sizes) self.pitch_model.layers.append( Layer(p_layer_sizes[-1], self.output_size, activation=T.nnet.sigmoid)) self.conservativity = T.fscalar() self.srng = T.shared_randomstreams.RandomStreams( np.random.randint(0, 1024)) self.epsilon = np.spacing(np.float32(1.0)) print("Done")
def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN, steps=40): # declare model self.model = StackedCells(input_size, celltype=celltype, layers=[hidden_size] * stack_size) # add a classifier: self.model.layers.append( Layer(hidden_size, output_size, activation=T.tanh)) # inputs are matrices of indices, # each row is a sentence, each column a timestep self.steps = steps self.gfs = T.tensor3('gfs') #输入gfs数据 self.pm25in = T.tensor3('pm25in') #pm25初始数据部分 self.layerstatus = None self.results = None self.cnt = T.tensor3('cnt') # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction) self.predictions = self.create_prediction() self.create_predict_function() '''上面几步的意思就是先把公式写好'''
def name_model(): LSTM_SIZE = 300 layer1 = LSTM(len(CHARKEY), LSTM_SIZE, activation=T.tanh) layer2 = Layer(LSTM_SIZE, len(CHARKEY), activation=lambda x:x) params = layer1.params + [layer1.initial_hidden_state] + layer2.params ################# Train ################# train_data = T.ftensor3() n_batch = train_data.shape[0] train_input = T.concatenate([T.zeros([n_batch,1,len(CHARKEY)]),train_data[:,:-1,:]],1) train_output = train_data def _scan_train(last_out, last_state): new_state = layer1.activate(last_out, last_state) layer_out = layer1.postprocess_activation(new_state) layer2_out = layer2.activate(layer_out) new_out = T.nnet.softmax(layer2_out) return new_out, new_state outputs_info = [None, initial_state(layer1, n_batch)] (scan_outputs, scan_states), _ = theano.scan(_scan_train, sequences=[train_input.dimshuffle([1,0,2])], outputs_info=outputs_info) flat_scan_outputs = scan_outputs.dimshuffle([1,0,2]).reshape([-1,len(CHARKEY)]) flat_train_output = train_output.reshape([-1,len(CHARKEY)]) crossentropy = T.nnet.categorical_crossentropy(flat_scan_outputs, flat_train_output) loss = T.sum(crossentropy)/T.cast(n_batch,'float32') adam_updates = Adam(loss, params) train_fn = theano.function([train_data],loss,updates=adam_updates) ################# Eval ################# length = T.iscalar() srng = MRG_RandomStreams(np.random.randint(1, 1024)) def _scan_gen(last_out, last_state): new_state = layer1.activate(last_out, last_state) layer_out = layer1.postprocess_activation(new_state) layer2_out = layer2.activate(layer_out) new_out = T.nnet.softmax(T.shape_padleft(layer2_out)) sample = srng.multinomial(n=1,pvals=new_out)[0,:] sample = T.cast(sample,'float32') return sample, new_state initial_input = np.zeros([len(CHARKEY)], np.float32) outputs_info = [initial_input, layer1.initial_hidden_state] (scan_outputs, scan_states), updates = theano.scan(_scan_gen, n_steps=length, outputs_info=outputs_info) gen_fn = theano.function([length],scan_outputs,updates=updates) return layer1, layer2, train_fn, gen_fn
def __init__(self, time_model_layer_sizes, note_model_layer_sizes): self.time_model = StackedCells(input_size, celltype=LSTM, layers=time_model_layer_sizes) self.time_model.layers.append(Router()) note_model_input_size = time_model_layer_sizes[-1] + outptu_size self.note_model = StackedCells(note_model_input_size, celltype=LSTM, layers=note_model_layer_sizes) self.note_model.layers.append(Layer(note_model_layer_sizes[-1], output_size, activation=T.nnet.sigmoid)) self.time_model_layer_sizes = time_model_layer_sizes self.note_model_layer_sizes = note_model_layer_sizes self._initialize_update_function() self._initialize_predict_function()
def __init__(self, hidden_size, input_size, output_size, celltype=Layer): # declare model self.model = StackedCells(input_size, celltype=celltype, layers =hidden_size) # add a classifier: self.regression=Layer(hidden_size[-1], output_size[0], activation = T.tanh) self.classifier=Layer(hidden_size[-1], output_size[1], activation = softmax) # inputs are matrices of indices, # each row is a sentence, each column a timestep self.steps=T.iscalar('steps') self.x=T.tensor3('x')#输入gfs数据 self.target0=T.tensor3('target0')#输出的目标target,这一版把target维度改了 self.target1=T.itensor3('target1') self.layerstatus=None self.results=None # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction) self.predictions0,self.predictions1 = self.create_prediction() # create gradient training functions: #self.create_cost_fun() #self.create_valid_error() #self.create_training_function() self.create_predict_function() #self.create_validate_function() '''上面几步的意思就是先把公式写好'''
def __init__(self, input_parts, layer_sizes, output_size, window_size=0, dropout=0, mode="drop", unroll_batch_num=None): """ Parameters: input_parts: A list of InputParts layer_sizes: A list of the form [ (indep, per_note), ... ] where indep is the number of non-shifted cells to have, and per_note is the number of cells to have per window note, which shift as the network moves Alternately can just be [ indep, ... ] output_size: An integer, the width of the desired output dropout: How much dropout to apply. mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead """ self.input_parts = input_parts self.window_size = window_size layer_sizes = [ x if isinstance(x, tuple) else (x, 0) for x in layer_sizes ] self.layer_sizes = layer_sizes self.tot_layer_sizes = [(indep + per_note * self.window_size) for indep, per_note in layer_sizes] self.output_size = output_size self.dropout = dropout self.input_size = sum(part.PART_WIDTH for part in input_parts) self.cells = StackedCells(self.input_size, celltype=LSTM, activation=T.tanh, layers=self.tot_layer_sizes) self.cells.layers.append( Layer(self.tot_layer_sizes[-1], self.output_size, activation=lambda x: x)) assert mode in ("drop", "roll"), "Must specify either drop or roll mode" self.mode = mode self.unroll_batch_num = unroll_batch_num
class Model: """ Simple predictive model for forecasting words from sequence using LSTMs. Choose how many LSTMs to stack what size their memory should be, and how many words can be predicted. """ def __init__(self, hidden_size, input_size, output_size, celltype=Layer): # declare model self.model = StackedCells(input_size, celltype=celltype, layers =hidden_size) # add a classifier: self.regression=Layer(hidden_size[-1], output_size[0], activation = T.tanh) self.classifier=Layer(hidden_size[-1], output_size[1], activation = softmax) # inputs are matrices of indices, # each row is a sentence, each column a timestep self.steps=T.iscalar('steps') self.x=T.tensor3('x')#输入gfs数据 self.target0=T.tensor3('target0')#输出的目标target,这一版把target维度改了 self.target1=T.itensor3('target1') self.layerstatus=None self.results=None # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction) self.predictions0,self.predictions1 = self.create_prediction() # create gradient training functions: #self.create_cost_fun() #self.create_valid_error() #self.create_training_function() self.create_predict_function() #self.create_validate_function() '''上面几步的意思就是先把公式写好''' @property def params(self): return self.model.params+self.regression.params+self.classifier.params def create_prediction(self):#做一次predict的方法 def step(idx): new_states=self.model.forward(idx) output0=self.regression.activate(new_states[-1]) output1=self.classifier.activate(new_states[-1]) return [output0,output1]#不论recursive与否,会全部输出 x = self.x num_examples = x.shape[0] #outputs_info =[initial_state_with_taps(layer, num_examples) for layer in self.model.layers] #outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]] [result0,result1], _ = theano.scan(fn=step, n_steps=self.steps, sequences=dict(input=x.dimshuffle((1,0,2)), taps=[-0]), ) return result0.dimshuffle((1,0,2)),result1.dimshuffle((2,0,1)) def create_cost_fun (self): y=self.target1[:,0,0] self.cost = (self.predictions0 - self.target0[:,:,0:1]).norm(L=2)+100*(-T.mean(T.log(self.predictions1)[T.arange(y.shape[0]),:,y])) def create_valid_error(self): self.valid_error0=T.mean(T.abs_(self.predictions0 - self.target0[:,:,0:1]),axis=0) #self.valid_error1=-T.mean(T.log(self.predictions1)[T.arange(self.target1.shape[0]),:,self.target1[:,0,0]]) self.valid_error1=T.mean(T.eq(T.argmax(self.predictions1, axis=2).dimshuffle(1,0),self.target1[:,0,0])) def create_predict_function(self): self.pred_fun = theano.function(inputs=[self.x,self.steps],outputs =[self.predictions0,self.predictions1],allow_input_downcast=True) def create_training_function(self): updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, lr=0.01, method="adagrad")#这一步Gradient Decent!!!! self.update_fun = theano.function( inputs=[self.x, self.target0,self.target1,self.steps], outputs=self.cost, updates=updates, name='update_fun', profile=False, allow_input_downcast=True) def create_validate_function(self): self.valid_fun = theano.function( inputs=[self.x, self.target0,self.target1,self.steps], outputs=[self.valid_error0,self.valid_error1], allow_input_downcast=True ) def __call__(self, x): return self.pred_fun(x)