def train( self, train_set, batch_size = 100 ): for i in xrange(len(self.layers) - 1): train_data = T.dmatrix('train_data') x = T.dmatrix('x') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=self.layers[i], n_hidden=self.layers[i+1] ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) for epoch in xrange(200): train_cost = [] for index in xrange(len(train_set)/batch_size): train_cost.append(train_da(numpy.asarray(train_set[index * batch_size: (index + 1) * batch_size]))) print 'Training 1st ae epoch %d, cost ' % epoch, numpy.mean(train_cost) train_set = da.get_hidden_values(train_set).eval() self.dAs.append(da)
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct dA_layers dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def test_DimentionalReduction(learning_rate=0.1, training_epochs=15, dataset="mnist.pkl.gz", batch_size=20): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=2) cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]} ) for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print "Training epoch %d, cost " % epoch, numpy.mean(c) x = T.matrix("x") hidden_values_function = da.get_hidden_values2(x) result_function = theano.function(inputs=[x], outputs=hidden_values_function) colors = ["b", "g", "r", "c", "m", "y", "k", "w"] n = 0 for x, y in zip(result_function(train_set_x.get_value()), train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1], c=colors[y]) n += 1 if n > 2000: break plt.show() n = 0 pca = PCA(n_components=2) for x, y in zip(pca.fit_transform(train_set_x.get_value()), train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1], c=colors[y]) n += 1 if n > 2000: break plt.show()
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.sigmoid_layers[i - 1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.params.extend(dA_layer.params)
def pre_train(self, X, step=1, bias=True): structure = numpy.concatenate([[X.shape[1]], self.structure]) self.structure = structure self.V = X for idx in xrange(len(structure) - 1): print "\nPre-trainning %d-th layer with %d hidden units"%(idx + 1, structure[idx + 1]) # trainning bottom sigmoid layers layer = dA(structure[idx], structure[idx + 1], self.V.shape[1], alpha=0.01, bias=True); layer.train(self.V, self.V, step) Z = layer.predict(self.V) self.V = layer.hidden self.W.append(layer.W)
def get_hidden(train_set=None, model_file=None, shared=1): if model_file == None: model_file = 'L1_p0_s1' model_w, model_b, model_b_prime = load_model_mat(model_file) n_visible,n_hidden = model_w.get_value(borrow=True).shape rng = numpy.random.RandomState(123) da = dA(numpy_rng=rng, input=train_set, n_visible=n_visible, n_hidden=n_hidden, W=model_w, bhid=model_b, bvis=model_b_prime) # set_trace() hidden_value = da.get_active().eval() if shared == 1: hidden_value = theano.shared(numpy.asarray(hidden_value, dtype=theano.config.floatX), borrow=True) return hidden_value
def fine_tune(self, X, y, learning_layer=100, n_iters=1, alpha=0.01): self.n_output = len(y[0]) self.structure[0] += int(self.bias) self.structure = numpy.concatenate([self.structure, [learning_layer], [self.n_output]]) # adding softmax layer on top of stack layer = dA(self.V.shape[1], learning_layer, self.n_output, alpha=alpha, bias=False, isSoftmax=True) layer.train(self.V, y, 1) self.W.append(layer.W) self.W.append(layer.K) data = numpy.arange(len(X)) for i in numpy.arange(n_iters): numpy.random.shuffle(data) for idx in data: self.forwardPass(X[idx]) self.backwardPass(y[idx])
def __init__(self,numpy_rng, n_ins, layer_sizes, corruption_levels=[0.5, 0.5], perlin=False): self.corruption_levels = corruption_levels self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(layer_sizes) self.x = T.matrix('x') self.numpy_rng = numpy_rng self.perlin = perlin theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) for i in xrange(self.n_layers): if i==0: input_size = n_ins layer_input = self.x else: input_size = layer_sizes[i-1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input, n_in = input_size, n_out = layer_sizes[i], activation = T.nnet.sigmoid, perlin = self.perlin) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, n_visible = input_size, n_hidden = layer_sizes[i], W = sigmoid_layer.W, bhid = sigmoid_layer.b) self.dA_layers.append(dA_layer) print "created dA %ix%i" % (input_size, layer_sizes[i])
def sAE(U, visible_size, hid_size): print 'autoencoder' ul = T.dmatrix('ul') index = T.lscalar() rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) n_train_batches = U.get_value().shape[0] u_da = dA(numpy_rng=rng, theano_rng=theano_rng, input=ul, n_visible=visible_size, n_hidden=hid_size) #print u_da.n_visible #print u_da.n_hidden cost, updates = u_da.get_cost_updates( corruption_level=0.5, learning_rate=0.000001 ) train_da = theano.function( [index], cost, updates=updates, givens={ ul: U[index * batch_size: (index + 1) * batch_size] } ) #start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(max_iterations): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) return (u_da, train_da)
def trainAndExecuteSingleDA(self, dsPath, ensembleCMeans, maxs, mins, threshold, numOfClusters): d = dA.dA(n_visible=111, n_hidden=35) encounteredAnomaly = 0 with open(dsPath, 'rt') as csvin: csvin = csv.reader(csvin, delimiter=',') with open(ensembleCMeans, 'w') as ensembleCmeansFile: for i, row in enumerate(csvin): try: if i != 0 and float(row[111]) != 0: encounteredAnomaly = 1 if i == 0: continue if i % 10000 == 0: print(i) totalScore = 0 input = (numpy.array(row[:111]).astype(float) - numpy.array(mins)) / (numpy.array( (numpy.array(maxs) - numpy.array(mins) + 1))) for m in range(len(input)): if numpy.isnan(input[m]) == True: input[m] = 0 scoresList = [] #added agg totalScore = 0 aeCount = 1 #if encounteredAnomaly==0: """ if i==threshold-1: #added mse lastTrainPacket=input #added mse """ if i < threshold: score = d.train(input=input) else: score = d.feedForward(input=input) if (score < 0): print("not match") continue """ #added mse if i>=threshold: mse=0 for me in range(len(input)): mse+=numpy.abs(float(input[me])-float(lastTrainPacket[me])) #mse/=len(input) #added mse """ totalScore = score # added aggr """ if i>=threshold: #added mse totalScore+=(mse*mse) #added mse """ #for inp in range(len(input)): # ensembleCmeansFile.write(str(input[inp]) + ",") ensembleCmeansFile.write( str(totalScore) + "," + str(row[111]) + "\n") except Exception as ex: print(ex.message) print("observation rejected") continue
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = theano.shared(value=numpy.zeros((1,), dtype=theano.config.floatX ), name='y', borrow=True ) # the labels are presented as 1D vector of # [double] vector # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a value function computing self.valueLayer = ValueFunction( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], ) self.params.extend(self.valueLayer.params) # construct a function that implements one step of finetunining # calculate the squared error for the value function self.finetune_cost = self.valueLayer.cost(self.y) self.error = self.valueLayer.cost(self.y)
x = T.matrix('x') # the data is presented as rasterized images index = T.lscalar() # index to a [mini]batch #da = dA.dA( # numpy_rng=rng, # theano_rng=theano_rng, # input=x, # n_visible=28 * 28, # n_hidden=500 #) da = dA.dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=4400, n_hidden=250 ) cost, updates = da.get_cost_updates( corruption_level=0.7, learning_rate=learning_rate ) #datasets = load_data('mnist.pkl.gz') #train_set_x, train_set_y = datasets[0] train_set_x2 = shared(numpy.asarray(mat['all_target_faltten1'][:,0:4400],config.floatX))
def __init__( self, numpy_rng, f_load_MLP=None, f_load_SDA=None, theano_rng=None, n_ins=784, hidden_layers_sizes=[1000, 1000, 1000, 15], n_outs=10, corruption_levels=[0.1, 0.1], name_appendage='', xtropy_fraction = 1, dropout_probs=[0.0,0.5,0.5,0.5,0.1] ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.out_sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer_ReLU_dropout(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], dropout_rate=dropout_probs[i+1], name_appendage = name_appendage+'_sigmoid_'+str(i)) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) for i in xrange(self.n_layers): all_layers = self.sigmoid_layers+self.out_sigmoid_layers input_size = all_layers[-1].n_out output_size = self.sigmoid_layers[-i-1].n_in # the input to the inverse sigmoid layer is always the activation of the # sigmoid layer behind it (forward sigmoid if its' the first inverse layer) layer_input = all_layers[-1].output out_sigmoid_layer = HiddenLayer_ReLU_dropout(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=output_size, dropout_rate=dropout_probs[-i-2], name_appendage = name_appendage+'_outsigmoid_'+str(i)) self.out_sigmoid_layers.append(out_sigmoid_layer) self.params.extend(out_sigmoid_layer.params) for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] # Construct a denoising autoencoder that shared weights with each layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=sigmoid_layer.input, n_visible=sigmoid_layer.n_in, n_hidden=sigmoid_layer.n_out, W = sigmoid_layer.W, bhid=sigmoid_layer.b, name_appendage=name_appendage+'_dA_'+str(i) ) self.dA_layers.append(dA_layer) if f_load_MLP != None: self.predictLayer = MLP( rng = numpy_rng, input=self.out_sigmoid_layers[-1].output, f_load = f_load_MLP, name_appendage = name_appendage+'_MLPLayer' ) elif f_load_SDA != None: self.predictLayer = SdA( numpy_rng = numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10, input = self.out_sigmoid_layers[-1].output ) self.predictLayer.load(f_load_SDA) self.xtropy_cost = -T.mean(self.x*T.log(self.out_sigmoid_layers[-1].output) + (1-self.x)*T.log(1-self.out_sigmoid_layers[-1].output)) self.mse_cost = T.mean((self.x-self.out_sigmoid_layers[-1].output)**2) self.logloss_cost = self.predictLayer.logLayer.negative_log_likelihood(self.y) self.finetune_cost = xtropy_fraction*self.mse_cost + (1-xtropy_fraction)*self.logloss_cost self.errors = self.predictLayer.logLayer.errors(self.y)
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # end-snippet-1 # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__( self, numpy_rng, n_ins, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None, n_outs=7 ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins=n_ins self.n_outs=n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.iscalar('y') # the labels are presented as 1D vector of # [int] labels assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta) self.dA_layers.append(dA_layer) # end-snippet-2 sda_input = T.matrix('sda_input') # the data is presented as rasterized images self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape((-1, self.da_layers_output_size)), givens={ self.x: sda_input } )
def test_model(batch_size=100, file_name='da.pkl'): # datasets = load_data(dataset) print '...loading data' datasets = load_TIMIT() train_set, valid_set, test_set = datasets print '...building model' pickle_lst = [1000] # , 500, 1000 # pickle_lst = [1, 10] for epoch in pickle_lst: print 'epoch: ', epoch file_name = "da_epoch_%d" % (epoch) w, b, b_prime = load_model_mat(file_name) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=129, n_hidden=500, W=w, bhid=b, bvis=b_prime ) # test_fun = theano.function( # inputs=[index], # outputs=da.get_reconstructed_out(), # givens={ # x: test_set_x[index * batch_size:(index + 1) * batch_size] # } # ) get_outputs = theano.function( inputs=[index], outputs=da.get_active(), givens={ x: test_set[index * batch_size:(index + 1) * batch_size] } ) index = 1 hidden_value = get_outputs(index) plot_data = test_set.get_value(borrow=True)[index * batch_size:(index + 1) * batch_size] pylab.figure(); pylab.hist(plot_data.reshape(plot_data.size, 1), 50); pylab.figure();pylab.plot(numpy.mean(plot_data, axis=0), '*');pylab.xlim(0, 128);pylab.ylim(0, 1); pylab.figure();pylab.hist(hidden_value.reshape(hidden_value.size, 1), 50); pylab.figure();pylab.plot(numpy.mean(hidden_value, axis=0), '*');pylab.ylim(0, 1); pylab.show() set_trace() # pylab.title(epoch) pylab.show()
def __init__(self, numpy_rng=None, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): #save for raw_dump self.n_ins = n_ins self.hidden_layers_sizes = hidden_layers_sizes self.n_outs = n_outs self.corruption_levels = corruption_levels self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not numpy_rng: numpy_rng = numpy.random.RandomState(numpy.random.randint(2 ** 30)) if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, #shared weight bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def deepclustering(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the pickled dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] nHid = 2000 # Load the saved dA object, to initialize our model # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size label_true = train_set_y.get_value(borrow=True) # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images center = T.matrix('center') # end-snippet-2 #if not os.path.isdir(output_folder): # os.makedirs(output_folder) # os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### #Train a denosing autoencoder to initialize my own network, and provide latent representation for initializing clusteing rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # Instancialize a dA class # To get the initial clustering information f = open('no_corruption.save', 'rb') no_corruption = cPickle.load(f) init_da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=nHid, ) init_da.params = no_corruption hid = init_da.get_hidden_values(x) hidden_da = theano.function( [index], outputs=hid, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) # go through training epochs km = MiniBatchKMeans(n_clusters=10, batch_size=100) train_array = train_set_x.get_value() ypred = km.fit_predict(train_array) nmi_data = metrics.normalized_mutual_info_score(label_true, ypred) hidden_val = [] for batch_index in xrange(n_train_batches): hidden_val.append(hidden_da(batch_index)) hidden_array = numpy.asarray(hidden_val) hidden_size = hidden_array.shape hidden_array = numpy.reshape( hidden_array, (hidden_size[0] * hidden_size[1], hidden_size[2])) # Do a k-means clusering to get center_array ypred = km.fit_predict(hidden_array) nmi_disjoint = metrics.normalized_mutual_info_score(label_true, ypred) center_array = km.cluster_centers_[[km.labels_]] center_shared = theano.shared(numpy.asarray(center_array, dtype='float32'), borrow=True) dc = deep_clus(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=nHid) cost, updates = dc.get_cost_updates(center, corruption_level=0., learning_rate=learning_rate) #reconst = da.get_reconstructed_input(hidden) # training a pure denoising autoencoder, without clustering, to get initial values to cluster train_dc = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], center: center_shared[index * batch_size:(index + 1) * batch_size] }) start_time = timeit.default_timer() ############ # TRAINING # ############ for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): cost_batch = train_dc(batch_index) c.append(cost_batch) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) hidden_val = [] for batch_index in xrange(n_train_batches): hidden_val.append(hidden_da(batch_index)) hidden_array = numpy.asarray(hidden_val) hidden_size = hidden_array.shape hidden_array = numpy.reshape( hidden_array, (hidden_size[0] * hidden_size[1], hidden_size[2])) km.init = km.cluster_centers_ km.fit(hidden_array) center_array = km.cluster_centers_[[km.labels_]] center_shared = theano.shared(numpy.asarray(center_array, dtype='float32'), borrow=True) # print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = timeit.default_timer() ypred = km.predict(hidden_array) nmi_dc = metrics.adjusted_mutual_info_score(label_true, ypred) print 'Normalized mutual info for data KMeans: ', nmi_data print 'Normalized mutual info for disjoint clustering: ', nmi_disjoint print 'Normalized mutual info for deep clustering: ', nmi_dc training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=dc.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png')
def __init__(self,numpy_rng,theano_rng=None,n_ins=784, hidden_layers_sizes=[500,500],n_outs=10, corruption_levels=[0.1,0.1]): """ 该类可以构造可变层数的网络 numpy_rng:numpy.random.RandomState 用于初始化权重的随机数 theano_rng: theano.tensor.shared_randomstreams.RandomStreams Theano随机生成数,如果,默认值为None, 则是由'rng' 生成的随机种子 n_ins: int SdA输入的维度 hidden_layers_sizes: lists of ints 中间层的层数列表,最少一个元素 n_out: int 网路输出量的维度 corruption_levels: list of float 每一层的corruption level """ self.sigmoid_layers=[] self.dA_layers=[] self.params=[] self.n_layers=len(hidden_layers_sizes) assert self.n_layers>0 #设定隐层数量大于0 if not theano_rng: theano_rng=RandomStreams(numpy_rng.randint(2**30)) #设定符号变量 self.x=T.matrix('x') #栅格化的图像数据 self.y=T.ivector('y') #由[int]型标签组成的一维向量 #SdA是一个MLP,降噪自编码器共享中间层的权重向量。 #首先将SdA构造为深层多感知器,然后构造每个sigmoid层。 #同时,该层的降噪自编码器也会共享权重。 #预训练过程是训练这些自编码器(同时也会改变多感知器的权重) #在微调过程,通过在MLP上采用随机梯度下降法完成SdA的训练 #构造sigmoid层 for i in xrange(self.n_layers): #输入量的大小是下层隐层单元数量(本层不是第一层) #输入量的大小是输入量的大小(本层是第一层) if i==0: input_size=n_ins else: input_size=hidden_layers_sizes[i-1] #本层的输入是下层隐层的激活(本层不是第一层); #本层的输入是SdA的输入(本层是第一层) if i==0: layer_input=self.x else: layer_input=self.sigmoid_layers[-1].output #定义sigmoid层 sigmoid_layer=HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) #将sigmoid层添加到层列表 self.sigmoid_layers.append(sigmoid_layer) #这是个哲学问题... #但是我们只想说sigmoid_layers的参数就是 #SdA的参数 #dA中可视偏置是dA的参数,而不是SdA的参数 self.params.extend(sigmoid_layer.params) #构造降噪自编码器与该层共享权重 dA_layer=dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b ) self.dA_layers.append(dA_layer) #在MLP顶部加上losgistic层 self.logLayer=LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1],n_out=n_outs) self.params.extend(self.logLayer.params) #建立函数,执行一步微调 #定义第二步训练的代价:负log函数 self.finetune_cost=self.logLayer.negative_log_likelihood(self.y) #分别对模型中参数计算梯度 #给定self.x和self.y,定义每个minibatch上的误差的符号变量 self.errors=self.logLayer.errors(self.y)
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[1000, 1000], n_outs=10, corruption_levels=[0.1, 0.1], Py_emp = 0.5, log_layer_type=MultitaskLogisticRegression, S_matrix = None, S_type = None, lambda_S = 0.0001, lambda_O_l2 = 0.0001, lambda_O_l1 = 0.0001, lambda_H_l2 = 0.0001 ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.corruption_levels = corruption_levels self.L2_sqr = 0 self.use_auc = False if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.imatrix('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.L2_sqr = ( self.L2_sqr + (sigmoid_layer.W ** 2).sum() + (sigmoid_layer.b ** 2).sum() ) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) if self.n_layers > 0: self.logLayer = log_layer_type( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, Py_emp=Py_emp ) else: self.logLayer = log_layer_type( input=self.x, n_in=n_ins, n_out=n_outs, Py_emp=Py_emp ) self.params.extend(self.logLayer.params) lambda_O_l2 = lambda_O_l2 if (lambda_O_l2 is not None and lambda_O_l2 > 0) else 0 lambda_O_l1 = lambda_O_l1 if (lambda_O_l1 is not None and lambda_O_l1 > 0) else 0 lambda_H_l2 = lambda_H_l2 if (lambda_H_l2 is not None and lambda_H_l2 > 0) else 0 # compute the cost for second phase of training, # defined as the negative log likelihood self.reg_cost = 0 if S_matrix is not None: lambda_S = lambda_S if (lambda_S is not None and lambda_S > 0) else 0.0001 if S_type == 'l1': sys.stdout.write('using L1 multi-task regularizer, lambda={0}\n'.format(lambda_S)) L_loss = 0 myW = self.logLayer.W for i in range(10): for j in range(i+1,10): L_loss = L_loss + T.sum(T.abs_(myW[:,i]-myW[:,j])) else: sys.stdout.write('using Laplacian multi-task regularizer, lambda={0}\n'.format(lambda_S)) D_matrix = np.diag(S_matrix.sum(axis = 0)) L_matrix = D_matrix - S_matrix L_loss = (T.dot(T.dot(self.logLayer.W, L_matrix), self.logLayer.W.T)).trace() self.reg_cost = self.reg_cost + lambda_S * L_loss if lambda_H_l2 > 0: sys.stdout.write('using L2 hidden unit penalty, lambda={0}\n'.format(lambda_H_l2)) self.reg_cost = self.reg_cost + lambda_H_l2 * self.L2_sqr if lambda_O_l2 > 0: sys.stdout.write('using L2 penalty, lambda={0}\n'.format(lambda_O_l2)) myW = self.logLayer.W self.reg_cost = self.reg_cost + lambda_O_l2 * ((myW**2).sum()) # + (self.b**2).sum()) if lambda_O_l1 > 0: sys.stdout.write('using L1 penalty, lambda={0}\n'.format(lambda_O_l1)) myW = self.logLayer.W self.reg_cost = self.reg_cost + lambda_O_l1 * ((abs(myW)).sum()) #+ (abs(self.b)).sum()) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + self.reg_cost self.errors = self.logLayer.errors(self.y) self.errors_per_output = self.logLayer.errors_per_output(self.y)
if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=10, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size]
def trainAndExecute(i=0, ngramStop=2): scoresProb = [] scoresN2 = [] scoresN3 = [] scoresN4 = [] labels = [] for j2 in range(40): with io.open('E:/challenge/training_data/user2SegLabels.csv', 'rt', encoding="utf8") as file: file.readline() labList = [] for j in range(150): labList.append(file.readline().split(',')[j2]) labels.append(labList) globalFeatVecProb, globalFeatVecN2, globalFeatVecN3, globalFeatVecN4, perSegFeatVecProb, perSegFeatVecN2, perSegFeatVecN3, perSegFeatVecN4 = dsGen.createTrainSetForUser( i, ngramStop) userSegFeatVecProb, userSegNotExistDicProb, userSegFeatVecN2, userSegNotExistDicN2, userSegFeatVecN3, userSegNotExistDicN3, userSegFeatVecN4, userSegNotExistDicN4 = dsGen.createTestSetForUser( i, ngramStop) #train and test prob da = dA.dA(n_visible=len(perSegFeatVecProb[0]), n_hidden=len(perSegFeatVecProb[0]) / 3) #print ("starting train\n") for iter in range(15): counter = 0 for k in range(len(perSegFeatVecProb)): da.train(input=np.array(perSegFeatVecProb[k])) counter += 1 counter = 0 #print ("finished train, starts test\n") for k in range(len(userSegFeatVecProb)): nnOutputProb = da.feedForward(input=np.array(userSegFeatVecProb[k])) scoresProb.append(nnOutputProb) counter += 1 #train and test N2 da = dA.dA(n_visible=len(perSegFeatVecN2[0]), n_hidden=len(perSegFeatVecN2[0]) / 3) #print ("starting train\n") for iter in range(15): counter = 0 for k in range(len(perSegFeatVecN2)): da.train(input=np.array(perSegFeatVecN2[k])) counter += 1 counter = 0 #print ("finished train, starts test\n") for k in range(len(userSegFeatVecN2)): nnOutputN2 = da.feedForward(input=np.array(userSegFeatVecN2[k])) scoresN2.append(nnOutputN2) counter += 1 #train and test N3 da = dA.dA(n_visible=len(perSegFeatVecN3[0]), n_hidden=len(perSegFeatVecN3[0]) / 3) #print ("starting train\n") for iter in range(15): counter = 0 for k in range(len(perSegFeatVecN3)): da.train(input=np.array(perSegFeatVecN3[k])) counter += 1 counter = 0 #print ("finished train, starts test\n") for k in range(len(userSegFeatVecN3)): nnOutputN3 = da.feedForward(input=np.array(userSegFeatVecN3[k])) scoresN3.append(nnOutputN3) counter += 1 #train and test N4 da = dA.dA(n_visible=len(perSegFeatVecN4[0]), n_hidden=len(perSegFeatVecN4[0]) / 3) #print ("starting train\n") for iter in range(15): counter = 0 for k in range(len(perSegFeatVecN4)): da.train(input=np.array(perSegFeatVecN4[k])) counter += 1 counter = 0 #print ("finished train, starts test\n") for k in range(len(userSegFeatVecN4)): nnOutputN4 = da.feedForward(input=np.array(userSegFeatVecN4[k])) counter += 1 scoresN4.append(nnOutputN4) #print ("finished test\n") avgS = np.array(scoresProb).mean() #normalize mean-std prob for s in range(len(scoresProb)): scoresProb[s] = math.fabs(avgS - scoresProb[s]) # normalize mean-std n2 avgS = np.array(scoresN2).mean() for s in range(len(scoresN2)): scoresN2[s] = math.fabs(avgS - scoresN2[s]) # normalize mean-std n3 avgS = np.array(scoresN3).mean() for s in range(len(scoresN3)): scoresN3[s] = math.fabs(avgS - scoresN3[s]) # normalize mean-std n4 avgS = np.array(scoresN4).mean() for s in range(len(scoresN4)): scoresN4[s] = math.fabs(avgS - scoresN4[s]) return scoresProb, scoresN2, scoresN3, scoresN4 #trainAndExecute(7,4)
def auto_encoder_Lx(train_set=None, p=0, sigma=1, param=None, training_epochs=1000): #=========================================================================== # train_set: the training data # p: penalty; sigma: sigma for Gaussian # N: number of frame; # item_str: string for model and log file # training_epochs: training epochs #=========================================================================== batch_size = 100 output_folder = param['output_folder'] item_str = param['item_str'] pretrain_lr = param['pretrain_lr'] down_epoch = param['down_epoch'] n_hidden = param['n_hidden'] # compute number of minibatches for training, validation and testing n_train_batches = train_set.get_value(borrow=True).shape[0] / batch_size n_visible = train_set.get_value(borrow=True).shape[1] # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch learning_rate = T.scalar('lr') # learning rate to use x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) # pickle_lst = [1, 5, 10, 50, 100, 500, 1000] rng = numpy.random.RandomState(123) da = dA(numpy_rng=rng, input=x, n_visible=n_visible, n_hidden=n_hidden) cost, updates = da.get_cost_updates_p(learning_rate=learning_rate, p=p, sigma=sigma) print '... building the model' train_fn = theano.function( inputs=[index, learning_rate], outputs=cost, updates=updates, givens={x: train_set[index * batch_size: (index + 1) * batch_size]} ) print '... training' start_time = strict_time() logger = mylogger(output_folder + '/' + item_str + '.log') logger.log('p:%g, sigma:%g, learning rate:%g' % (p, sigma, pretrain_lr)) for epoch in xrange(1, training_epochs + 1): # go through trainng set c = [] epoch_time_s = strict_time() for batch_index in xrange(n_train_batches): err = train_fn(batch_index, pretrain_lr) # err = 0 c.append(err) logger.log('Training epoch %d, cost %.5f, took %f seconds ' % (epoch, numpy.mean(c), (strict_time() - epoch_time_s))) if epoch % down_epoch == 0: pretrain_lr = 0.8 * pretrain_lr logger.log('learning rate: %g' % (pretrain_lr)) # if epoch in pickle_lst: # file_name = "%s_epoch_%d" % (item_str,epoch) # save_model_mat(da, file_name) # logger.info(file_name+'.mat saved') da.save_model_mat(output_folder + '/' + item_str + '.mat') # da.save_model_mat("%s_(%d)" %(item_str,training_epochs), output_folder) logger.log('ran for %.2f m ' % ((strict_time() - start_time) / 60.))
def __init__(self, rng, theano_rng, layer_sizes, activations, dropout_rates, pretrain_way, batch_size, use_bias=False): self.x = T.matrix('x') self.y = T.vector('y') # Ex) # layer_sizes = [1631, 800, 400, 1] # weight_matrix_sizes = [(1631, 800), (800, 400), (400, 1)] weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:]) self.params = [] self.layers = [] self.dropout_layers = [] self.pretrain_layers = [] self.hist_grads = [] self.accu_grads = [] self.accu_deltas = [] layer_counter = 0 next_layer_input = self.x next_dropout_layer_input = _dropout_from_layer(rng, self.x, \ p = dropout_rates[0]) # Ex) # weight_matrix_sizes[:-1] = [(1631, 800), (800, 400)] for n_in, n_out in weight_matrix_sizes[:-1]: next_dropout_layer = DropoutHiddenLayer( rng=rng, input=next_dropout_layer_input, n_in=n_in, n_out=n_out, activation=activations[layer_counter], dropout_rate=dropout_rates[layer_counter+1], W = None, b = None, use_bias = False) self.dropout_layers.append(next_dropout_layer) next_dropout_layer_input = next_dropout_layer.output self.params.extend(next_dropout_layer.params) self.hist_grads.extend(next_dropout_layer.hist_grads) self.accu_grads.extend(next_dropout_layer.accu_grads) self.accu_deltas.extend(next_dropout_layer.accu_deltas) if pretrain_way == 'denoising': pretrain_layer = dA(numpy_rng = rng, theano_rng = theano_rng, input = next_layer_input, n_visible = n_in, n_hidden = n_out, W=next_dropout_layer.W, bhid=next_dropout_layer.b, bvis=None) self.pretrain_layers.append(pretrain_layer) elif pretrain_way == 'contractive': pretrain_layer = cA(numpy_rng = rng, input = next_layer_input, n_visible = n_in, n_hidden = n_out, n_batchsize = batch_size, W=next_dropout_layer.W, bhid=next_dropout_layer.b, bvis=None) self.pretrain_layers.append(pretrain_layer) next_layer = HiddenLayer(rng = rng, input = next_layer_input, n_in = n_in, n_out = n_out, activation = activations[layer_counter], W=next_dropout_layer.W * (1 - dropout_rates[layer_counter]), b=next_dropout_layer.b, use_bias = use_bias) self.layers.append(next_layer) next_layer_input = next_layer.output #self.layers.append(next_layer) #next_layer_input = next_layer.output layer_counter = layer_counter + 1 # n_outは1で固定 n_in = layer_sizes[-2] dropout_output_layer = RegressionLayer( input = next_dropout_layer_input, n_in = n_in) self.dropout_layers.append(dropout_output_layer) output_layer = RegressionLayer( input = next_layer_input, W=dropout_output_layer.W * (1 - dropout_rates[-1]), b=dropout_output_layer.b, n_in=n_in) self.layers.append(output_layer) self.params.extend(dropout_output_layer.params) self.hist_grads.extend(dropout_output_layer.hist_grads) self.accu_grads.extend(dropout_output_layer.accu_grads) self.accu_deltas.extend(dropout_output_layer.accu_deltas) # dropoutの二乗誤差 self.dropout_mean_square_error = \ self.dropout_layers[-1].mean_square_error(self.y) # 二乗誤差 self.mean_squared_error = self.layers[-1].mean_square_error(self.y) self.dropout_prediction = self.dropout_layers[-1].prediction self.prediction = self.layers[-1].prediction
def __init__( self, numpy_rng, theano_rng=None, input = None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input is None: # we use a matrix because we expect a minibatch of several # examples, each example being a row self.x = T.dmatrix(name='x') else: self.x = input for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer)
def __init__( self, numpy_rng=None, theano_rng=None,n_inputs=None, hidden_layers_sizes = None, corruption_levels=[0.1, 0.1], dA_initiall=True, error_known=True, method=None, problem = None): self.n_layers = len(hidden_layers_sizes) self.n_inputs=n_inputs self.hidden_layers_sizes=hidden_layers_sizes self.error_known = error_known self.method=method self.problem = problem assert self.n_layers > 2 if not numpy_rng: numpy_rng = numpy.random.RandomState(123) if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') self.mask = T.matrix('mask') ### encoder_layers #### self.encoder_layers = [] self.encoder_params = [] self.dA_layers=[] for i in range(self.n_layers): if i == 0: input_size = self.n_inputs corruption=True else: input_size = self. hidden_layers_sizes[i-1] corruption=False if i == 0: layer_input = self.x else: layer_input=self.encoder_layers[-1].output act_func=T.tanh self.encoder_layer=perceptron(rng = numpy_rng, theano_rng=theano_rng, input = layer_input, n_in = input_size, n_out = self.hidden_layers_sizes[i], activation = act_func, first_layer_corrup=corruption) if dA_initiall: dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=self.encoder_layer.W, bhid=self.encoder_layer.b, method = self.method) self.dA_layers.append(dA_layer) self.encoder_layers.append(self.encoder_layer) self.encoder_params.extend(self.encoder_layer.params) ### decoder_layers #### self.decoder_layers = [] self.decoder_params = [] self.reverse_layers=self.encoder_layers[::-1] #self.reverse_da=self.dA_layers[::-1] decode_hidden_sizes=list(reversed(self.hidden_layers_sizes)) for i,j in enumerate(decode_hidden_sizes): input_size=j if i == 0: layer_input=self.reverse_layers[i].output else: layer_input=self.decoder_layers[-1].output if i==len(decode_hidden_sizes)-1: n_out= self.n_inputs else: n_out=decode_hidden_sizes[i+1] if i==len(decode_hidden_sizes)-1: if self.problem == 'regression': act_func = None else: act_func = T.nnet.sigmoid else: act_func=T.tanh self.decoder_layer=perceptron(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=n_out, W= self.reverse_layers[i].W, b= None, activation=act_func, decoder=True ) self.decoder_layers.append(self.decoder_layer) self.decoder_params.append(self.decoder_layer.b) self.network_layers= self.encoder_layers + self.decoder_layers self.params = self.encoder_params + self.decoder_params print(self.params)
def gen_autoencoder(**desc): return dA.dA(**desc)
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # end-snippet-2 rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible= data.shape[1], n_hidden=20 ) cost, updates = da.get_cost_updates( corruption_level=0.1, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size]
def __init__( self, numpy_rng, f_load_MLP=None, f_load_SDA=None, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], name_appendage='', xtropy_fraction = 0, is_relu = True ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.is_relu = is_relu self.sigmoid_layers = [] self.out_sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if self.is_relu: sigmoid_layer = HiddenLayer_ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], name_appendage = name_appendage+'_sigmoid_'+str(i)) else: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid, name_appendage = name_appendage+'_sigmoid_'+str(i)) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) for i in xrange(self.n_layers): all_layers = self.sigmoid_layers+self.out_sigmoid_layers input_size = all_layers[-1].n_out output_size = self.sigmoid_layers[-i-1].n_in # the input to the inverse sigmoid layer is always the activation of the # sigmoid layer behind it (forward sigmoid if its' the first inverse layer) layer_input = all_layers[-1].output if self.is_relu: out_sigmoid_layer = HiddenLayer_ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=output_size, name_appendage = name_appendage+'_outsigmoid_'+str(i)) else: out_sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=output_size, activation=T.nnet.sigmoid, name_appendage = name_appendage+'_outsigmoid_'+str(i)) self.out_sigmoid_layers.append(out_sigmoid_layer) self.params.extend(out_sigmoid_layer.params) for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] # Construct a denoising autoencoder that shared weights with each layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=sigmoid_layer.input, n_visible=sigmoid_layer.n_in, n_hidden=sigmoid_layer.n_out, W = sigmoid_layer.W, bhid=sigmoid_layer.b, name_appendage=name_appendage+'_dA_'+str(i) ) self.dA_layers.append(dA_layer) if f_load_MLP != None: self.predictLayer = MLP( rng = numpy_rng, input=self.out_sigmoid_layers[-1].output, f_load = f_load_MLP, name_appendage = name_appendage+'_MLPLayer' ) elif f_load_SDA != None: self.predictLayer = SdA( numpy_rng = numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10, input = self.out_sigmoid_layers[-1].output ) self.predictLayer.load(f_load_SDA) self.xtropy_cost = -T.mean(self.x*T.log(self.out_sigmoid_layers[-1].output) + (1-self.x)*T.log(1-self.out_sigmoid_layers[-1].output)) self.mse_cost = T.mean((self.x-self.out_sigmoid_layers[-1].output)**2) self.logloss_cost = self.predictLayer.logLayer.negative_log_likelihood(self.y) if self.is_relu: self.finetune_cost = xtropy_fraction*self.mse_cost + (1-xtropy_fraction)*self.logloss_cost else: self.finetune_cost = xtropy_fraction*self.xtropy_cost + (1-xtropy_fraction)*self.logloss_cost self.errors = self.predictLayer.logLayer.errors(self.y)
def __init__( self, numpy_rng, n_ins, n_outs, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins=n_ins self.n_outs=n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid ) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this layer dA_layer = dA( numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta ) self.dA_layers.append(dA_layer) sda_input = T.matrix('sda_input') self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape((-1, self.da_layers_output_size)), givens={ self.x: sda_input } ) self.logLayer = LogisticRegression( rng = numpy.random.RandomState(), input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) #self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, numpy_rng, theano_rng=None, layers_sizes=[129, 500, 54, 500, 129], w_list=None, b_list=None, output_folder='out', p_dict=None, sigma_dict=None): self.dA_layers = [] self.dA_train_flags = [] self.finetune_train_flag = False self.n_layers = len(layers_sizes) self.n_layers_dA = int(len(layers_sizes)/2) self.numpy_rng = numpy_rng self.output_folder = output_folder assert self.n_layers > 0 if p_dict==None: self.p_list = [0]*self.n_layers_dA self.sigma_list = [0]*self.n_layers_dA self.p = 0 self.sigma = 0 elif p_dict!=None and sigma_dict==None: assert len(p_dict['p_list']) == self.n_layers_dA self.p_list = p_dict['p_list'] self.sigma_list = [0]*self.n_layers_dA self.p = p_dict['p'] self.sigma = 0 elif p_dict!=None and sigma_dict!=None: assert len(p_dict['p_list']) == self.n_layers_dA assert len(sigma_dict['sigma_list']) == len(p_dict['p_list']) self.p_list = p_dict['p_list'] self.sigma_list = sigma_dict['sigma_list'] self.p = p_dict['p'] self.sigma = sigma_dict['sigma'] if not theano_rng: self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix(name='x') for i in xrange(self.n_layers_dA): if i == 0: layer_input = self.x else: layer_input = self.dA_layers[-1].get_active() if(self.p_list[i] == 0): model_file = '%s/L%d.mat'%(output_folder, i) else: model_file = '%s/L%d_p%g_s%g.mat'%(output_folder, i, self.p_list[i], self.sigma_list[i]) if os.path.isfile(model_file): #this layer has been trained model_w, model_b, model_b_prime = load_model_mat(model_file) dA_layer = dA(numpy_rng=numpy_rng, input=layer_input, n_visible=layers_sizes[i], n_hidden=layers_sizes[i+1], W=model_w, bhid=model_b, bvis=model_b_prime) self.dA_train_flags.append(True) #set the flag else: dA_layer = dA(numpy_rng=numpy_rng, input=layer_input, n_visible=layers_sizes[i], n_hidden=layers_sizes[i+1]) self.dA_train_flags.append(False) #set the flag self.dA_layers.append(dA_layer) finetune_file = '%s/SAE%s.mat'%(output_folder, self.get_model_file()) if os.path.isfile(finetune_file): #trained self.finetune_train_flag = True
def __init__( self, numpy_rng=None, theano_rng=None,n_inputs=None, hidden_layers_sizes = None, corruption_levels=[0.1, 0.1], dA_initiall=True, error_known=True, method=None, problem = None, activ_fun = None, drop = None, regu_l1 = None, regu_l2 = None): self.activ_fun = activ_fun #T.arctan #T.tanh self.n_layers = len(hidden_layers_sizes) self.n_inputs=n_inputs self.hidden_layers_sizes=hidden_layers_sizes self.error_known = error_known self.method=method self.problem = problem self.drop = drop self.regu_l1 = regu_l1 self.regu_l2 = regu_l2 #assert self.n_layers >= 2 self.x = T.matrix('x') self.mask = T.matrix('mask') ### encoder_layers #### self.encoder_layers = [] self.encoder_params = [] self.dA_layers=[] for i in range(self.n_layers): if i == 0: input_size = self.n_inputs corruption=True else: input_size = self. hidden_layers_sizes[i-1] corruption=False if i == 0: layer_input = self.x else: layer_input=self.encoder_layers[-1].output self.encoder_layer=perceptron(input = layer_input, n_in = input_size, n_out = self.hidden_layers_sizes[i], activation = activ_fun, first_layer_corrup=corruption, drop = self.drop[i]) if dA_initiall : dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=self.encoder_layer.W, bhid=self.encoder_layer.b, method = self.method, activation=activ_fun, regu_l1=self.regu_l1, regu_l2=self.regu_l2) self.dA_layers.append(dA_layer) self.encoder_layers.append(self.encoder_layer) self.encoder_params.extend(self.encoder_layer.params) ### decoder_layers #### self.decoder_layers = [] self.decoder_params = [] self.drop.reverse() self.reverse_layers=self.encoder_layers[::-1] #self.reverse_da=self.dA_layers[::-1] decode_hidden_sizes=list(reversed(self.hidden_layers_sizes)) for i,j in enumerate(decode_hidden_sizes): input_size=j if i == 0: layer_input=self.reverse_layers[i].output else: layer_input=self.decoder_layers[-1].output if i==len(decode_hidden_sizes)-1: n_out= self.n_inputs else: n_out=decode_hidden_sizes[i+1] if i==len(decode_hidden_sizes)-1: if self.problem == 'regression': act_func = None else: act_func = T.nnet.sigmoid else: act_func=activ_fun self.decoder_layer=perceptron(input=layer_input, n_in=input_size, n_out=n_out, W= self.reverse_layers[i].W, b= None, activation=act_func, decoder=True, drop = None)#self.drop[i]) self.decoder_layers.append(self.decoder_layer) self.decoder_params.append(self.decoder_layer.b) self.network_layers= self.encoder_layers + self.decoder_layers self.params = self.encoder_params + self.decoder_params
def __init__( self, W_sda=[None,None],#有已保存的参数的话就读入 b_sda=[None,None], nn_structure=[600,500,500,10], # 输入层、隐藏层各层节点数、输出类别数 dropout_rates=[0.5,0.5], L1_reg=0,L2_reg=0, activation=T.nnet.sigmoid, non_static=False, wordvec=None ): #print activation self.activation=activation self.sigmoid_layers = [] #隐藏层列表... self.dA_layers = [] #da层列表... self.params = [] #不包括dA层....sigmoid layers... self.n_layers = len(nn_structure)-2 #有几个隐藏层 #random num numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x')#data self.y = T.ivector('y')#label #print wordvec self.Words=theano.shared(value=wordvec,name='Words') layer0_input= self.Words[T.cast(self.x.flatten(),dtype="int32")].\ reshape((self.x.shape[0],self.x.shape[1]*self.Words.shape[1])) #n=layer0_input.shape[1] / numpy.shape(wordvec)[0] ''' #词向量相加... avg_vec = numpy.zeros(numpy.shape(wordvec)[0],dtype=theano.config.floatX) for i in xrange(5): sub_vec=layer0_input[:,i*200:(i+1)*200] avg_vec=T.add(sub_vec,avg_vec) #avg_vec=avg_vec/5. layer0_input=avg_vec ''' # DA层和MLP层共享权重.. for i in xrange(self.n_layers): if i == 0: input_size = nn_structure[0] #input_size:每个隐藏层的输入节点个数 else: input_size = nn_structure[i] if i == 0: layer_input = layer0_input#self.x #输入数据 else: layer_input = self.sigmoid_layers[i-1].output # 后一层的输入是前一层的输出 #参数W\b随机初始化..dropout layer sigmoid_layer = dpLayer(rng=numpy_rng, input=layer_input, n_in=input_size, W=W_sda[i], b=b_sda[i], dropout_rate=dropout_rates[i], n_out=nn_structure[i+1], activation=self.activation) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) #隐藏层列表 self.params.extend(sigmoid_layer.params) #mlp参数w,b # 与hidden layer共享权重 dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=nn_structure[i+1], W=sigmoid_layer.W, # share weights with sigmoid layer # W是共享变量,dA预训练 bhid=sigmoid_layer.b, activation=self.activation) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=nn_structure[-2], n_out=nn_structure[-1] ) self.params.extend(self.logLayer.params)#log参数 w,b L1=0. L2=0. for layer in self.sigmoid_layers: L1+=abs(layer.W).sum() L2+=(layer.W ** 2).sum() self.L1=(L1+abs(self.logLayer.W).sum()) self.L2_sqr=L2+(self.logLayer.W ** 2).sum() self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) \ + L1_reg * self.L1 + L2_reg * self.L2_sqr if non_static: self.params.extend([self.Words]) self.errors = self.logLayer.errors(self.y)
def main(args): (training_file, label_file, u_file, test_file, test_label, output, n, hid_size) = args X = load_feat(training_file) y = load_label(label_file) X = theano.shared(np.asarray(X, dtype=theano.config.floatX)) y = np.asarray(y) visible_size = X.get_value().shape[1] test_X = load_feat(test_file) test_y = load_label(test_label) test_X = theano.shared(np.asarray(test_X, dtype=theano.config.floatX)) test_y = np.asarray(test_y) U = load_feat(u_file) U = theano.shared(np.asarray(U[:u_size], dtype=theano.config.floatX)) print 'autoencoder' ul = T.dmatrix('ul') index = T.lscalar() rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) n_train_batches = U.get_value().shape[0] print n_train_batches u_da = dA(numpy_rng=rng, theano_rng=theano_rng, input=ul, n_visible=visible_size, n_hidden=int(hid_size)) #print u_da.n_visible #print u_da.n_hidden cost, updates = u_da.get_cost_updates( corruption_level=1.0, learning_rate=0.00001 ) train_da = theano.function( [index], cost, updates=updates, givens={ ul: U[index * batch_size: (index + 1) * batch_size] } ) #start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(max_iterations): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c_tmp = train_da(batch_index) c.append(c_tmp) #print 'Training epoch %d, cost ' % epoch, np.mean(c) #end_time = timeit.default_timer() #training_time = (end_time - start_time) train_features = u_da.get_hidden_values(X) test_features = u_da.get_hidden_values(test_X) print train_features.eval().shape #print dir(train_features) #print type(train_features.eval()) #print train_features.eval() #train_features = np.asarray(train_features.eval()) #test_features = np.asarray(test_features.eval()) #kernel = GPy.kern.RBF() #m = GPy.models.GPRegression(X, y) #n = '1000' print train_features.eval() print 'model build' kernel = GPy.kern.RBF(input_dim=int(hid_size), variance=1., lengthscale=1.) m = GPy.models.SparseGPRegression(train_features.eval(), y, kernel=kernel, num_inducing=int(n)) print 'training' m.optimize(optimizer='bfgs', max_iters=50, messages=True) print 'test' pred = m.predict(test_features.eval())[0] mae = mean_absolute_error(test_y, pred) mse = mean_squared_error(test_y, pred) print 'MAE: ', mae print 'RMSE: ', sqrt(mse) print 'pearson:', sp.stats.pearsonr(test_y, pred)[0] print 'resid mean:', np.mean(test_y - pred) print 'true: ', mquantiles(test_y, prob=[0.1,0.9]) print 'pred: ', mquantiles(pred, prob=[0.1,0.9]) with open(output, 'w') as output: for p in pred: print >>output, p[0] return
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) self.recall = self.logLayer.recall(self.y)
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)