def train_rep( learning_rate=0.002, L1_reg=0.0002, L2_reg=0.005, n_epochs=200, nkerns=[20, 50], batch_size=25, ): rng = numpy.random.RandomState(23455) train_dir = "../out/h5/" valid_dir = "../out/h5/" weights_dir = "./weights/" print("... load input data") filename = train_dir + "rep_train_data_1.gzip.h5" datasets = load_initial_data(filename) train_set_x, train_set_y, shared_train_set_y = datasets filename = valid_dir + "rep_valid_data_1.gzip.h5" datasets = load_initial_data(filename) valid_set_x, valid_set_y, shared_valid_set_y = datasets mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets # compute number of minibatches for training, validation and testing n_all_train_batches = 30000 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_all_train_batches /= batch_size n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2), ) # TODO: incase of flt_time < in_time the output dimension will be different layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = ( layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ) # symbolic Theano variable that represents the L1 regularization term L1 = ( T.sum(abs(layer4.params[0])) + T.sum(abs(layer3.params[0])) + T.sum(abs(layer2.params[0])) + T.sum(abs(layer1.params[0])) + T.sum(abs(layer0.params[0])) ) # symbolic Theano variable that represents the squared L2 term L2_sqr = ( T.sum(layer4.params[0] ** 2) + T.sum(layer3.params[0] ** 2) + T.sum(layer2.params[0] ** 2) + T.sum(layer1.params[0] ** 2) + T.sum(layer0.params[0] ** 2) ) # the loss cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") start_time = time.clock() epoch = 0 done_looping = False cost_ij = 0 train_files_num = 600 val_files_num = 100 startc = time.clock() while (epoch < n_epochs) and (not done_looping): endc = time.clock() print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0))) startc = time.clock() epoch = epoch + 1 for nTrainSet in range(1, train_files_num + 1): # load next train data if nTrainSet % 50 == 0: print("training @ nTrainSet = ", nTrainSet, ", cost = ", cost_ij) filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5" datasets = load_next_data(filename) ns_train_set_x, ns_train_set_y = datasets train_set_x.set_value(ns_train_set_x, borrow=True) shared_train_set_y.set_value( numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # train for minibatch_index in range(n_train_batches): # training itself # -------------------------------------- cost_ij = train_model(minibatch_index) # ------------------------- # at the end of each epoch run validation this_validation_loss = 0 for nValSet in range(1, val_files_num + 1): filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5" datasets = load_next_data(filename) ns_valid_set_x, ns_valid_set_y = datasets valid_set_x.set_value(ns_valid_set_x, borrow=True) shared_valid_set_y.set_value( numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True ) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss += numpy.mean(validation_losses) this_validation_loss /= val_files_num print(( "epoch %i, minibatch %i/%i, validation error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0, ) )) # save snapshots print("saving weights state, epoch = ", epoch) f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb") state_L0 = layer0.__getstate__() pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL) state_L1 = layer1.__getstate__() pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL) state_L2 = layer2.__getstate__() pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL) state_L3 = layer3.__getstate__() pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL) state_L4 = layer4.__getstate__() pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() end_time = time.clock() print ("Optimization complete.") print(( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ), file=sys.stderr)
layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }, ) # load weights print("loading weights state") f = open("weights.save", "rb") loaded_objects = [] for i in range(5): loaded_objects.append(pickle.load(f, encoding='bytes')) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1])
def prepare_network(): rng = numpy.random.RandomState(23455) print('Preparing Theano model...') mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets n_test_batches = test_set_x.get_value(borrow=True).shape[0] # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) // 2 layer1_h = (layer0_h - 4) // 2 layer2_w = (layer1_w - 2) // 2 layer2_h = (layer1_h - 2) // 2 layer3_w = (layer2_w - 2) // 2 layer3_h = (layer2_h - 2) // 2 ###################### # BUILD NETWORK # ###################### # image sizes batchsize = 1 in_channels = 20 in_width = 50 in_height = 50 #filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batchsize, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batchsize, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batchsize:(index + 1) * batchsize], y: test_set_y[index * batchsize:(index + 1) * batchsize] }) print('Loading network weights...') weightFile = '../live_count/weights.save' f = open(weightFile, 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(pickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
def start(inputfile): global in_time, out_time, cooldown_in_time, cooldown_out_time, classify global global_counter, winner_stride, cur_state, in_frame_num, actions_counter global test_set_x, test_set_y, shared_test_set_y rng = numpy.random.RandomState(23455) # ####################### build start ######################## # create an empty shared variables to be filled later data_x = numpy.zeros([1, 20 * 50 * 50]) data_y = numpy.zeros(20) train_set = (data_x, data_y) (test_set_x, test_set_y, shared_test_set_y) = \ shared_dataset(train_set) print 'building ... ' batch_size = 1 # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 # ##################### # BUILD ACTUAL MODEL # # ##################### # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) # ####################### build done ######################## fromCam = False if fromCam: print 'using camera input' cap = cv2.VideoCapture(0) else: print 'using input file: ', inputfile cap = cv2.VideoCapture(inputfile) # my timing frame_rate = 5 frame_interval_ms = 1000 / frame_rate fourcc = cv2.VideoWriter_fourcc(*'XVID') video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate, (640, 480)) frame_counter = 0 (ret, frame) = cap.read() proFrame = process_single_frame(frame) # init detectors st_a_det = RepDetector(proFrame, detector_strides[0]) st_b_det = RepDetector(proFrame, detector_strides[1]) st_c_det = RepDetector(proFrame, detector_strides[2]) frame_wise_counts = [] while True: in_frame_num += 1 if in_frame_num % 2 == 1: continue (ret, frame) = cap.read() if ret == 0: print 'unable to read frame' break proFrame = process_single_frame(frame) # handle stride A.... if frame_counter % st_a_det.stride_number == 0: st_a_det.count(proFrame) # handle stride B if frame_counter % st_b_det.stride_number == 0: st_b_det.count(proFrame) # handle stride C if frame_counter % st_c_det.stride_number == 0: st_c_det.count(proFrame) # display result on video................ blue_color = (130, 0, 0) green_color = (0, 130, 0) red_color = (0, 0, 130) orange_color = (0, 140, 0xFF) out_time = in_frame_num / 60 if cur_state == state.IN_REP and (out_time - in_time < 4 or global_counter < 5): draw_str(frame, (20, 120), ' new hypothesis (%d) ' % global_counter, orange_color, 1.5) if cur_state == state.IN_REP and out_time - in_time >= 4 \ and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: counting... %d' % (actions_counter, global_counter), green_color, 2) if cur_state == state.COOLDOWN and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: done. final counting: %d' % (actions_counter, global_counter), blue_color, 2) # print "pls", global_counter frame_wise_counts.append(global_counter) # print 'action %d: done. final counting: %d' % (actions_counter, global_counter) print "Dhruv", frame_wise_counts, global_counter return frame_wise_counts
layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h , n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function([index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3])
layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h , n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) # change the number of output labels cost = layer4.negative_log_likelihood(y) classify = theano.function([index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3])