print("testing_set size",len(testing_set)*(input_size/1.0e9)) print("making model") #training_set = training_set[:5000] #testing_set = testing_set[:1000] #make_X_y_convnet(images,w,h,offset=None,size=None) X_train,y_train = utils.make_X_y_convnet(training_set,w,h,offset=[offset_w,offset_h],size=[target_w,target_h]) X_test,y_test = utils.make_X_y_convnet(testing_set,w,h,offset=[offset_w,offset_h],size=[target_w,target_h]) #model = convnet.make_model_1(3,target_w,target_h,nb_filters = 16,nb_conv = 10,nb_classes=nb_classes,dropout=0.5) model = convnet.make_model_3(3,target_w,target_h,nb_classes) score,max_value, mean_value = convnet.train(model, X_train,X_test,y_train,y_test,nb_classes,batch_size,nb_epoch) score = convnet.test(model,max_value, mean_value, X_test,y_test,nb_classes) print('Cross Validation result at:', i, 'Test score:', score[0], 'Test accuracy:', score[1]) convnet.save_model(model,"model-convnet-{0}.json".format(i),"model-convnet-{0}.h5".format(i)) #for epoch in range(nb_epoch): #print("processing epoch",epoch) ##process batches #for start_index, end_index in make_batches(len(training_set), batch_size): ##print("processing batch",start_index, end_index)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels. images, labels = convnet.inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = convnet.inference(images) # Calculate loss. loss = convnet.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = convnet.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ''sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if __name__ == '__main__': train()
target = target[index] n = len(data) data.resize(n, 28 * 28) target.resize(n, 1) y = np.zeros((n, 10)) for i in range(n): y[i][int(target[i])] = 1 alpha = 0.00001 k = 10000 batch = 600 data.resize(n, 1, 28, 28) data = data * 1.01 #data = data / (np.std(data, axis = 0) + 1.01) print (data) model = convnet.modelPre() for i in range(k): pos = i % batch batch_index = range(pos * 100, pos * 100 + 100, 1) data_batch = data[batch_index] target_batch = target[batch_index] y_batch = y[batch_index] cost, rate = convnet.train(data_batch, target_batch, y_batch, model) print ("iteration : ", i , "rate : ", rate, "cost: " , cost)
while (go): min_val_err = 20 for epoch in range(num_epochs): # Full pass over training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(x_train, labels_train, oneshot_indices_train, oneshot_class, batch_size, shuffle=True): inputs, targets = batch print('\rTraining phase {:6.1f}%'.format(train_batches * batch_size / train_size * 100), end="");sys.stdout.flush() train_err += convnet.train(inputs, targets) train_batches += 1 print('\rTraining phase {:6.1f}%'.format(100)) # Full pass over validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(x_validate, labels_validate, oneshot_indices_validate, oneshot_class, batch_size, shuffle=False): inputs, targets = batch print('\rValidation phase {:6.1f}%'.format(val_batches * batch_size / validate_size * 100), end="");sys.stdout.flush() err, acc = convnet.validate(inputs, targets) val_err += err
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels. images, labels = convnet.inputs(eval_data=False) # Build a Graph that computes the logits predictions from the # inference model. logits = convnet.inference(images) # Calculate loss. loss = convnet.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = convnet.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Load previously stored model from checkpoint ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print("Loading from checkpoint.Global step %s" % global_step) else: print("No checkpoint file found...Creating a new model...") stepfile = "/home/soms/EmotionMusic/Model1/stepfile.txt" if not os.path.exists(stepfile): print("No step file found.") step = 0 else: f = open(stepfile, "r") step = int(f.readlines()[0]) print("Step file step %d" % step) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) while step < FLAGS.max_steps: start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time def signal_handler(signal, frame): f = open(stepfile, 'w') f.write(str(step)) print("Step file written to.") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 500 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) step += 1
modelcheckpoint = SaverCallback(i+1) #training_set = training_set[:5000] #testing_set = testing_set[:1000] #make_X_y_convnet(images,w,h,offset=None,size=None) #model = convnet.make_model_1(3,target_w,target_h,nb_filters = 16,nb_conv = 10,nb_classes=nb_classes,dropout=0.5) print("making model") model = convnet.make_window_model((3,target_h,target_w),nb_classes=nb_classes) score = convnet.train(model, X_train,X_test,y_train,y_test,nb_classes,batch_size,nb_epoch,callbacks=[modelcheckpoint],generator=None) #print('After training at:', i, 'Test score:', score[0], 'Test accuracy:', score[1]) #score = convnet.test(model,max_value,mean_value,X_test,y_test,nb_classes) print('Cross Validation result at:', i, 'Test score:', score[0], 'Test accuracy:', score[1]) convnet.save_model(model,"model-convnet-{0}.json".format(i),"model-convnet-{0}.h5".format(i)) #print("mean",mean_value,"max",max_value) #for epoch in range(nb_epoch): #print("processing epoch",epoch) ##process batches #for start_index, end_index in make_batches(len(training_set), batch_size): ##print("processing batch",start_index, end_index)
alpha = 0.0001 k = 100000 model = convnet.modelPre() #sgd with momentum mu = 0.1 v = {} for key in model: v[key] = np.zeros_like(model[key]) for i in range(k): #spark driver is acting as parameter server -- gather gradients then broadcast updated weights gradientsByPartition = data.map( lambda x: convnet.train(x, i, model, alpha)) gradients, cost, rate = gradientsByPartition.reduce(add) for key in model: if (key.startswith("b")): model[key] -= alpha * gradients[key] else: v[key] = mu * v[key] - alpha * (gradients[key] + decayRate * model[key]) model[key] += v[key] rate_test = 0.00 #estimate test rate if (i % 300 == 1): count = testData.map( lambda x: convnet.getTestRightCount(x, model)).reduce(addNum)