예제 #1
0
def load_checkpoint(path):
    checkpoint = torch.load(path, map_location=torch.device('cpu'))
    model = LSTMModel()
    model.load_state_dict(checkpoint['model'])
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    criterion = nn.CTCLoss()
    return model, optimizer, criterion
예제 #2
0
class InferenceWrapper(object):
  """Model wrapper class for performing inference with a ShowAndTellModel."""

  def __init__(self, config, model_dir,
               ses_threads=2,
               length_normalization_factor=0.0,
               gpu_memory_fraction=0.3,
               gpu=1,
               with_image_embedding=True):
    self.config = copy.deepcopy(config)
    self.config.batch_size = 1
    self.flag_load_model = False
    self.model_dir = model_dir
    self.gpu= gpu
    self.gpu_memory_fraction = gpu_memory_fraction
    self.with_image_embedding = with_image_embedding

  def build_model(self):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=self.gpu_memory_fraction)
    config_proto = tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True)
    self.session = session = tf.Session(config=config_proto) 
    with tf.device('/gpu:%d'%self.gpu):
      with tf.variable_scope("LSTMModel", reuse=None):
        if self.with_image_embedding:
          self.model = LSTMModel(config=self.config, mode="inference", 
                      model_dir = self.model_dir,
                      flag_with_saver=True,
                      num_steps = 1,
                      gpu=self.gpu)
        else:
          print ('Please use with_image_embeddind=1')
          sys.exit(-1)
        self.model.build()

  def load_model(self, model_path):
      self.model.saver.restore(self.session, model_path)
      self.flag_load_model = True
      self.model_path = model_path
      logger.info('Load model from %s', model_path)

  def feed_visual_feature(self, visual_feature):
    assert visual_feature.shape[0] == self.config.vf_size
    #assert self.flag_load_model, 'Must call local_model first'
    sess = self.session
    initial_state = sess.run(fetches="LSTMModel/lstm/initial_state:0",
                             feed_dict={"LSTMModel/visual_feature:0": visual_feature})
    return initial_state

  def inference_step(self, input_feed, state_feed):
    sess = self.session
    softmax_output, state_output = sess.run(
        fetches=["LSTMModel/softmax:0", "LSTMModel/lstm/state:0"],
        feed_dict={
            "LSTMModel/input_feed:0": input_feed,
            "LSTMModel/lstm/state_feed:0": state_feed,
        })
    return softmax_output, state_output, None
예제 #3
0
def doRun(path):
    # # 设置 GPU 按需增长
    # config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # np.set_printoptions(threshold=10000)  #全部输出
    # sess = tf.Session(config=config)
    # coord = tf.train.Coordinator()
    # threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    JavaBean = jclass("com.diy.edu.rd.Model.JavaBean")
    jb = JavaBean("python")
    jb.println("---------------------------->start<----------------")
    jb.println("---------------------------->" + path + "<----------------")
    np.set_printoptions(threshold=10000)  # 全部输出
    sess = tf.Session()
    # model = HARClassifier(class_num=class_num)
    # model = HARClassifier()
    model = LSTMModel()
    sess.run(tf.global_variables_initializer())

    data = DataGenerater.DataGenerater(path)
    for i in range(50):
        train(sess, model, data)
        test(sess, model, data)
    # coord.request_stop()
    # coord.join(threads)

    saver = tf.train.Saver()
    save_dir = path + 'models/'
    saver.save(sess, save_dir + '6')
예제 #4
0
def main():
    # # 设置 GPU 按需增长
    # config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # np.set_printoptions(threshold=10000)  #全部输出
    # sess = tf.Session(config=config)
    # coord = tf.train.Coordinator()
    # threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    np.set_printoptions(threshold=10000)  # 全部输出
    sess = tf.Session()
    # model = HARClassifier(class_num=class_num)
    # model = HARClassifier()
    model = LSTMModel()
    sess.run(tf.global_variables_initializer())

    data = DataGenerater.DataGenerater()
    for i in range(50):
        train(sess, model, data)
        test(sess, model, data)
    # coord.request_stop()
    # coord.join(threads)

    saver = tf.train.Saver()
    save_dir = 'logs/models/'
    saver.save(sess, save_dir + '6')
def load_model(model_save_fn, model_type):
    if model_type == 'lstm':
        model = LSTMModel.load(model_save_fn)
    elif model_type == 'rnn':
        model = RNNModel.load(model_save_fn)
    elif model_type == 'cnn':
        model = CNNModel.load(model_save_fn)
    return model
예제 #6
0
 def build_model(self):
     gpu_options = tf.GPUOptions(
         per_process_gpu_memory_fraction=self.gpu_memory_fraction)
     config_proto = tf.ConfigProto(gpu_options=gpu_options,
                                   allow_soft_placement=True)
     self.session = session = tf.Session(config=config_proto)
     with tf.device('/gpu:%d' % self.gpu):
         with tf.variable_scope("LSTMModel", reuse=None):
             if self.with_image_embedding:
                 self.model = LSTMModel(config=self.config,
                                        mode="inference",
                                        model_dir=self.model_dir,
                                        flag_with_saver=True,
                                        num_steps=1,
                                        gpu=self.gpu)
             else:
                 print('Please use with_image_embeddind=1')
                 sys.exit(-1)
             self.model.build()
예제 #7
0
def train_model(train_data, train_target, word_to_idx, target_to_idx, model_file = "model.pth", 
    model_type = "LSTM", embedding_dim = 32, hidden_dim = 16, epochs = 10, learning_rate = 0.1, seed = 19):

    torch.manual_seed(seed)

    ## initialize model
    if model_type == "LSTM":
        model = LSTMModel(embedding_dim, hidden_dim, len(word_to_idx), len(target_to_idx))

    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)

    st = time.time()

    print("training model ...")

    # reference: http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html
    count = 0
    loss_mean = 0
    for epoch in range(epochs): 
        for sentence, next_word in zip(train_data, train_target):
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            model.hidden = model.init_hidden()

            # Step 2. Get our inputs ready for the network, that is, turn them into
            # Variables of word indices.
            sentence_in = prepare_sequence(sentence, word_to_idx)
            targets = prepare_sequence(next_word, target_to_idx)

            # Step 3. Run our forward pass.
            scores = model(sentence_in)

            # Step 4. Compute the loss, gradients, and update the parameters by
            #  calling optimizer.step()
            loss = loss_function(scores, targets)
            loss.backward()
            optimizer.step()
            
            loss_mean += loss.data[0]
            
            if count % 100 == 0 and count > 0:
                print("%d sentence done. loss mean: %f" % (count,loss_mean/100))
                loss_mean = 0
            
            count += 1
            
        print("%d th epoch done. %f sec" % (epoch, time.time() - st))

    return model
예제 #8
0
 def build_model(self):
   gpu_options = tf.GPUOptions(
       per_process_gpu_memory_fraction=self.gpu_memory_fraction)
   config_proto = tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True)
   self.session = session = tf.Session(config=config_proto) 
   with tf.device('/gpu:%d'%self.gpu):
     with tf.variable_scope("LSTMModel", reuse=None):
       if self.with_image_embedding:
         self.model = LSTMModel(config=self.config, mode="inference", 
                     model_dir = self.model_dir,
                     flag_with_saver=True,
                     num_steps = 1,
                     gpu=self.gpu)
       else:
         print ('Please use with_image_embeddind=1')
         sys.exit(-1)
       self.model.build()
예제 #9
0
x_train, y_train = DataSplitter.split_to_x_and_y(data_train, timesteps=timesteps)
x_val, y_val = DataSplitter.split_to_x_and_y(data_val, timesteps=timesteps)
x_test, y_test = DataSplitter.split_to_x_and_y(data_test, timesteps=timesteps)

print("Train dataset has {} samples.".format(*x_train.shape))
# print(x_train[:3])
# print(y_train[:3])
print("Validation dataset has {} samples.".format(*x_val.shape))
# print(x_val[:3])
# print(y_val[:3])
print("Test dataset has {} samples.".format(*x_test.shape))
# print(x_test[:3])
# print(y_test[:3])

# Build & train model
model = LSTMModel(timesteps, hidden_neurons).build()
print("Fitting the model...")
history = model.fit(x_train, y_train,
                    batch_size=batchsize, epochs=epochs, validation_data=(x_val, y_val))

# Output training history to csv
history_df = pd.DataFrame(history.history)
history_df.to_csv("training_history.csv")

print("Predicting...")
result = model.predict(x_test)
predicted = pd.DataFrame(result)
predicted.columns = ['predicted_nikkei']
predicted['actual_nikkei'] = y_test

print("Completed Prediction.")
예제 #10
0
                one_hot(c),
                one_hot(b),
                one_hot(a), plus_vector, equals_vector
            ])
            y.append([
                one_hot(c),
                one_hot(b),
                one_hot(a),
                one_hot((a + b + c) % 10),
                one_hot((a + b + c) // 10)
            ])

x = np.array(x)
y = np.array(y)

model = LSTMModel(5, [10, 10], 12, 12)
model.train(x, y, x, y, 10, 5000)

results = model.predict(x)
activations_0 = model.getActivations(0, x)
activations_1 = model.getActivations(1, x)

count = 0

for index, result in enumerate(results):
    left = argmax(result[3])
    right = argmax(result[4])
    leftTarget = argmax(y[index][3])
    rightTarget = argmax(y[index][4])

    if left == leftTarget and right == rightTarget:
예제 #11
0
def main(unused_args):
  model_dir=utility.get_model_dir(FLAGS)
  if os.path.exists(model_dir) and not FLAGS.overwrite:
    logger.info('%s exists. quit', model_dir)
    sys.exit(0)

  # Load model configuration
  config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py')
  config = utility.load_config(config_path)

  # pdb.set_trace()
  FLAGS.vf_dir = os.path.join(FLAGS.rootpath, FLAGS.train_collection, 'FeatureData', FLAGS.vf_name)
  vocab_file = utility.get_vocab_file(FLAGS.train_collection, FLAGS.word_cnt_thr, FLAGS.rootpath)
  textbank = TextBank(vocab_file)
  config.vocab_size = len(textbank.vocab)
  config.vf_size = int(open(os.path.join(FLAGS.vf_dir, 'shape.txt')).read().split()[1])

  if hasattr(config,'num_epoch_save'):
    num_epoch_save = config.num_epoch_save
  else:
    num_epoch_save = 1

  # if FLAGS.fluency_method == 'None':
  #     FLAGS.fluency_method = None
  # config.fluency_method = FLAGS.fluency_method
  # if config.fluency_method == 'weighted':
  #   config.use_weighted_loss = True
  # else:
  #   config.use_weighted_loss = False

  train_image_embedding = True
  try:
    if config.train_image_embedding == False:
      assert('freeze' in FLAGS.model_name)
      train_image_embedding = False 
      logger.info('Not training image embedding')
  except:
    pass

  with_image_embedding = True if FLAGS.with_image_embedding != 0 else False
  # Start model training
  gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
  config_proto = tf.ConfigProto(
      intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True)
 
  with tf.Graph().as_default(), tf.Session(config=config_proto) as session:


    writer = tf.train.SummaryWriter("logs/", session.graph)

    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)


    assert len(config.buckets) >= 1
    assert config.buckets[-1] == config.max_num_steps
    models = []
    with tf.device('gpu:%s'%FLAGS.gpu):
      with tf.variable_scope("LSTMModel", reuse=None, initializer=initializer):
        if with_image_embedding:
          m = LSTMModel(mode='train',
              num_steps=config.buckets[0], 
              config=config,
              model_dir=model_dir,
              flag_with_saver=True,
              train_image_embedding=train_image_embedding)
              #model_root=FLAGS.model_root)
        else:
          # deprecating this function
          logger.info('Plz use with_image_embedding=1')
          sys.exit(-1)

        m.build()
        models.append(m)

    pre_trained_iter=0 
    if FLAGS.pre_trained_model_path:
      pre_trained_iter = int(FLAGS.pre_trained_model_path.split('model_')[1].split('.')[0])
    hdlr = logging.FileHandler(os.path.join(m.model_dir, 'log%d.txt'%pre_trained_iter))
    hdlr.setLevel(logging.INFO)
    hdlr.setFormatter(logging.Formatter(formatter_log))
    logger.addHandler(hdlr)

    if FLAGS.pre_trained_model_path:
      if tf.__version__ < '1.0':
        tf.initialize_all_variables().run()
      else:
        tf.global_variables_initializer().run()
      models[0].saver.restore(session, FLAGS.pre_trained_model_path)
      logger.info('Continue to train from %s', FLAGS.pre_trained_model_path)
    elif FLAGS.pre_trained_imembedding_path:
      if tf.__version__ < '1.0':
        tf.initialize_all_variables().run()
      else:
        tf.global_variables_initializer().run()
      models[0].imemb_saver.restore(session, FLAGS.pre_trained_imembedding_path)
      logger.info('Init image-embedding from %s', FLAGS.pre_trained_imembedding_path)
    elif FLAGS.pre_trained_lm_path:
      if tf.__version__ < '1.0':
        tf.initialize_all_variables().run()
      else:
        tf.global_variables_initializer().run()
      models[0].lm_saver.restore(session, FLAGS.pre_trained_lm_path)
      logger.info('Init language from %s', FLAGS.pre_trained_lm_path)
    else:
      if tf.__version__ < '1.0':
        tf.initialize_all_variables().run()
      else:
        tf.global_variables_initializer().run()
      # print([v.name for v in tf.trainable_variables()])

    iters_done = 0
    data_provider = BucketDataProvider(FLAGS.train_collection, vocab_file, FLAGS.vf_name, 
                               language=FLAGS.language, # method=config.fluency_method,
                               rootpath=FLAGS.rootpath)
    
    for i in range(config.num_epoch):
      logger.info('epoch %d', i)
      data_provider.shuffle_data_queue()  ####################################
      train_cost, iters_done, result = run_epoch(session, iters_done, config, models, data_provider , verbose=True)
      logger.info("Train cost for epoch %d is %.3f" % (i, train_cost))
      writer.add_summary(result, i)

      # save the current model if necessary
      if (i+1)% num_epoch_save == 0:
          models[0].saver.save(session, os.path.join(m.variable_dir,
                'model_%d.ckpt' % (iters_done+pre_trained_iter)))
          if with_image_embedding: 
              models[0].imemb_saver.save(session, os.path.join(m.variable_dir, \
                 'imembedding_model_%d.ckpt' % (iters_done)))
          logger.info("Model saved at iteration %d", iters_done)


  # copy the configure file in to checkpoint direction
  os.system("cp %s %s" % (config_path, model_dir))
  if FLAGS.pre_trained_model_path:
    os.system("echo %s > %s" % (FLAGS.pre_trained_model_path, os.path.join(model_dir, 'pre_trained_model_path.txt')))
  if FLAGS.pre_trained_imembedding_path:
    os.system("echo %s > %s" % (FLAGS.pre_trained_imembedding_path, os.path.join(model_dir, 'pre_trained_imembedding_path.txt')))
예제 #12
0
def main():
	"""Program execution starts here."""
	intersections = read_intersections("intersections.data")
	intersection_list = []
	
	for i in range(len(intersections)):
		intersection_list.append(intersections[i]['name'].strip(' '))
		
	#sel_intersection = intersections[0]['name']
	sel_intersection = "4589"

	df = get_dataset(sel_intersection, intersections)
	x_train, x_test, y_train, y_test = preprocessing(df)

	model_file = search(intersections, 'name', sel_intersection)

	main_menu = [
		"CAPSTONE TRAFFIC PREDICTION",

		"Select Intersection",
		"List of Intersections",
		"Train Intersection",
		"Test Intersection (Accuracy)",
		"Route Check"
	]
	train_menu = [
		"Train Mode:",

		"Train from scratch w/ events",
		"Train from file w/ events",
		"Train from scratch",
		"Train from file",
	]
	route_check_menu = [
		"Train Mode:",

		"Train from scratch w/ events",
		"Train from file w/ events",
		"Train from scratch",
		"Train from file",
	]

	while True:
		print("Currently Selected Intersection:", sel_intersection)
		choice = menu.do_menu(main_menu)
		model = LSTMModel(x_train.shape[1], y_train.shape[1])
		if choice is None:
			return  # Exit main() (and program).
		if choice == 1:
			# Select Intersection
			temp_menu = ["Please Select a New Intersection"]

			for line in intersections:
				option = line["name"] + ": " + line["street"]
				temp_menu.append(option)

			choice = menu.do_menu(temp_menu)
			if choice is not None:
				sel_intersection = intersections[choice - 1]['name']
				print(sel_intersection, "set as current intersection.")

				df = get_dataset(sel_intersection, intersections)
				x_train, x_test, y_train, y_test = preprocessing(df)

				model_file = search(intersections, 'name', sel_intersection)
				if model_file is not None:
					model_file = intersections[model_file]['model']
					#try:
					model.load_network('./model/'+str(sel_intersection)+'.hdf')
					#except:
					#	print("File for loading model is not found! Creating a new model from scratch")
					#	model.init_network(hidden_size=50)

		elif choice == 2:
			# List Intersections
			print_intersections(intersections)
		elif choice == 3:
			model = LSTMModel(x_train.shape[1], y_train.shape[1])
			# Train Intersections
			# TODO test each option
			choice = menu.do_menu(train_menu)
			if choice == 1:
				x_train, x_test, y_train, y_test = preprocessing(df, events=True)
				model = LSTMModel(x_train.shape[1], y_train.shape[1])

				intersection_idx = search(intersections, 'name', sel_intersection)
				model_file = "model/" + sel_intersection + ".hdf"
				#if os.path.exists(model_file):
					#os.remove(model_file)
				model.init_network(hidden_size=50)
			elif choice == 2:
				x_train, x_test, y_train, y_test = preprocessing(df, events=True)
				model = LSTMModel(x_train.shape[1], y_train.shape[1])

				intersection_idx = search(intersections, 'name', sel_intersection)
				model_file = "model/" + sel_intersection + ".hdf"
				if os.path.exists(model_file):
					model.load_network(model_file)
				else:
					print("Model does not exist, starting from scratch")
					model.init_network(hidden_size=50)
			elif choice == 3:
				x_train, x_test, y_train, y_test = preprocessing(df, events=False)
				model = LSTMModel(x_train.shape[1], y_train.shape[1])

				intersection_idx = search(intersections, 'name', sel_intersection)
				model_file = "model/" + sel_intersection + ".hdf"
				if os.path.exists(model_file):
					os.remove(model_file)
				model.init_network(hidden_size=50)
			elif choice == 4:
				x_train, x_test, y_train, y_test = preprocessing(df, events=False)
				model = LSTMModel(x_train.shape[1], y_train.shape[1])

				intersection_idx = search(intersections, 'name', sel_intersection)
				model_file = "model/" + sel_intersection + ".hdf"
				if os.path.exists(model_file):
					model.load_network(model_file)
				else:
					print("Model does not exist, starting from scratch")
					model.init_network(hidden_size=50)
			try:
				e = int(input("Enter Epochs to train for (Default=50): "))
				model.epochs = e
			except ValueError:
				print("Invalid number entered, using default value")
			model.train(x_train, y_train, './'+ model_file)

			intersections[intersection_idx]['model'] = model_file
			save_intersections(intersections, "intersections.data")

		elif choice == 4:
			# Test Intersections
			model_file = "model/" + sel_intersection + ".hdf"
			model = LSTMModel(x_train.shape[1], y_train.shape[1])
			if os.path.exists(model_file):
				model.load_network(model_file)
				test_output = model.get_accuracy(x_test, y_test)
				N, S, E, W = confusion_matrix(test_output, y_test, True)
			else:
				print("Please train intersection first")

		elif choice == 5:
			model = LSTMModel(x_train.shape[1], y_train.shape[1], intersection_list)
			# Route Check
			x_data = []
			day_week = [1,2,3,4,5,6,7]
			season=[1,2,3,4]
			time_str = ''
			flag = 0
			x_data = []
			peak = 0

			while flag == 0:
				num_inter = input("Please enter intersection number: ")
				if num_inter in intersection_list:
					flag = 1
					num_inter = int(num_inter)
					#x_data.append(int(num_inter))
				else:
					print("Intersection not found!")
					
			flag = 0
			while flag == 0:
				week = [0,0,0,0,0,0,0]
				num_day = input("Please enter the day of the week:\nOptions:\n1:Sunday\n2:Monday\n3:Tuesday\n4:Wednesday\n5:Thursday\n6:Friay\n7:Saturday\n")
				num_day = int(num_day)
				if num_day in day_week:
					flag = 1
					week[num_day-1] = 1
					x_data = x_data + week
				else:
					print("Day of the week not found!")
			
			flag = 0
			while flag == 0:
				time_day = input("Please enter the time of the day (ex. 17:30): ")
				temp = time_day.split(':')
				if len(temp) == 2:
					hour = int(temp[0]) * 60
					if hour > 9 and hour < 17:
						peak = 1
					time = hour + int(temp[1])
					time_d = float(time) / float(1440)
					if time_d > 1.0:
						print("Please enter time in the proper format!")
					else:
						x_data.append(time_d)
						flag = 1
				else:
					print("Please enter time in the proper format!")
			
			flag = 0
			while flag == 0:
				seasons = [0,0,0,0]
				season_input = input("Please enter the season:\n1:Summer\n2:Fall\n3:Winter\n4:Spring\n")
				season_input = int(season_input)
				
				if season_input in season:
					flag = 1
					seasons[season_input-1] = 1
					x_data = x_data + seasons
				else:
					print("Season not found!")
			
			# add [0,0] for events
			x_data.append(peak)
			x_data = x_data + [0,0]
			x_test = np.array([x_data])
			x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
			res = model.predict(x_test, num_inter)
			print("Prediction: " + str(res))
예제 #13
0
class InferenceWrapper(object):
    """Model wrapper class for performing inference with a ShowAndTellModel."""
    def __init__(self,
                 config,
                 model_dir,
                 ses_threads=2,
                 length_normalization_factor=0.0,
                 gpu_memory_fraction=0.3,
                 gpu=1,
                 with_image_embedding=True):
        self.config = copy.deepcopy(config)
        self.config.batch_size = 1
        self.flag_load_model = False
        self.model_dir = model_dir
        self.gpu = gpu
        self.gpu_memory_fraction = gpu_memory_fraction
        self.with_image_embedding = with_image_embedding

    def build_model(self):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_memory_fraction)
        config_proto = tf.ConfigProto(gpu_options=gpu_options,
                                      allow_soft_placement=True)
        self.session = session = tf.Session(config=config_proto)
        with tf.device('/gpu:%d' % self.gpu):
            with tf.variable_scope("LSTMModel", reuse=None):
                if self.with_image_embedding:
                    self.model = LSTMModel(config=self.config,
                                           mode="inference",
                                           model_dir=self.model_dir,
                                           flag_with_saver=True,
                                           num_steps=1,
                                           gpu=self.gpu)
                else:
                    print('Please use with_image_embeddind=1')
                    sys.exit(-1)
                self.model.build()

    def load_model(self, model_path):
        self.model.saver.restore(self.session, model_path)
        self.flag_load_model = True
        self.model_path = model_path
        logger.info('Load model from %s', model_path)

    def feed_visual_feature(self, visual_feature):
        assert visual_feature.shape[0] == self.config.vf_size
        #assert self.flag_load_model, 'Must call local_model first'
        sess = self.session
        initial_state = sess.run(
            fetches="LSTMModel/lstm/initial_state:0",
            feed_dict={"LSTMModel/visual_feature:0": visual_feature})
        return initial_state

    def inference_step(self, input_feed, state_feed):
        sess = self.session
        softmax_output, state_output = sess.run(
            fetches=["LSTMModel/softmax:0", "LSTMModel/lstm/state:0"],
            feed_dict={
                "LSTMModel/input_feed:0": input_feed,
                "LSTMModel/lstm/state_feed:0": state_feed,
            })
        return softmax_output, state_output, None
예제 #14
0
from torch import Tensor

from lstm_model import LSTMModel
from saa import MovieDataLoader
from tqdm import tqdm

dataset = MovieDataLoader()

tr_dl, val_dl, test_dl = dataset.tr_dl, dataset.val_dl, dataset.test_dl
vocab_size = len(dataset.TEXT.vocab)
input_dim = 28
hidden_dim = 128
layer_dim = 1  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 2

model = LSTMModel(vocab_size, hidden_dim, layer_dim, output_dim)

criterion = nn.BCELoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


def test(loss_fn, model, data_loader, device):
    if model.training:
        model.eval()

    total_loss = 0
    correct_count = 0

    for step, batch in tqdm(enumerate(data_loader), total=len(data_loader)):
            ])
            y.append([
                one_hot(b) + [b / 10.0],
                one_hot(a) + [a / 10.0],
                one_hot((a + b) % 10) + [((a + b) % 10) / 10.0],
                one_hot((a + b) // 10) + [((a + b) // 10) / 10.0]
            ])

sys.exit(0)

x = np.array(x)
y = np.array(y)
test_x = np.array(test_x)
test_y = np.array(test_y)

model = LSTMModel(4, [100, 100], 14, 11)
model.train(x, y, test_x, test_y, 10, 5000)

results = model.predict(x)
activations_0 = model.getActivations(0, x)
activations_1 = model.getActivations(1, x)

count = 0
count_ordinal = 0

for index, result in enumerate(results):
    left = argmax(result[2][:-1])
    right = argmax(result[3][:-1])
    leftTarget = argmax(y[index][2][:-1])
    rightTarget = argmax(y[index][3][:-1])
예제 #16
0
def main(unused_args):
  train_collection =  FLAGS.train_collection
  val_collection = FLAGS.val_collection
  overwrite = FLAGS.overwrite
  output_dir = utility.get_sim_dir(FLAGS)
  loss_info_file = os.path.join(output_dir, 'loss_info.txt')
  if os.path.exists(loss_info_file) and not overwrite:
      logger.info('%s exists. quit', loss_info_file)
      sys.exit(0)

  model_dir=utility.get_model_dir(FLAGS)
  config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py')
  config = utility.load_config(config_path)

  # if FLAGS.fluency_method == 'None':
  #     FLAGS.fluency_method = None
  # config.fluency_method = FLAGS.fluency_method
  # if config.fluency_method == 'weighted':
  #   config.use_weighted_loss = True
  # else:
  #   config.use_weighted_loss = False

  textbank = TextBank(utility.get_train_vocab_file(FLAGS))
  config.vocab_size = len(textbank.vocab)
  config.vf_size = int(open(os.path.join(utility.get_val_feat_dir(FLAGS), 'shape.txt')).read().split()[1])

  gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
  config_proto = tf.ConfigProto(
      intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True)

  with_image_embedding = True if FLAGS.with_image_embedding > 0 else False
  with tf.Graph().as_default(), tf.Session(config=config_proto) as session:

    assert len(config.buckets) >= 1
    assert config.buckets[-1] == config.max_num_steps
    with tf.device('/gpu:%d'%FLAGS.gpu):
      with tf.variable_scope("LSTMModel", reuse=None):
        if with_image_embedding:
          model = LSTMModel(mode='eval',
                num_steps=config.buckets[-1], 
                config=config,
                model_dir=model_dir, #model_name=FLAGS.model_name,
                flag_with_saver=True)
                #model_root=FLAGS.model_root)
        else:
          # deprecating this option
          print('Plz use image_embedding')
          sys.exit(-1)          
        model.build()    


    model_path_list = []
    _dir = os.path.join(model_dir,'variables')
    for _file in os.listdir(_dir):
      if _file.startswith('model_') and _file.endswith('.ckpt.meta'):
        iter_n = int(_file[6:-10])
        model_path = os.path.join(_dir, 'model_%d.ckpt'%iter_n)
        model_path_list.append((iter_n, model_path))

    data_provider = BucketDataProvider(val_collection, utility.get_train_vocab_file(FLAGS), 
          feature=FLAGS.vf_name, 
          language=FLAGS.language, 
          flag_shuffle=False, 
          # method=config.fluency_method,
          rootpath=FLAGS.rootpath)
    iter2loss = {}
    for iter_n, model_path in model_path_list:
      loss_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n, 'loss.txt')
      if os.path.exists(loss_file) and not overwrite:
          logger.info('load loss from %s', loss_file)
          loss = float(open(loss_file).readline().strip())
          iter2loss[iter_n] = loss
          continue
      if not os.path.exists(os.path.split(loss_file)[0]):
          os.makedirs(os.path.split(loss_file)[0])

      model.saver.restore(session, model_path)
      # print([v.name for v in tf.trainable_variables()])
      logger.info('Continue to train from %s', model_path)

      val_cost = run_epoch(session, config.batch_size, config.buckets[-1], config,model, data_provider)
      logger.info("Validation cost for checkpoint model_%d.ckpt is %.3f" % (iter_n, val_cost))

      iter2loss[iter_n] = val_cost
      with open(loss_file, "w") as fw:
        fw.write('%g' % val_cost)
        fw.close()

  sorted_iter2loss = sorted(iter2loss.iteritems(), key=lambda x: x[1])
  with open(loss_info_file, 'w') as fw:
      fw.write('\n'.join(['%d %g' % (iter_n, loss) for (iter_n, loss) in sorted_iter2loss]))
      fw.close()
예제 #17
0
    dataLoader = DataLoader(src="MNIST_data/dataset")

    [
        noisyTrain,
        noisySymbolsTrain,
        targetsTrain,
        noisyValidation,
        noisySymbolsValidation,
        targetsValidation,
        noisyTest,
        noisySymbolsTest,
        targetsTest
    ] = dataLoader.getSequencialDataNoClassification(i, 2, 2)

    model = LSTMModel(3, [512], 784, 20)
    model.train(noisyTrain, targetsTrain, noisyValidation, targetsValidation, 100, 50)
    result = model.evaluate(noisyTest, targetsTest)
    #predictions = model.predict(noisyTest)

    #utils.renderResults("results_ideal" + str(i) + ".png", noisyTest, predictions)

    results.append(result[1])
    total += result[1]

print(results)
print("SCORE: " + str(total / k))

#No Classification
#[0.26166666626930235, 0.4266666686534882, 0.4099999964237213, 0.2849999988079071, 0.28833332896232605]
#SCORE: 0.33433333182334896
예제 #18
0
from lstm_model import LSTMModel
if __name__ == "__main__":
    lstm_model = LSTMModel.create_from_args()
    lstm_model.run()
예제 #19
0
    valid_batch_size = 64
    n_conv_stack = 120
    conv_size = 5
    
    corpus = Corpus.load_from_file(r'imdb\imdb-prepared.pkl')
    (train_x, train_mask, train_y), (valid_x, valid_mask, valid_y), (test_x, test_mask, test_y) = corpus.train_valid_test()
    
    
    n_train, n_valid, n_test = len(train_x), len(valid_x), len(test_x)
    class_dim = np.max(train_y) + 1
    
    rng = np.random.RandomState(1224)
    th_rng = RandomStreams(1226)
    
    gensim_w2v = Word2Vec.load(r'w2v\enwiki.w2v')
    lstm = LSTMModel(corpus, n_emb=n_emb, n_hidden=n_hidden, pooling='max', gensim_w2v=gensim_w2v)
    
    # test whether emb values are replaced by word2vec model results
#    lstm_emb = lstm.model_layers[0]
#    idx = 1001
#    W_values = lstm_emb.W.get_value()
#    vec = w2v.wv[corpus.dic._idx2word[idx]]
#    W_values[idx]
#    print(W_values[idx]/vec)
    
    # test function compile
    f = theano.function([lstm.x, lstm.mask, lstm.y], lstm.cost)
    batch_idx_seq = np.arange(batch_size)
    print(f(train_x[batch_idx_seq], train_mask[batch_idx_seq], train_y[batch_idx_seq]))
    
#    # x: (batch size, n_words/steps)
예제 #20
0
def main(unused_args):
    model_dir = utility.get_model_dir(FLAGS)
    if os.path.exists(model_dir) and not FLAGS.overwrite:
        logger.info('%s exists. quit', model_dir)
        sys.exit(0)

    # Load model configuration
    config_path = os.path.join(os.path.dirname(__file__), 'model_conf',
                               FLAGS.model_name + '.py')
    config = utility.load_config(config_path)

    FLAGS.vf_dir = os.path.join(FLAGS.rootpath, FLAGS.train_collection,
                                'FeatureData', FLAGS.vf_name)
    vocab_file = utility.get_vocab_file(FLAGS.train_collection,
                                        FLAGS.word_cnt_thr, FLAGS.rootpath)
    textbank = TextBank(vocab_file)
    config.vocab_size = len(textbank.vocab)
    config.vf_size = int(
        open(os.path.join(FLAGS.vf_dir, 'shape.txt')).read().split()[1])

    if hasattr(config, 'num_epoch_save'):
        num_epoch_save = config.num_epoch_save
    else:
        num_epoch_save = 1

    if FLAGS.fluency_method == 'None':
        FLAGS.fluency_method = None
    config.fluency_method = FLAGS.fluency_method
    if config.fluency_method == 'weighted':
        config.use_weighted_loss = True
    else:
        config.use_weighted_loss = False

    train_image_embedding = True
    try:
        if config.train_image_embedding == False:
            assert ('freeze' in FLAGS.model_name)
            train_image_embedding = False
            logger.info('Not training image embedding')
    except:
        pass

    with_image_embedding = True if FLAGS.with_image_embedding != 0 else False
    # Start model training
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config_proto = tf.ConfigProto(
        intra_op_parallelism_threads=FLAGS.ses_threads,
        gpu_options=gpu_options,
        allow_soft_placement=True)

    with tf.Graph().as_default(), tf.Session(config=config_proto) as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        assert len(config.buckets) >= 1
        assert config.buckets[-1] == config.max_num_steps
        models = []
        with tf.device('gpu:%s' % FLAGS.gpu):
            with tf.variable_scope("LSTMModel",
                                   reuse=None,
                                   initializer=initializer):
                if with_image_embedding:
                    m = LSTMModel(mode='train',
                                  num_steps=config.buckets[0],
                                  config=config,
                                  model_dir=model_dir,
                                  flag_with_saver=True,
                                  train_image_embedding=train_image_embedding)
                    #model_root=FLAGS.model_root)
                else:
                    # deprecating this function
                    logger.info('Plz use with_image_embedding=1')
                    sys.exit(-1)
                    '''m = PCALSTMModel(mode='train',
              num_steps=config.buckets[0],
              config=config,
              model_name=FLAGS.model_name,
              flag_with_saver=True,
              train_image_embedding=train_image_embedding,
              model_root=FLAGS.model_root)
          '''
                m.build()
                models.append(m)

        pre_trained_iter = 0
        if FLAGS.pre_trained_model_path:
            pre_trained_iter = int(
                FLAGS.pre_trained_model_path.split('model_')[1].split('.')[0])
        hdlr = logging.FileHandler(
            os.path.join(m.model_dir, 'log%d.txt' % pre_trained_iter))
        hdlr.setLevel(logging.INFO)
        hdlr.setFormatter(logging.Formatter(formatter_log))
        logger.addHandler(hdlr)

        if FLAGS.pre_trained_model_path:
            if tf.__version__ < '1.0':
                tf.initialize_all_variables().run()
            else:
                tf.global_variables_initializer().run()
            models[0].saver.restore(session, FLAGS.pre_trained_model_path)
            logger.info('Continue to train from %s',
                        FLAGS.pre_trained_model_path)
        elif FLAGS.pre_trained_imembedding_path:
            if tf.__version__ < '1.0':
                tf.initialize_all_variables().run()
            else:
                tf.global_variables_initializer().run()
            models[0].imemb_saver.restore(session,
                                          FLAGS.pre_trained_imembedding_path)
            logger.info('Init image-embedding from %s',
                        FLAGS.pre_trained_imembedding_path)
        elif FLAGS.pre_trained_lm_path:
            if tf.__version__ < '1.0':
                tf.initialize_all_variables().run()
            else:
                tf.global_variables_initializer().run()
            models[0].lm_saver.restore(session, FLAGS.pre_trained_lm_path)
            logger.info('Init language from %s', FLAGS.pre_trained_lm_path)
        else:
            if tf.__version__ < '1.0':
                tf.initialize_all_variables().run()
            else:
                tf.global_variables_initializer().run()
            # print([v.name for v in tf.trainable_variables()])

        iters_done = 0
        data_provider = BucketDataProvider(FLAGS.train_collection,
                                           vocab_file,
                                           FLAGS.vf_name,
                                           language=FLAGS.language,
                                           method=config.fluency_method,
                                           rootpath=FLAGS.rootpath)

        for i in range(config.num_epoch):
            logger.info('epoch %d', i)
            data_provider.shuffle_data_queue()
            train_cost, iters_done = run_epoch(session,
                                               iters_done,
                                               config,
                                               models,
                                               data_provider,
                                               verbose=True)
            logger.info("Train cost for epoch %d is %.3f" % (i, train_cost))

            # save the current model if necessary
            if (i + 1) % num_epoch_save == 0:
                models[0].saver.save(
                    session,
                    os.path.join(
                        m.variable_dir,
                        'model_%d.ckpt' % (iters_done + pre_trained_iter)))
                if with_image_embedding:
                    models[0].imemb_saver.save(session, os.path.join(m.variable_dir, \
                       'imembedding_model_%d.ckpt' % (iters_done)))
                logger.info("Model saved at iteration %d", iters_done)

    # copy the configure file in to checkpoint direction
    os.system("cp %s %s" % (config_path, model_dir))
    if FLAGS.pre_trained_model_path:
        os.system("echo %s > %s" %
                  (FLAGS.pre_trained_model_path,
                   os.path.join(model_dir, 'pre_trained_model_path.txt')))
    if FLAGS.pre_trained_imembedding_path:
        os.system(
            "echo %s > %s" %
            (FLAGS.pre_trained_imembedding_path,
             os.path.join(model_dir, 'pre_trained_imembedding_path.txt')))
예제 #21
0
 def test_init(self):
     self.assertIsInstance(LSTMModel(1, 1, 1, 1), LSTMModel)
예제 #22
0
from lstm_model import LSTMModel
if __name__ == "__main__":
    lstm_model = LSTMModel.create_from_args(is_sample_mode=True)
    lstm_model.run()
예제 #23
0
            model.fit(session, saver)

            with open(model.config.conll_output, 'w') as f:
                write_conll(f, output)
            with open(model.config.eval_output, 'w') as f:
                for sentence, labels, predictions in output:
                    print_sentence(f, sentence, labels, predictions)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', action='store_true')
    parser.add_argument('--top10eval', action='store_true')
    x = parser.parse_args()
    config = Config()
    embeddings, tokens = loadWordVectors()
    config.embed_size = embeddings.shape[1]
    config.vocab_size = len(tokens)
    if x.t:
        graph = tf.Graph()
        with graph.as_default():
            model = LSTMModel(config, embeddings, tokens)
            top10(config, embeddings, tokens, model)
    elif x.top10eval:
        graph = tf.Graph()
        with graph.as_default():
            model = LSTMModel(config, embeddings, tokens)
            eval_test(embeddings, tokens, model)
    else:
        main(config, embeddings, tokens)
예제 #24
0
test_y = []
test_result = []
for x, y, r in testSequances:
    test_x.append(x)
    test_y.append(y)
    test_result.append(r)

train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)

print(len(train_x))
print(len(test_x))

model = LSTMModel(3, [512, 512], 10, 20)
model.train(train_x, train_y, test_x, test_y, 10, 200)

results = model.predict(test_x)
count = 0
total = 0

for index, result in enumerate(results):
    left = argmax(result[2][:10])
    right = argmax(result[2][10:])

    if left * 10 + right == test_result[index]:
        count += 1
    total += 1

print("SCORE: " + str(count / float(total)))
예제 #25
0
sys.dont_write_bytecode = True

from data import DataLoader
from lstm_model import LSTMModel
from feed_forward_model import FeedForwardModel

k = 5
results = []
total = 0

for i in range(k):

    dataLoader = DataLoader(src="MNIST_data/dataset")

    [
        noisyTrain, noisySymbolsTrain, targetsTrain, noisyValidation,
        noisySymbolsValidation, targetsValidation, noisyTest, noisySymbolsTest,
        targetsTest
    ] = dataLoader.getSequencialData(i, 2, 2)

    model = LSTMModel(2, [512, 512], 1568, 20)
    model.train(noisySymbolsTrain, targetsTrain, noisySymbolsValidation,
                targetsValidation, 100, 200)
    result = model.evaluate(noisySymbolsTest, targetsTest)

    results.append(result[1])
    total += result[1]

print(results)
print("SCORE: " + str(total / k))
예제 #26
0
k = 5
tctoScores = []
tcfoScores = []
fctoScores = []
fcfoScores = []
epochs = []

k_fold_index = 0
bestModel = None
maxAcccuracy = 0
while k_fold_index < k:

    [train_x, train_y, val_x, val_y, test_x,
     test_y] = dataLoader.getData(k_fold_index, 4, 4, num_with_symbols)

    model = LSTMModel(4, [512, 512], 784, 50)
    epoch = model.train(train_x, train_y, val_x, val_y, 100, 200)
    epochs.append(epoch)

    results = model.predict(test_x)

    tcto = 0
    tcfo = 0
    fcto = 0
    fcfo = 0
    total = 0

    for index, result in enumerate(results):

        left = argmax(result[3][-20:-10])
        right = argmax(result[3][-10:])
예제 #27
0
                temperature(0) + [0, 0, 1]
            ])
            y.append([
                temperature(b),
                temperature(a),
                temperature((a + b) % 10),
                temperature((a + b) // 10)
            ])

x = np.array(x)
y = np.array(y)

test_x = np.array(test_x)
test_y = np.array(test_y)

model = LSTMModel(4, [20, 20], 12, 9)
model.train(x, y, x, y, 10, 5000)

results = model.predict(test_x)
activations_0 = model.getActivations(0, test_x)
activations_1 = model.getActivations(1, test_x)

count = 0

for index, result in enumerate(results):
    right = temperatureToInt(result[2])
    left = temperatureToInt(result[3])
    rightTarget = temperatureToInt(test_y[index][2])
    leftTarget = temperatureToInt(test_y[index][3])

    if left == leftTarget and right == rightTarget:
예제 #28
0
        else:
            x.append([
                temperature(b) + [1, 0, 0],
                temperature(a) + [1, 0, 0],
                temperature(0) + [0, 1, 0],
                temperature(0) + [0, 0, 1]
            ])
            y.append([[b / 10.0], [a / 10.0], [((a + b) % 10) / 10.0],
                      [((a + b) // 10) / 10.0]])

x = np.array(x)
y = np.array(y)
test_x = np.array(test_x)
test_y = np.array(test_y)

model = LSTMModel(4, [512, 512], 12, 1)
model.train(x, y, test_x, test_y, 10, 5000)

results = model.predict(x)

count = 0

for index, result in enumerate(results):
    left = round(result[2][0], 1)
    right = round(result[3][0], 1)
    leftTarget = round(y[index][2][0], 1)
    rightTarget = round(y[index][3][0], 1)

    if left == leftTarget and right == rightTarget:
        count += 1
예제 #29
0
                                                    len(trainSamples[b]) - 1)]

                    val_x.append([right, left, operators[key], equals])
                    val_y.append([
                        temperature(b),
                        temperature(a), rightTarget, leftTarget
                    ])

train_x = np.array(train_x)
train_y = np.array(train_y)
val_x = np.array(val_x)
val_y = np.array(val_y)
test_x = np.array(test_x)
test_y = np.array(test_y)

model = LSTMModel(4, [10, 100], 784, 9)
model.train(train_x, train_y, val_x, val_y, 100, 5000)

results = model.predict(val_x)

count = 0

for index, result in enumerate(results):
    left = result[2]
    right = result[3]
    leftTarget = val_y[index][2]
    rightTarget = val_y[index][3]

    if is_temperature(left, leftTarget) and is_temperature(right, rightTarget):
        count += 1
def train_with_validation(train_set, valid_set, corpus, 
                          n_hidden=128, n_emb=128, batch_size=32, conv_size=5,             
                          pooling_type='mean', model_type='lstm', w2v_fn=None, 
                          model_save_fn=None, disp_proc=True):
    '''pooling_type: mean or max
    model_type: lstm, rnn or cnn
    use_w2v: whether to use pre-trained embeddings from word2vec
    '''
    # Only train_set is converted by theano.shared
    train_x, train_mask, train_y = [theano.shared(_) for _ in train_set]
    valid_x, valid_mask, valid_y = valid_set
    n_train, n_valid = len(train_x.get_value()), len(valid_x)

    print("%d training examples" % n_train)
    print("%d validation examples" % n_valid)
    
    rng = np.random.RandomState(1224)
    th_rng = RandomStreams(1224)
    
    if model_save_fn is None:
        model_save_fn = os.path.join('model-res', '%s-%s' % (model_type, pooling_type))
    
    # Load Word2Vec 
    if w2v_fn is None:
        gensim_w2v = None
    else:
        print('Loading word2vec model...')
        if not os.path.exists(w2v_fn):
            raise Exception("Word2Vec model doesn't exist!", model_type)
        gensim_w2v = Word2Vec.load(w2v_fn)
    
    # Define Model
    if model_type == 'lstm':
        model = LSTMModel(corpus, n_emb, n_hidden, pooling_type, 
                          rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    elif model_type == 'rnn':
        model = RNNModel(corpus, n_emb, n_hidden, pooling_type, 
                         rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    elif model_type == 'cnn':
        model = CNNModel(corpus, n_emb, n_hidden, batch_size, conv_size, pooling_type, 
                         rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v)
    else:
        raise Exception("Invalid model type!", model_type)
    
    x, mask, y = model.x, model.mask, model.y
    batch_idx_seq, use_noise = model.batch_idx_seq, model.use_noise
    
    f_update_1_gr     = theano.function(inputs =[batch_idx_seq], 
                                        outputs=model.cost, 
                                        updates=model.gr_updates,
                                        givens ={x:    train_x[batch_idx_seq],
                                                 mask: train_mask[batch_idx_seq],
                                                 y:    train_y[batch_idx_seq]},
                                        on_unused_input='ignore')
    f_update_2_gr_sqr = theano.function(inputs=[], updates=model.gr_sqr_updates)
    f_update_3_dp_sqr = theano.function(inputs=[], updates=model.dp_sqr_updates)
    f_update_4_params = theano.function(inputs=[], updates=model.param_updates)
    
    # keep validation set consistent
    keep_tail = False if model_type == 'cnn' else True
    valid_idx_batches = get_minibatches_idx(n_valid, batch_size, keep_tail=keep_tail)
    valid_y = np.concatenate([valid_y[idx_batch] for idx_batch in valid_idx_batches])
    
    # train the model
    patience = 5000
    patience_increase = 2
    improvement_threshold = 0.995
    disp_freq = 20
    validation_freq = 100
    
    max_epoch = 500
    best_iter = 0
    best_validation_err = np.inf
    
    epoch = 0
    uidx = 0
    done_looping = False
    start_time = time.time()
    
    while (epoch < max_epoch) and (not done_looping):
        epoch += 1        
        # Get new shuffled index for the training set. use rng to make result keep same with specific random-seed
        for idx_batch in get_minibatches_idx(n_train, batch_size, shuffle=True, rng=rng, keep_tail=keep_tail):
            uidx += 1
            use_noise.set_value(1.)
            
            cost = f_update_1_gr(idx_batch)
            f_update_2_gr_sqr()
            f_update_3_dp_sqr()
            f_update_4_params()
            
            if uidx % disp_freq == 0 and disp_proc:
                print('epoch %i, minibatch %i, train cost %f' % (epoch, uidx, cost))
    
            if uidx % validation_freq == 0:
                use_noise.set_value(0.)
                valid_y_pred = [model.predict_func(valid_x[idx_batch], valid_mask[idx_batch]) for idx_batch in valid_idx_batches]
                valid_y_pred = np.concatenate(valid_y_pred)
                this_validation_err = (valid_y_pred != valid_y).mean()
                print('epoch %i, minibatch %i, validation error %f %%' % (epoch, uidx, this_validation_err*100))
                
                if this_validation_err < best_validation_err:
                    if this_validation_err < best_validation_err*improvement_threshold:
                        patience = max(patience, uidx*patience_increase)                        
                    best_validation_err = this_validation_err
                    best_iter = uidx
                    model.save(model_save_fn)
                    
            if patience < uidx:
                done_looping = True
                break
        
    end_time = time.time()
    print('Optimization complete with best validation score of %f %%, at iter %d' % (best_validation_err * 100, best_iter))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch / (end_time - start_time)))
    
    
    
예제 #31
0
def main(unused_args):
    train_collection = FLAGS.train_collection
    val_collection = FLAGS.val_collection
    overwrite = FLAGS.overwrite
    output_dir = utility.get_sim_dir(FLAGS)
    loss_info_file = os.path.join(output_dir, 'loss_info.txt')
    if os.path.exists(loss_info_file) and not overwrite:
        logger.info('%s exists. quit', loss_info_file)
        sys.exit(0)

    model_dir = utility.get_model_dir(FLAGS)
    config_path = os.path.join(os.path.dirname(__file__), 'model_conf',
                               FLAGS.model_name + '.py')
    config = utility.load_config(config_path)

    if FLAGS.fluency_method == 'None':
        FLAGS.fluency_method = None
    config.fluency_method = FLAGS.fluency_method
    if config.fluency_method == 'weighted':
        config.use_weighted_loss = True
    else:
        config.use_weighted_loss = False

    textbank = TextBank(utility.get_train_vocab_file(FLAGS))
    config.vocab_size = len(textbank.vocab)
    config.vf_size = int(
        open(os.path.join(utility.get_val_feat_dir(FLAGS),
                          'shape.txt')).read().split()[1])

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config_proto = tf.ConfigProto(
        intra_op_parallelism_threads=FLAGS.ses_threads,
        gpu_options=gpu_options,
        allow_soft_placement=True)

    with_image_embedding = True if FLAGS.with_image_embedding > 0 else False
    with tf.Graph().as_default(), tf.Session(config=config_proto) as session:

        assert len(config.buckets) >= 1
        assert config.buckets[-1] == config.max_num_steps
        with tf.device('/gpu:%d' % FLAGS.gpu):
            with tf.variable_scope("LSTMModel", reuse=None):
                if with_image_embedding:
                    model = LSTMModel(
                        mode='eval',
                        num_steps=config.buckets[-1],
                        config=config,
                        model_dir=model_dir,  #model_name=FLAGS.model_name,
                        flag_with_saver=True)
                    #model_root=FLAGS.model_root)
                else:
                    # deprecating this option
                    print('Plz use image_embedding')
                    sys.exit(-1)
                model.build()

        model_path_list = []
        _dir = os.path.join(model_dir, 'variables')
        for _file in os.listdir(_dir):
            if _file.startswith('model_') and _file.endswith('.ckpt.meta'):
                iter_n = int(_file[6:-10])
                model_path = os.path.join(_dir, 'model_%d.ckpt' % iter_n)
                model_path_list.append((iter_n, model_path))

        data_provider = BucketDataProvider(val_collection,
                                           utility.get_train_vocab_file(FLAGS),
                                           feature=FLAGS.vf_name,
                                           language=FLAGS.language,
                                           flag_shuffle=False,
                                           method=config.fluency_method,
                                           rootpath=FLAGS.rootpath)
        iter2loss = {}
        for iter_n, model_path in model_path_list:
            loss_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n,
                                     'loss.txt')
            if os.path.exists(loss_file) and not overwrite:
                logger.info('load loss from %s', loss_file)
                loss = float(open(loss_file).readline().strip())
                iter2loss[iter_n] = loss
                continue
            if not os.path.exists(os.path.split(loss_file)[0]):
                os.makedirs(os.path.split(loss_file)[0])

            model.saver.restore(session, model_path)
            # print([v.name for v in tf.trainable_variables()])
            logger.info('Continue to train from %s', model_path)

            val_cost = run_epoch(session, config.batch_size,
                                 config.buckets[-1], config, model,
                                 data_provider)
            logger.info(
                "Validation cost for checkpoint model_%d.ckpt is %.3f" %
                (iter_n, val_cost))

            iter2loss[iter_n] = val_cost
            with open(loss_file, "w") as fw:
                fw.write('%g' % val_cost)
                fw.close()

    sorted_iter2loss = sorted(iter2loss.iteritems(), key=lambda x: x[1])
    with open(loss_info_file, 'w') as fw:
        fw.write('\n'.join(
            ['%d %g' % (iter_n, loss) for (iter_n, loss) in sorted_iter2loss]))
        fw.close()
예제 #32
0
dataLoader = DataLoader(src="MNIST_data/dataset")

[
    noisyTrain,
    noisySymbolsTrain,
    targetsTrain,
    noisyValidation,
    noisySymbolsValidation,
    targetsValidation,
    noisyTest,
    noisySymbolsTest,
    targetsTest
] = dataLoader.getSequencialDataWithDontCare(0, 2, 2)

model = LSTMModel(2, [512, 512, 512], 1568, 20)
model.train(noisyTrain, targetsTrain, noisyValidation, targetsValidation, 100, 100)
result = model.evaluate(noisyTest, targetsTest)
predictions = model.predict(noisyTest)
count = 0
for index in range(len(predictions)):
    a = utils.argmax(predictions[index][:10])
    b = utils.argmax(predictions[index][10:])
    predicted = a * 10 + b

    a = utils.argmax(targetsTest[index][:10])
    b = utils.argmax(targetsTest[index][10:])
    actual = a * 10 + b

    if actual == predicted:
        count += 1