Example #1
0
def main(_):
    if not tf.gfile.Exists(FLAGS.eval_log_dir):
        tf.gfile.MakeDirs(FLAGS.eval_log_dir)

    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    data = data_provider.get_data(
        dataset,
        FLAGS.batch_size,
        augment=False,
        central_crop_size=common_flags.get_crop_size())
    print("JIBIRI!")
    print(data.images)
    endpoints = model.create_base(data.images, labels_one_hot=None)
    model.create_loss(data, endpoints)
    eval_ops = model.create_summaries(data,
                                      endpoints,
                                      dataset.charset,
                                      is_training=False)
    slim.get_or_create_global_step()
    session_config = tf.ConfigProto(device_count={"GPU": 0})
    checkpoint_path = "%s/model.ckpt-90482" % FLAGS.train_log_dir
    eval_result = slim.evaluation.evaluate_once(
        master=FLAGS.master,
        checkpoint_path=checkpoint_path,
        logdir=FLAGS.eval_log_dir,
        eval_op=eval_ops,
        session_config=session_config)
Example #2
0
def main(_):
    prepare_training_dir()

    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    hparams = get_training_hparams()

    # If ps_tasks is zero, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks,
                                                   merge_devices=True)
    with tf.device(device_setter):
        data = data_provider.get_data(
            dataset,
            FLAGS.batch_size,
            augment=hparams.use_augment_input,
            central_crop_size=common_flags.get_crop_size())
        endpoints = model.create_base(data.images, data.labels_one_hot)
        total_loss = model.create_loss(data, endpoints)
        model.create_summaries(data,
                               endpoints,
                               dataset.charset,
                               is_training=True)
        init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
                                                  FLAGS.checkpoint_inception)
        if FLAGS.show_graph_stats:
            tf.logging.info('Total number of weights in the graph: %s',
                            calculate_graph_metrics())
        train(total_loss, init_fn, hparams)
Example #3
0
def main(_):
  if not tf.gfile.Exists(FLAGS.eval_log_dir):
    tf.gfile.MakeDirs(FLAGS.eval_log_dir)

  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
  model = common_flags.create_model(dataset.num_char_classes,
                                    dataset.max_sequence_length,
                                    dataset.num_of_views, dataset.null_code)
  data = data_provider.get_data(
      dataset,
      FLAGS.batch_size,
      augment=False,
      central_crop_size=common_flags.get_crop_size())
  endpoints = model.create_base(data.images, labels_one_hot=None)
  model.create_loss(data, endpoints)
  eval_ops = model.create_summaries(
      data, endpoints, dataset.charset, is_training=False)
  slim.get_or_create_global_step()
  session_config = tf.ConfigProto(device_count={"GPU": 0})
  slim.evaluation.evaluation_loop(
      master=FLAGS.master,
      checkpoint_dir=FLAGS.train_log_dir,
      logdir=FLAGS.eval_log_dir,
      eval_op=eval_ops,
      num_evals=FLAGS.num_batches,
      eval_interval_secs=FLAGS.eval_interval_secs,
      max_number_of_evaluations=FLAGS.number_of_steps,
      session_config=session_config)
Example #4
0
def main(_):
    if not tf.gfile.Exists(FLAGS.eval_log_dir):
        tf.gfile.MakeDirs(FLAGS.eval_log_dir)

    dataset = common_flags.create_dataset(FLAGS.dataset_name,
                                          FLAGS.dataset_split_name)
    model = common_flags.create_model(num_classes=FLAGS.num_classes)
    data = data_provider.get_data(dataset,
                                  FLAGS.model_name,
                                  FLAGS.batch_size,
                                  is_training=False,
                                  height=FLAGS.height,
                                  width=FLAGS.width)
    logits, endpoints = model.create_model(data.images,
                                           num_classes=FLAGS.num_classes,
                                           is_training=False)
    eval_ops = model.create_summary(data, logits, is_training=False)
    slim.get_or_create_global_step()
    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = True
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.train_dir,
        logdir=FLAGS.eval_log_dir,
        eval_op=eval_ops,
        num_evals=FLAGS.num_evals,
        eval_interval_secs=FLAGS.eval_interval_secs,
        max_number_of_evaluations=FLAGS.number_of_steps,
        session_config=session_config)
Example #5
0
def local_train(stemmer=data_provider.NoStemmer(),
                text_representation='bag-of-words',
                create_model=dnn_model):
    data_provider.init_data_provider(ngrams=False)
    X, Y = data_provider.get_data(input_format='hot_vector' if create_model
                                  == dnn_model else 'sequential',
                                  output_format='categorical',
                                  ngrams=text_representation == 'ngrams',
                                  all_data=True)

    model = create_model(len(X[0]), len(Y[0]))

    X_train, X_val = split(X, 0.9)
    Y_train, Y_val = split(Y, 0.9)

    data_provider.STATE = data_provider.initial_state()
    del X, Y
    gc.collect()

    run_id = get_run_id(stemmer, text_representation, create_model)
    print(run_id)
    for i in range(40):
        model.fit(X_train,
                  Y_train,
                  n_epoch=5,
                  validation_set=(X_val, Y_val),
                  show_metric=True,
                  run_id=run_id)
        Y_pred = get_predictions(model, X_val)
        compute_metrics(Y_val, Y_pred)
    def initial(self):
        dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
        model = common_flags.create_model(dataset.num_char_classes,
                                          dataset.max_sequence_length,
                                          dataset.num_of_views,
                                          dataset.null_code,
                                          charset=dataset.charset)
        data = data_provider.get_data(
            dataset,
            FLAGS.batch_size,
            augment=False,
            central_crop_size=common_flags.get_crop_size())

        self.image_height = int(data.images.shape[1])
        self.image_width = int(data.images.shape[2])
        self.image_channel = int(data.images.shape[3])
        self.num_of_view = dataset.num_of_views
        placeholder_shape = (1, self.image_height, self.image_width,
                             self.image_channel)
        print placeholder_shape
        self.placeholder = tf.placeholder(tf.float32, shape=placeholder_shape)
        self.endpoint = model.create_base(self.placeholder,
                                          labels_one_hot=None)
        init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint)

        self.sess = tf.Session(config=config)
        tf.tables_initializer().run(session=self.sess)
        init_fn(self.sess)
Example #7
0
def main(_):
    if not tf.gfile.Exists(FLAGS.eval_log_dir):
        tf.gfile.MakeDirs(FLAGS.eval_log_dir)

    dataset = common_flags.create_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name)
    model = common_flags.create_model(num_classes=FLAGS.num_classes)
    data = data_provider.get_data(dataset,
                                  FLAGS.model_name,
                                  FLAGS.batch_size,
                                  is_training=False,
                                  height=FLAGS.height,
                                  width=FLAGS.width)
    logits, endpoints = model.create_model(data.images,
                                           num_classes=FLAGS.num_classes,
                                           is_training=False)
    eval_ops = model.create_summary(data, logits, is_training=False)
    slim.get_or_create_global_step()
    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = True
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.train_dir,
        logdir=FLAGS.eval_log_dir,
        eval_op=eval_ops,
        num_evals=FLAGS.num_evals,
        eval_interval_secs=FLAGS.eval_interval_secs,
        max_number_of_evaluations=FLAGS.number_of_steps,
        session_config=session_config)
Example #8
0
def main(_):
    if not tf.gfile.Exists(FLAGS.eval_log_dir):
        tf.gfile.MakeDirs(FLAGS.eval_log_dir)

    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    data = data_provider.get_data(
        dataset,
        FLAGS.batch_size,
        augment=False,
        central_crop_size=common_flags.get_crop_size())
    endpoints = model.create_base(data.images, labels_one_hot=None)
    model.create_loss(data, endpoints)
    eval_ops = model.create_summaries(data,
                                      endpoints,
                                      dataset.charset,
                                      is_training=False)
    slim.get_or_create_global_step()
    session_config = tf.ConfigProto(device_count={"GPU": 0})
    ###
    session_config.gpu_options.allow_growth = True
    session_config.log_device_placement = False
    ###
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.train_log_dir,
        logdir=FLAGS.eval_log_dir,
        eval_op=eval_ops,
        num_evals=FLAGS.num_batches,
        eval_interval_secs=FLAGS.eval_interval_secs,
        max_number_of_evaluations=1,
        session_config=session_config)
Example #9
0
def main(_):

    prepare_training_dir()
    logging.info('dataset_name: {}, split_name: {}'.format(
        FLAGS.dataset_name, FLAGS.dataset_split_name))
    dataset = common_flags.create_dataset(FLAGS.dataset_name,
                                          FLAGS.dataset_split_name)
    model = common_flags.create_model(num_classes=FLAGS.num_classes)

    data = data_provider.get_data(dataset,
                                  FLAGS.model_name,
                                  batch_size=FLAGS.batch_size,
                                  is_training=True,
                                  height=FLAGS.height,
                                  width=FLAGS.width)

    logits, endpoints = model.create_model(data.images,
                                           num_classes=dataset.num_classes,
                                           weight_decay=FLAGS.weight_decay,
                                           is_training=True)
    total_loss = model.create_loss(logits, endpoints, data.labels_one_hot,
                                   FLAGS.label_smoothing)
    model.create_summary(data, logits, is_training=True)
    init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint_path,
                                              FLAGS.checkpoint_inception,
                                              FLAGS.checkpoint_exclude_scopes)
    variables_to_train = model.get_variables_to_train(FLAGS.trainable_scopes)
    if FLAGS.show_graph_state:
        logging.info('Total number of weights in the graph: %s',
                     utils.calculate_graph_metrics())
    train(total_loss, init_fn, variables_to_train)
Example #10
0
def run_svm_instance(actor, args={}):
    X, Y = data_provider.get_data('../scrapper/out/unijokes.json', 
                                  input_format='hot_vector',
                                  output_format='categorical',
                                  stemmer=nltk.stem.lancaster.LancasterStemmer())
    model = create_model(len(X[0]), len(Y[0]))
    asyncio.ensure_future(svm_instance_process(actor, args))
Example #11
0
def main(_):
  prepare_training_dir()
  num_char_classes = 74
  max_sequence_length = 37
  num_of_views = 4
  null_code = 2
  model = common_flags.create_model(num_char_classes,
                                    max_sequence_length,
                                    num_of_views,null_code)
  hparams = get_training_hparams()

  # If ps_tasks is zero, the local device is used. When using multiple
  # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
  # across the different devices.
  device_setter = tf.train.replica_device_setter(
      FLAGS.ps_tasks, merge_devices=True)
  with tf.device(device_setter):
    images_orig = tf.placeholder(tf.float32, [32, 150, 600,3], name='image_orig')
    labels = tf.placeholder(tf.int32, [32,37, ], name='label')
    data = data_provider.get_data(
        images_orig,labels,
        num_of_views,
        num_char_classes)
        #augment=hparams.use_augment_input,
        #central_crop_size=None)
    endpoints = model.create_base(data.images,data.labels_one_hot)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver.restore(sess,'./trained/my-model-240')
        filelist = os.listdir('./real_train')
     #   print(filelist[:10])
        rates = list()
        for num in range(20):
          images = list()
          for i in range(32):
            image = np.resize(np.array(Image.open(os.path.join('./real_train',filelist[i+32*num]))),(150,600,3))
            images.append(image)
          images = np.array(images)
          y = np.array(np.random.randint(10, size=[32,37, ]), dtype=np.int32)
          result = sess.run(endpoints.predicted_chars,feed_dict={images_orig:images,labels:y})
          f = open('./vocab.txt')
          line = f.readline()
          f.close()
        
          rate = 0 
          for i in range(32):
            words = idx_to_words(list(result[i]))
        #    print(filelist[i][:-4])
        #    print(''.join(words))
            rate += distance.levenshtein(filelist[i+32*num][:-4],words)/float(len(filelist[i+32*num][:-4]))
          rate = 1 - float(rate) / 32
         # print(rate)
          rates.append(rate)
        print(sum(rates)/20)
Example #12
0
  def test_optionally_applies_central_crop(self):
    batch_size = 4
    data = data_provider.get_data(
        dataset=datasets.fsns_test.get_test_split(),
        batch_size=batch_size,
        augment=True,
        central_crop_size=(500, 100))

    with self.test_session() as sess, queues.QueueRunners(sess):
      images_np = sess.run(data.images)

    self.assertEqual(images_np.shape, (batch_size, 100, 500, 3))
Example #13
0
  def test_provided_data_has_correct_shape(self):
    batch_size = 4
    data = data_provider.get_data(
        dataset=datasets.fsns_test.get_test_split(),
        batch_size=batch_size,
        augment=True,
        central_crop_size=None)

    with self.test_session() as sess, queues.QueueRunners(sess):
      images_np, labels_np = sess.run([data.images, data.labels_one_hot])

    self.assertEqual(images_np.shape, (batch_size, 150, 600, 3))
    self.assertEqual(labels_np.shape, (batch_size, 37, 134))
 def test_labels_correctly_shuffled(self):
     batch_size = 4
     data = data_provider.get_data(
         dataset=datasets.fsns_test.get_test_split(),
         batch_size=batch_size,
         augment=True,
         central_crop_size=None)
     with self.test_session() as sess, queues.QueueRunners(sess):
         images, labels, probs, texts = sess.run(
             [data.images, data.labels, data.probs, data.texts])
         for i in range(batch_size * batch_size):
             plt.imshow(images[i])
             print(texts[i], probs[i], labels[i])
Example #15
0
 def test_provided_data_has_correct_shape(self):
     dataset_name = 'flowers'
     model_name = 'inception_v3'
     for split_name in ['train']:
         is_training = True if split_name == 'train' else False
         dataset = common_flags.create_dataset(dataset_name, split_name)
         batch_size = 4
         data = data_provider.get_data(dataset,
                                       model_name,
                                       batch_size=batch_size,
                                       is_training=is_training,
                                       height=224,
                                       width=224)
         with self.test_session() as sess, slim.queues.QueueRunners(sess):
             images_np, labels_np = sess.run([data.images, data.labels])
         self.assertEqual(images_np.shape, (batch_size, 224, 224, 3))
Example #16
0
def train():
    record_names = ["sudoku_train"] * 50 + ["mnist_train"]
    record_paths = ["data/{}.tfrecords".format(x) for x in record_names]

    # Load data in batches.
    images, labels = data_provider.get_data(record_paths,
                                            batch_size=FLAGS.batch_size,
                                            is_training=True)

    # Define network
    with slim.arg_scope([slim.layers.dropout, slim.batch_norm],
                        is_training=True):
        predictions = model(images)

    # Display images to tensorboard
    tf.image_summary('images', images, max_images=5)

    # Define loss function
    slim.losses.softmax_cross_entropy(predictions, labels)
    total_loss = slim.losses.get_total_loss()

    tf.scalar_summary('loss', total_loss)

    # Create learning rate decay
    global_step = slim.get_or_create_global_step()

    learning_rate = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                               global_step=global_step,
                                               decay_steps=FLAGS.decay_steps,
                                               decay_rate=FLAGS.decay_rate)

    # Optimizer to use.
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Create training operation
    train_op = slim.learning.create_train_op(total_loss, optimizer)

    logging.set_verbosity(1)

    # Start training
    slim.learning.train(train_op,
                        FLAGS.log_dir,
                        save_summaries_secs=20,
                        save_interval_secs=60,
                        log_every_n_steps=100)
Example #17
0
def local_train(stemmer=data_provider.NoStemmer(), text_representation='bag-of-words', C=1, max_iter=10000):
    data_provider.STATE['stemmer'] = stemmer
    X, Y = data_provider.get_data(input_format='hot_vector',
                                  output_format='numerical',
                                  ngrams=text_representation=='ngrams',
                                  all_data=True)
    model = create_model(C, max_iter)
    X_train, X_val = split(X, 0.9)
    Y_train, Y_val = split(Y, 0.9)

    data_provider.STATE = data_provider.initial_state()
    del X, Y
    gc.collect()
    #X_train, X_val, Y_train, Y_val = np.array(X_train), np.array(X_val), np.array(Y_train), np.array(Y_val)
    data_provider.STATE = data_provider.initial_state()
    print(">>> {} {} {} {} {}".format(type(stemmer).__name__, text_representation, 'svm', C, max_iter))
    start = time.time()
    model.fit(X_train, Y_train)
    print(">>> TRAINING TIME: {}s".format(time.time() - start))
    Y_pred = model.predict(X_val)
    compute_metrics(Y_val, Y_pred)
Example #18
0
def main(_):
    tmp_capStore = os.path.abspath('.') + 'caption.txt'
    opts, args = getopt.getopt(sys.argv[1:], "hi:o:")
    input_file = ""
    output_file = tmp_capStore
    for op, value in opts:
        if op == "-i":
            input_file = value
        elif op == "-o":
            output_file = value
        elif op == "-h":
            usage()
            sys.exit()
    dataset = utils.create_dataset(split_name=split_name)
    model = utils.create_model(dataset.num_char_classes,
                               dataset.max_sequence_length,
                               dataset.num_of_views, dataset.null_code)
    data = data_provider.get_data(dataset,
                                  batch_size,
                                  augment=False,
                                  central_crop_size=utils.get_crop_size())
    endpoints = model.create_base(data.images, labels_one_hot=None)
    model.create_loss(data, endpoints)
    eval_ops = model.create_summaries(data,
                                      endpoints,
                                      dataset.charset,
                                      is_training=False)
    slim.get_or_create_global_step()
    session_config = tf.ConfigProto(device_count={"GPU": 0})
    slim.evaluation.evaluation_loop(master=master,
                                    checkpoint_dir=train_log_dir,
                                    logdir=eval_log_dir,
                                    eval_op=eval_ops,
                                    num_evals=num_batches,
                                    eval_interval_secs=eval_interval_secs,
                                    max_number_of_evaluations=number_of_steps,
                                    session_config=session_config)
Example #19
0
def main(_):
  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
  model = common_flags.create_model(dataset.num_char_classes,
                                  dataset.max_sequence_length,
                                  dataset.num_of_views, dataset.null_code,
                                  charset=dataset.charset)
  data = data_provider.get_data(
      dataset,
      FLAGS.batch_size,
      augment=False,
      central_crop_size=common_flags.get_crop_size())


  input_image = Image.open(FLAGS.input_image).convert("RGB").resize((data.images.shape[2]
    / dataset.num_of_views, data.images.shape[1]))
  input_array = np.array(input_image).astype(np.float32)
  #input_array = np.concatenate((input_array, input_array, input_array, input_array), axis=1)
  #Image.fromarray(input_array.astype(np.uint8), "RGB").save("test_input_1.jpg")
  input_array = np.expand_dims(input_array, axis=0)
  print input_array.shape
  print input_array.dtype
  #input_image.save("test_input.jpg")
  #return
  
  placeholder_shape = (1, data.images.shape[1], data.images.shape[2], data.images.shape[3])
  print placeholder_shape
  image_placeholder = tf.placeholder(tf.float32, shape=placeholder_shape)
  endpoints = model.create_base(image_placeholder, labels_one_hot=None)
  init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint)
  with tf.Session() as sess:
    tf.tables_initializer().run()  # required by the CharsetMapper
    init_fn(sess)
    predictions = sess.run(endpoints.predicted_text,
                           feed_dict={image_placeholder: input_array})
  print("Predicted strings:")
  for line in predictions:
    print(line)
Example #20
0
from data_provider import get_data
from makiflow.trainers import SegmentatorTrainer
import makiflow as mf
import os

if __name__ == "__main__":
    mf.set_main_gpu(0)
    Xtrain, Ytrain, num_pos, Xtest, Ytest = get_data()
    os.makedirs('experiments', exist_ok=True)
    trainer = SegmentatorTrainer('exp_params.json', 'experiments')
    trainer.set_test_data(Xtest, Ytest)
    trainer.set_train_data(Xtrain, Ytrain, num_pos)
    trainer.start_experiments()
Example #21
0
def main(_):
    # 检查训练目录
    prepare_training_dir()

    # 建立数据集 split_name: train test
    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)

    # 建立模型 max_sequence_length: 37, num_of_views: 4, null_code:133
    # 这里还没有创建模型,只是返回了模型类,和初始化了模型相关参数
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    hparams = get_training_hparams()

    # If ps_tasks is zero, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks,
                                                   merge_devices=True)
    with tf.device(device_setter):
        # 获得训练数据
        data = data_provider.get_data(
            dataset,
            FLAGS.batch_size,
            augment=hparams.use_augment_input,
            central_crop_size=common_flags.get_crop_size())

        # 打印 dataset 的数据,看一下
        # print("#######################")
        # print("images:", data.images)
        # print("labels:", data.labels)
        # print(dir(data.labels))
        # print("labels_one_hot:", data.labels_one_hot.shape)
        # print("labels_0:", data.labels[0])
        # print("labels_one_host_0:", data.labels_one_hot[0])
        # print("#######################")
        # init = tf.global_variables_initializer()
        # with tf.Session() as session:
        #     session.run(init)
        #     coord = tf.train.Coordinator()
        #     threads = tf.train.start_queue_runners(coord=coord)
        #     labels = session.run(data.labels)
        #     print(labels[0])
        #     labels = session.run(data.labels_one_hot)
        #     print(labels[0])
        #     coord.request_stop()
        #     coord.join(threads)
        #     return

        # 创建模型
        endpoints = model.create_base(data.images, data.labels_one_hot)
        # 创建损失函数
        total_loss = model.create_loss(data, endpoints)
        model.create_summaries(data,
                               endpoints,
                               dataset.charset,
                               is_training=True)

        # 恢复数据
        init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
                                                  FLAGS.checkpoint_inception)
        if FLAGS.show_graph_stats:
            logging.info('Total number of weights in the graph: %s',
                         calculate_graph_metrics())
        train(total_loss, init_fn, hparams)
def main(_):
    prepare_training_dir()

    dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
    model = common_flags.create_model(dataset.num_char_classes,
                                      dataset.max_sequence_length,
                                      dataset.num_of_views, dataset.null_code)
    hparams = get_training_hparams()

    # If ps_tasks is zero, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    #device_setter = tf.train.replica_device_setter(
    #    FLAGS.ps_tasks, merge_devices=True)
    with tf.device("/cpu:0"):
        provider = data_provider.get_data(
            dataset,
            FLAGS.batch_size,
            augment=hparams.use_augment_input,
            central_crop_size=common_flags.get_crop_size())
        batch_queue = slim.prefetch_queue.prefetch_queue([
            provider.images, provider.images_orig, provider.labels,
            provider.labels_one_hot
        ],
                                                         capacity=2 *
                                                         FLAGS.num_clones)

    losses = []
    for i in xrange(FLAGS.num_clones):
        with tf.name_scope("clone_{0}".format(i)):
            with tf.device("/gpu:{0}".format(i)):
                #if i == 1:
                #  continue
                images, images_orig, labels, labels_one_hot = batch_queue.dequeue(
                )
                if i == 0:
                    endpoints = model.create_base(images, labels_one_hot)
                else:
                    endpoints = model.create_base(images,
                                                  labels_one_hot,
                                                  reuse=True)
                init_fn = model.create_init_fn_to_restore(
                    FLAGS.checkpoint, FLAGS.checkpoint_inception)
                if FLAGS.show_graph_stats:
                    logging.info('Total number of weights in the graph: %s',
                                 calculate_graph_metrics())

                data = InputEndpoints(images=images,
                                      images_orig=images_orig,
                                      labels=labels,
                                      labels_one_hot=labels_one_hot)

                total_loss, single_model_loss = model.create_loss(
                    data, endpoints)
                losses.append((single_model_loss, i))
                with tf.device("/cpu:0"):
                    tf.summary.scalar('model_loss'.format(i),
                                      single_model_loss)
                    model.create_summaries_multigpu(data,
                                                    endpoints,
                                                    dataset.charset,
                                                    i,
                                                    is_training=True)
    train_multigpu(losses, init_fn, hparams)
Example #23
0
import tensorflow as tf
import os
import model
import data_provider

MODEL_SAVE_PATH = "path/to/model"
if not os.path.exists(MODEL_SAVE_PATH):
    os.makedirs(MODEL_SAVE_PATH)

MODEL_NAME = "face.ckpt"
global_step = tf.Variable(0, trainable=False)

image, label = data_provider.get_data("train")
num_classes = 2
learning_rate = 0.001
batch_size = 30
TRAINING_ROUNDS = int(40000 / batch_size)

capacity = 1000 + 3 * batch_size

image_batch, label_batch = tf.train.batch([image, label],
                                          batch_size=batch_size,
                                          capacity=capacity,
                                          allow_smaller_final_batch=True)

print(image_batch, label_batch)

x = tf.placeholder(tf.float32, [None, 64 * 64])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)  #dropout (keep probability)
Example #24
0
def main(_):
  prepare_training_dir()
  num_char_classes = 77
#less than 37
  max_sequence_length = 25
  num_of_views = 4
#  null_code = 42 
  null_code = 2
  model = common_flags.create_model(num_char_classes,
                                    max_sequence_length,
                                    num_of_views,null_code)
  hparams = get_training_hparams()

  # If ps_tasks is zero, the local device is used. When using multiple
  # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
  # across the different devices.
  device_setter = tf.train.replica_device_setter(
      FLAGS.ps_tasks, merge_devices=True)
  with tf.device(device_setter):
    images_orig = tf.placeholder(tf.float32, [64, 150, 600,3], name='image_orig')
    labels = tf.placeholder(tf.int32, [64,max_sequence_length, ], name='label')
    data = data_provider.get_data(
        images_orig,labels,
        num_of_views,
        num_char_classes)
  #      augment=hparams.use_augment_input,
  #      central_crop_size=None)
    endpoints = model.create_base(data.images,data.labels_one_hot)
    total_loss = model.create_loss(labels, endpoints)
    init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
                                              FLAGS.checkpoint_inception)
    train_op = train(total_loss, init_fn, hparams)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        images_dataset,labels_dataset = prehandle_data.get_data()
        print('data prepared')
        for epoch in range(400):
          ids = np.random.permutation(images_dataset.shape[0])
          images_permutation = images_dataset[ids]
          labels_permutation = labels_dataset[ids]
          for step in range(images_dataset.shape[0] / 64): 
            result, loss, _ = sess.run([endpoints.predicted_chars,total_loss,train_op],feed_dict={images_orig:images_permutation[step*64:(step+1)*64],labels:labels_permutation[step*64:(step+1)*64]})
           #  print(loss)
            if step % 200 == 0:
              print(loss)
              target = labels_permutation[step*64:(step+1)*64]
              leven_distance = list()
              count = 0
              for h_shape in range(result.shape[0]):
                tmp_result = list(result[h_shape])
                target_result = list(target[h_shape])
                if count < 5:
                  f = open('./aaa.txt','a+')
                  f.write(''.join(idx_to_words(tmp_result))+'\n')
                  f.write(''.join(idx_to_words(target_result))+'\n')
                  f.write('--------------------------------------'+'\n')
                  f.close()
                  count += 1
                target_start = target_result.index(1)+1
                target_end = target_result.index(2)
                divide = float(target_end - target_start)
                if 1 not in tmp_result and 2 not in tmp_result:
                  leven_distance.append((target_end-target_start)/divide)
                if 1 in tmp_result and 2 not in tmp_result:
                  leven_distance.append(distance.levenshtein(tmp_result[tmp_result.index(1)+1:],target_result[target_start:target_end])/divide)
                if 1 not in tmp_result and 2 in tmp_result:
                  leven_distance.append(distance.levenshtein(tmp_result[:tmp_result.index(2)],target_result[target_start:target_end])/divide)
                if 1 in tmp_result and 2 in tmp_result:
                  if(tmp_result.index(1) < tmp_result.index(2)):
                    leven_distance.append(distance.levenshtein(tmp_result[tmp_result.index(1)+1:tmp_result.index(2)],target_result[target_start:target_end])/divide)
                  else:
                    leven_distance.append((target_end-target_start)/divide)
              summary = sum(leven_distance) / 64
              print(summary) 
              f = open('result_1.txt','a+')
              f.write(str(loss)+'\n')
              f.write(str(summary)+'\n')
              f.close()
          if epoch % 40 == 0:
            saver.save(sess,'./trained/my-model',global_step=epoch)
Example #25
0
import json
import re
import sys
import matplotlib.pyplot as plt
from collections import OrderedDict
from nltk.stem import *

import data_provider

data_provider.init_data_provider(ngrams=True)

X, Y = data_provider.get_data(all_data=True)

print(len(X), len(Y))

# plt.title('unijokes raw dataset words (number of different words {})'.format(len(words_counter)))
# plt.bar(range(len(words_counter)), sorted(words_counter.values(), reverse=True), align='center', alpha=0.5, color='orange')
# plt.xticks(range(10), ordered.keys(), rotation='vertical')
# plt.show()
Example #26
0
def main(_):
    prepare_training_dir()
    num_char_classes = 3215
    #less than 37
    max_sequence_length = 37
    num_of_views = 4
    #  null_code = 42
    null_code = 2
    model = common_flags.create_model(num_char_classes, max_sequence_length,
                                      num_of_views, null_code)
    hparams = get_training_hparams()

    # If ps_tasks is zero, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks,
                                                   merge_devices=True)
    with tf.device(device_setter):
        images_orig = tf.placeholder(tf.float32, [32, 150, 600, 3],
                                     name='image_orig')
        labels = tf.placeholder(tf.int32, [
            32,
            max_sequence_length,
        ],
                                name='label')
        data = data_provider.get_data(images_orig, labels, num_of_views,
                                      num_char_classes)
        #      augment=hparams.use_augment_input,
        #      central_crop_size=None)
        endpoints = model.create_base(data.images, data.labels_one_hot)
        total_loss = model.create_loss(labels, endpoints)
        init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
                                                  FLAGS.checkpoint_inception)
        train_op = train(total_loss, init_fn, hparams)
        saver = tf.train.Saver()

        with tf.Session() as sess:
            tf.global_variables_initializer().run()
            filelist = os.listdir('./chinese_data/ad_flag')
            f = open('./chinese_data/chinese_vocab.txt')
            char_list = f.readline()
            f.close()
            count = 0
            for epoch in range(10):
                random.shuffle(filelist)
                for step in range(len(filelist) / 32):
                    count += 1
                    images_permutation, labels_permutation = handle_data(
                        filelist[step * 32:(step + 1) * 32], char_list)
                    result, loss, _ = sess.run(
                        [endpoints.predicted_chars, total_loss, train_op],
                        feed_dict={
                            images_orig: images_permutation,
                            labels: labels_permutation
                        })
                    if step % 2 == 0:
                        print(loss)
                        target = labels_permutation
                        leven_distance = list()
                        for h_shape in range(result.shape[0]):
                            tmp_result = list(result[h_shape])
                            target_result = list(target[h_shape])
                            target_start = target_result.index(1) + 1
                            target_end = target_result.index(2)
                            divide = float(target_end - target_start)
                            if 1 not in tmp_result and 2 not in tmp_result:
                                leven_distance.append(
                                    (target_end - target_start) / divide)
                            if 1 in tmp_result and 2 not in tmp_result:
                                leven_distance.append(
                                    distance.levenshtein(
                                        tmp_result[tmp_result.index(1) + 1:],
                                        target_result[target_start:target_end])
                                    / divide)
                            if 1 not in tmp_result and 2 in tmp_result:
                                leven_distance.append(
                                    distance.levenshtein(
                                        tmp_result[:tmp_result.index(2)],
                                        target_result[target_start:target_end])
                                    / divide)
                            if 1 in tmp_result and 2 in tmp_result:
                                if (tmp_result.index(1) < tmp_result.index(2)):
                                    leven_distance.append(
                                        distance.levenshtein(
                                            tmp_result[tmp_result.index(1) +
                                                       1:tmp_result.index(2)],
                                            target_result[
                                                target_start:target_end]) /
                                        divide)
                                else:
                                    leven_distance.append(
                                        (target_end - target_start) / divide)
                        summary = sum(leven_distance) / 32
                        print(summary)
                        f = open('chinese_result.txt', 'a+')
                        f.write(str(loss) + '\n')
                        f.write(str(summary) + '\n')
                        f.close()
                    if count % 10000 == 0:
                        saver.save(sess,
                                   './chinese_trained/my-model',
                                   global_step=count)
import tensorflow as tf
import os
import model
import data_provider

TEXT_EXAMPLES = 800
num_classes = 2

global_step = tf.Variable(0, trainable=False)

x = tf.placeholder(tf.float32, [None, 64 * 64])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)  #dropout (keep probability)

image, label = data_provider.get_data("test")
image = tf.reshape(image, shape=(1, 64 * 64))
label = tf.reshape(label, shape=(1, 2))

logit = model.create_model(x, num_classes, keep_prob)
correct_pred = tf.equal(tf.argmax(logit, 1), tf.argmax(y, 1))

correct_num = 0

saver = tf.train.Saver()

checkpoint_dir = "path/to/model/"

with tf.Session() as sess:

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
import tensorflow as tf
import data_provider
import numpy as np

image_orig = tf.placeholder(tf.int32, [None, 150, 360, 3], name='image_orig')
label = tf.placeholder(tf.int32, [
    None,
], name='label')
return_data = data_provider.get_data(image_orig, label, 4, 134)
with tf.Session() as sess:
    x = np.array(np.random.randint(10, size=[2, 150, 360, 3]), dtype=np.int32)
    y = np.array(np.random.randint(10, size=[
        2,
    ]), dtype=np.int32)
    result = sess.run(return_data, feed_dict={image_orig: x, label: y})
    print(result)