예제 #1
0
 def make_iterator(tensors):
     with tf.device('/device:CPU:0'):
         ds = tf.data.Dataset.from_tensors(tensors).repeat()
     return tfe.Iterator(ds)
예제 #2
0
파일: tf-test.py 프로젝트: VeggieJ/tf-tests
    parsed_line = tf.decode_csv(line, example_defaults)
    # First 4 fields are features, combine into single tensor
    features = tf.reshape(parsed_line[:-1], shape=(4, ))
    # Last field is the label
    label = tf.reshape(parsed_line[-1], shape=())
    return features, label


train_dataset = tf.data.TextLineDataset(train_dataset_fp)
train_dataset = train_dataset.skip(1)  # skip the first header row
train_dataset = train_dataset.map(parse_csv)  # parse each row
train_dataset = train_dataset.shuffle(buffer_size=1000)  # randomize
train_dataset = train_dataset.batch(32)

# View a single example entry from a batch
features, label = tfe.Iterator(train_dataset).next()
print("example features:", features[0])
print("example label:", label[0])

model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation="relu",
                          input_shape=(4, )),  # input shape required
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(3)
])

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)


def loss(model, x, y):
    y_ = model(x)
labels = [
    pClasses[cn]['type'] == 'ann'
    for cn in (filtGenPClassNames + annPClassNames)
]
labels = np.array([tf.to_int32(x) for x in labels])

dataset = tf.data.Dataset.from_tensor_slices((featDict, labels))
dataset = dataset.shuffle(1000).repeat().batch(2)

#train_dataset = tf.data.TextLineDataset(train_dataset_fp)
#train_dataset = train_dataset.skip(1)             # skip the first header row
#train_dataset = train_dataset.map(parse_csv)      # parse each row
#train_dataset = train_dataset.shuffle(buffer_size=1000)  # randomize
#train_dataset = train_dataset.batch(32)

features, label = tfe.Iterator(dataset).next()

model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation="relu",
                          input_shape=(5, )),  # input shape required
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(1)
])


def loss(model, x, y):
    y_ = model(x)
    return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)


def grad(model, inputs, targets):
예제 #4
0
                          shape=(basePairs, ))
    # Last field is the label
    labels = tf.reshape(parsed_line[0:statCount], shape=(statCount, ))
    return features, labels


train_dataset = train_dataset.skip(1)  # skip the first header row
print("Mapping Database...")
train_dataset = train_dataset.map(parse_csv)  # parse each row
print("Shuffling Database...")
train_dataset = train_dataset.shuffle(buffer_size=100)  # randomize
print("Batching Database...")
train_dataset = train_dataset.batch(batchSize)

# View a single example entry from a batch
features, label = tfe.Iterator(train_dataset).next()
print("example features:", features[0])
print("example label:", label[0])

print("Generating Network...")
neuralNetwork = tf.keras.Sequential([
    tf.keras.layers.Dense(basePairs,
                          activation="relu",
                          input_shape=(basePairs, )),  # input shape required
    tf.keras.layers.Dense(basePairs * 3, activation="relu"),
    tf.keras.layers.Dense(basePairs * 3, activation="relu"),
    tf.keras.layers.Dense(statCount)
])


def loss(prediction, groundTruth):
예제 #5
0
# Parameters
learning_rate = 0.001
num_steps = 1000
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 256  # 1st layer number of neurons
n_hidden_2 = 256  # 2nd layer number of neurons
num_input = 784  # MNIST data input (img shape: 28*28)
num_classes = 10  # MNIST total classes (0-9 digits)

# Using TF Dataset to split data into batches
dataset = tf.data.Dataset.from_tensor_slices(
    (mnist.train.images, mnist.train.labels)).batch(batch_size)
dataset_iter = tfe.Iterator(dataset)


# Define the neural network. To use eager API and tf.layers API together,
# we must instantiate a tfe.Network class as follow:
class NeuralNet(tfe.Network):
    def __init__(self):
        # Define each layer
        super(NeuralNet, self).__init__()
        # Hidden fully connected layer with 256 neurons

        self.layer1 = self.track_layer(
            tf.layers.Conv2D(32, 5, padding='SAME',
                             activation=tf.nn.relu))  # [n,28,28,32]
        self.pool1 = self.track_layer(
            tf.layers.AveragePooling2D(2, 2, padding='SAME'))  # [n,14,14,32]
예제 #6
0
    def train(self, epoch, dataloader):
        dataset, data_len = dataloader[0], dataloader[1]
        if self.log_type == 'progressbar':
            # Progress bar
            processed_data_len = 0
            bar = plugins.Bar('{:<10}'.format('Train'),
                              max=data_len // self.batch_size)
        end = time.time()

        with self.summary_writer.as_default():
            with tf.device(self.device):
                for i, (inputs, labels) in enumerate(tfe.Iterator(dataset)):
                    # keeps track of data loading time
                    data_time = time.time() - end

                    ############################
                    # Update network
                    ############################
                    inputs = tf.reshape(inputs,
                                        shape=(-1, self.nc,
                                               self.resolution_high,
                                               self.resolution_wide))
                    with tf.device('/cpu:0'):
                        tf.assign_add(self.step_counter, 1)

                    with tf.contrib.summary.always_record_summaries():
                        with tf.GradientTape() as tape:
                            # get outputs
                            outputs = self.model(inputs)

                            # compute loss
                            loss = self.criterion(outputs, labels)

                            # perform evaluation
                            accuracy = self.evaluation(outputs, labels)

                            # logging and visualization
                            tf.contrib.summary.scalar('loss', loss)
                            tf.contrib.summary.scalar('accuracy', accuracy)
                            self.params['Loss'] = loss.cpu()._numpy()
                            self.params['Accuracy'] = accuracy.cpu()._numpy()

                        # compute gradients
                        grads = tape.gradient(loss, self.model.variables)

                        # optimize the network
                        self.optimizer.apply_gradients(
                            zip(grads, self.model.variables),
                            global_step=self.step_counter)

                        # print the progress
                        if self.log_type == 'traditional':
                            # print batch progress
                            print(
                                self.print_formatter %
                                tuple([epoch + 1, self.nepochs, i, data_len] +
                                      [
                                          self.params[key]
                                          for key in self.training_params
                                      ]))
                        elif self.log_type == 'progressbar':
                            # update progress bar
                            batch_time = time.time() - end
                            processed_data_len += inputs._shape_as_list()[0]

                            bar.suffix = self.print_formatter.format(*[
                                processed_data_len, data_len, data_time,
                                batch_time, bar.elapsed_td, bar.eta_td
                            ] + [
                                self.params[key]
                                for key in self.training_params
                            ] + [self.optimizer._learning_rate])
                            bar.next()
                            end = time.time()

                if self.log_type == 'progressbar':
                    bar.finish()

        return loss.cpu()._numpy()
예제 #7
0
                                         reduction_indices=[1]))
    return loss


@tfe.implicit_value_and_gradients
def cal_gradient(image_batch, label_batch):
    return cross_entropy(softmax_model(image_batch), label_batch)


if __name__ == '__main__':
    data = input_data.read_data_sets("data/MNIST_data/", one_hot=True)
    train_ds = tf.data.Dataset.from_tensor_slices((data.train.images, data.train.labels))\
        .map(lambda x, y: (x, tf.cast(y, tf.float32)))\
        .shuffle(buffer_size=1000)\
        .batch(100)\

    optimizer = tf.train.GradientDescentOptimizer(0.5)

    for step, (image_batch, label_batch) in enumerate(tfe.Iterator(train_ds)):
        loss, grads_and_vars = cal_gradient(image_batch, label_batch)
        optimizer.apply_gradients(grads_and_vars)
        print("step: {}  loss: {}".format(step, loss.numpy()))

    model_test_output = softmax_model(data.test.images)
    model_test_label = data.test.labels
    correct_prediction = tf.equal(tf.argmax(model_test_output, 1),
                                  tf.argmax(model_test_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    print("test accuracy = {}".format(accuracy.numpy()))
예제 #8
0
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.layer1 = self.track_layer(tf.layers.Dense(units=10))
        self.layer2 = self.track_layer(tf.layers.Dense(units=10))

    def call(self, input):
        """Actually runs the model."""
        result = self.layer1(input)
        result = self.layer2(result)
        return result


# Let's make up a blank input image
model = MNISTModel()
batch = tf.zeros([1, 1, 784])
print(batch.shape)

result = model(batch)
print(result)


def loss_function(model, x, y):
    y_ = model(x)
    return tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_)


optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

for (x, y) in tfe.Iterator(batch):
    grads = tfe.implicit_gradients(loss_function)(model, x, y)
    optimizer.apply_gradients(grads)
예제 #9
0
def train_one_epoch(generator, discriminator, generator_optimizer,
                    discriminator_optimizer, dataset, step_counter,
                    log_interval, noise_dim):
  """Trains `generator` and `discriminator` models on `dataset`.

  Args:
    generator: Generator model.
    discriminator: Discriminator model.
    generator_optimizer: Optimizer to use for generator.
    discriminator_optimizer: Optimizer to use for discriminator.
    dataset: Dataset of images to train on.
    step_counter: An integer variable, used to write summaries regularly.
    log_interval: How many steps to wait between logging and collecting
      summaries.
    noise_dim: Dimension of noise vector to use.
  """

  total_generator_loss = 0.0
  total_discriminator_loss = 0.0
  for (batch_index, images) in enumerate(tfe.Iterator(dataset)):
    with tf.device('/cpu:0'):
      tf.assign_add(step_counter, 1)

    with tf.contrib.summary.record_summaries_every_n_global_steps(
        log_interval, global_step=step_counter):
      current_batch_size = images.shape[0]
      noise = tf.random_uniform(
          shape=[current_batch_size, noise_dim],
          minval=-1.,
          maxval=1.,
          seed=batch_index)

      with tfe.GradientTape(persistent=True) as g:
        generated_images = generator(noise)
        tf.contrib.summary.image(
            'generated_images',
            tf.reshape(generated_images, [-1, 28, 28, 1]),
            max_images=10)

        discriminator_gen_outputs = discriminator(generated_images)
        discriminator_real_outputs = discriminator(images)
        discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
                                                    discriminator_gen_outputs)
        total_discriminator_loss += discriminator_loss_val

        generator_loss_val = generator_loss(discriminator_gen_outputs)
        total_generator_loss += generator_loss_val

      generator_grad = g.gradient(generator_loss_val, generator.variables)
      discriminator_grad = g.gradient(discriminator_loss_val,
                                      discriminator.variables)

      generator_optimizer.apply_gradients(
          zip(generator_grad, generator.variables))
      discriminator_optimizer.apply_gradients(
          zip(discriminator_grad, discriminator.variables))

      if log_interval and batch_index > 0 and batch_index % log_interval == 0:
        print('Batch #%d\tAverage Generator Loss: %.6f\t'
              'Average Discriminator Loss: %.6f' %
              (batch_index, total_generator_loss / batch_index,
               total_discriminator_loss / batch_index))
예제 #10
0
def process_user(user_name, outfile):
    num_epochs = 2
    batch_size = 512
    data_dir = '{0}/{1}/'.format(users_indir, u)

    # keep results for plotting
    train_loss_results = []

    model = getModel()

    for d in range(16):

        # training phase
        dataset_fname = data_dir + '{0}.txt'.format(d)
        #         print('df:', dataset_fname)
        input_data, target_data, red_events = process_file(dataset_fname)
        print('processing:', dataset_fname, " - num events:", len(input_data),
              " - red events:", len(red_events))

        training_dataset = tf.data.Dataset.from_tensor_slices(
            (input_data, target_data))
        training_dataset = training_dataset.batch(batch_size)

        # train model on a day
        loss_results = train(model, training_dataset, num_epochs)
        train_loss_results.append(loss_results)
        print('loss_results:', loss_results)
        """     
        Evaluation phase
        """
        dataset_fname = data_dir + '{0}.txt'.format(d + 1)
        input_data, target_data, red_events = process_file(dataset_fname)
        print('  evaluating:', dataset_fname, " - num events:",
              len(input_data), " - red events:", len(red_events))

        eval_dataset = tf.data.Dataset.from_tensor_slices(
            (input_data, target_data))
        eval_dataset = eval_dataset.batch(batch_size)

        line_losses = np.array([])

        # eval using batches of 'batch_size'
        for X, y in tfe.Iterator(eval_dataset):
            batch_loss = loss(model, X, y)
            line_losses = np.append(line_losses, batch_loss)

        possible_anomalies = [(i, v) for i, v in enumerate(line_losses)]
        possible_anomalies.sort(key=lambda x: x[1], reverse=True)

        print('    max:', possible_anomalies[:10])
        print('    red events:', [a for a, b in red_events])

        # write top 10 losses to a file with the format (day, score, redevent)
        for i, v in possible_anomalies[:20]:
            red = '0'
            for a, b in red_events:
                if a == i:
                    red = '1'
                    break
            line = '{0},{1},{2}\n'.format(d, v, red)
            outfile.write(line)

    # Save model to a file
    model_filepath = '{0}/{1}_simple_lm.hdfs'.format(users_modeldir, user_name)

    tf.keras.models.save_model(model,
                               model_filepath,
                               overwrite=True,
                               include_optimizer=False)

    model = None
예제 #11
0
  example_defaults = [[0.], [0.], [0.], [0.], [0]]  # sets field types
  parsed_line = tf.decode_csv(line, example_defaults)
  # First 4 fields are features, combine into single tensor
  features = tf.reshape(parsed_line[:-1], shape=(4,))
  # Last field is the label
  label = tf.reshape(parsed_line[-1], shape=())
  return features, label

train_dataset = tf.data.TextLineDataset(train_dataset_fp)
train_dataset = train_dataset.skip(1)             # skip the first header row
train_dataset = train_dataset.map(parse_csv)      # parse each row
train_dataset = train_dataset.shuffle(buffer_size=1000)  # randomize
train_dataset = train_dataset.batch(32)

# View a single example entry from a batch
features, label = tfe.Iterator(train_dataset).next()
print("example features:", features[0])
print("example label:", label[0])


#adventures in ML tutorial
const = tf.constant(2.0, name = 'const')
#b = tf.Variable(2.0, name='b')
b = tf.placeholder(tf.float32, [None, 1], name='b')
c = tf.Variable(1.0, name='c')

d = tf.add(b,c, name='d')
e = tf.add(c, const, name = 'e')
a = tf.multiply(d, e, name = 'a')
# setup the variable initialisation
init_op = tf.global_variables_initializer()
def main():
    parser = argparse.ArgumentParser(description='TensorFlow Pascal Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=20,
                        help='input batch size for training')
    parser.add_argument('--epochs',
                        type=int,
                        default=60,
                        help='number of epochs to train')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--seed', type=int, default=1, help='random seed')
    parser.add_argument('--log-interval',
                        type=int,
                        default=50,
                        help='how many batches to wait before'
                        ' logging training status')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=50,
                        help='how many batches to wait before'
                        ' evaluate the model')
    parser.add_argument('--log-dir',
                        type=str,
                        default='02_pascal_simplecnn_tb',
                        help='path for logging directory')
    parser.add_argument('--data-dir',
                        type=str,
                        default='./VOCdevkit/VOC2007',
                        help='Path to PASCAL data storage')
    args = parser.parse_args()
    util.set_random_seed(args.seed)
    sess = util.set_session()

    splt = "trainval"
    trainval_npz = splt + '.npz'
    test_npz = 'test.npz'

    if (os.path.isfile(trainval_npz)):
        print("\nFound trainval npz file\n")
        with np.load(trainval_npz) as tr_npzfile:
            train_images = tr_npzfile['imgs']
            train_labels = tr_npzfile['labels']
            train_weights = tr_npzfile['weights']
    else:

        train_images, train_labels, train_weights = util.load_pascal(
            args.data_dir, class_names=CLASS_NAMES, split=splt)
        np.savez(trainval_npz,
                 imgs=train_images,
                 labels=train_labels,
                 weights=train_weights)

    ##TEST##
    if (os.path.isfile(test_npz)):
        print("\nFound test npz file\n")
        # npzfile = np.load(test_npz)
        with np.load(test_npz) as test_npzfile:
            test_images = test_npzfile['imgs']
            test_labels = test_npzfile['labels']
            test_weights = test_npzfile['weights']
    else:
        test_images, test_labels, test_weights = util.load_pascal(
            args.data_dir, class_names=CLASS_NAMES, split='test')
        np.savez(test_npz,
                 imgs=test_images,
                 labels=test_labels,
                 weights=test_weights)

    ## TODO modify the following code to apply data augmentation here
    rgb_mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) / 256.0
    train_images = (train_images - rgb_mean).astype(np.float32)
    test_images = (test_images - rgb_mean).astype(np.float32)

    flip_fn = lambda img, lbl, wts: flip(img, lbl, wts)
    crop_fn = lambda img, lbl, wts: crop(img, lbl, wts)
    ccrop_fn = lambda img, lbl, wts: center_crop(img, lbl, wts)
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_images, train_labels, train_weights))
    flipped_train = train_dataset.map(flip_fn, num_parallel_calls=4)
    train_dataset = train_dataset.concatenate(flipped_train)
    train_dataset = train_dataset.map(crop_fn, num_parallel_calls=4)

    train_dataset = train_dataset.shuffle(10000).batch(args.batch_size)
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (test_images, test_labels, test_weights))
    test_dataset = test_dataset.map(ccrop_fn, num_parallel_calls=4)
    test_dataset = test_dataset.batch(args.batch_size)

    model = SimpleCNN(num_classes=len(CLASS_NAMES))

    logdir = os.path.join(args.log_dir,
                          datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if os.path.exists(logdir):
        shutil.rmtree(logdir)
    os.makedirs(logdir)

    writer = tf.contrib.summary.create_file_writer(logdir)
    writer.set_as_default()
    tf.contrib.summary.initialize()

    global_step = tf.train.get_or_create_global_step()
    # optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)

    ##decay lr using callback
    learning_rate = tf.Variable(args.lr)
    # decay_interval = 5000
    # decay_op = tf.train.exponential_decay(args.lr,global_step,decay_interval,0.5)
    ##optimizer : sgd , momentum, 0.9
    # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

    optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
    train_log = {'iter': [], 'loss': []}
    test_log = {'iter': [], 'mAP': []}
    checkpoint_directory = "./02_pascal_simplecnn/"
    if not os.path.exists(checkpoint_directory):
        os.makedirs(checkpoint_directory)
    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
    # pdb.set_trace()
    latest = tf.train.latest_checkpoint(checkpoint_directory)
    load_flag = 0
    if (latest is not None):
        print("Loading checkpoint ", latest)
        status = checkpoint.restore(
            tf.train.latest_checkpoint(checkpoint_directory))
        load_flag = 1

    print("\nUsing eval interval: ", args.eval_interval)
    print("\nUsing batch size: ", args.batch_size)
    for ep in range(args.epochs):
        epoch_loss_avg = tfe.metrics.Mean()
        # for batch, (images, labels,weights) in enumerate(train_dataset):
        for (images, labels, weights) in tfe.Iterator(train_dataset):

            with tf.GradientTape() as tape:
                logits = model(images, training=True)
                loss_value = tf.losses.sigmoid_cross_entropy(
                    labels, logits, weights)
            grads = tape.gradient(loss_value, model.trainable_variables)

            optimizer.apply_gradients(zip(grads, model.trainable_variables),
                                      global_step)
            epoch_loss_avg(loss_value)

            if global_step.numpy() % args.log_interval == 0:
                # pdb.set_trace()

                print(
                    'Epoch: {0:d}/{1:d} Iteration:{2:d}  Training Loss:{3:.4f}  '
                    .format(ep, args.epochs, global_step.numpy(),
                            epoch_loss_avg.result()))
                train_log['iter'].append(global_step.numpy())
                train_log['loss'].append(epoch_loss_avg.result())

                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar('Training loss', loss_value)
                    tf.contrib.summary.image('Training images', images)
                    tf.contrib.summary.scalar('Learning rate', learning_rate)
                    for i, variable in enumerate(model.trainable_variables):
                        tf.contrib.summary.histogram("grad_" + variable.name,
                                                     grads[i])

            if global_step.numpy() % args.eval_interval == 0:
                print("\n **** Running Eval *****\n")
                test_AP, test_mAP = util.eval_dataset_map(model, test_dataset)
                print("Eval finsished with test mAP : ", test_mAP)
                test_log['iter'].append(global_step.numpy())
                test_log['mAP'].append(test_mAP)
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar('Testing mAP', test_mAP)

        # learning_rate.assign(tf.train.exponential_decay(args.lr, global_step, decay_interval, 0.5)())
        # print("Learning rate:", learning_rate)
        checkpoint.save(checkpoint_prefix)

    ## TODO write the training and testing code for multi-label classification

    AP, mAP = util.eval_dataset_map(model, test_dataset)
    rand_AP = util.compute_ap(test_labels,
                              np.random.random(test_labels.shape),
                              test_weights,
                              average=None)
    print('Random AP: {} mAP'.format(np.mean(rand_AP)))
    gt_AP = util.compute_ap(test_labels,
                            test_labels,
                            test_weights,
                            average=None)
    print('GT AP: {} mAP'.format(np.mean(gt_AP)))
    print('Obtained {} mAP'.format(mAP))
    print('Per class:')
    for cid, cname in enumerate(CLASS_NAMES):
        print('{}: {}'.format(cname, util.get_el(AP, cid)))
예제 #13
0
                                                   resize_side_min=256, resize_side_max=512)
    one_hot = tf.one_hot(data['label'], FLAGS.num_classes)
    return image, one_hot


# Load a given dataset by name, along with the DatasetInfo
# Imagenet2012: train=1,281,167 / validation=50,000
dataset = tfds.load(name="imagenet2012", split=tfds.Split.TRAIN)


dataset = dataset.map(parse_fn, FLAGS.num_map_threads)
dataset = dataset.shuffle(FLAGS.shuffle_buffer_size)
dataset = dataset.batch(FLAGS.batch_size)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

#print(dataset)
#for features in dataset.take(1):
#    print(features)

#vgg = Vgg16(vgg16_npy_path="vgg16.npy")
print('!!!!!!!!!!!!')

for images, labels in tfe.Iterator(dataset):
    print(images.shape, labels.shape)

#for features in dataset.take(2):
#    print(features['image'])
    #vgg.build(features['image'])

    #print(vgg.prob)
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'Create tfrecords dataset holding patches of images specified by filename in input dataset.'
    )

    parser.add_argument('input_dataset',
                        type=str,
                        help='Path to dataset holding image filenames')
    parser.add_argument('output_dataset',
                        type=str,
                        help='Path where to store the output dataset')
    parser.add_argument(
        'patch_size',
        type=int,
        help='Patch size which to use in the preprocessed dataset')
    parser.add_argument('num_samples', type=int, help='Size of output dataset')
    parser.add_argument(
        'labels',
        type=lambda s: [item for item in s.split(',')],
        help="Comma separated list of labels to find in filenames.")
    parser.add_argument('--image_size',
                        type=int,
                        dest='image_size',
                        help='Image size for files pointed to by filename')
    parser.add_argument(
        '--no_filter',
        dest='no_filter',
        action='store_true',
        default=False,
        help='Whether to apply total image variation filtering.')
    parser.add_argument(
        '--threshold',
        type=float,
        dest='threshold',
        help='Threshold for filtering the samples according to variation.')
    parser.add_argument('--subsampling_factor',
                        type=int,
                        dest='subsampling_factor',
                        default=1,
                        help='Subsampling factor to use to downsample images.')
    args = parser.parse_args()

    labels_table = tf.contrib.lookup.index_table_from_tensor(
        mapping=args.labels)

    filename_dataset = tf.data.TFRecordDataset(
        args.input_dataset,
        num_parallel_reads=8).map(_decode_example_filename).shuffle(100000)

    functions = [
        tf.Variable(label, name='const_' + label).value
        for label in args.labels
    ]
    false_fn = tf.Variable('None', name='none_label').value

    def _extract_label(filename):
        #base_size = tf.size(tf.string_split([filename],""))
        #predicates = [tf.equal(base_size, tf.size(tf.string_split([tf.regex_replace(filename, "/"+ label + "/", "")])))  for label in args.labels]

        match = [
            tf.math.reduce_any(
                tf.strings.regex_full_match(
                    tf.string_split([filename], '/').values, label))
            for label in args.labels
        ]
        pred_fn_pairs = list(zip(match, functions))
        return tf.case(pred_fn_pairs, default=false_fn, exclusive=True)

    # Load images and extract the label from the filename
    if args.image_size is not None:
        images_dataset = filename_dataset.map(
            lambda feature: {
                'image':
                ctfi.load(feature['filename'],
                          channels=3,
                          width=args.image_size,
                          height=args.image_size),
                'label':
                labels_table.lookup(_extract_label(feature['filename']))
            })
    else:
        images_dataset = filename_dataset.map(
            lambda feature: {
                'image': ctfi.load(feature['filename'], channels=3),
                'label': labels_table.lookup(
                    _extract_label(feature['filename']))
            })

    if args.subsampling_factor > 1:
        images_dataset = images_dataset.map(
            lambda feature: {
                'image': ctfi.subsample(feature['image'], args.
                                        subsampling_factor),
                'label': feature['label']
            })

    def _filter_func_label(features):
        label = features['label']
        result = label > -1
        return result

    images_dataset = images_dataset.filter(_filter_func_label).shuffle(100)

    # Extract image patches

    #for sample in tfe.Iterator(images_dataset):
    #    print(sample['label'])

    def _split_patches(features):
        patches = ctfi.extract_patches(features['image'], args.patch_size)
        labels = tf.expand_dims(tf.reshape(features['label'], [1]), 0)
        labels = tf.tile(labels, tf.stack([tf.shape(patches)[0], 1]))
        return (patches, labels)

    patches_dataset = images_dataset.map(_split_patches).apply(
        tf.data.experimental.unbatch())

    patches_dataset = patches_dataset.map(lambda patch, label: {
        'patch': patch,
        'label': label
    })

    if args.threshold is not None:
        threshold = args.threshold
    else:
        threshold = 0.08

    num_filtered_patches = tf.Variable(0)
    filtered_patch_ratio = 10

    # Filter function which filters the dataset after total image variation.
    # See: https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/image/total_variation
    def add_background_info(sample):
        variation = tf.image.total_variation(sample['patch'])
        num_pixels = sample['patch'].get_shape().num_elements()
        var_per_pixel = (variation / num_pixels)
        no_background = var_per_pixel > threshold
        sample['no_background'] = no_background
        return sample

        #def true_fn():
        #     sample.update({'no_background': True})
        #     return sample
        #def false_fn():
        #    def _true_fn_lvl2():
        #        sample.update({'label':tf.reshape(tf.convert_to_tensor(len(args.labels), dtype=tf.int64), [1]),'no_background': True})
        #        return sample
        #    def _false_fn_lvl2():
        #        sample.update({'no_background': False})
        #        return sample
        #    pred = tf.equal(num_filtered_patches.assign_add(1) % 10, 0)
        #    return tf.cond(pred,true_fn=_true_fn_lvl2,false_fn=_false_fn_lvl2)
        #return tf.cond(no_background,true_fn=true_fn, false_fn=false_fn)

    if args.no_filter == True:
        dataset = patches_dataset
    else:
        dataset = patches_dataset.map(add_background_info)
        filtered_elements_dataset = dataset.filter(
            lambda sample: tf.logical_not(sample['no_background']))

        def change_label(sample):
            return {
                'patch':
                sample['patch'],
                'label':
                tf.reshape(
                    tf.convert_to_tensor(len(args.labels), dtype=tf.int64),
                    [1])
            }

        filtered_elements_dataset = filtered_elements_dataset.map(change_label)
        filtered_dataset = dataset.filter(lambda sample: sample[
            'no_background']).map(lambda sample: {
                'patch': sample['patch'],
                'label': sample['label']
            })
        dataset = tf.data.experimental.sample_from_datasets(
            [filtered_dataset, filtered_elements_dataset],
            weights=[0.95, 0.05])

    dataset = dataset.map(lambda sample: (sample['patch'], sample['label']))
    dataset = dataset.take(args.num_samples).shuffle(100000)

    writer = tf.io.TFRecordWriter(args.output_dataset)

    # Make file readable for all users
    cutil.publish(args.output_dataset)

    def _encode_func(sample):
        patch_np = sample[0].numpy().flatten()
        label_np = sample[1].numpy()
        return ctfd.encode({
            'patch': ctf.float_feature(patch_np),
            'label': ctf.int64_feature(label_np)
        })

    # Iterate over whole dataset and write serialized examples to file.
    # See: https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/contrib/eager/Iterator
    for sample in tfe.Iterator(dataset):
        example = _encode_func(sample)
        writer.write(example.SerializeToString())

    # Flush and close the writer.
    writer.flush()
    writer.close()
예제 #15
0
def train_or_infer_spinn(vocab, trans, params, train_dataset, val_dataset,
                         model_dir, embeddings_matrix):

    use_gpu = tfe.num_gpus() > 0
    device = "gpu:0" if use_gpu else "cpu:0"
    print("Using device: %s" % device)

    train_len = params.train_size

    log_header = (
        "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
        "     Accuracy  Dev/Accuracy")

    log_template = (
        "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} {} "
        "{:12.4f} {}")

    dev_log_template = (
        "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
        "{:8.6f} {:12.4f} {:12.4f}")

    summary_writer = tf.contrib.summary.create_file_writer(model_dir,
                                                           flush_millis=10000)

    with tf.device(device), \
       summary_writer.as_default(), \
        tf.contrib.summary.always_record_summaries():
        model = THANOS(params, embeddings_matrix)
        global_step = tf.train.get_or_create_global_step()
        trainer = THANOSTrainer(model, params)
        checkpoint = tf.train.Checkpoint(trainer=trainer,
                                         global_step=global_step)
        checkpoint.restore(tf.train.latest_checkpoint(model_dir))
        best_save_path = model_dir + '/' + 'best_weights'

        start = time.time()
        iterations = 0
        mean_loss = tfe.metrics.Mean()
        accuracy = tfe.metrics.Accuracy()
        logging.info(log_header)
        best_eval_acc = 0.0
        for epoch in xrange(params.num_epochs):
            batch_idx = 0
            for labels, document_sizes, sentence_lengths, sentences, transitions in tfe.Iterator(
                    train_dataset):
                #print (sentences)
                if use_gpu:
                    labels, document_sizes, sentence_lengths, sentences, transitions = labels.gpu(
                    ), document_sizes.gpu(), sentence_lengths.gpu(
                    ), sentences.gpu(), transitions.gpu()

                inputs = {
                    'labels': labels,
                    'document_sizes': document_sizes,
                    'sentence_lengths': sentence_lengths,
                    'sentences': sentences,
                    'transitions': transitions
                }

                iterations += 1
                batch_train_loss, batch_train_logits = trainer.train_batch(
                    inputs)
                batch_size = tf.shape(labels)[0]
                mean_loss(batch_train_loss.numpy(),
                          weights=batch_size.gpu() if use_gpu else batch_size)
                accuracy(tf.argmax(batch_train_logits, axis=1),
                         tf.cast(labels, tf.int64))

                if iterations % params.save_every == 0:
                    checkpoint.save(os.path.join(model_dir, "ckpt"))

                if iterations % params.dev_every == 0:
                    dev_loss, dev_frac_correct = _evaluate_on_dataset(
                        val_dataset, trainer, use_gpu)
                    #print(dev_log_template.format(
                    #time.time() - start,
                    #epoch, iterations, 1 + batch_idx, train_len,
                    #100.0 * (1 + batch_idx) / train_len,
                    #mean_loss.result(), dev_loss,
                    #accuracy.result() * 100.0, dev_frac_correct * 100.0))

                    logging.info(
                        dev_log_template.format(
                            time.time() - start, epoch, iterations,
                            1 + batch_idx, train_len,
                            100.0 * (1 + batch_idx) / train_len,
                            mean_loss.result(), dev_loss,
                            accuracy.result() * 100.0,
                            dev_frac_correct * 100.0))
                    tf.contrib.summary.scalar("dev/loss", dev_loss)
                    tf.contrib.summary.scalar("dev/accuracy", dev_frac_correct)

                    if dev_frac_correct >= best_eval_acc:
                        #best_save_path = model_dir+'/'+'best_weights'
                        logging.info(
                            "- Found new best accuracy, saving in {}".format(
                                best_save_path))
                        checkpoint.save(
                            os.path.join(best_save_path, "ckpt_best"))
                        best_eval_acc = dev_frac_correct

                elif iterations % params.log_every == 0:
                    mean_loss_val = mean_loss.result()
                    accuracy_val = accuracy.result()
                    logging.info(
                        log_template.format(
                            time.time() - start, epoch, iterations,
                            1 + batch_idx, train_len,
                            100.0 * (1 + batch_idx) / train_len, mean_loss_val,
                            " " * 8, accuracy_val * 100.0, " " * 12))

                    #print(log_template.format(
                    #time.time() - start,
                    #epoch, iterations, 1 + batch_idx, train_len,
                    #100.0 * (1 + batch_idx) / train_len,
                    #mean_loss_val, " " * 8, accuracy_val * 100.0, " " * 12))

                    tf.contrib.summary.scalar("train/loss", mean_loss_val)
                    tf.contrib.summary.scalar("train/accuracy", accuracy_val)

                batch_idx += 1
                # Reset metrics.
                mean_loss = tfe.metrics.Mean()
                accuracy = tfe.metrics.Accuracy()

    return trainer
outf += str(learning_rate)
outf += '.'
outf += time.strftime("%Y-%m-%d--%H-%M")
if not os.path.exists(outf):
    os.makedirs(outf)
else:
    print 'There exists a same model.'
    exit()
logfile = os.path.join(outf, 'logfile.txt')
shutil.copy(os.path.realpath(__file__), os.path.join(outf, filename_script))

# data
train_dataset, validation_dataset, test_dataset = dataset.uci_binary_dataset(
    filename=setting[2])
train_dataset = train_dataset.shuffle(shuffle_buffer).batch(batch_size)
dataset_iter = tfe.Iterator(train_dataset)
'''
model and loss
'''
rbm = RBM(vis_dim=vis_dim, hid_dim=hid_dim)
if method is 'cd':

    def loss_fn(v_0):
        v_n = rbm.cd_step(v_0, train_mc_steps)
        return rbm.cd_loss(v_0, v_n)
elif method is 'pcd':
    pass
else:
    print 'unknown method'
    exit()