Example #1
0
        def load_model():
            # setting gpus
            self.cpu_devices = get_cpu_devices()
            self.gpu_devices = get_gpu_devices()
            if (len(self.gpu_devices) > 0):
                os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
                os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu_to_use)

            self.devices, _ = (self.gpu_devices, DeviceCategory.GPU) if len(
                self.gpu_devices) > 0 else (self.cpu_devices,
                                            DeviceCategory.CPU)

            # initialization
            self.sess_transfer_learner, self.end_points, self.logits, self.input_tensor = InitializeTransferLearner(
                self.model_dir, self.pretrained_model, self.classes)
            init_model = True

            # logging
            checkPathExists([self.infer_dir, self.log_dir])
            self.log_filename = self.log_dir + '/' + datetime.now().strftime(
                'prediction_logs_%H_%M_%d_%m_%Y.log')
            with open(self.log_filename, 'w') as prediction_log:
                prediction_log.write('Filename' + '\t' + 'Actual' + '\t' +
                                     'Predicted' + '\t' + 'Score' + '\t' +
                                     'Noise_Suppression' + '\t' +
                                     'Language_Model' + '\n')

            logging(
                'Inference engine has been initiated on port: ' +
                str(self.port), self.logger, 'info')

            self.initization = True
def train_model(model_name='densenet121',
                opt='Adagrad',
                dataset='iris',
                writer=None):
    train_loader, val_loader, test_loader = load_data(dataset)

    # Model selection
    model = load_model(model_name)

    # Optimizer
    if model_name == "ownnet":
        optimizer = opt_selection(model[0], opt)
    else:
        optimizer = opt_selection(model, opt)

    # Loss Criterion
    if dataset == 'mltoy' or dataset == "yeast14c" or dataset == "yeast14c_m":
        # criterion = nn.MultiLabelSoftMarginLoss()
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if model_name == "ownnet":
        criterion = torch.nn.MSELoss()

    best_train, best_val = 0.0, 0.0
    g = 0
    for epoch in range(1, args.epochs + 1):
        # Train and Validate
        train_stats = train_step(model, criterion, optimizer, train_loader, g,
                                 epoch)
        valid_stats = valid_step(model, criterion, val_loader)
        g += 1

        # Logging
        logging(epoch, train_stats, valid_stats, writer)

        # Keep best model
        # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val)
        if valid_stats['accuracy'] > best_val or (
                valid_stats['accuracy'] == best_val
                and train_stats['accuracy'] >= best_train):
            best_train = train_stats['accuracy']
            best_val = valid_stats['accuracy']
            if model_name == "ownnet":
                best_model_weights = copy.deepcopy(model[0].state_dict())
            else:
                best_model_weights = copy.deepcopy(model.state_dict())

    # Load best model and evaluate on test set
    model.load_state_dict(best_model_weights)
    test_stats = valid_step(model, criterion, test_loader)
    # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val)
    print(
        '\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'
        .format(best_train, best_val, test_stats['accuracy']))

    return model
 def get_action(self, state):
     try:
         return self.sess.run(self.predicted_action, {self.states: state})
         # if random.random() < self.exploration:
         #     return np.array([[random.sample(range(1, 35), 4*self.max_layers)]])
         # else:
         #     return self.sess.run(self.predicted_action, {self.states: state})
     except Exception as e:
         logging("Get action failed - " + str(e), self.logger, 'error')
 def get_action(self, state, init=False):
     try:
         if random.random() < self.exploration or init:
             return np.array([[
                 random.sample(range(1, self.num_hidden),
                               self.search_space_size)
             ]],
                             dtype=np.int32)
         else:
             return self.policy_session.run(self.predicted_action,
                                            {self.states: state})
     except Exception as e:
         logging("Get action failed - " + str(e), self.logger, 'error')
    def discount_reward_computation(self):
        try:
            rewards = np.asarray(self.reward_buffer)
            discounted_rewards = np.zeros_like(rewards)
            running_add = 0
            for t in reversed(range(0, rewards.size)):
                if rewards[t] != 0.0:
                    running_add = 0
                running_add = running_add * self.discount_factor + rewards[t]
                discounted_rewards[t] = running_add

            return discounted_rewards[-1]
        except Exception as e:
            logging("Discount rewards failed - " + str(e), self.logger,
                    'error')
    def policy_network(self, state, max_layers, logger=None):
        try:
            with tf.name_scope("policy_network"):
                nas_cell = tf.contrib.rnn.NASCell(4 * max_layers)
                outputs, state = tf.nn.dynamic_rnn(nas_cell, tf.expand_dims(state, -1), dtype=tf.float32)

                bias = tf.Variable([0.05] * 4 * max_layers)
                outputs = tf.nn.bias_add(outputs, bias)

                print("outputs: ", outputs, outputs[:, -1:, :],
                      tf.slice(outputs, [0, 4 * max_layers - 1, 0], [1, 1, 4 * max_layers]))

                # return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnn
                return outputs[:, -1:, :]
        except Exception as e:
            logging("Policy network failed - " + str(e), logger, 'error')
Example #7
0
def train_model(model_name='densenet121',
                opt='Adagrad',
                dataset='iris',
                writer=None,
                label_col_name=''):
    # train_loader, val_loader, test_loader = load_data(dataset, label_col_name=label_col_name)
    train_loader, test_loader, nb_classes = load_data(
        dataset, label_col_name=label_col_name)

    # Model selection
    model = load_model(model_name, nb_classes=nb_classes)

    # Optimizer
    optimizer = opt_selection(model, opt)

    # Loss Criterion
    criterion = nn.CrossEntropyLoss()

    best_train, best_val = 0.0, 0.0
    for epoch in range(1, args.epochs + 1):
        # Train and Validate
        train_stats = train_step(model, criterion, optimizer, train_loader)
        # valid_stats = valid_step(model, criterion, val_loader)

        # Logging
        # logging(epoch, train_stats, valid_stats, writer)
        logging(epoch, train_stats, writer)

        # Keep best model
        if train_stats['accuracy'] >= best_train:
            best_train = train_stats['accuracy']
            # best_val    = valid_stats['accuracy']
            best_model_weights = copy.deepcopy(model.state_dict())

    # Load best model and evaluate on test set
    model.load_state_dict(best_model_weights)
    test_stats = valid_step(model, criterion, test_loader)

    # print('\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'.format(best_train, best_val, test_stats['accuracy']))
    print('\nBests Model Accuracies: Train: {:4.2f} | Test: {:4.2f}'.format(
        best_train, test_stats['accuracy']))

    return model
    def train_step(self, steps_count):
        try:
            for i, (grad, var) in enumerate(self.gradients):
                if grad is not None:
                    print(self.gradients[i])

            # print('prev_reward: ' + str(self.reward_buffer[-steps_count:]))
            states = np.array(
                self.state_buffer[-steps_count:]) / self.division_rate
            # reward = self.reward_buffer[-steps_count:]
            reward = np.asarray([self.discount_reward_computation()
                                 ]).astype('float32')

            # print('states: ' + str(states[0]))
            # print('rewards: ' + str(reward))

            _, loss, summary, log_probs, global_step = self.policy_session.run(
                [
                    self.train_op, self.loss, self.summaries_op,
                    self.policy_outputs, self.global_step
                ], {
                    self.states: states,
                    self.discounted_rewards: reward
                })
            log_probs = ['%.3f' % elem for elem in log_probs[0][0]]

            # print('' + str(log_probs))

            self.summary_writer.add_summary(summary, global_step)
            self.summary_writer.flush()
            self.saver.save(self.policy_session,
                            save_path=self.model_dir + 'controller/model.chkt',
                            global_step=self.global_step)

            # reduce exploration after many train steps
            if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5:
                self.exploration *= 0.99

            return loss, log_probs
        except Exception as e:
            logging("Train step failed - " + str(e), self.logger, 'error')
Example #9
0
def main(args):
    torch.manual_seed(422)
    log = logging(args.save_folder)

    train_set = DataLoader(
        dataset=p2Dataset(root_dir='hw2_data/p2_data/train'),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers)
    valid_set = DataLoader(
        dataset=p2Dataset(root_dir='hw2_data/p2_data/validation'),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    device = args.device
    model = VGGFCN8s(pretrained=True, n_classes=7)

    # if torch.cuda.device_count() > 1:
    # model = torch.nn.DataParallel(model)

    model = model.to(device)
    # model.load_state_dict(torch.load(
    #     'weights/q2/fcn8s/epoch_149-0.660.pth', map_location=args.device))
    optimzer = optim.Adam(model.parameters(), lr=7e-5)
    criterion = CrossEntropyLoss(ignore_index=6)
    min_loss = 2

    for epoch in range(50):
        log.write('\nepoch: {}\n'.format(epoch))
        loss = train(train_set, model, optimzer, criterion, device)
        log.write('train: {}\n'.format(loss))

        loss, iou = validation(valid_set, model, criterion, device)
        log.write('validation: {}, mIoU:{:.3f}\n'.format(loss, iou))
        if iou > 0.6 and loss < min_loss:
            torch.save(
                model.state_dict(),
                '{}/epoch_{:02d}-{:.3f}.pth'.format(args.save_folder, epoch,
                                                    iou))
            min_loss = loss
            log.write('Best epoch: {}\n'.format(epoch))
def dataPreprocessing(mode,
                      data_dir,
                      features_dir,
                      batch_size,
                      pretrained_model,
                      logger=None):
    try:
        labels_dict = {}  # label dictionary

        if (not os.path.isfile(features_dir + "/" + mode + ".tfrecord")
            ):  # check whether feature file already exist
            logging("Preparing " + mode + "ing data...", logger, 'info')

            record_writer = tf.python_io.TFRecordWriter(
                path=features_dir + "/" + mode + ".tfrecord")

            labels = os.listdir(
                data_dir + "/" +
                mode)  # get all the images and labels in directory
            labels.sort(
            )  # sort the labels so that training and validation get them in the same order

            if (not os.path.isfile(features_dir + '/labels.txt')
                ):  # check whether feature file already exist
                for i, label in enumerate(list(
                        set(labels))):  # preparing labels
                    labels_dict[label] = i

                if (not os.path.isfile(features_dir + '/labels.txt')
                    ):  # check whether labels file already exist
                    with open(features_dir + '/labels.txt',
                              'w') as writelabelDict:  # write labels file
                        for k in sorted(labels_dict, key=labels_dict.get):
                            writelabelDict.write(
                                str(labels_dict[k]) + ':' + k + '\n')
            else:
                with open(features_dir + '/labels.txt',
                          'r') as label_file:  # load labels
                    for line in re.split('\r?\n', label_file.read()):
                        line = line.split(':')
                        if len(line[0]) and len(line[1].strip(
                        )) and not line[1].strip() in labels_dict:
                            labels_dict[line[1].strip().lower()] = int(line[0])

            image_reader = ImageReader()
            with tf.Session('') as sess:
                for label in labels:
                    for filename in os.listdir(
                            os.path.join(data_dir, mode, label)):
                        if (filename not in '.DS_Store'):
                            image_data = tf.gfile.FastGFile(
                                os.path.join(data_dir, mode, label, filename),
                                'rb').read()  # extract image features
                            height, width = image_reader.read_image_dims(
                                sess, image_data)

                            example =  tf.train.Example(features=tf.train.Features(feature={            # tensorflow example
                                'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_data])),
                                'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[b'jpg'])),
                                'image/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=[labels_dict[label]])),
                                'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
                                'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
                            }))

                            record_writer.write(example.SerializeToString())

                record_writer.flush()
                record_writer.close()

        # load dataset
        with open(features_dir + '/labels.txt',
                  'r') as label_file:  # load labels
            for line in re.split('\r?\n', label_file.read()):
                line = line.split(':')
                if len(line[0]) and len(line[1].strip(
                )) and not line[1].strip() in labels_dict:
                    labels_dict[int(line[0])] = line[1].strip().lower()

        num_samples = 0  # Count the total number of examples in all of these shard
        for _ in tf.python_io.tf_record_iterator(features_dir + "/" + mode +
                                                 ".tfrecord"):
            num_samples += 1

        reader = tf.TFRecordReader  # create a reader, which must be a TFRecord reader in this case

        # create the keys_to_features dictionary for the decoder
        keys_to_features = {
            'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpg'),
            'image/class/label':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
        }

        # create the items_to_handlers dictionary for the decoder.
        items_to_handlers = {
            'image': slim.tfexample_decoder.Image(),
            'label': slim.tfexample_decoder.Tensor('image/class/label'),
        }

        # start to create the decoder
        decoder = slim.tfexample_decoder.TFExampleDecoder(
            keys_to_features, items_to_handlers)

        # create the dataset
        dataset = slim.dataset.Dataset(data_sources=features_dir + "/" + mode +
                                       ".tfrecord",
                                       decoder=decoder,
                                       reader=reader,
                                       num_readers=4,
                                       num_samples=num_samples,
                                       num_classes=len(labels_dict),
                                       labels_to_name=labels_dict,
                                       items_to_descriptions={})

        # create the data_provider object
        data_provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            common_queue_capacity=24 + 3 * batch_size,
            common_queue_min=24)

        # obtain the raw image using the get method
        raw_image, label = data_provider.get(['image', 'label'])

        # perform the correct preprocessing for this image depending if it is training or evaluating
        if (pretrained_model == 'inceptionV3'):  # inception v3
            image_size = inception_v3.default_image_size
            image = inception_preprocessing.preprocess_image(
                raw_image,
                height=image_size,
                width=image_size,
                is_training=(mode == 'train'))
        elif (pretrained_model == 'inception_resnetV2'):  # inception_resnet v2
            image_size = inception_resnet_v2.default_image_size
            image = inception_preprocessing.preprocess_image(
                raw_image,
                height=image_size,
                width=image_size,
                is_training=(mode == 'train'))
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            image_size = vgg_19.default_image_size
            image = vgg_preprocessing.preprocess_image(
                raw_image,
                output_height=image_size,
                output_width=image_size,
                is_training=(mode == 'train'))

        # as for the raw images reshape to batch it up
        # raw_image = tf.expand_dims(raw_image, 0)
        # raw_image = tf.image.resize_nearest_neighbor(raw_image, [image_size, image_size])
        # raw_image = tf.squeeze(raw_image)

        # batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
        images, labels = tf.train.batch([image, label],
                                        batch_size=batch_size,
                                        num_threads=4,
                                        capacity=4 * batch_size,
                                        allow_smaller_final_batch=True)

        logging(mode + "ing data loaded successfully", logger, 'info')

        return dataset, images, labels
    except Exception as e:
        logging("Data preprocessing failed - " + str(e), logger, 'error')
 def storeRollout(self, state, reward):
     try:
         self.reward_buffer.append(reward)
         self.state_buffer.append(state[0])
     except Exception as e:
         logging("Store rollout failed - " + str(e), self.logger, 'error')
    def create_variables(self):
        try:
            with tf.name_scope("model_inputs"):
                self.states = tf.placeholder(tf.float32,
                                             [None, self.search_space_size],
                                             name="states")

            with tf.name_scope("predict_actions"):
                # initialize policy network
                with tf.variable_scope("policy_network"):
                    self.policy_outputs = self.policy_network(
                        self.states, self.search_space_size)

                self.action_scores = tf.identity(self.policy_outputs,
                                                 name="action_scores")
                self.predicted_action = tf.cast(tf.scalar_mul(
                    self.division_rate, self.action_scores),
                                                tf.int32,
                                                name="predicted_action")

            # regularization loss
            policy_network_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")

            # compute loss and gradients
            with tf.name_scope("compute_gradients"):
                self.discounted_rewards = tf.placeholder(
                    tf.float32, (None, ), name="discounted_rewards"
                )  # gradients for selecting action from policy network

                with tf.variable_scope("policy_network", reuse=True):
                    self.log_probs = self.policy_network(
                        self.states, self.search_space_size)

                # compute policy loss and regularization loss
                self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.log_probs[:, -1, :], labels=self.states)
                self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)
                self.reg_loss = tf.reduce_sum([
                    tf.reduce_sum(tf.square(x))
                    for x in policy_network_variables
                ])  # regularization
                self.loss = self.pg_loss + self.reg_param * self.reg_loss

                # compute gradients
                self.gradients = self.optimizer.compute_gradients(self.loss)

                # compute policy gradients
                for i, (grad, var) in enumerate(self.gradients):
                    if grad is not None:
                        self.gradients[i] = (grad * self.discounted_rewards,
                                             var)

                # training update
                with tf.name_scope("train_policy_network"):
                    self.train_op = self.optimizer.apply_gradients(
                        self.gradients, global_step=self.global_step
                    )  # apply gradients to update policy network

            # vars = tf.trainable_variables()
            # print(vars)

            # tf.summary.scalar("controller_cross_entropy_loss", self.pg_loss)
            # tf.summary.scalar('controller_regularizer_loss', self.reg_loss)
            tf.summary.scalar('controller_discounted_reward',
                              tf.reduce_sum(self.discounted_rewards))
            tf.summary.scalar("controller_loss", self.loss)
            # tf.summary.scalar("learning_rate", self.learning_rate)

            self.summaries_op = tf.summary.merge_all()
            filename = self.log_dir + '/controller/tb_logs/'  #%s' % time.strftime("%Y-%m-%d-%H-%M-%S")

            self.summary_writer = tf.summary.FileWriter(
                filename, graph=self.policy_session.graph)

            self.policy_session.run(tf.global_variables_initializer())
            self.saver = tf.train.Saver(max_to_keep=1)

            ckpt = tf.train.latest_checkpoint(self.model_dir +
                                              'controller/model.chkt')
            if ckpt and tf.train.checkpoint_exists(self.model_dir):
                self.saver.restore(self.policy_session, ckpt)
                logging(
                    self.model_dir + 'controller/model.chkt' +
                    " model loaded successfully", self.logger, 'info')
        except Exception as e:
            logging("Create variables failed - " + str(e), self.logger,
                    'error')
    def get_reward(self, action, prev_step, prev_accuracy):  # prev_epoch
        try:
            # action = [action[0][0][x:x + search_space_size] for x in range(0, len(action[0][0]), search_space_size)]

            # drop_rate = [self.search_space['3'][c[3] % len(self.search_space['3'])] for c in action]
            # activation = self.network_structure[0][8]
            # drop_rate = [c[0] for c in action]
            depth = action[0]
            dropout = action[1]
            learning_rate = action[2]
            momentum = action[3]
            # nesterov = action[4]

            # depth = 101
            # dropout = 0.2
            # learning_rate = 0.001
            # momentum = 0.95

            summary, global_step = None, None
            # for epoch in range(prev_epoch, self.train_num_epochs + 1):
            iterations = 0
            for step_ in range(prev_step, self.num_steps + 1):
                iterations += 1
                curr_itr = prev_step + self.num_child_steps_per_cycle
                for step in range(prev_step,
                                  (curr_itr if curr_itr < self.num_steps else
                                   self.num_steps)):

                    train_batch_xi = self.train_batch_x[step *
                                                        self.train_batch_size:
                                                        (step + 1) *
                                                        self.train_batch_size]
                    train_batch_yi = self.train_batch_y[step *
                                                        self.train_batch_size:
                                                        (step + 1) *
                                                        self.train_batch_size]

                    _, summary, global_step = self.sess.run(
                        [self.train_op, self.summaries_op, self.global_step],
                        feed_dict={
                            self.inputs: train_batch_xi,
                            self.labels: train_batch_yi,
                            self.depth: depth,
                            self.dropout_rate: dropout,
                            self.learning_rate: learning_rate,
                            self.momentum: momentum
                        })
                    if step % 10 == 0:  # calculate batch loss and accuracy
                        self.loss_value, accuracy = self.sess.run(
                            [self.loss, self.accuracy],
                            feed_dict={
                                self.inputs: train_batch_xi,
                                self.labels: train_batch_yi,
                                self.depth: depth,
                                self.dropout_rate: dropout,
                                self.learning_rate: learning_rate,
                                self.momentum: momentum
                            })

                        print("Training: | Step: " + str(step) +
                              " | Training Loss: " +
                              "{:.3f}".format(self.loss_value) +
                              " | Training Accuracy: " +
                              "{:.3f}".format(accuracy))
                        #  Epoch: " + str(epoch + 1) + "

                # validation
                validation_accuracy, probs, trainable_variables = self.sess.run(
                    [
                        self.validation_accuracy, self.probs,
                        self.all_trainable_vars
                    ],
                    feed_dict={
                        self.inputs: self.test_batch_x,
                        self.labels: self.test_batch_y,
                        self.depth: depth,
                        self.dropout_rate: dropout,
                        self.learning_rate: learning_rate,
                        self.momentum: momentum
                    })

                print("Validation | Step: " + str(step) +
                      " | Validation Accuracy: " +
                      "{:.3f}".format(validation_accuracy))

                # print gradients
                # print(g)
                # for var, grad_value in zip(self.var_list, g):
                #     grad, value = grad_value
                #     print('', var.op.name, grad.squeeze(), sep='\n')

                # difference factor
                # different_factor = self.get_different_factor(accuracy * 100)
                # if accuracy * (1 + different_factor) <= self.best_accuracy:
                #     self.best_accuracy = accuracy
                #     step = prev_step
                # else:

                # if prev_accuracy * 1.2 <= accuracy:

                # if(prev_step == 0):
                #     prev_epoch += 1
                # else:
                #     prev_epoch = epoch

                # if (accuracy + different_factor) <= self.best_accuracy:
                # compute the reward
                reward = validation_accuracy  #- self.moving_accuracy)
                # if self.moving_accuracy == 0.0 or reward == 0.0:
                #     reward = 0.01

                if self.clip_rewards:
                    reward = np.clip(reward, -0.05, 0.05)

                # update moving accuracy with bias correction for 1st update
                if self.beta > 0.0 and self.beta < 1.0:
                    self.moving_accuracy = self.beta * self.moving_accuracy + (
                        1 - self.beta) * validation_accuracy
                    self.moving_accuracy = self.moving_accuracy / (
                        1 - self.beta_bias)
                    self.beta_bias = 0

                    # reward = np.clip(reward, -0.1, 0.1)
                    if reward <= 0.0:
                        reward = 0.01

                print("Evaluation accuracy: " + str(validation_accuracy) +
                      " | moving accuracy: " +
                      str(round(self.moving_accuracy, 4)) +
                      " | previous accuracy: " + str(prev_accuracy))

                # if(self.moving_accuracy > validation_accuracy and (validation_accuracy - prev_accuracy) < 0.0): # if (validation_accuracy - prev_accuracy) < 0.0: #different_factor:
                self.summary_writer.add_summary(summary, global_step)
                self.summary_writer.flush()

                self.saver.save(self.sess,
                                save_path=self.model_dir +
                                'network/model.chkt',
                                global_step=tf.train.get_global_step())

                return reward, validation_accuracy, self.loss_value, prev_step, step, probs, iterations, self.moving_accuracy, trainable_variables
                # else:
                #     prev_accuracy = validation_accuracy
                #     prev_step = step % (self.num_steps - 1)

                # else:
                #     self.best_accuracy = accuracy

                # if accuracy - prev_accuracy <= 0.01: #and reward >= 0.0:
                #     return accuracy, accuracy, loss, prev_step, step, probs, iterations
                # else:
                #     return 0.01, accuracy, loss, prev_step, step, probs, iterations      # prev_epoch

                # if (accuracy - prev_accuracy) <= different_factor:
                # if (accuracy - prev_accuracy) <= 0.01:
                #     # # compute the reward
                #     # reward = (accuracy - self.moving_accuracy)
                #     #
                #     # # if rewards are clipped, clip them in the range -0.05 to 0.05
                #     # # if self.clip_rewards:
                #     # #     reward = np.clip(reward, -0.05, 0.05)
                #     #
                #     # # update moving accuracy with bias correction for 1st update
                #     # if self.beta > 0.0 and self.beta < 1.0:
                #     #     self.moving_accuracy = self.beta * self.moving_accuracy + (1 - self.beta) * accuracy
                #     #     self.moving_accuracy = self.moving_accuracy / (1 - self.beta_bias)
                #     #     self.beta_bias = 0
                #     #
                #     # # reward = np.clip(reward, -0.1, 0.1)
                #
                #     reward = accuracy
                #
                #     return reward, accuracy, loss, epoch, prev_step, step
                # else:
                #     return 0.01, accuracy, loss, epoch, prev_step, step

        except Exception as e:
            logging("Get reward failed - " + str(e), self.logger, 'error')
Example #14
0
def train(mode, dataset, images, labels, batch_size, num_epochs, optimizer_fn, learning_rate, learning_rate_decay_factor, num_epochs_per_decay,
          dropout_keep_prob, pretrained_model, model_dir, pretrained_model_dir, layer_count, logger = None):
    try:
        # find the number steps to take before decaying the learning rate and batches per epoch
        num_batches_per_epoch = int(dataset.num_samples / batch_size) + 1
        num_steps_per_epoch = num_batches_per_epoch  # one step is one batch processed
        # decay_steps = int(num_epochs_per_decay * num_steps_per_epoch)

        # initializing the model
        if (pretrained_model == 'inceptionV3'):  # inception V3
            model_file = 'inception_v3.ckpt'
            architecture_layers = inceptionV3_layers
            with slim.arg_scope(inception_v3_arg_scope()):  # create the model inference
                logits, end_points = inception_v3(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train'))
        elif (pretrained_model == 'inception_resnetV2'):  # inception_resnetV2
            model_file = 'inception_resnet_v2_2016_08_30.ckpt'
            architecture_layers = inceptionResnetV2_layers
            with slim.arg_scope(inception_resnet_v2_arg_scope()):  # create the model inference
                logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train'))
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            model_file = 'vgg_19.ckpt'
            architecture_layers = vgg_19_layers
            with slim.arg_scope(vgg_arg_scope()):  # create the model inference
                logits, end_points = vgg_19(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train'))

        if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'):  # inceptionV3 or inception_resnetV2
            logging("Transfer learning layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count + 1)]), logger, 'info')

            # define the scopes that you want to exclude for restoration
            variables_to_restore = slim.get_variables_to_restore(exclude=architecture_layers[:(layer_count + 1)])
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            logging("Transfer learning layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count)]), logger, 'info')

            # define the scopes that you want to exclude for restoration
            variables_to_restore = slim.get_variables_to_restore(exclude=architecture_layers[:(layer_count)])

        # perform one-hot-encoding of the labels
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        # performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits)
        total_loss = tf.losses.get_total_loss()    # obtain the regularization losses as well

        # global_step = get_or_create_global_step()           # create the global step for monitoring the learning_rate and training.

        # define your exponentially decaying learning rate
        # learning_rate = tf.train.exponential_decay(
        #     learning_rate = initial_learning_rate,
        #     global_step = global_step,
        #     decay_steps = decay_steps,
        #     decay_rate = learning_rate_decay_factor,
        #     staircase = True)

        optimizer = optimizer_functions[optimizer_fn](learning_rate = learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)              # define the optimizer that takes on the learning rate
        train_op = slim.learning.create_train_op(total_loss, optimizer)     # create the train_op

        # the metrics that need to predict it isn't one_hot_encoded.
        if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'):  # inceptionV3 or inception_resnetV2
            predictions = tf.argmax(end_points['Predictions'], 1)
            # probabilities = end_points['Predictions']
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            predictions = tf.cast(tf.to_int64(tf.argmax(logits, 1)), tf.float32)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        metrics_op = tf.group(accuracy_update) #, probabilities)

        # all the summaries need to monitor and group them into one summary op.
        tf.summary.scalar('losses/Total_Loss', loss)
        tf.summary.scalar('train_accuracy', accuracy)
        # tf.summary.scalar('learning_rate', learning_rate)
        summary_op = tf.summary.merge_all()

        # create a saver function that actually restores the variables from a checkpoint file in a sess
        saver = tf.train.Saver(var_list=variables_to_restore, max_to_keep=1)

        def restore_fn(sess):
            return saver.restore(sess, pretrained_model_dir + model_file)

        # define supervisor for running a managed session.
        sv = tf.train.Supervisor(logdir=model_dir + str(layer_count) + "/", summary_op=None, init_fn=restore_fn)

        # run the managed session
        with sv.managed_session() as sess:
            # sess.run(init_from_final_layer)  # initialize the last unfreez layer

            max_train_accuracy = 0.0
            for step in range(num_steps_per_epoch * num_epochs):
                loss_value, _, _ = sess.run([train_op, sv.global_step, metrics_op])

                if(step == 0):
                    logging('Step: ' + str(int(step / num_batches_per_epoch + 1)) + '/' + str(num_epochs) + ' | learning rate: ' + str(learning_rate), logger, 'info')

                if step % num_steps_per_epoch == 0 and step != 0:
                    logits_value, loss_value, accuracy_value, summary_values = sess.run([logits, total_loss, accuracy, summary_op])

                    logging('Step: ' + str(int(step / num_batches_per_epoch + 1)) + '/' + str(num_epochs) + ' | loss: ' + str(loss_value) +
                            ' | accuracy: ' + str(accuracy_value), logger, 'info')      #  + ' | learning rate: ' + str(learning_rate_value)

                    sv.summary_computed(sess, summary_values)       # log the summaries

                    if(accuracy_value > max_train_accuracy):
                        max_train_accuracy = accuracy_value

            # log the final training loss and accuracy
            total_loss_value, total_accuracy_value = sess.run([total_loss, accuracy])
            logging('Training Final loss: ' + str(total_loss_value) + ' | Training Final accuracy: ' + str(max_train_accuracy), logger, 'info')

            # once all the training has been done, save the log files and checkpoint model
            logging('Saving model of layers-' + str(layer_count), logger, 'info')
            sv.saver.save(sess, model_dir + str(layer_count) + '/', global_step=sv.global_step)

            if not str(layer_count) in train_accuracy:
                train_accuracy[str(layer_count)] = str(num_epochs) + '\t' + str(round(learning_rate, 8)) + '\t' + str(total_loss_value) + \
                                                   '\t' + str(total_accuracy_value * 100) + '\t' + str(max_train_accuracy * 100)

        logging("Transfer learning training completed successfully", logger, 'info')

        return train_accuracy
    except Exception as e:
        logging("Trasnfer learning training failed - " + str(e), logger, 'error')
    def initialize_graph(self):  # , action, step, pre_acc, search_space_size):
        try:
            # creating graph
            # self.graph = tf.Graph().as_default()
            tf.reset_default_graph()

            print('Building graph...')

            # max_depth = 18
            # image_size = 228
            # channels = 3
            # batch_size = 32
            # num_classes = 10
            # learning_rate = 0.01
            self.input_dimensions = self.input_dimensions.split('x')
            # if('mnist' in self.dataset_name):
            #     self.inputs = tf.placeholder(tf.uint8, [None, int(self.input_dimensions[0]), int(self.input_dimensions[1]), int(self.input_dimensions[2])], name="inputs")
            # elif 'cifar' in self.dataset_name:
            if self.data_format == 'channels_last':
                self.inputs = tf.placeholder(tf.float32,
                                             shape=[
                                                 None,
                                                 int(self.input_dimensions[0]),
                                                 int(self.input_dimensions[1]),
                                                 int(self.input_dimensions[2])
                                             ],
                                             name="inputs")
            else:
                self.inputs = tf.placeholder(tf.float32,
                                             shape=[
                                                 None,
                                                 int(self.input_dimensions[2]),
                                                 int(self.input_dimensions[0]),
                                                 int(self.input_dimensions[1])
                                             ],
                                             name="inputs")

            self.labels = tf.placeholder(tf.int32,
                                         shape=[None, self.num_classes],
                                         name='label')

            self.depth = tf.placeholder(tf.int32, shape=[], name='depth')
            self.dropout_rate = tf.placeholder(tf.float32,
                                               shape=[],
                                               name="dropout")
            self.learning_rate = tf.placeholder(tf.float32,
                                                shape=[],
                                                name='learning_rate')
            self.momentum = tf.placeholder(tf.float32,
                                           shape=[],
                                           name='momentum')
            # self.nesterov = tf.placeholder(tf.bool, shape=(), name='nesterov')

            # labels_onehot = tf.one_hot(self.labels, 10)

            # child network
            self.model = ChildNetwork(self.inputs,
                                      self.depth,
                                      self.dropout_rate,
                                      self.num_classes,
                                      max_depth=self.max_depth,
                                      initial_filters=self.initial_filters,
                                      data_format=self.data_format)
            logits, self.probs = self.model.stochastic_depth_conv2d(
                mode='train')
            self.loss, self.accuracy = self.model.classification_loss(
                logits=logits, label=self.labels)
            _, self.validation_accuracy = self.model.classification_loss(
                logits=logits, label=self.labels)

            # print([t.name for op in self.graph.get_operations() for t in op.values()])
            # print([t for op in self.graph.get_operations() for t in op.values()])

            self.global_step = tf.Variable(0, trainable=False)

            self.optimizer = self.activation_fn['4'](
                learning_rate=self.learning_rate,
                momentum=self.momentum,
                use_nesterov=True)
            self.var_list = tf.trainable_variables()
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=self.global_step)

            # self.all_trainable_vars = [np.product(list(map(int, v.shape))) * v.dtype.size for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)]
            self.all_trainable_vars = tf.reduce_sum(
                [tf.reduce_prod(v.shape) for v in tf.trainable_variables()])
            # np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.all_variables()])

            # if ('mnist' in self.dataset_name):
            #     self.train_batch_x, self.train_batch_y, self.test_batch_x, self.test_batch_y = self.dataset[0], self.dataset[1], self.dataset[2], self.dataset[3]
            # elif ('cifar' in self.dataset_name):
            self.train_batch_x, self.train_batch_y, self.test_batch_x, self.test_batch_y = self.dataset

            self.num_steps = int(
                math.ceil(len(self.train_batch_x) / self.train_batch_size))

            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())

            tf.summary.scalar("network_trainable_variables",
                              self.all_trainable_vars)
            tf.summary.scalar("network_depth", self.depth)
            tf.summary.scalar("network_loss", self.loss)
            tf.summary.scalar('network_training_accuracy', self.accuracy)
            tf.summary.scalar('network_validation_accuracy',
                              self.validation_accuracy)
            self.summaries_op = tf.summary.merge_all()

            filename = self.log_dir + '/network/tb_logs/'
            self.summary_writer = tf.summary.FileWriter(filename,
                                                        graph=self.sess.graph)

            # vars = tf.trainable_variables()
            # print(vars)  # some infos about variables...
            # vars_vals = self.sess.run(vars)
            # for var, val in zip(vars, vars_vals):
            #     print("var: {}, value: {}".format(var.name, val))

            self.saver = tf.train.Saver(max_to_keep=1)

            ckpt = tf.train.latest_checkpoint(self.model_dir +
                                              'network/model.chkt')
            if ckpt and tf.train.checkpoint_exists(self.model_dir):
                self.saver.restore(self.sess, ckpt)
                logging(
                    self.model_dir + 'network/model.chkt' +
                    " model loaded successfully", self.logger, 'info')
        except Exception as e:
            logging("Get reward failed - " + str(e), self.logger, 'error')
Example #16
0
def main(argv):
    try:
        if (len(argv) > 1):
            if (len(argv) > 1 or argv[1:][0] == '-h'):
                try:
                    opts, args = getopt.getopt(argv[1:], "ho:m:",
                                               ["operation=", "model="])
                    for opt, arg in opts:
                        opt = opt.lower()
                        arg = arg.lower()
                        if opt == '-h':
                            print(
                                'voicenet.py -o <train|test|infer|analysis|serve> -m <inceptionV3,inception_resnetV2,vgg_19>'
                            )
                            return
                        elif opt in ("-o", "--operation"):
                            mode = arg
                        elif opt in ("-m", "--model"):
                            pretrained_models = arg

                except getopt.GetoptError:
                    print(
                        'voicenet.py -o <train|test|infer|analysis|serve> -m <inceptionV3,inception_resnetV2,vgg_19>'
                    )  # -o <data_prep|train_test|freeze_model|infer|serve|regress_infer|analysis>')
                    return

                if pretrained_models in 'inceptionV3' or pretrained_models in 'inception':
                    pretrained_models = 'inceptionV3'
                elif pretrained_models in 'inception_resnetV2' or pretrained_models in 'resnet':
                    pretrained_models = 'inception_resnetV2'
                elif pretrained_models in 'vgg_19' or pretrained_models in 'vgg':
                    pretrained_models = 'vgg_19'
        else:
            mode = ''
            pretrained_models = ''

        # if len(argv):
        #     gConfig = getConfig(main_dir + 'config/' + getConfig(argv[1]).lower() + '.ini')   # get configuration
        # else:
        gConfig = getConfig('config/metavision.ini')  # get configuration

        site = gConfig['site']
        if (not len(mode)):
            mode = gConfig['mode']

        if (not len(pretrained_models)):
            pretrained_models = gConfig['pretrained_model_dir']

        datasets = gConfig['datasets']
        data_dirs = gConfig['data_dir']
        infer_dir = gConfig['infer_dir'] + "/" + datasets + "/"
        train_num_epochs = gConfig['train_num_epochs']
        test_num_epochs = gConfig['test_num_epochs']
        layer_start = gConfig['layer_start']
        infer_layer = gConfig['infer_layer']
        learning_rate = gConfig['learning_rate']
        learning_rate_decay_factor = gConfig['learning_rate_decay_factor']
        num_epochs_per_decay = gConfig['num_epochs_per_decay']
        train_batch_size = gConfig['train_batch_size']
        test_batch_size = gConfig['test_batch_size']
        optimizer = gConfig['optimizer']
        dropout_keep_prob = gConfig['dropout_keep_prob']
        extract_features_only = gConfig['extract_features_only']
        log_dir = gConfig['log_dir']
        port = gConfig['port']
        gpu_to_use = gConfig['gpu_to_use']
        certificate = gConfig['certificate']
        resource_dir = gConfig['resources']

        # init_inception = False
        # init_inception_resnet = False
        # init_vgg = False
        #
        # logits = None

        # create logger
        _log.basicConfig(filename=log_dir + "/" + "log.txt",
                         level=_log.DEBUG,
                         format='%(asctime)s %(message)s',
                         datefmt='%m/%d/%Y %I:%M:%S %p')
        logger = _log.getLogger("VoiceNet")
        logger.setLevel(_log.DEBUG)
        console = _log.StreamHandler()
        console.setLevel(_log.DEBUG)

        formatter = _log.Formatter(
            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
        )  # create formatter
        console.setFormatter(formatter)
        logger.addHandler(console)

        if ('train' in mode or 'test' in mode):
            # specify GPU numbers to use get gpu and cpu devices
            cpu_devices = get_cpu_devices()
            gpu_devices = get_gpu_devices()
            if (len(gpu_devices) > 1):
                os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
                os.environ["CUDA_VISIBLE_DEVICES"] = str(gConfig["gpu_to_use"])

                print("The available GPU devices: " + str(gpu_devices))

                # devices, device_category = (gpu_devices, DeviceCategory.GPU) if len(gpu_devices) > 1 else (cpu_devices, DeviceCategory.CPU)

            for pretrained_model in pretrained_models.split(
                    ','):  # pre-trained architecture
                pretrained_model_dir = "pretrained_models/" + pretrained_model + "/"
                for dataset in datasets.split(','):  # datasets
                    data_dir = data_dirs + "/" + dataset + "/"
                    features_dir = gConfig[
                        'features_dir'] + "/" + pretrained_model + "/" + dataset + "/"
                    model_dir = gConfig[
                        'model_dir'] + "/" + pretrained_model + "/" + dataset + "/"
                    log_dir = gConfig[
                        'log_dir'] + "/" + pretrained_model + "/"  # + dataset + "/"
                    output_dir = gConfig[
                        'output_dir'] + "/" + pretrained_model + "/" + dataset + "/"

                    checkPathExists(
                        [model_dir, features_dir, log_dir, output_dir])

                    if (pretrained_model == 'inceptionV3'):  # inception v3
                        architecture_layers = inceptionV3_layers
                    elif (pretrained_model == 'inception_resnetV2'
                          ):  # inception_resnet v2
                        architecture_layers = inceptionResnetV2_layers
                    elif (pretrained_model == 'vgg_19'):  # vgg 19
                        architecture_layers = vgg_19_layers

                    logging(
                        pretrained_model + " Transfer learning on " + dataset +
                        " dataset", logger, 'info')

                    if (pretrained_model == 'vgg_19'):
                        layer_count_len = len(architecture_layers)
                    else:
                        layer_count_len = len(architecture_layers) - 1

                    if (extract_features_only):
                        layer_count_len = 2

                    for layer_count in range(layer_start, layer_count_len):
                        # learning_rate_ = round((learning_rate / (1.05 + ((layer_count - 1) / 10))), 8)
                        train_num_epochs_ = train_num_epochs + (
                            10 * (layer_count - 1))

                        if ("train" in mode):  # training
                            with tf.Graph().as_default() as graph:
                                tf.logging.set_verbosity(
                                    tf.logging.ERROR
                                )  # set the verbosity to INFO level

                                train_dataset, train_images, train_labels = dataPreprocessing(
                                    'train', data_dir, features_dir,
                                    train_batch_size, pretrained_model, logger)

                                if (not extract_features_only):
                                    train_accuracy = train(
                                        'train', train_dataset, train_images,
                                        train_labels, train_batch_size,
                                        train_num_epochs_, optimizer,
                                        learning_rate,
                                        learning_rate_decay_factor,
                                        num_epochs_per_decay,
                                        dropout_keep_prob, pretrained_model,
                                        model_dir, pretrained_model_dir,
                                        layer_count, logger)

                                graph.finalize()

                        if ("test" in mode):  # testing
                            with tf.Graph().as_default() as graph:
                                tf.logging.set_verbosity(
                                    tf.logging.ERROR
                                )  # set the verbosity to INFO level

                                test_dataset, test_images, test_labels = dataPreprocessing(
                                    'test', data_dir, features_dir,
                                    test_batch_size, pretrained_model, logger)

                                if (not extract_features_only):
                                    test_accuracy = test(
                                        'test', test_dataset, test_images,
                                        test_labels, test_batch_size,
                                        test_num_epochs, pretrained_model,
                                        model_dir, layer_count, logger)
                                    # test_accuracy = test('test', test_dataset, data_dir, pretrained_model, model_dir, layer_count, logger)

                                graph.finalize()

                    if (not extract_features_only):
                        # save the results file
                        logging(
                            pretrained_model + " Writing results of " +
                            dataset + " dataset", logger, 'info')
                        if ("train" in mode):  # training
                            with open(output_dir + 'accuracy.txt',
                                      'w') as writeresultsDict:
                                writeresultsDict.write(
                                    'Architecture\tDataset\tLayer\tEpochs\tLearning\tTrain_Loss\tTrain_Accuracy\tMax_Train_Accuracy\n'
                                )
                                # for train_k, train_v in train_accuracy.items():
                                for layer_count in range(1, layer_count_len):
                                    print(train_accuracy[str(layer_count)])
                                    # accuracy = train_accuracy[layer_count].split('\t')
                                    writeresultsDict.write(
                                        pretrained_model + '\t' + dataset +
                                        '\t' + str(layer_count) + '\t' +
                                        train_accuracy[str(layer_count)] +
                                        '\n')

                        if ("test" in mode):  # testing
                            with open(output_dir + 'test_accuracy.txt',
                                      'w') as writeresultsDict:
                                writeresultsDict.write(
                                    'Architecture\tDataset\tLayer\tTest_Accuracy\n'
                                )
                                # for test_k, test_v in test_accuracy.items():
                                for layer_count in range(1, layer_count_len):
                                    # accuracy = test_accuracy[layer_count].split('\t')
                                    writeresultsDict.write(
                                        pretrained_model + '\t' + dataset +
                                        '\t' + str(layer_count) + '\t' +
                                        str(test_accuracy[str(layer_count)]) +
                                        '\n')

        elif 'infer' in mode:
            output_dir = gConfig[
                'output_dir'] + "/" + pretrained_models + "/" + datasets + "/"
            model_dir = gConfig[
                'model_dir'] + "/" + pretrained_models + "/" + datasets + "/" + str(
                    infer_layer) + "/"
            checkPathExists([output_dir, model_dir])

            inferenceResults = open(gConfig['output_dir'] + '/inference.txt',
                                    'w')
            inferenceResults.write(
                'Architecture\tActual Class\tPredicted Class\tProbability\n')
            # test image
            # image = "willy_wonka_new.jpg"

            # preprocessing
            # input_tensor, _, _ = dataPreprocessing('infer', infer_dir, dataset, train_batch_size, pretrained_model, logger)

            # inference
            # predictions = inference(mode, pretrained_model, pretrained_model_dir, infer_dir + imagefile, channels = 3, return_top_predictions=5)
            # PlotResizedImage(sess, image_path=image_path)
            # ineption_prediction = ClassifyInception(sess, image_path, return_top_predictions=5)

            # print(pretrained_model + ' - network prediction: ' + str(predictions) + '\n')

            classes = []
            with open(resource_dir + '/' + datasets + '/labels.txt',
                      'r') as readfile:
                for line in readfile.readlines():
                    classes.append(line.split(':')[1].strip())

            sess_transfer_learner, end_points, logits, input_tensor = InitializeTransferLearner(
                model_dir, pretrained_models, classes)
            init_model = True

            # # initialization
            # if (pretrained_model == 'inceptionV3' and not init_inception):  # inception v3
            #     sess_inception = InitializeInception(pretrained_model_dir)
            #     init_inception = True
            # elif (pretrained_model == 'inception_resnetV2' and not init_inception_resnet):  # inception_resnet v2
            #     sess_inception_resnet, end_points, logits, input_tensor, imagenet_classes = InitializeInceptionResnet(model_dir)
            #     init_inception_resnet = True
            # elif (pretrained_model == 'vgg_19' and not init_vgg):  # vgg 19
            #     sess_vgg, prediction, input_tensor = InitializeVGG(pretrained_model_dir)
            #     init_vgg = True

            # print('Inception - Resnet network prediction: ' + str(ineption_resnet_prediction[0]) + '\n')

            logging(
                datasets + " inference on " + pretrained_models + " network",
                logger, 'info')

            # inference
            count = 0
            probability = 0.0
            accuracy_ = 0.0
            # entropy = 0.0
            for subdir, dirs, files in os.walk(os.path.join(infer_dir)):
                for file in files:
                    if file.endswith('.png') or file.endswith('.jpg'):
                        # if (pretrained_model == 'inceptionV3' and init_inception):  # inception v3
                        #     probabilities, entropies = ClassifyInception(sess_inception, subdir + "/" + file)
                        # elif (pretrained_model == 'inception_resnetV2' and init_inception_resnet):  # inception_resnet v2
                        #     probabilities, entropies = ClassifyInceptionResnet(sess_inception_resnet, end_points, logits, input_tensor, subdir + "/" + file)
                        # elif (pretrained_model == 'vgg_19' and init_vgg):  # vgg 19
                        #     probabilities, entropies = ClassifyVGG(sess_vgg, prediction, input_tensor, subdir + "/" + file)

                        # if(init_model):
                        probabilities, actual_class, pred_class, accuracy, processed_image = ClassifyTransferLearner(
                            sess_transfer_learner,
                            end_points,
                            logits,
                            input_tensor,
                            subdir + "/" + file,
                            is_inference=True)

                        grad_cam(subdir + "/" + file,
                                 processed_image,
                                 input_tensor,
                                 end_points,
                                 sess_transfer_learner,
                                 classes.index(pred_class),
                                 num_classes=len(classes),
                                 output_path=subdir + "/" +
                                 file.split('.')[0] + '_cam.jpg')

                        probability += probabilities
                        accuracy_ += accuracy

                        inferenceResults.write(pretrained_models + '\t' +
                                               str(file) + '\t' +
                                               actual_class + '\t' +
                                               pred_class + '\t' +
                                               str(accuracy) + '\t' +
                                               str(round(probabilities, 2)) +
                                               '\n')

                        count += 1

            if (count):
                probability = (probability * 100) / count
                accuracy_ = (accuracy_ * 100) / count

            inferenceResults.write(pretrained_models + '\t'
                                   "Accuracy: " + str(accuracy_) + '\t' +
                                   "Probability: " +
                                   str(round(probability, 2)) + '\n')

            print(pretrained_models + ' network predictions on ' + datasets +
                  " - Probability: " + str(probability) + " - Accuracy: " +
                  str(accuracy_))

            # if (pretrained_model == 'inceptionV3' and init_inception):  # inception v3
            #     CloseInceptionResnet(sess_inception)
            #     init_inception = False
            # elif (pretrained_model == 'inception_resnetV2' and init_inception_resnet):  # inception_resnet v2
            #     CloseInceptionResnet(sess_inception_resnet)
            #     init_inception_resnet = False
            # elif (pretrained_model == 'vgg_19' and init_vgg):  # vgg 19
            #     CloseVGG(sess_vgg)
            #     init_vgg = False

            CloseTransferLearner(sess_transfer_learner)
            inferenceResults.close()
        elif (mode == "serve"):  # serve
            output_dir = gConfig[
                'output_dir'] + "/" + pretrained_models + "/" + datasets + "/"
            model_dir = gConfig[
                'model_dir'] + "/" + pretrained_models + "/" + datasets + "/" + str(
                    infer_layer) + "/"
            checkPathExists([output_dir, model_dir])

            classes = []
            with open(resource_dir + '/' + datasets + '/labels.txt',
                      'r') as readfile:
                for line in readfile.readlines():
                    classes.append(line.split(':')[1].strip())

            model_server = Serving(site,
                                   port,
                                   model_dir,
                                   pretrained_models,
                                   infer_dir,
                                   output_dir,
                                   log_dir,
                                   gpu_to_use,
                                   classes,
                                   certificate=certificate,
                                   logger=logger)

            model_server.run()

    except Exception as ex:
        print("main function failed - " + str(ex))
        raise ex
    def create_variables(self):
        try:
            with tf.name_scope("model_inputs"):
                self.states = tf.placeholder(tf.float32, [None, self.max_layers * 4], name="states")      # raw state representation

            with tf.name_scope("predict_actions"):
                with tf.variable_scope("policy_network"):       # initialize policy network
                    # state input is the first input fed into the controller RNN. the rest of the inputs are fed to the RNN internally
                    # with tf.name_scope('state_input'):
                    state_input = tf.placeholder(dtype=tf.int32, shape=(1, None), name='state_input')

                    # self.state_input = state_input

                    nas_cell = tf.nn.rnn_cell.LSTMCell(35)
                    cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32)

                    embedding_weights = []

                    # for each possible state, create a new embedding. Reuse the weights for multiple layers.
                    with tf.variable_scope('embeddings', reuse=tf.AUTO_REUSE):
                        # for i in range(len(self.state_space)):
                        for key, value, index in zip(self.state_space.items(), range(len(self.state_space))):
                            state_ = value
                            size = len(value)

                            # size + 1 is used so that 0th index is never updated and is "default" value
                            weights = tf.get_variable('state_embeddings_%d' % index, shape=[size + 1, self.embedding_dim], initializer=tf.initializers.random_uniform(-1., 1.))
                            embedding_weights.append(weights)

                        # initially, cell input will be 1st state input
                        embeddings = tf.nn.embedding_lookup(embedding_weights[0], state_input)

                    cell_input = embeddings

                    for i in range(self.num_layers):
                        for key, value in self.state_space.items():
                            state_id = i % len(self.state_space)
                            size = len(value)

                            with tf.name_scope('controller_output_%d' % i):
                                # feed the ith layer input (i-1 layer output) to the RNN
                                outputs, final_state = tf.nn.dynamic_rnn(nas_cell, cell_input, initial_state=cell_state, dtype=tf.float32)

                                # add a new classifier for each layers output
                                classifier = tf.layers.dense(outputs[:, -1, :], units=size, name='classifier_%d' % (i), reuse=False)
                                predictions = tf.nn.softmax(classifier)

                                # feed the previous layer (i-1 layer output) to the next layers input, along with state take the class label
                                cell_input = tf.argmax(predictions, axis=-1)
                                cell_input = tf.expand_dims(cell_input, -1, name='pred_output_%d' % (i))
                                cell_input = tf.cast(cell_input, tf.int32)
                                cell_input = tf.add(cell_input, 1)  # we avoid using 0 so as to have a "default" embedding at 0th index

                                # embedding lookup of this state using its state weights ; reuse weights
                                cell_input = tf.nn.embedding_lookup(embedding_weights[state_id], cell_input, name='cell_output_%d' % (i))
                                cell_state = final_state

                            # store the tensors for later loss computation
                            self.cell_outputs.append(cell_input)
                            self.policy_classifiers.append(classifier)
                            self.policy_actions.append(predictions)

                    # self.policy_outputs = self.policy_network(self.states, self.max_layers)

                # self.action_scores = tf.identity(self.policy_outputs, name="action_scores")
                # self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32, name="predicted_action")

            policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")          # regularization loss

            # compute loss and gradients
            with tf.name_scope("compute_gradients"):
                self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")        # gradients for selecting action from policy network

                with tf.variable_scope("policy_network", reuse=True):
                    self.logprobs = self.policy_network(self.states, self.max_layers)

                # compute policy loss and regularization loss
                self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :], labels=self.states)
                self.pg_loss            = tf.reduce_mean(self.cross_entropy_loss)
                self.reg_loss           = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # Regularization
                self.loss               = self.pg_loss + self.reg_param * self.reg_loss

                self.gradients = self.optimizer.compute_gradients(self.loss)        # compute gradients

                # compute policy gradients
                for i, (grad, var) in enumerate(self.gradients):
                    if grad is not None:
                        self.gradients[i] = (grad * self.discounted_rewards, var)

                # training update
                with tf.name_scope("train_policy_network"):
                    # apply gradients to update policy network
                    self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
        except Exception as e:
            logging("Create variables failed - " + str(e), self.logger, 'error')
Example #18
0
def train(dataset, dataset_name, model_dir, num_episodes, max_depth, initial_filters, num_layers, num_hidden, initial_learning_rate, learning_rate_decay_factor, train_batch_size,
          test_batch_size, train_num_epochs, input_dimensions, num_classes, optimizer, num_steps_per_decay, num_child_steps_per_cycle, exploration, discount_factor,
          log_dir, output_dir, logger = None):
    try:
        # loading log
        # logging_dict = {}
        # csv_fields = ['dataset','policy_episode', 'policy_layers', 'policy_neurons', 'policy_loss', 'search_space', 'max depth of child network', 'lr optimizer',
        #               'child steps/episod', 'state', 'steps', 'reward', 'time taken']

        # csv_fields = ['dataset','pg_episode','pg_layers','pg_neurons','network_loss','network_search_space','network_state','network_reward','time_stamp','time_taken']
        # if (os.path.exists(output_dir + '/' + dataset_name + '_results.csv')):
        #     with open(output_dir + '/' + dataset_name + '_results.csv', mode='r') as csv_file:
        #         csv_reader = csv.DictReader(csv_file)
        #         for row in csv_reader:
        #             logging_dict[row['time_stamp']] = row['dataset'] + "|" + row['pg_episode'] + "|" + row['pg_layers'] + "|" + row['pg_neurons'] + "|" + \
        #                               row['network_loss'] + "|" + row['network_search_space'] + "|" + row['network_state'] + "|" + row['network_reward'] + "|" + \
        #                               row['time_stamp'] + "|" + row['time_taken']

        csv_fields = ['dataset', 'policy_episode', 'policy_layers', 'policy_neurons', 'policy_loss', 'policy_probs', 'lr optimizer', 'search_space', 'state',
                      'max depth of child network', 'child_steps_episod', 'steps', 'reward', 'network_accuracy', 'moving_accuracy', 'total_reward', 'gradients_entropty_avg',
                      'gradients_entropty_std', 'trainable_variables', 'time_taken']
        with open(output_dir + '/' + dataset_name + '_results.csv', 'w') as target_csv_file:
            writer = csv.DictWriter(target_csv_file, fieldnames=csv_fields)
            writer.writeheader()

        search_space_fields = ['depth','dropout','learning_rate','momentum']

        max_depth_layers, _, min_depth_layers, _ = get_residual_layer(max_depth)

        search_space = {'0': list(np.arange(min_depth_layers, sum(max_depth_layers) + 1, 1)),  # depth
                        '1': list(np.arange(0.05, 0.35, 0.05)),                     # 'dropout'
                        '2': list(np.arange(0.01, 0.1, 0.01)) + list(np.arange(0.001, 0.01, 0.001)), # list(np.arange(0.1, 0.99, 0.1)) + list(np.arange(0.0001, 0.001, 0.0001)),   # 'learning_rate'
                        '3': list(np.arange(0.7, 0.99, 0.05))}                  # 'momentum' -> 0.6-0.99
                        # '4': list(np.arange(0, 1.0, 0.01)),                     # 'total_reward' -> 0-1
                        # '5': list(np.arange(0, 1.0, 0.01)),                    # 'loss' -> 0-1
                        # '6': list(np.arange(0, 1.0, 0.01)),                     # average entropy
                        # '7': list(np.arange(0, 1.0, 0.01))}                     # std entropy

        reinforce = Reinforce(model_dir, log_dir, initial_learning_rate = initial_learning_rate, num_hidden = num_hidden, num_layers = num_layers, search_space = search_space, num_steps_per_decay=num_steps_per_decay,
                              learning_rate_decay_factor = learning_rate_decay_factor, optimizer = optimizer, exploration = exploration, discount_factor = discount_factor, logger = logger)        # manages the training and evaluation of the Controller RNN
        net_manager = NetManager(search_space, input_dimensions=input_dimensions, num_classes=num_classes, dataset = dataset, dataset_name=dataset_name, log_dir=log_dir, train_batch_size = train_batch_size,
                                 test_batch_size = test_batch_size, train_num_epochs = train_num_epochs, max_depth=max_depth, num_child_steps_per_cycle=num_child_steps_per_cycle,
                                 initial_filters=initial_filters, model_dir = model_dir, logger = logger)   # handles the training and reward computation of a model

        # print("Search Space: ", search_space)
        prev_accuracy, prev_step, total_rewards, reward, network_loss, ent_avg, ent_std, elapsed_time = 0.0, 0, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01

        # get_residual_layers, _ = get_residual_layer(max_depth)
        # max_channels = get_residual_filters(sum(get_residual_layers), min_channels)
        state = np.array([[0, 0, 0, 0]], dtype=np.int32)      # max_channels, network_loss
        # state = np.array([[0.05, 0.1, 0.7, 0.0, 0.0]], dtype=np.float32)      # max_channels, network_loss
        # state = np.array([[min(search_space['0']), min(search_space['1']), min(search_space['2']), min(search_space['3']), max_channels, prev_accuracy]], dtype=np.int32) # min_filters, accuracy
        # entropy = lambda p: -np.sum(p * np.log2(p))
        total_rewards = 0.0
        action_in_search_space = []
        for i_episode in range(num_episodes):
            # state_in_search_space = get_state_search(search_space, state[0])
            # state = [abs(x) for x in state]
            if(i_episode != 0):
                action = reinforce.get_action(state, init=False)
            else:
                action = reinforce.get_action(state, init=True)
            #     action = [[list(state[0])]]

            if all(ai >= 0 for ai in action[0][0]):
                start_time = time.time()

                action_in_search_space = [get_state_search(search_space, action[0][0], network_loss, ent_avg, ent_std, total_rewards)]     # max_channels, network_loss
                print("Actions: ", action_in_search_space[0][0])
                # print(action[0][0])

                reward, prev_accuracy, network_loss, prev_step, steps, probs, iterations, moving_accuracy, trainable_variables = net_manager.get_reward(action_in_search_space[0][0], prev_step, prev_accuracy)

                ent_avg = sum(sc.stats.entropy(probs)) / num_classes #/ (train_batch_size * num_child_steps_per_cycle * iterations)
                ent_std = np.std(sc.stats.entropy(probs)) * 10

                elapsed_time += (time.time() - start_time)
                print("Reward: " + str(reward) + " | Accuracy: " + str(prev_accuracy))
            else:
                reward = 0.01

            total_rewards += reward
            print('Total Reward: ' + str(round(total_rewards, 4)))

            # state_in_search_space = get_state_search(search_space, action_in_search_space[0][0])
            # max_channels = get_residual_filters(action_in_search_space[0][0][0], min_channels)
            state = action[0]
            # state[0][0] = round(total_rewards * 10, 0) if round(network_loss * 10, 0) > 0 else 0.01
            # state[0][1] = round(network_loss * 10, 0) if round(network_loss * 10, 0) > 0 else 0.01
            # state[0][2] = round(ent_avg, 0) if round(ent_avg, 0) > 0 else 0.01
            # state[0][3] = round(ent_std * 10, 0) if round(ent_std * 10, 0) > 0 else 0.1
            reinforce.storeRollout(state, reward)

            loss, log_probs = reinforce.train_step(1)

            # logging
            log_str = "time taken: " + str(elapsed_time / 60) + " | problem: " + dataset_name + " | episode: " + str(i_episode) + " | steps: " + str(steps) + " | loss: " + str(round(loss, 3)) + \
                      " | log_probs: " + str(log_probs) + " | state: " + str(action[0]) + " | reward: " + str(round(reward, 2)) + " | network accuracy: " + str(round(prev_accuracy * 100, 2)) + "\n"
            # logging_dict[i_episode] = dataset_name + "|" + str(i_episode) + "|" + str(num_layers) + "|" + str(num_hidden) + "|" + str(loss) + "|" + str(search_space_fields) + "|" + \
            #                           str(state[0]) + "|" + str(max_depth) + "|" + optimizer + "|" + str(num_child_steps_per_cycle) + "|" + str(steps) + "|" + str(reward) + "|" + \
            #                           str(datetime.datetime.now().time()).split('.')[0] + "|" + str(elapsed_time)
            print(log_str)

            # writing logs
            with open(output_dir + '/' + dataset_name + '_results.csv', 'a') as target_csv_file:
                writer = csv.writer(target_csv_file)

                writer.writerows([[dataset_name, str(i_episode), str(num_layers), str(num_hidden), str(round(loss * 100, 3)), str(log_probs), str(optimizer),
                                   str(search_space_fields), str(action_in_search_space[0][0]), str(max_depth), str(num_child_steps_per_cycle), str(steps),
                                   str(round(reward, 2)), str(round(prev_accuracy * 100, 2)), str(round(moving_accuracy * 100, 2)), str(round(total_rewards, 2)),
                                   str(round(ent_avg * 100, 2)), str(round(ent_std * 100, 2)), str(round(trainable_variables, 2)), str(round(elapsed_time, 2))]])
    except Exception as e:
        logging("Meta-RL training failed - " + str(e), logger, 'error')
Example #19
0
def test(mode, dataset, images, labels, batch_size, num_epochs, pretrained_model, model_dir, layer_count, logger = None):
    try:
        # inception and inception_resnet only
        num_batches_per_epoch = int(dataset.num_samples / batch_size) + 1
        num_steps_per_epoch = num_batches_per_epoch

        # initializing the model
        if (pretrained_model == 'inceptionV3'):  # inception V3
            architecture_layers = inceptionV3_layers
            with slim.arg_scope(inception_v3_arg_scope()):  # create the model inference
                logits, end_points = inception_v3(images, num_classes=dataset.num_classes, is_training=False)
        elif (pretrained_model == 'inception_resnetV2'):  # inception_resnetV2
            architecture_layers = inceptionResnetV2_layers
            with slim.arg_scope(inception_resnet_v2_arg_scope()):  # create the model inference
                logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=False)
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            architecture_layers = vgg_19_layers
            with slim.arg_scope(vgg_arg_scope()):  # create the model inference
                logits, end_points = vgg_19(images, num_classes=dataset.num_classes, is_training=True) #=(mode == 'train'))

        if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'):  # inceptionV3 or inception_resnetV2
            logging("Transfer learning testing layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count + 1)]), logger, 'info')
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            logging("Transfer learning testing layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count)]), logger, 'info')

        # the metrics that need to predict it isn't one_hot_encoded.
        if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'):  # inceptionV3 or inception_resnetV2
            predictions = tf.argmax(end_points['Predictions'], 1) #end_points['Predictions'], 1)
            # probabilities = end_points['Predictions']
        elif (pretrained_model == 'vgg_19'):  # vgg 19
            predictions = tf.cast(tf.to_int64(tf.argmax(logits, 1)), tf.float32)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        metrics_op = tf.group(accuracy_update) #, probabilities)

        # create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        global_step_op = tf.assign(global_step, global_step + 1)  # no apply_gradient method so manually increasing the global_step

        # define some scalar quantities to monitor
        tf.summary.scalar('test_accuracy', accuracy)
        # summary_op = tf.summary.merge_all()

        # get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(var_list=variables_to_restore)

        def restore_fn(sess):
            saver.restore(sess, tf.train.latest_checkpoint(model_dir + str(layer_count) + '/'))

        # get the supervisor
        sv = tf.train.Supervisor(logdir=model_dir + str(layer_count) + '/', summary_op=None, saver=None, init_fn=restore_fn)

        # run in one session
        with sv.managed_session() as sess:
            for step in range(num_steps_per_epoch * num_epochs):
                sess.run(sv.global_step)

                if step == 0:
                    sess.run(accuracy)
                else:
                    _, global_step_count, accuracy_value = sess.run([metrics_op, sv.global_step, accuracy])

                if step != 0 and step % 10 == 0:
                    logging('Step: ' + str(step) + ' | test accuracy: ' + str(accuracy_value), logger, 'info')

                    # sv.summary_computed(sess, sess.run(summary_op))


            # at the end of all the evaluation, show the final accuracy
            total_accuracy = sess.run(accuracy)
            logging('Testing Final accuracy: ' + str(total_accuracy), logger, 'info')

            if not str(layer_count) in test_accuracy:
                test_accuracy[str(layer_count)] = str(total_accuracy * 100)

            # visualize the last batch's images just to see what our model has predicted
            # raw_image, labels, predictions = sess.run([raw_image, labels, predictions])
            # for i in range(10):
            #     image, label, prediction = raw_image[i], labels[i], predictions[i]
            #     prediction_name, label_name = dataset.labels_to_name[prediction], dataset.labels_to_name[label]
            #     text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name)
            #     # print(text)
            #     img_plot = plt.imshow(image)
            #
            #     # set up the plot and hide axes
            #     plt.title(text)
            #     img_plot.axes.get_yaxis().set_ticks([])
            #     img_plot.axes.get_xaxis().set_ticks([])
            #     plt.show()

        logging("Transfer learning testing completed successfully, accuracy: " + str(total_accuracy * 100), logger, 'info')

        return test_accuracy
    except Exception as e:
        logging("Transfer learning testing failed - " + str(e), logger, 'error')
        test_accuracy[str(layer_count)] = 0.0
        return test_accuracy