Ejemplo n.º 1
0
def make_models(variables_params):
    vars = []
    for params in variables_params:
        var = make_variable(**params)
        log_message(var.summary(), logging.WARN)
        vars += [var]
    return vars
Ejemplo n.º 2
0
def load_models(file_name, variables_names):
    log_message('Restore old models ...', logging.DEBUG)
    vars = []
    for name in variables_names:
        var = os.path.join(file_name, name+'.h5')
        variable = load_model(var)
        vars += [variable]
        log_message(variable.summary(), logging.WARN)
    return vars
Ejemplo n.º 3
0
def log(file_name, message, printed=True):
    header = file_name.split('\\')[-1].split('_')[0]
    fieldnames = [*message]
    if printed:
        msg = header
        for k, v in message.items():
            msg += '\n    {}: {}'.format(k, v)
        log_message(msg, logging.DEBUG)

    with open(file_name+'.csv', mode='a') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        if message['Epoch']==0:
            writer.writeheader()
        writer.writerow(message)
def create_if_not_exist(directories):
    """
    dirs - a list of directories to create if these directories are not found
    :param directories:
    """
    if not isinstance(directories, list):
        directories = [directories]

    for directory in directories:
        try:
            if not os.path.exists(directory):
                os.makedirs(directory)

        except Exception as err:
            log_message("Creating directory {},  error: {}".format(directory, err), logging.ERROR)
Ejemplo n.º 5
0
def create_variables(variables_params, model_name, restore=None):
    variables_names = [variables['name'] for variables in variables_params]
    variables = None
    if restore:
        try:
            variables = load_models(
                restore, [model_name + '_' + var for var in variables_names])

        except Exception as e:
            print(str(e))
            print()
            log_message('Faild to restore old models !', logging.ERROR)

    variables = variables or create_models(variables_params)

    return variables
def clone_model(old_model, new_name, restore=None):
    if restore:
        log_message('Restore old models ...', logging.DEBUG)
        variable_path = os.path.join(restore, new_name + '.hdf5')
        variable = load_model(variable_path, compile=False)

    else:
        temp_layers = tf.keras.models.clone_model(old_model).layers
        temp_layers.append(tf.keras.layers.Flatten())
        temp_layers.append(
            tf.keras.layers.Dense(units=1,
                                  activation='linear',
                                  name=new_name + '_outputs'))
        temp_layers = tf.keras.Sequential(temp_layers)
        variable = tf.keras.Model(name=new_name,
                                  inputs=temp_layers.inputs,
                                  outputs=temp_layers.outputs)
    return variable
Ejemplo n.º 7
0
def compute_unsupervised_metrics(ground_truth_data,
                                 representation_function,
                                 random_state,
                                 num_train,
                                 batch_size=16):
    """Computes unsupervised scores based on covariance and mutual information.
       Args:
            ground_truth_data: GroundTruthData to be sampled from.
            representation_function: Function that takes observations as input and
              outputs a dim_representation sized representation for each observation.
            random_state: Numpy random state used for randomness.
            artifact_dir: Optional path to directory where artifacts can be saved.
            num_train: Number of points used for training.
            batch_size: Batch size for sampling.
       Returns:
          Dictionary with scores.
    """
    scores = {}
    log_message('Generating training set ...', logging.INFO)
    mus_train, _ = generate_batch_factor_code(ground_truth_data,
                                              representation_function,
                                              num_train, random_state,
                                              batch_size)
    num_codes = mus_train.shape[0]
    cov_mus = np.cov(mus_train)
    assert num_codes == cov_mus.shape[0]

    # Gaussian total correlation.
    scores["gaussian_total_correlation"] = gaussian_total_correlation(cov_mus)

    # Gaussian Wasserstein correlation.
    scores[
        "gaussian_wasserstein_correlation"] = gaussian_wasserstein_correlation(
            cov_mus)
    scores["gaussian_wasserstein_correlation_norm"] = (
        scores["gaussian_wasserstein_correlation"] / np.sum(np.diag(cov_mus)))

    # Compute average mutual information between different factors.
    mus_discrete = discrete_entropy(mus_train).reshape(-1, 1)
    mutual_info_matrix = discrete_mutual_info(mus_discrete, mus_discrete)
    np.fill_diagonal(mutual_info_matrix, 0)
    mutual_info_score = np.sum(mutual_info_matrix) / (num_codes**2 - num_codes)
    scores["mutual_info_score"] = mutual_info_score
    return scores
Ejemplo n.º 8
0
    def stop(self, loss_val):
        if self.prev_loss_val != np.Infinity:
            if np.abs(np.abs(self.prev_loss_val) - np.abs(loss_val)) >= self.min_delta:
                self.patience_cnt = 0
                self.prev_loss_val = loss_val
            else:
                self.patience_cnt += 1

                msg = '\nPervious {}: {}'.format(self.name, self.prev_loss_val)
                msg += '\nCurrent {}: {}'.format(self.name, loss_val)
                msg += '\n{} Patience count-out: {}'.format(self.name, self.patience_cnt)
                log_message(msg, logging.DEBUG)


        if (self.patience_cnt > self.patience):
            log_message('{} is out of patience'.format(self.name), logging.CRITICAL)
            return True
        else:
            return False
def inspect_log(file_name):
    try:
        with open(file_name, 'r', encoding="utf-8", errors="ignore") as scraped:
            reader = csv.reader(scraped, delimiter=',')
            last_row = [0]
            for row in reader:
                if row:  # avoid blank lines
                    last_row = row
        if last_row[0]==0:
            log_message('No former training found ... ', logging.ERROR)
        else:
            log_message('Found Record for {} Epochs'.format(int(last_row[0])), logging.INFO)
        return int(last_row[0])
    except:
        log_message('No former training found ... ', logging.ERROR)
        return 0
Ejemplo n.º 10
0
    def fit(self,
            train_dataset,
            test_dataset,
            instance_names=['image'],
            epochs=10,
            learning_rate=1e-3,
            random_latent=None,
            recoding_dir='./recoding',
            gray_plot=True,
            generate_epoch=5,
            save_epoch=5,
            metric_epoch=10,
            gt_epoch=10,
            gt_data=None):
        assert isinstance(train_dataset, Iterable), 'dataset must be iterable'
        assert isinstance(test_dataset, Iterable), 'dataset must be iterable'

        self.dir_setup(recoding_dir)

        # generate random latent
        latent_shape = [50, self.latent_dim]
        if random_latent is None:
            random_latent = tf.random.normal(shape=latent_shape)

        if generate_epoch:
            generated = self.generate_sample(model=self.get_varibale,
                                             inputs_shape=self.inputs_shape,
                                             latent_shape=latent_shape,
                                             eps=random_latent)
            plot_and_save_generated(generated=generated,
                                    epoch=0,
                                    path=self.image_gen_dir,
                                    gray=gray_plot)

        self.optimizer = RAdamOptimizer(learning_rate)

        file_Name = os.path.join(self.csv_log_dir,
                                 'TRAIN_' + self.model_name + '.csv')
        start_epoch = inspect_log(file_Name)

        early_stopper = EarlyStopping(name='on-Test dataset ELBO monitor',
                                      patience=5,
                                      min_delta=1e-6)
        epochs_pbar = tqdm(iterable=range(start_epoch, start_epoch + epochs),
                           position=0,
                           desc='Epochs Progress')
        for epoch in epochs_pbar:
            # training dataset
            tr_start_time = time.time()
            loss_tr = defaultdict()
            loss_tr['Epoch'] = epoch
            log_message('Training ... ', logging.INFO)
            for i, data_train in enumerate(train_dataset):
                data_train = self.cast_batch(data_train)
                total_loss = self.train_step(input=data_train,
                                             names=instance_names)
                tr_losses = self.evaluate_step(input=data_train,
                                               names=instance_names)
                loss_tr = self.reduce_sum_dict(tr_losses, loss_tr)
                epochs_pbar.set_description(
                    'Epochs Progress, Training Iterations {}'.format(i))
            tr_end_time = time.time()
            loss_tr['Elapsed'] = '{:06f}'.format(tr_end_time - tr_start_time)

            # testing dataset
            val_start_time = time.time()
            loss_val = defaultdict()
            loss_val['Epoch'] = epoch

            log_message('Testing ... ', logging.INFO)
            tbar = tqdm(iterable=range(100), position=0, desc='Testing ...')
            for i, data_test in enumerate(test_dataset):
                data_test = self.cast_batch(data_test)
                val_losses = self.evaluate_step(input=data_test,
                                                names=instance_names)
                loss_val = self.reduce_sum_dict(val_losses, loss_val)

                montiored_loss = loss_val['Total']
                tbar.update(i % 100)
            val_end_time = time.time()
            loss_val['Elapsed'] = '{:06f}'.format(val_end_time -
                                                  val_start_time)

            if metric_epoch is not None and epoch % metric_epoch == 0:
                # testing dataset
                met_start_time = time.time()
                met_values = defaultdict()
                met_values['Epoch'] = epoch

                log_message('Evaluating Mertics ... ', logging.INFO)
                tbar = tqdm(iterable=range(100),
                            position=0,
                            desc='Evaluating ...')
                for i, data_test in enumerate(test_dataset):
                    data_test = self.cast_batch(data_test)

                    inputs = {
                        'X': data_test[instance_names[0]],
                        'y': self.feedforward(data_test[instance_names[0]])
                    }
                    met_computed = compute_metrics(inputs)
                    met_values = self.reduce_sum_dict(met_computed, met_values)
                    tbar.update(i % 100)
                met_end_time = time.time()
                met_values['Elapsed'] = '{:06f}'.format(met_end_time -
                                                        met_start_time)

            if epoch % gt_epoch == 0 and gt_data is not None:
                # testing dataset
                gt_start_time = time.time()
                gt_values = defaultdict()
                gt_values['Epoch'] = epoch

                log_message('Evaluating ground truth data ... ', logging.INFO)
                tbar = tqdm(iterable=range(100),
                            position=0,
                            desc='gt Evaluating ...')

                def rep_func(x):
                    return self.feedforward(x)['latent']

                us_scores = compute_unsupervised_metrics(
                    ground_truth_data=gt_data,
                    representation_function=rep_func,
                    random_state=np.random.RandomState(0),
                    num_train=10000,
                    batch_size=32)
                s_scores = compute_supervised_metrics(
                    ground_truth_data=gt_data,
                    representation_function=rep_func,
                    random_state=np.random.RandomState(0),
                    num_train=10000,
                    num_test=2000,
                    continuous_factors=False,
                    batch_size=32)

            #############################

            display.clear_output(wait=False)
            log_message(
                "==================================================================",
                logging.INFO)
            file_Name = os.path.join(self.csv_log_dir,
                                     'TRAIN_' + self.model_name)
            log(file_name=file_Name, message=dict(loss_tr), printed=True)
            log_message(
                "==================================================================",
                logging.INFO)

            log_message(
                "==================================================================",
                logging.INFO)
            file_Name = os.path.join(self.csv_log_dir,
                                     'TEST_' + self.model_name)
            log(file_name=file_Name, message=dict(loss_val), printed=True)
            log_message(
                "==================================================================",
                logging.INFO)

            if epoch % metric_epoch:
                log_message(
                    "==================================================================",
                    logging.INFO)
                file_Name = os.path.join(self.csv_log_dir,
                                         'Metrics_' + self.model_name)
                log(file_name=file_Name,
                    message=dict(met_values),
                    printed=True)
                log_message(
                    "==================================================================",
                    logging.INFO)

            if epoch % gt_epoch and gt_data is not None:
                gt_metrics = {**s_scores, **us_scores}
                log_message(
                    "==================================================================",
                    logging.INFO)
                file_Name = os.path.join(self.csv_log_dir,
                                         'GroundTMetrics_' + self.model_name)
                log(file_name=file_Name,
                    message=dict(gt_metrics),
                    printed=True)
                log_message(
                    "==================================================================",
                    logging.INFO)

            if generate_epoch is not None and epoch % generate_epoch == 0:
                generated = self.generate_sample(
                    model=self.get_varibale,
                    inputs_shape=self.inputs_shape,
                    latent_shape=latent_shape,
                    eps=random_latent)
            plot_and_save_generated(generated=generated,
                                    epoch=epoch,
                                    path=self.image_gen_dir,
                                    gray=gray_plot,
                                    save=epoch % generate_epoch == 0)

            if epoch % save_epoch == 0:
                log_message('Saving Status in Epoch {}'.format(epoch),
                            logging.CRITICAL)
                self.save_status()

            # Early stopping
            if (early_stopper.stop(montiored_loss)):
                log_message(
                    'Aborting Training after {} epoch because no progress ... '
                    .format(epoch), logging.WARN)
                break
Ejemplo n.º 11
0
    def __init__(self,
                 image_lists,
                 image_data_generator,
                 category, image_dir,
                 image_shape=(256, 256, 3),
                 color_mode='rgb',
                 class_mode='categorical',
                 batch_size=32,
                 episode_len=20,
                 episode_shift=10,
                 shuffle=True,
                 seed=None,
                 data_format=None,
                 save_to_dir=None,
                 save_prefix='',
                 save_format='jpeg',
                 dtype=np.float32
                 ):
        if data_format is None:
            data_format = tf.keras.backend.image_data_format()

        classes = list(image_lists.keys())
        self.category = category
        self.batch_size = batch_size
        self.num_class = len(classes)
        self.image_lists = image_lists
        self.image_dir = image_dir
        self.episode_len = episode_len
        self.episode_shift = episode_shift

        how_many_files = 0
        for label_name in classes:
            for _ in self.image_lists[label_name][category]:
                how_many_files += 1

        self.samples = how_many_files
        self.class2id = dict(zip(classes, range(len(classes))))
        self.id2class = dict((v, k) for k, v in self.class2id.items())
        self.classes = np.zeros((self.samples,), dtype='int32')

        self.image_data_generator = image_data_generator
        #self.target_size = tuple(target_size)
        if color_mode not in {'rgb', 'grayscale'}:
            raise ValueError('Invalid color mode:', color_mode,
                             '; expected "rgb" or "grayscale".')
        self.color_mode = color_mode
        self.data_format = data_format
        self.image_shape = image_shape
        if self.data_format == 'jpeg':
            self.read_image = lambda x: tf.image.resize(tf.image.decode_jpeg(tf.io.read_file(x), channels=3), self.image_shape[:2])
        else:
            self.read_image = lambda x: tf.image.resize(tf.image.decode_png(tf.io.read_file(x), channels=3), self.image_shape[:2])

        if (class_mode not in {'categorical', 'binary', 'sparse', 'episode', 'episode_flat', None}) and (not hasattr(class_mode, '__call__')):
            raise ValueError('Invalid class_mode:', class_mode,
                             '; expected one of "categorical", '
                             '"binary", "sparse", "episode", or None.')
        self.class_mode = class_mode
        self.dtype = dtype
        self.save_to_dir = save_to_dir
        self.save_prefix = save_prefix
        self.save_format = save_format

        i = 0
        self.filenames = []
        for label_name in classes:
            for j, _ in enumerate(self.image_lists[label_name][category]):
                self.classes[i] = self.class2id[label_name]
                img_path = get_file_path(self.image_lists,
                                          label_name,
                                          j,
                                          self.image_dir,
                                          self.category)
                self.filenames.append(img_path)
                i += 1
        log_message("Found {} {} files".format(len(self.filenames), category), logging.INFO)
        Iterator.__init__(self, self.samples, self.batch_size, shuffle, seed)
def create_image_lists(image_dir,
                       validation_pct,
                       valid_imgae_formats,
                       max_num_images_per_class=2**27 - 1,
                       sequenced=None,
                       verbose=1):
    """Builds a list of training images from the file system.

    Analyzes the sub folders in the image directory, splits them into stable
    training, testing, and validation sets, and returns a data structure
    describing the lists of images for each label and their paths.

    # Arguments
        image_dir: string path to a folder containing subfolders of images.
        validation_pct: integer percentage of images reserved for validation.

    # Returns
        dictionary of label subfolder, with images split into training
        and validation sets within each label.
    """
    if not os.path.isdir(image_dir):
        raise ValueError("Image directory {} not found.".format(image_dir))
    image_lists = {}
    sub_dirs = [x[0] for x in os.walk(image_dir)]

    sub_dirs_without_root = sub_dirs[1:]  # first element is root directory
    sub_dirs_without_root = sorted(sub_dirs_without_root,
                                   key=lambda x: str(x.split(os.sep)[-1]))

    for sub_dir in sub_dirs_without_root:
        file_list = []
        dir_name = os.path.basename(sub_dir)
        if dir_name == image_dir:
            continue
        if verbose == 1:
            log_message("Looking for images in '{}'".format(dir_name),
                        logging.DEBUG)

        if isinstance(valid_imgae_formats, str):
            valid_imgae_formats = [valid_imgae_formats]

        for extension in valid_imgae_formats:
            file_glob = os.path.join(image_dir, dir_name, '*.' + extension)
            file_list.extend(glob.glob(file_glob))
        if not file_list:
            msg = 'No files found'
            if verbose == 1:
                log_message(msg, logging.WARN)
            warnings.warn(msg)
            continue
        else:
            if verbose == 1:
                log_message('{} file found'.format(len(file_list)),
                            logging.INFO)
        if len(file_list) < 20:
            msg = 'Folder has less than 20 images, which may cause issues.'
            if verbose == 1:
                log_message(msg, logging.WARN)
            warnings.warn(msg)
        elif len(file_list) > max_num_images_per_class:
            msg='WARNING: Folder {} has more than {} images. Some '\
                          'images will never be selected.' \
                          .format(dir_name, max_num_images_per_class)
            log_message(msg, logging.WARN)
            warnings.warn(msg)
        label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower())
        training_images = []
        validation_images = []
        if sequenced is True:
            #Sequenced in the case of
            try:
                file_list = sorted(
                    file_list,
                    key=lambda x: int(x.split(os.sep)[-1].split('.')[0]))
            except:
                msg = 'WARNING: Sorting folder {} has failed!' \
                    .format(dir_name)
                log_message(msg, logging.WARN)
                warnings.warn(msg)

        for file_name in file_list:
            base_name = os.path.basename(file_name)
            if sequenced is True:
                hash_pct = int(
                    (int(file_name.split(os.sep)[-1].split('.')[0]) /
                     len(file_list)) * 100)
            else:
                # Get the hash of the file name and perform variant assignment.
                hash_name = hashlib.sha1(as_bytes(base_name)).hexdigest()
                hash_pct = ((int(hash_name, 16) %
                             (max_num_images_per_class + 1)) *
                            (100.0 / max_num_images_per_class))
            if hash_pct < validation_pct:
                validation_images.append(base_name)
            else:
                training_images.append(base_name)
        image_lists[label_name] = {
            'dir': dir_name,
            'training': training_images,
            'validation': validation_images,
        }
    return image_lists