def plot_wrapper(data_files,
                 keep_models,
                 out_name,
                 max_samples,
                 baseline_file,
                 out_data,
                 create_subdir=True):
    """Wrapper for running routines."""
    print('Working on %s' % out_name)
    it_data_files = [x for x in data_files if out_name not in x]
    if create_subdir:
        py_utils.make_dir(os.path.join(out_data, out_name))
        out_name = '%s%s%s' % (out_name, os.path.sep, out_name)
    df, dm = gather_data(data_files=it_data_files, keep_models=keep_models)
    baseline_file = pd.read_csv(baseline_file)
    df, diff_df = process_data(df=df,
                               out_name=out_name,
                               out_data=out_data,
                               baseline_file=baseline_file,
                               max_samples=max_samples)

    # Create color maps
    colors = keep_models.values()
    color_pal = sns.xkcd_palette(colors)
    hue_order = keep_models.keys()
    create_plots(df=df,
                 out_data=out_data,
                 colors=color_pal,
                 hue_order=hue_order,
                 out_plot='%s.pdf' % out_name)
    print('Finished on %s' % out_name)
Esempio n. 2
0
def main(f, tag):
    """Plot BSDS figs."""
    fn = f.split(os.path.sep)[-1].strip('.npz')
    dir_name = f.split(os.path.sep)[0]
    fig_dir = os.path.join(dir_name, 'trip_%s_%s' % (tag, fn))
    res_dir = os.path.join(dir_name, 'preds_%s_%s' % (tag, fn))
    py_utils.make_dir(fig_dir)
    py_utils.make_dir(res_dir)
    d = np.load(f)
    test_dict = d['test_dict']
    if 'portrait' in f:
        from datasets.BSDS500_test_portrait import data_processing as dp
    elif 'landscape' in f:
        from datasets.BSDS500_test_landscape import data_processing as dp
    elif 'multicue_edges' in f:
        from datasets.multicue_100_edges_jk_test import data_processing as dp
    else:
        raise NotImplementedError(f)
    dP = dp()
    files = dP.get_test_files()
    print('Saving files to %s and %s' % (fig_dir, res_dir))
    for idx, (td, im) in tqdm(enumerate(zip(test_dict, files)),
                              total=len(test_dict)):
        f = plt.figure()
        im_name = im.split(os.path.sep)[-1].split('.')[0]
        score = tf_fun.sigmoid_fun(td['logits'].squeeze())
        proc_im = (td['images'].squeeze()[..., [2, 1, 0]] + PASCAL).astype(
            np.uint8)
        proc_lab = td['labels'].squeeze()
        sc_shape = score.shape
        # if sc_shape[0] > sc_shape[1]:
        #     diff_h = sc_shape[0] - 481
        #     diff_w = sc_shape[1] - 321
        #     score = score[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]
        #     proc_im = proc_im[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]  # [:481, :321]
        #     proc_lab = proc_lab[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]  # [:481, :321]
        # elif sc_shape[1] > sc_shape[0]:
        #     diff_h = sc_shape[1] - 481
        #     diff_w = sc_shape[0] - 321
        #     score = score[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]
        #     proc_im = proc_im[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]  # [:481, :321]
        #     proc_lab = proc_lab[diff_h // 2:-(diff_h - diff_h // 2), diff_w // 2: (-diff_w - diff_w // 2)]  # [:481, :321]
        # else:
        #     raise RuntimeError(sc_shape)
        plt.subplot(131)
        plt.imshow(proc_im)
        plt.axis('off')
        plt.subplot(132)
        plt.imshow(proc_lab)
        plt.axis('off')
        plt.subplot(133)
        plt.imshow(score, cmap='Greys_r')
        # io.imsave(
        #     os.path.join(
        #         res_dir,
        #         '%s.tiff' % im_name),
        #     score)
        np.save(os.path.join(res_dir, '%s' % im_name), score)
        plt.savefig(os.path.join(fig_dir, '%s.pdf' % im_name), dpi=150)
        plt.close(f)
Esempio n. 3
0
def visualize_recurrence(idx,
                         image,
                         label,
                         logits,
                         ff,
                         h2s,
                         config,
                         debug=False):
    """Visualize norm of diffs of timesteps of activity."""
    f, axarr = plt.subplots(2, len(h2s), figsize=(30, 15))
    image = image.squeeze()
    post_decoded_final = logits.squeeze()
    if image.shape[-1] == 3:
        axarr[0, 2].imshow((image).astype(np.uint8))
    else:
        axarr[0, 2].imshow((image).astype(np.uint8), cmap='Greys_r')
    axarr[0, 3].imshow(label.squeeze(), cmap='Greys', vmin=0, vmax=0.5)
    axarr[0, 4].imshow(sigmoid_fun(post_decoded_final),
                       vmin=0.0,
                       vmax=1.0,
                       cmap='Greys_r')
    for its, post in enumerate(h2s):
        axarr[1, its].imshow(sigmoid_fun(post.squeeze()),
                             vmin=0,
                             vmax=1,
                             cmap='Greys_r')
    [axi.set_xticks([]) for axi in axarr.ravel()]
    [axi.set_yticks([]) for axi in axarr.ravel()]
    py_utils.make_dir(config.model)
    plt.savefig(os.path.join(config.model, '%d.pdf' % idx))
    if debug:
        plt.show()
    plt.close(f)
Esempio n. 4
0
def plot_wrapper(data_files,
                 keep_models,
                 out_name,
                 max_samples,
                 out_data,
                 model_csv,
                 create_subdir=True):
    """Wrapper for running routines."""
    print('Working on %s' % out_name)
    it_data_files = [x for x in data_files if out_name not in x]
    if create_subdir:
        py_utils.make_dir(os.path.join(out_data, out_name))
        out_name = '%s%s%s' % (out_name, os.path.sep, out_name)
    dm, df = gather_data(data_files=it_data_files,
                         keep_models=keep_models,
                         model_csv=model_csv)
    sel_df = process_data(df=df,
                          out_name=out_name,
                          out_data=out_data,
                          max_samples=max_samples)

    py_utils.make_dir(os.path.join(out_data, out_name))

    # Do the plotting
    for idx in range(len(dm)):
        sel_data = dm[idx]
        sel_info = df.iloc[idx]
        it_train = sel_info.train_dataset
        it_val = sel_info.val_dataset
        it_model = sel_info.model

        # Plot images
        f = plt.figure(dpi=450)
        im = dm[idx][0]
        lab = dm[idx][1]
        log = dm[idx][2]
        plt.subplot(131)
        plt.imshow(im, cmap='Greys_r')
        plt.axis('off')
        plt.subplot(132)
        plt.imshow(lab, cmap='Greys_r')
        plt.axis('off')
        plt.subplot(133)
        plt.imshow((log > 0.5).astype(np.float32),
                   cmap='Greys_r',
                   vmin=0,
                   vmax=1)
        plt.axis('off')
        plt.savefig(
            os.path.join(out_data, out_name,
                         '%s_%s_%s' % (it_model, it_train, it_val)))
        print(
            'Saved to: %s' %
            os.path.join(out_data, out_name, '%s_%s_%s' %
                         (it_model, it_train, it_val)))
        plt.close(f)
Esempio n. 5
0
 def get_labels(self, files):
     """Process and save label images."""
     labels = {}
     new_files = {}
     for k, images in files.iteritems():
         # Replace extension and path with labels
         label_vec = []
         file_vec = []
         fold = k
         # New label dir
         proc_dir = os.path.join(images[0].split(fold)[0], fold,
                                 self.processed_labels)
         py_utils.make_dir(proc_dir)
         # New image dir
         proc_image_dir = os.path.join(self.config.data_root, self.name,
                                       self.images_dir, fold,
                                       self.processed_images)
         py_utils.make_dir(proc_image_dir)
         all_images = []
         for im in tqdm(images,
                        total=len(images),
                        desc='Storing %s labels and images for %s' %
                        (self.name, k)):
             it_label = im.split(os.path.sep)[-1]
             it_label_path = '%s%s' % (im.split('.')[0], self.lab_extension)
             it_label_path = it_label_path.replace(self.images_dir,
                                                   self.labels_dir)
             # Process every label and duplicate images for each
             #Obtain image and contour label in im_size shape
             im_data, ip_lab = get_label_image(im,
                                               it_label_path,
                                               output_size=self.lab_size)
             transpose_labels = False
             if not np.all(self.im_size == list(im_data.shape)):
                 im_data = np.swapaxes(im_data, 0, 1)
                 transpose_labels = True
             assert np.all(self.im_size == list(
                 im_data.shape)), 'Mismatched dimensions.'
             if transpose_labels:
                 ip_lab = np.swapaxes(ip_lab, 0, 1)
             it_im_name = it_label  #Copying image name from a previously assigned variable
             it_lab_name = '%s.npy' % it_im_name.split('.')[0]
             out_lab = os.path.join(proc_dir, it_lab_name)
             np.save(out_lab, ip_lab)
             label_vec += [out_lab]
             # Process images
             proc_im = os.path.join(proc_image_dir, it_im_name)
             #misc.imsave(proc_im, im_data)
             np.save(proc_im + '.npy', im_data.astype(np.float32))
             file_vec += [proc_im + '.npy']
             #Cannot compute z-score for SBD, too many images
         labels[k] = label_vec
         new_files[k] = file_vec
     return labels, new_files
Esempio n. 6
0
 def create_label_image_dirs(self, sample_img_path, fold):
     """Create directories for labels and images"""
     proc_dir = os.path.join(self.config.data_root, self.name,
                             self.labels_dir, fold, self.processed_labels)
     #sample_img_path.split(fold)[0],
     #fold,
     #self.processed_labels)
     py_utils.make_dir(proc_dir)
     proc_image_dir = os.path.join(self.config.data_root, self.name,
                                   self.images_dir, fold,
                                   self.processed_images)
     py_utils.make_dir(proc_image_dir)
     return proc_dir, proc_image_dir
def main():
    # datasets = ["curv_contour_length_6_full", "curv_contour_length_9_full", "curv_contour_length_14_full"]
    datasets = [
        "curv_contour_length_14_full"
    ]  # "curv_contour_length_6_full", "curv_contour_length_9_full", "curv_contour_length_14_full"]
    cvs = ["val", "train"]
    config = Config()
    for ds in datasets:
        for cv in cvs:
            py_utils.make_dir(ds)
            out_dir = os.path.join(ds, cv)
            py_utils.make_dir(out_dir)
            extract_dataset(dataset=ds, config=config, cv=cv, out_dir=out_dir)
Esempio n. 8
0
    def get_files(self):
        files = {}
        all_ims = np.load(os.path.join(
            self.config.data_root,
            self.name,
            self.data_file))[self.file_key]
        all_ims = np.asarray([x for x in all_ims if x not in self.ignore_ims])

        # Create folders for training/validation splits
        self.rand_order = np.random.permutation(len(all_ims))
        self.test_split = np.round(len(all_ims) * self.crossval_split).astype(int)
        shuffled_ims = all_ims[self.rand_order]
        test_ims = shuffled_ims[:self.test_split]
        train_ims = shuffled_ims[self.test_split:]
        target_test_ims = [os.path.join(
            self.config.data_root,
            self.name,
            self.folds['test'],
            f) for f in test_ims]
        target_train_ims = [os.path.join(
            self.config.data_root,
            self.name,
            self.folds['train'],
            f) for f in train_ims]
        test_ims = [os.path.join(
            self.config.data_root,
            self.name,
            f) for f in test_ims]
        train_ims = [os.path.join(
            self.config.data_root,
            self.name,
            f) for f in train_ims]

        py_utils.make_dir(os.path.join(
            self.config.data_root,
            self.name,
            self.folds['test']))
        py_utils.make_dir(os.path.join(
            self.config.data_root,
            self.name,
            self.folds['train']))
        [shutil.copyfile(s, t) for s, t in zip(test_ims, target_test_ims)]
        [shutil.copyfile(s, t) for s, t in zip(train_ims, target_train_ims)]
        files = {
            self.folds['test']: test_ims,
            self.folds['train']: train_ims
        }
        return files
Esempio n. 9
0
    def __init__(self, **kwargs):
        """Global config file for normalization experiments."""
        self.data_root = '/media/data_cifs/image_datasets/'
        self.project_directory = '/media/data_cifs/contextual_circuit/'
        self.tf_records = os.path.join(self.project_directory, 'tf_records')
        self.checkpoints = os.path.join(self.project_directory, 'checkpoints')
        self.summaries = os.path.join(self.project_directory, 'summaries')
        self.experiment_evaluations = os.path.join(self.project_directory,
                                                   'experiment_evaluations')
        self.condition_evaluations = os.path.join(self.project_directory,
                                                  'condition_evaluations')
        self.visualizations = os.path.join(self.project_directory,
                                           'visualizations')
        self.plots = os.path.join(self.project_directory, 'plots')
        self.log_dir = os.path.join(self.project_directory, 'logs')
        self.dataset_info = 'dataset_processing'  # local dataset classes

        # DB
        self.db_ssh_forward = False
        machine_name = os.uname()[1]
        if len(machine_name) == 12 or ('serre' in machine_name
                                       and machine_name != 'serrep3'):
            # Docker container or master p-node
            self.db_ssh_forward = True

        # Create directories if they do not exist
        check_dirs = [
            self.tf_records, self.checkpoints, self.experiment_evaluations,
            self.condition_evaluations, self.visualizations, self.plots,
            self.log_dir, self.dataset_info
        ]
        [py_utils.make_dir(x) for x in check_dirs]
Esempio n. 10
0
    def get_labels(self, n_counts, n_img_per_count):
        """Process and save label images."""
        labels = {}
        new_files = {}
        counts = range(1, n_counts + 1)
        total_n_imgs = n_counts * n_img_per_count
        self.n_imgs_fold['train'] = int(0.7 * total_n_imgs)
        self.n_imgs_fold['val'] = int(0.3 * total_n_imgs)
        for k in self.folds.keys():
            n_imgs = self.n_imgs_fold[k]
            label_vec = []
            file_vec = []
            fold = k
            # New label dir
            proc_dir = os.path.join(self.config.data_root, self.name,
                                    self.labels_dir, fold,
                                    self.processed_labels)
            py_utils.make_dir(proc_dir)

            # New image dir
            proc_image_dir = os.path.join(self.config.data_root, self.name,
                                          self.images_dir, fold,
                                          self.processed_images)
            py_utils.make_dir(proc_image_dir)
            all_images = []
            for i in tqdm(range(n_imgs),
                          desc='Generating images for %s..' % (fold)):
                # Replace extension and path with labels
                it_label = 'gt_%06d' % (i) + '.npy'
                it_label_path = proc_dir + '/' + it_label
                label_data = sample(
                    counts, 1)[0]  #Randomly sampling a number of objects
                it_im_name = 'img_%06d_%s.npy' % (i, label_data)
                im_data = psst.generate_image(label_data)
                label_vec += [
                    label_data
                ]  #Since labels are just counts, append the label value to label_vec
                proc_im = os.path.join(proc_image_dir, it_im_name)
                np.save(proc_im, im_data)
                file_vec += [proc_im]
            labels[k] = label_vec
            new_files[k] = file_vec
        return labels, new_files
Esempio n. 11
0
def plot_wrapper(
        data_files,
        keep_models,
        out_name,
        max_samples,
        out_data,
        generalization=False,
        create_subdir=True):
    """Wrapper for running routines."""
    print('Working on %s' % out_name)
    it_data_files = [x for x in data_files if out_name not in x]
    if create_subdir:
        py_utils.make_dir(os.path.join(out_data, out_name))
        out_name = '%s%s%s' % (out_name, os.path.sep, out_name)
    df, dm = gather_data(
        data_files=it_data_files,
        keep_models=keep_models,
        generalization=generalization)
    df = process_data(
        df=df,
        out_name=out_name,
        out_data=out_data,
        generalization=generalization,
        max_samples=max_samples)
    colors = keep_models.values()
    color_pal = sns.xkcd_palette(colors)
    hue_order = keep_models.keys()
    if generalization:
        create_gen_plots(
            df=df,
            out_data=out_data,
            out_plot='%s.pdf' % out_name)
    else:
        create_plots(
            df=df,
            out_data=out_data,
            colors=color_pal,
            hue_order=hue_order,
            out_plot='%s.pdf' % out_name)
    print('Finished on %s' % out_name)
Esempio n. 12
0
def main(experiment_name,
         list_experiments=False,
         load_and_evaluate_ckpt=None,
         config_file=None,
         ckpt_file=None,
         gpu_device='/gpu:0'):
    """Create a tensorflow worker to run experiments in your DB."""
    if list_experiments:
        exps = db.list_experiments()
        print '_' * 30
        print 'Initialized experiments:'
        print '_' * 30
        for l in exps:
            print l.values()[0]
        print '_' * 30
        if len(exps) == 0:
            print 'No experiments found.'
        else:
            print 'You can add to the DB with: '\
                'python prepare_experiments.py --experiment=%s' % \
                exps[0].values()[0]
        return

    if experiment_name is None:
        print 'No experiment specified. Pulling one out of the DB.'
        experiment_name = db.get_experiment_name()

    # Prepare to run the model
    config = Config()
    condition_label = '%s_%s' % (experiment_name, py_utils.get_dt_stamp())
    experiment_label = '%s' % (experiment_name)
    log = logger.get(os.path.join(config.log_dir, condition_label))
    experiment_dict = experiments.experiments()[experiment_name]()
    config = add_to_config(d=experiment_dict, config=config)  # Globals
    config.load_and_evaluate_ckpt = load_and_evaluate_ckpt
    config, exp_params = process_DB_exps(
        experiment_name=experiment_name, log=log,
        config=config)  # Update config w/ DB params
    config = np.load(config_file).item()
    dataset_module = py_utils.import_module(model_dir=config.dataset_info,
                                            dataset=config.dataset)
    dataset_module = dataset_module.data_processing()  # hardcoded class name
    train_data, train_means_image, train_means_label = get_data_pointers(
        dataset=config.dataset,
        base_dir=config.tf_records,
        cv=dataset_module.folds.keys()[1],  # TODO: SEARCH FOR INDEX.
        log=log)
    val_data, val_means_image, val_means_label = get_data_pointers(
        dataset=config.dataset,
        base_dir=config.tf_records,
        cv=dataset_module.folds.keys()[0],
        log=log)

    # Initialize output folders
    dir_list = {
        'checkpoints':
        os.path.join(config.checkpoints, condition_label),
        'summaries':
        os.path.join(config.summaries, condition_label),
        'condition_evaluations':
        os.path.join(config.condition_evaluations, condition_label),
        'experiment_evaluations':
        os.path.join(  # DEPRECIATED
            config.experiment_evaluations, experiment_label),
        'visualization':
        os.path.join(config.visualizations, condition_label),
        'weights':
        os.path.join(config.condition_evaluations, condition_label, 'weights')
    }
    [py_utils.make_dir(v) for v in dir_list.values()]

    # Prepare data loaders on the cpu
    if all(isinstance(i, list) for i in config.data_augmentations):
        if config.data_augmentations:
            config.data_augmentations = py_utils.flatten_list(
                config.data_augmentations, log)
    config.epochs = 1
    config.shuffle = False
    with tf.device('/cpu:0'):
        train_images, train_labels = data_loader.inputs(
            dataset=train_data,
            batch_size=config.batch_size,
            model_input_image_size=dataset_module.model_input_image_size,
            tf_dict=dataset_module.tf_dict,
            data_augmentations=config.data_augmentations,
            num_epochs=config.epochs,
            tf_reader_settings=dataset_module.tf_reader,
            shuffle=config.shuffle_train,
            resize_output=config.resize_output)
        if hasattr(config, 'val_augmentations'):
            val_augmentations = config.val_augmentations
        else:
            val_augmentations = config.data_augmentations
        val_images, val_labels = data_loader.inputs(
            dataset=val_data,
            batch_size=config.batch_size,
            model_input_image_size=dataset_module.model_input_image_size,
            tf_dict=dataset_module.tf_dict,
            data_augmentations=['resize_and_crop'],
            num_epochs=config.epochs,
            tf_reader_settings=dataset_module.tf_reader,
            shuffle=config.shuffle_val,
            resize_output=config.resize_output)
    log.info('Created tfrecord dataloader tensors.')

    # Load model specification
    struct_name = config.model_struct.split(os.path.sep)[-1]
    try:
        model_dict = py_utils.import_module(
            dataset=struct_name,
            model_dir=os.path.join('models', 'structs',
                                   experiment_name).replace(os.path.sep, '.'))
    except IOError:
        print 'Could not find the model structure: %s in folder %s' % (
            struct_name, experiment_name)

    # Inject model_dict with hyperparameters if requested
    model_dict.layer_structure = hp_opt_utils.inject_model_with_hps(
        layer_structure=model_dict.layer_structure, exp_params=exp_params)

    # Prepare model on GPU
    with tf.device(gpu_device):
        with tf.variable_scope('cnn') as scope:
            # Normalize labels if needed
            if 'normalize_labels' in exp_params.keys():
                if exp_params['normalize_labels'] == 'zscore':
                    train_labels -= train_means_label['mean']
                    train_labels /= train_means_label['std']
                    log.info('Z-scoring labels.')
                elif exp_params['normalize_labels'] == 'mean':
                    train_labels -= train_means_label['mean']
                    log.info('Mean-centering labels.')

            # Training model
            if len(dataset_module.output_size) == 2:
                log.warning('Found > 1 dimension for your output size.'
                            'Converting to a scalar.')
                dataset_module.output_size = np.prod(
                    dataset_module.output_size)

            if hasattr(model_dict, 'output_structure'):
                # Use specified output layer
                output_structure = model_dict.output_structure
            else:
                output_structure = None
            model = model_utils.model_class(
                mean=train_means_image,
                training=True,
                output_size=dataset_module.output_size)
            train_scores, model_summary = model.build(
                data=train_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            eval_graph = tf.Graph()
            with eval_graph.as_default():
                with eval_graph.gradient_override_map({'selu': 'GradLRP'}):
                    train_grad_images = tf.gradients(
                        train_scores[0] * tf.cast(train_labels, tf.float32),
                        train_images)[0]
            log.info('Built training model.')
            log.debug(json.dumps(model_summary, indent=4), verbose=0)
            print_model_architecture(model_summary)

            # Check the shapes of labels and scores
            if not isinstance(train_scores, list):
                if len(train_scores.get_shape()) != len(
                        train_labels.get_shape()):
                    train_shape = train_scores.get_shape().as_list()
                    label_shape = train_labels.get_shape().as_list()
                    if len(train_shape) == 2 and len(
                            label_shape) == 1 and train_shape[-1] == 1:
                        train_labels = tf.expand_dims(train_labels, axis=-1)
                    elif len(train_shape) == 2 and len(
                            label_shape) == 1 and train_shape[-1] == 1:
                        train_scores = tf.expand_dims(train_scores, axis=-1)

            # Prepare the loss function
            train_loss, _ = loss_utils.loss_interpreter(
                logits=train_scores,  # TODO
                labels=train_labels,
                loss_type=config.loss_function,
                weights=config.loss_weights,
                dataset_module=dataset_module)

            # Add loss tensorboard tracking
            if isinstance(train_loss, list):
                for lidx, tl in enumerate(train_loss):
                    tf.summary.scalar('training_loss_%s' % lidx, tl)
                train_loss = tf.add_n(train_loss)
            else:
                tf.summary.scalar('training_loss', train_loss)

            # Add weight decay if requested
            if len(model.regularizations) > 0:
                train_loss = loss_utils.wd_loss(
                    regularizations=model.regularizations,
                    loss=train_loss,
                    wd_penalty=config.regularization_strength)
            train_op = loss_utils.optimizer_interpreter(
                loss=train_loss,
                lr=config.lr,
                optimizer=config.optimizer,
                constraints=config.optimizer_constraints,
                model=model)
            log.info('Built training loss function.')

            # Add a score for the training set
            train_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,  # TODO: Attach to exp cnfg
                pred=train_scores,  # TODO
                labels=train_labels)

            # Add aux scores if requested
            train_aux = {}
            if hasattr(dataset_module, 'aux_scores'):
                for m in dataset_module.aux_scores:
                    train_aux[m] = eval_metrics.metric_interpreter(
                        metric=m, pred=train_scores,
                        labels=train_labels)[0]  # TODO: Fix for multiloss

            # Prepare remaining tensorboard summaries
            if len(train_images.get_shape()) == 4:
                tf_fun.image_summaries(train_images, tag='Training images')
            if len(train_labels.get_shape()) > 2:
                tf_fun.image_summaries(train_labels, tag='Training_targets')
                tf_fun.image_summaries(train_scores,
                                       tag='Training_predictions')
            if isinstance(train_accuracy, list):
                for tidx, ta in enumerate(train_accuracy):
                    tf.summary.scalar('training_accuracy_%s' % tidx, ta)
            else:
                tf.summary.scalar('training_accuracy', train_accuracy)
            if config.pr_curve:
                if isinstance(train_scores, list):
                    for pidx, train_score in enumerate(train_scores):
                        train_label = train_labels[:, pidx]
                        pr_summary.op(
                            tag='training_pr_%s' % pidx,
                            predictions=tf.cast(
                                tf.argmax(train_score, axis=-1), tf.float32),
                            labels=tf.cast(train_label, tf.bool),
                            display_name='training_precision_recall_%s' % pidx)
                else:
                    pr_summary.op(tag='training_pr',
                                  predictions=tf.cast(
                                      tf.argmax(train_scores, axis=-1),
                                      tf.float32),
                                  labels=tf.cast(train_labels, tf.bool),
                                  display_name='training_precision_recall')
            log.info('Added training summaries.')

            # Validation model
            scope.reuse_variables()
            val_model = model_utils.model_class(
                mean=train_means_image,  # Normalize with train data
                training=False,  # False,
                output_size=dataset_module.output_size)
            val_scores, _ = val_model.build(  # Ignore summary
                data=val_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            eval_graph = tf.Graph()
            with eval_graph.as_default():
                with eval_graph.gradient_override_map({'selu': 'GradLRP'}):
                    val_grad_images = tf.gradients(
                        val_scores[0] * tf.cast(val_labels, tf.float32),
                        val_images)[0]
            log.info('Built validation model.')

            # Check the shapes of labels and scores
            if not isinstance(train_scores, list):
                if len(val_scores.get_shape()) != len(val_labels.get_shape()):
                    val_shape = val_scores.get_shape().as_list()
                    val_label_shape = val_labels.get_shape().as_list()
                    if len(val_shape) == 2 and len(
                            val_label_shape) == 1 and val_shape[-1] == 1:
                        val_labels = tf.expand_dims(val_labels, axis=-1)
                    if len(val_shape) == 2 and len(
                            val_label_shape) == 1 and val_shape[-1] == 1:
                        val_scores = tf.expand_dims(val_scores, axis=-1)
            val_loss, _ = loss_utils.loss_interpreter(
                logits=val_scores,
                labels=val_labels,
                loss_type=config.loss_function,
                weights=config.loss_weights,
                dataset_module=dataset_module)

            # Add loss tensorboard tracking
            if isinstance(val_loss, list):
                for lidx, tl in enumerate(val_loss):
                    tf.summary.scalar('validation_loss_%s' % lidx, tl)
                val_loss = tf.add_n(val_loss)
            else:
                tf.summary.scalar('validation_loss', val_loss)

            # Add a score for the validation set
            val_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,  # TODO
                pred=val_scores,
                labels=val_labels)

            # Add aux scores if requested
            val_aux = {}
            if hasattr(dataset_module, 'aux_scores'):
                for m in dataset_module.aux_scores:
                    val_aux[m] = eval_metrics.metric_interpreter(
                        metric=m, pred=val_scores,
                        labels=val_labels)[0]  # TODO: Fix for multiloss

            # Prepare tensorboard summaries
            if len(val_images.get_shape()) == 4:
                tf_fun.image_summaries(val_images, tag='Validation')
            if len(val_labels.get_shape()) > 2:
                tf_fun.image_summaries(val_labels, tag='Validation_targets')
                tf_fun.image_summaries(val_scores,
                                       tag='Validation_predictions')
            if isinstance(val_accuracy, list):
                for vidx, va in enumerate(val_accuracy):
                    tf.summary.scalar('validation_accuracy_%s' % vidx, va)
            else:
                tf.summary.scalar('validation_accuracy', val_accuracy)
            if config.pr_curve:
                if isinstance(val_scores, list):
                    for pidx, val_score in enumerate(val_scores):
                        val_label = val_labels[:, pidx]
                        pr_summary.op(
                            tag='validation_pr_%s' % pidx,
                            predictions=tf.cast(tf.argmax(val_score, axis=-1),
                                                tf.float32),
                            labels=tf.cast(val_label, tf.bool),
                            display_name='validation_precision_recall_%s' %
                            pidx)
                else:
                    pr_summary.op(tag='validation_pr',
                                  predictions=tf.cast(
                                      tf.argmax(val_scores, axis=-1),
                                      tf.float32),
                                  labels=tf.cast(val_labels, tf.bool),
                                  display_name='validation_precision_recall')
            log.info('Added validation summaries.')

    # Set up summaries and saver
    saver = tf.train.Saver(tf.global_variables())

    # Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    # Need to initialize both of these if supplying num_epochs to inputs
    sess.run(
        tf.group(tf.global_variables_initializer(),
                 tf.local_variables_initializer()))

    # Set up exemplar threading
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # Create dictionaries of important training and validation information
    train_dict = {
        'train_loss': train_loss,
        'train_images': train_images,
        'train_labels': train_labels,
        'train_op': train_op,
        'train_scores': train_scores,
        'train_grad_images': train_grad_images
    }
    val_dict = {
        'val_loss': val_loss,
        'val_images': val_images,
        'val_labels': val_labels,
        'val_scores': val_scores,
        'val_grad_images': val_grad_images
    }
    if isinstance(train_accuracy, list):
        for tidx, (ta, va) in enumerate(zip(train_accuracy, val_accuracy)):
            train_dict['train_accuracy_%s' % tidx] = ta
            val_dict['val_accuracy_%s' % tidx] = va
    else:
        train_dict['train_accuracy_0'] = train_accuracy
        val_dict['val_accuracy_0'] = val_accuracy

    if load_and_evaluate_ckpt is not None:
        # Remove the train operation and add a ckpt pointer
        del train_dict['train_op']

    if hasattr(dataset_module, 'aux_score'):
        # Attach auxillary scores to tensor dicts
        for m in dataset_module.aux_scores:
            train_dict['train_aux_%s' % m] = train_aux[m]
            val_dict['val_aux_%s' % m] = val_aux[m]

    # Start training loop
    checkpoint_dir = dir_list['checkpoints']
    step = 0
    train_losses, train_accs, train_aux, timesteps = {}, {}, {}, {}
    val_scores, val_aux, val_labels, val_grads = {}, {}, {}, {}
    train_images, val_images = {}, {}
    train_scores, train_labels = {}, {}
    train_aux_check = np.any(['aux_score' in k for k in train_dict.keys()])
    val_aux_check = np.any(['aux_score' in k for k in val_dict.keys()])

    # Restore model
    saver.restore(sess, ckpt_file)

    # Start evaluation
    try:
        while not coord.should_stop():
            start_time = time.time()
            train_vars = sess.run(train_dict.values())
            it_train_dict = {
                k: v
                for k, v in zip(train_dict.keys(), train_vars)
            }
            duration = time.time() - start_time
            train_losses[step] = it_train_dict['train_loss']
            train_accs[step] = it_train_dict['train_accuracy_0']
            train_images[step] = it_train_dict['train_images']
            train_labels[step] = it_train_dict['train_labels']
            train_scores[step] = it_train_dict['train_scores']
            timesteps[step] = duration
            if train_aux_check:
                # Loop through to find aux scores
                it_train_aux = {
                    itk: itv
                    for itk, itv in it_train_dict.iteritems()
                    if 'aux_score' in itk
                }
                train_aux[step] = it_train_aux
            assert not np.isnan(it_train_dict['train_loss']).any(
            ), 'Model diverged with loss = NaN'
            if step % config.validation_iters == 0:
                it_val_scores, it_val_labels, it_val_aux, it_val_grads, it_val_ims = [], [], [], [], []
                for num_vals in range(config.num_validation_evals):
                    # Validation accuracy as the average of n batches
                    val_vars = sess.run(val_dict.values())
                    it_val_dict = {
                        k: v
                        for k, v in zip(val_dict.keys(), val_vars)
                    }
                    it_val_labels += [it_val_dict['val_labels']]
                    it_val_scores += [it_val_dict['val_scores']]
                    it_val_grads += [it_val_dict['val_grad_images']]
                    it_val_ims += [it_val_dict['val_images']]
                    if val_aux_check:
                        iva = {
                            itk: itv
                            for itk, itv in it_val_dict.iteritems()
                            if 'aux_score' in itk
                        }
                        it_val_aux += [iva]
                val_scores[step] = it_val_scores
                val_labels[step] = it_val_labels
                val_aux[step] = it_val_aux
                val_images[step] = it_val_grads
                val_grads[step] = it_val_ims

            # End iteration
            step += 1

    except tf.errors.OutOfRangeError:
        print 'Done with evaluation for %d epochs, %d steps.' % (config.epochs,
                                                                 step)
        print 'Saved to: %s' % checkpoint_dir
    finally:
        coord.request_stop()
    coord.join(threads)
    sess.close()

    import ipdb
    ipdb.set_trace()
    np.savez(
        'val_imgs_grads',
        val_images=val_images,  # it_val_dict['val_images'],
        val_grads=val_grads,  # it_val_dict['val_grad_images'],
        val_labels=val_labels,  # it_val_dict['val_labels'],
        val_scores=val_scores)  # it_val_dict['val_scores'][0])
Esempio n. 13
0
    def get_labels(self, files):
        """Process and save label images."""
        labels = {}
        new_files = {}
        for k, images in files.iteritems():
            # Replace extension and path with labels
            label_vec = []
            file_vec = []
            fold = images[0].split(os.path.sep)[-2]

            # New label dir
            proc_dir = os.path.join(
                images[0].split(fold)[0],
                fold,
                self.processed_labels)
            py_utils.make_dir(proc_dir)

            # New image dir
            proc_image_dir = os.path.join(
                self.config.data_root,
                self.name,
                self.images_dir,
                fold,
                self.processed_images)
            py_utils.make_dir(proc_image_dir)
            ims = []
            for im in tqdm(images, total=len(images), desc=k):
                it_label = im.split(os.path.sep)[-1]
                it_label_path = '%s%s' % (im.split('.')[0], self.lab_extension)
                it_label_path = it_label_path.replace(
                    self.images_dir,
                    self.labels_dir)

                # Process every label and duplicate images for each
                label_data = io.loadmat(
                    it_label_path)['groundTruth'].reshape(-1)
                im_data = misc.imread(im)
                transpose_labels = False
                if not np.all(self.im_size == list(im_data.shape)):
                    im_data = np.swapaxes(im_data, 0, 1)
                    # im_data = im_proc.pad_square(im_data)
                    transpose_labels = True
                assert np.all(
                    self.im_size == list(im_data.shape)), \
                    'Mismatched dimensions.'
                ims += [im_data]

                if self.fold_options[k] == 'duplicate':
                    # Loop through all labels
                    for idx, lab in enumerate(label_data):

                        # Process labels
                        ip_lab = lab.item()[1].astype(np.float32)
                        if transpose_labels:
                            ip_lab = np.swapaxes(ip_lab, 0, 1)
                            # ip_lab = im_proc.pad_square(ip_lab)
                        it_im_name = 'image_%s_%s.npy' % (idx, it_label)
                        it_lab_name = 'label_%s.npy' % it_im_name.split('.')[0]
                        out_lab = os.path.join(proc_dir, it_lab_name)
                        np.save(out_lab, ip_lab)
                        label_vec += [out_lab]

                        # Process images
                        proc_im = os.path.join(proc_image_dir, it_im_name)
                        np.save(proc_im, im_data)
                        file_vec += [proc_im]
                elif self.fold_options[k] == 'mean':
                    mean_labs = []
                    for idx, lab in enumerate(label_data):

                        # Process labels
                        ip_lab = lab.item()[1].astype(np.float32)
                        if transpose_labels:
                            ip_lab = np.swapaxes(ip_lab, 0, 1)
                            # ip_lab = im_proc.pad_square(ip_lab)
                        mean_labs += [ip_lab]
                    mean_lab = np.asarray(mean_labs).mean(0)
                    out_lab = os.path.join(
                        proc_dir, 'label_%s.npy' % it_label.split('.')[0])
                    np.save(out_lab, mean_lab)
                    label_vec += [out_lab]

                    # Process images
                    proc_im = os.path.join(proc_image_dir, it_label)
                    np.save(proc_im, im_data)
                    # misc.imsave(proc_im, im_data)
                    file_vec += ['%s.npy' % proc_im]
                else:
                    raise NotImplementedError
            if k == 'train':
                label_vec = label_vec[:self.train_size]
                file_vec = file_vec[:self.train_size]
                ims = ims[:self.train_size]
            labels[k] = label_vec
            new_files[k] = file_vec
            ims = np.array(ims)
            np.savez(
                os.path.join(
                    self.config.data_root,
                    self.name,
                    self.images_dir,
                    fold,
                    'file_paths'
                ),
                labels=labels,
                files=new_files,
                mean=np.mean(ims, axis=(0, 1, 2), keepdims=True),
                stds=np.std(ims, axis=(0, 1, 2), keepdims=True))
        return labels, new_files
Esempio n. 14
0
def main(
        experiment_name,
        list_experiments=False,
        load_and_evaluate_ckpt=None,
        placeholder_data=None,
        grad_images=False,
        gpu_device='/gpu:0'):
    """Create a tensorflow worker to run experiments in your DB."""
    if list_experiments:
        exps = db.list_experiments()
        print '_' * 30
        print 'Initialized experiments:'
        print '_' * 30
        for l in exps:
            print l.values()[0]
        print '_' * 30
        if len(exps) == 0:
            print 'No experiments found.'
        else:
            print 'You can add to the DB with: '\
                'python prepare_experiments.py --experiment=%s' % \
                exps[0].values()[0]
        return

    if experiment_name is None:
        print 'No experiment specified. Pulling one out of the DB.'
        experiment_name = db.get_experiment_name()

    # Prepare to run the model
    config = Config()
    condition_label = '%s_%s' % (experiment_name, py_utils.get_dt_stamp())
    experiment_label = '%s' % (experiment_name)
    log = logger.get(os.path.join(config.log_dir, condition_label))
    assert experiment_name is not None, 'Empty experiment name.'
    experiment_dict = experiments.experiments()[experiment_name]()
    config = add_to_config(d=experiment_dict, config=config)  # Globals
    config.load_and_evaluate_ckpt = load_and_evaluate_ckpt
    if load_and_evaluate_ckpt is not None:
        # Remove the train operation and add a ckpt pointer
        from ops import evaluation
    config, exp_params = process_DB_exps(
        experiment_name=experiment_name,
        log=log,
        config=config)  # Update config w/ DB params
    dataset_module = py_utils.import_module(
        model_dir=config.dataset_info,
        dataset=config.dataset)
    dataset_module = dataset_module.data_processing()  # hardcoded class name
    train_key = [k for k in dataset_module.folds.keys() if 'train' in k]
    if not len(train_key):
        train_key = 'train'
    else:
        train_key = train_key[0]
    train_data, train_means_image, train_means_label = get_data_pointers(
        dataset=config.dataset,
        base_dir=config.tf_records,
        cv=train_key,
        log=log)
    val_key = [k for k in dataset_module.folds.keys() if 'val' in k]
    if not len(val_key):
        val_key = 'train'
    else:
        val_key = val_key[0]
    val_data, val_means_image, val_means_label = get_data_pointers(
        dataset=config.dataset,
        base_dir=config.tf_records,
        cv=val_key,
        log=log)

    # Initialize output folders
    dir_list = {
        'checkpoints': os.path.join(
            config.checkpoints, condition_label),
        'summaries': os.path.join(
            config.summaries, condition_label),
        'condition_evaluations': os.path.join(
            config.condition_evaluations, condition_label),
        'experiment_evaluations': os.path.join(  # DEPRECIATED
            config.experiment_evaluations, experiment_label),
        'visualization': os.path.join(
            config.visualizations, condition_label),
        'weights': os.path.join(
            config.condition_evaluations, condition_label, 'weights')
    }
    [py_utils.make_dir(v) for v in dir_list.values()]

    # Prepare data loaders on the cpu
    if all(isinstance(i, list) for i in config.data_augmentations):
        if config.data_augmentations:
            config.data_augmentations = py_utils.flatten_list(
                config.data_augmentations,
                log)
    if load_and_evaluate_ckpt is not None:
        config.epochs = 1
        config.train_shuffle = False
        config.val_shuffle = False
    with tf.device('/cpu:0'):
        if placeholder_data:
            placeholder_shape = placeholder_data['train_image_shape']
            placeholder_dtype = placeholder_data['train_image_dtype']
            original_train_images = tf.placeholder(
                dtype=placeholder_dtype,
                shape=placeholder_shape,
                name='train_images')
            placeholder_shape = placeholder_data['train_label_shape']
            placeholder_dtype = placeholder_data['train_label_dtype']
            original_train_labels = tf.placeholder(
                dtype=placeholder_dtype,
                shape=placeholder_shape,
                name='train_labels')
            placeholder_shape = placeholder_data['val_image_shape']
            placeholder_dtype = placeholder_data['val_image_dtype']
            original_val_images = tf.placeholder(
                dtype=placeholder_dtype,
                shape=placeholder_shape,
                name='val_images')
            placeholder_shape = placeholder_data['val_label_shape']
            placeholder_dtype = placeholder_data['val_label_dtype']
            original_val_labels = tf.placeholder(
                dtype=placeholder_dtype,
                shape=placeholder_shape,
                name='val_labels')

            # Apply augmentations
            (
                train_images,
                train_labels
            ) = data_loader.placeholder_image_augmentations(
                images=original_train_images,
                model_input_image_size=dataset_module.model_input_image_size,
                labels=original_train_labels,
                data_augmentations=config.data_augmentations,
                batch_size=config.batch_size)
            (
                val_images,
                val_labels
            ) = data_loader.placeholder_image_augmentations(
                images=original_val_images,
                model_input_image_size=dataset_module.model_input_image_size,
                labels=original_val_labels,
                data_augmentations=config.data_augmentations,
                batch_size=config.batch_size)

            # Store in the placeholder dict
            placeholder_data['train_images'] = original_train_images
            placeholder_data['train_labels'] = original_train_labels
            placeholder_data['val_images'] = original_val_images
            placeholder_data['val_labels'] = original_val_labels
        else:
            train_images, train_labels = data_loader.inputs(
                dataset=train_data,
                batch_size=config.batch_size,
                model_input_image_size=dataset_module.model_input_image_size,
                tf_dict=dataset_module.tf_dict,
                data_augmentations=config.data_augmentations,
                num_epochs=config.epochs,
                tf_reader_settings=dataset_module.tf_reader,
                shuffle=config.shuffle_train,
                resize_output=config.resize_output)
            if hasattr(config, 'val_augmentations'):
                val_augmentations = config.val_augmentations
            else:
                val_augmentations = config.data_augmentations
            val_images, val_labels = data_loader.inputs(
                dataset=val_data,
                batch_size=config.batch_size,
                model_input_image_size=dataset_module.model_input_image_size,
                tf_dict=dataset_module.tf_dict,
                data_augmentations=val_augmentations,
                num_epochs=config.epochs,
                tf_reader_settings=dataset_module.tf_reader,
                shuffle=config.shuffle_val,
                resize_output=config.resize_output)
    log.info('Created tfrecord dataloader tensors.')

    # Load model specification
    struct_name = config.model_struct.split(os.path.sep)[-1]
    try:
        model_dict = py_utils.import_module(
            dataset=struct_name,
            model_dir=os.path.join(
                'models',
                'structs',
                experiment_name).replace(os.path.sep, '.')
            )
    except IOError:
        print 'Could not find the model structure: %s in folder %s' % (
            struct_name,
            experiment_name)

    # Inject model_dict with hyperparameters if requested
    model_dict.layer_structure = hp_opt_utils.inject_model_with_hps(
        layer_structure=model_dict.layer_structure,
        exp_params=exp_params)

    # Prepare variables for the models
    if len(dataset_module.output_size) == 2:
        log.warning(
            'Found > 1 dimension for your output size.'
            'Converting to a scalar.')
        dataset_module.output_size = np.prod(
            dataset_module.output_size)

    if hasattr(model_dict, 'output_structure'):
        # Use specified output layer
        output_structure = model_dict.output_structure
    else:
        output_structure = None

    # Correct number of output neurons if needed
    if config.dataloader_override and\
            'weights' in output_structure[-1].keys():
        output_neurons = output_structure[-1]['weights'][0]
        size_check = output_neurons != dataset_module.output_size
        fc_check = output_structure[-1]['layers'][0] == 'fc'
        if size_check and fc_check:
            output_structure[-1]['weights'][0] = dataset_module.output_size
            log.warning('Adjusted output neurons from %s to %s.' % (
                output_neurons,
                dataset_module.output_size))

    # Prepare model on GPU
    if not hasattr(dataset_module, 'input_normalization'):
        dataset_module.input_normalization = None
    with tf.device(gpu_device):
        with tf.variable_scope('cnn') as scope:
            # Training model
            model = model_utils.model_class(
                mean=train_means_image,
                training=True,
                output_size=dataset_module.output_size,
                input_normalization=dataset_module.input_normalization)
            train_scores, model_summary, _ = model.build(
                data=train_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            if grad_images:
                oh_dims = int(train_scores.get_shape()[-1])
                target_scores = tf.one_hot(train_labels, oh_dims) * train_scores
                train_gradients = tf.gradients(target_scores, train_images)[0]
            log.info('Built training model.')
            log.debug(
                json.dumps(model_summary, indent=4),
                verbose=0)
            print_model_architecture(model_summary)

            # Normalize labels on GPU if needed
            if 'normalize_labels' in exp_params.keys():
                if exp_params['normalize_labels'] == 'zscore':
                    train_labels -= train_means_label['mean']
                    train_labels /= train_means_label['std']
                    val_labels -= train_means_label['mean']
                    val_labels /= train_means_label['std']
                    log.info('Z-scoring labels.')
                elif exp_params['normalize_labels'] == 'mean':
                    train_labels -= train_means_label['mean']
                    val_labels -= val_means_label['mean']
                    log.info('Mean-centering labels.')

            # Check the shapes of labels and scores
            if not isinstance(train_scores, list):
                if len(
                        train_scores.get_shape()) != len(
                            train_labels.get_shape()):
                    train_shape = train_scores.get_shape().as_list()
                    label_shape = train_labels.get_shape().as_list()
                    val_shape = val_scores.get_shape().as_list()
                    val_label_shape = val_labels.get_shape().as_list()

                    if len(
                        train_shape) == 2 and len(
                            label_shape) == 1 and train_shape[-1] == 1:
                        train_labels = tf.expand_dims(train_labels, axis=-1)
                        val_labels = tf.expand_dims(val_labels, axis=-1)
                    elif len(
                        train_shape) == 2 and len(
                            label_shape) == 1 and train_shape[-1] == 1:
                        train_scores = tf.expand_dims(train_scores, axis=-1)
                        val_scores = tf.expand_dims(val_scores, axis=-1)

            # Prepare the loss function
            train_loss, _ = loss_utils.loss_interpreter(
                logits=train_scores,  # TODO
                labels=train_labels,
                loss_type=config.loss_function,
                weights=config.loss_weights,
                dataset_module=dataset_module)

            # Add loss tensorboard tracking
            if isinstance(train_loss, list):
                for lidx, tl in enumerate(train_loss):
                    tf.summary.scalar('training_loss_%s' % lidx, tl)
                train_loss = tf.add_n(train_loss)
            else:
                tf.summary.scalar('training_loss', train_loss)

            # Add weight decay if requested
            if len(model.regularizations) > 0:
                train_loss = loss_utils.wd_loss(
                    regularizations=model.regularizations,
                    loss=train_loss,
                    wd_penalty=config.regularization_strength)
            assert config.lr is not None, 'No learning rate.'  # TODO: Make a QC function 
            if config.lr > 1:
                old_lr = config.lr
                config.lr = loss_utils.create_lr_schedule(
                    train_batch=config.batch_size,
                    num_training=config.lr)
                config.optimizer = 'momentum'
                log.info('Forcing momentum classifier.')
            else:
                old_lr = None
            train_op = loss_utils.optimizer_interpreter(
                loss=train_loss,
                lr=config.lr,
                optimizer=config.optimizer,
                constraints=config.optimizer_constraints,
                model=model)
            log.info('Built training loss function.')

            # Add a score for the training set
            train_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,  # TODO: Attach to exp cnfg
                pred=train_scores,  # TODO
                labels=train_labels)

            # Add aux scores if requested
            train_aux = {}
            if hasattr(dataset_module, 'aux_scores'):
                for m in dataset_module.aux_scores:
                    train_aux[m] = eval_metrics.metric_interpreter(
                        metric=m,
                        pred=train_scores,
                        labels=train_labels)  # [0]  # TODO: Fix for multiloss

            # Prepare remaining tensorboard summaries
            if config.tensorboard_images:
                if len(train_images.get_shape()) == 4:
                    tf_fun.image_summaries(train_images, tag='Training images')
                if (np.asarray(
                        train_labels.get_shape().as_list()) > 1).sum() > 2:
                    tf_fun.image_summaries(
                        train_labels,
                        tag='Training_targets')
                    tf_fun.image_summaries(
                        train_scores,
                        tag='Training_predictions')
            if isinstance(train_accuracy, list):
                for tidx, ta in enumerate(train_accuracy):
                    tf.summary.scalar('training_accuracy_%s' % tidx, ta)
            else:
                tf.summary.scalar('training_accuracy', train_accuracy)
            if config.pr_curve:
                if isinstance(train_scores, list):
                    for pidx, train_score in enumerate(train_scores):
                        train_label = train_labels[:, pidx]
                        pr_summary.op(
                            tag='training_pr_%s' % pidx,
                            predictions=tf.cast(
                                tf.argmax(
                                    train_score,
                                    axis=-1),
                                tf.float32),
                            labels=tf.cast(train_label, tf.bool),
                            display_name='training_precision_recall_%s' % pidx)
                else:
                    pr_summary.op(
                        tag='training_pr',
                        predictions=tf.cast(
                            tf.argmax(
                                train_scores,
                                axis=-1),
                            tf.float32),
                        labels=tf.cast(train_labels, tf.bool),
                        display_name='training_precision_recall')
            log.info('Added training summaries.')

        with tf.variable_scope('cnn', tf.AUTO_REUSE) as scope:
            # Validation model
            scope.reuse_variables()
            val_model = model_utils.model_class(
                mean=train_means_image,  # Normalize with train data
                training=False,
                output_size=dataset_module.output_size,
                input_normalization=dataset_module.input_normalization)
            val_scores, _, _ = val_model.build(  # Ignore summary
                data=val_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            if grad_images:
                oh_dims = int(val_scores.get_shape()[-1])
                target_scores = tf.one_hot(val_labels, oh_dims) * val_scores
                val_gradients = tf.gradients(target_scores, val_images)[0]
            log.info('Built validation model.')

            # Check the shapes of labels and scores
            val_loss, _ = loss_utils.loss_interpreter(
                logits=val_scores,
                labels=val_labels,
                loss_type=config.loss_function,
                weights=config.loss_weights,
                dataset_module=dataset_module)

            # Add loss tensorboard tracking
            if isinstance(val_loss, list):
                for lidx, tl in enumerate(val_loss):
                    tf.summary.scalar('validation_loss_%s' % lidx, tl)
                val_loss = tf.add_n(val_loss)
            else:
                tf.summary.scalar('validation_loss', val_loss)

            # Add a score for the validation set
            val_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,  # TODO
                pred=val_scores,
                labels=val_labels)

            # Add aux scores if requested
            val_aux = {}
            if hasattr(dataset_module, 'aux_scores'):
                for m in dataset_module.aux_scores:
                    val_aux[m] = eval_metrics.metric_interpreter(
                        metric=m,
                        pred=val_scores,
                        labels=val_labels)  # [0]  # TODO: Fix for multiloss

            # Prepare tensorboard summaries
            if config.tensorboard_images:
                if len(val_images.get_shape()) == 4:
                    tf_fun.image_summaries(
                        val_images,
                        tag='Validation')
                if (np.asarray(
                        val_labels.get_shape().as_list()) > 1).sum() > 2:
                    tf_fun.image_summaries(
                        val_labels,
                        tag='Validation_targets')
                    tf_fun.image_summaries(
                        val_scores,
                        tag='Validation_predictions')
            if isinstance(val_accuracy, list):
                for vidx, va in enumerate(val_accuracy):
                    tf.summary.scalar('validation_accuracy_%s' % vidx, va)
            else:
                tf.summary.scalar('validation_accuracy', val_accuracy)
            if config.pr_curve:
                if isinstance(val_scores, list):
                    for pidx, val_score in enumerate(val_scores):
                        val_label = val_labels[:, pidx]
                        pr_summary.op(
                            tag='validation_pr_%s' % pidx,
                            predictions=tf.cast(
                                tf.argmax(
                                    val_score,
                                    axis=-1),
                                tf.float32),
                            labels=tf.cast(val_label, tf.bool),
                            display_name='validation_precision_recall_%s' %
                            pidx)
                else:
                    pr_summary.op(
                        tag='validation_pr',
                        predictions=tf.cast(
                            tf.argmax(
                                val_scores,
                                axis=-1),
                            tf.float32),
                        labels=tf.cast(val_labels, tf.bool),
                        display_name='validation_precision_recall')
            log.info('Added validation summaries.')

    # Set up summaries and saver
    if not hasattr(config, 'max_to_keep'):
        config.max_to_keep = None
    saver = tf.train.Saver(
        var_list=tf.global_variables(),
        max_to_keep=config.max_to_keep)
    summary_op = tf.summary.merge_all()

    # Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    # Need to initialize both of these if supplying num_epochs to inputs
    sess.run(
        tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer())
        )
    summary_writer = tf.summary.FileWriter(dir_list['summaries'], sess.graph)

    # Set up exemplar threading
    if placeholder_data:
        coord, threads = None, None
    else:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # Create dictionaries of important training and validation information
    train_dict = {
        'train_loss': train_loss,
        'train_images': train_images,
        'train_labels': train_labels,
        'train_op': train_op,
        'train_scores': train_scores
    }
    val_dict = {
        'val_loss': val_loss,
        'val_images': val_images,
        'val_labels': val_labels,
        'val_scores': val_scores,
    }

    if grad_images:
        train_dict['train_gradients'] = train_gradients
        val_dict['val_gradients'] = val_gradients

    if isinstance(train_accuracy, list):
        for tidx, (ta, va) in enumerate(zip(train_accuracy, val_accuracy)):
            train_dict['train_accuracy_%s' % tidx] = ta
            val_dict['val_accuracy_%s' % tidx] = va
    else:
        train_dict['train_accuracy_0'] = train_accuracy
        val_dict['val_accuracy_0'] = val_accuracy

    if load_and_evaluate_ckpt is not None:
        # Remove the train operation and add a ckpt pointer
        del train_dict['train_op']

    if hasattr(dataset_module, 'aux_score'):
        # Attach auxillary scores to tensor dicts
        for m in dataset_module.aux_scores:
            train_dict['train_aux_%s' % m] = train_aux[m]
            val_dict['val_aux_%s' % m] = val_aux[m]

    # Start training loop
    if old_lr is not None:
        config.lr = old_lr
    np.save(
        os.path.join(
            dir_list['condition_evaluations'], 'training_config_file'),
        config)
    log.info('Starting training')
    if load_and_evaluate_ckpt is not None:
        return evaluation.evaluation_loop(
            config=config,
            db=db,
            coord=coord,
            sess=sess,
            summary_op=summary_op,
            summary_writer=summary_writer,
            saver=saver,
            threads=threads,
            summary_dir=dir_list['summaries'],
            checkpoint_dir=dir_list['checkpoints'],
            weight_dir=dir_list['weights'],
            train_dict=train_dict,
            val_dict=val_dict,
            train_model=model,
            val_model=val_model,
            exp_params=exp_params,
            placeholder_data=placeholder_data)
    else:
        output_dict = training.training_loop(
            config=config,
            db=db,
            coord=coord,
            sess=sess,
            summary_op=summary_op,
            summary_writer=summary_writer,
            saver=saver,
            threads=threads,
            summary_dir=dir_list['summaries'],
            checkpoint_dir=dir_list['checkpoints'],
            weight_dir=dir_list['weights'],
            train_dict=train_dict,
            val_dict=val_dict,
            train_model=model,
            val_model=val_model,
            exp_params=exp_params)

    log.info('Finished training.')
    model_name = config.model_struct.replace('/', '_')
    if output_dict is not None:
        py_utils.save_npys(
            data=output_dict,
            model_name=model_name,
            output_string=dir_list['experiment_evaluations'])
    def get_data(self):
        """Get the names of files."""
        res_height = self.video_frame_size[0] / self.resize_factor
        if self.process_data:
            py_utils.make_dir(self.temp_dir)
            videos_m4v = glob(os.path.join(self.video_dir, '*.m4v'))
            ca2_npy = glob(os.path.join(self.neural_dir_ca, '*.npy'))
            ca2_match = filter(re.compile('(.+)_MV_(.+)').match, ca2_npy)
            num_frame_sets = np.arange(self.total_frames /
                                       self.video_timesteps)
            frame_sets_idx = num_frame_sets.reshape(1, -1).repeat(
                self.video_timesteps, axis=1).reshape(-1)
            frame_idx = np.arange(self.total_frames)
            fr = []
            res_data = []
            video_idx = []
            for idx, v in tqdm(enumerate(videos_m4v), total=len(videos_m4v)):
                vname = v.split(os.path.sep)[-1].split('.')[0]

                # Load the video, resize, and normalize to [0, 1]
                # vid = np.load(v.replace('.m4v', '.npy'))
                vid = mp.VideoFileClip(v)
                clip_resized = vid.resize(height=res_height)
                fr += [vid.fps]
                frames = np.asarray(
                    [frame for frame in clip_resized.iter_frames()])
                if frames[0].max() > 1:
                    frames = frames.astype(np.float32) / 255.
                frames = frames[:self.total_frames]
                # Find matching ca2+ vids
                it_ca2 = filter(
                    re.compile('(.+)_%s_(.+)' % vname).match, ca2_match)
                for tr, samp_path in enumerate(it_ca2):
                    # Resample, detrend, zscore
                    samp = resample_detrend_zscore(np.load(samp_path),
                                                   new_fps=False)
                    samp = samp.transpose(2, 0, 1)
                    samp = samp[:len(frames)]
                    esamp_file = '%s.npy' % samp_path.strip('full_frame.npy')
                    # self.ephys_suffix)
                    esamp_file = esamp_file.replace(self.ca_split,
                                                    self.ephys_split)
                    esamp = np.load(esamp_file)[:len(frames)]
                    dffsearch = os.path.join(
                        os.path.sep.join(samp_path.split(os.path.sep)[:4]),
                        self.dff_split, self.dff_suffix,
                        samp_path.split(os.path.sep)[-1])
                    dffglob = glob('%s_cell*' %
                                   dffsearch.strip('full_frame.npy'))
                    dffsamp = np.asarray(
                        [np.load(d)[:len(frames)] for d in dffglob])
                    dffsamp = resample_detrend_zscore(dffsamp, new_fps=False)
                    dffsamp = dffsamp.transpose()
                    dffsamp = dffsamp[:len(frames)]
                    for fs in np.unique(frame_sets_idx):
                        fi = frame_sets_idx == fs
                        # Trim samp and vid and save to npz in temp folder
                        fname = os.path.join(self.temp_dir,
                                             '%s_%s_fs_%s' % (vname, tr, fs))
                        video_idx += [idx]
                        res_data += [fname]
                        it_esamp = esamp[:self.model_timesteps]
                        samp = samp[:self.model_timesteps]
                        if self.smooth_ephys:
                            it_esamp = smooth(it_esamp,
                                              window='flat',
                                              mode='same',
                                              window_len=5)[5:-5]
                        try:
                            np.savez(
                                fname,
                                ca=samp,  # [fi],  # Make sure this is correct!
                                ephys=it_esamp,
                                frame_idx=frame_idx[fi],
                                dff=dffsamp[:self.model_timesteps],  # [fi],
                                video=frames[fi])
                        except Exception as e:
                            import ipdb
                            ipdb.set_trace()
            np.save(os.path.join(self.temp_dir, 'video_idx'), video_idx)
        else:
            res_data = glob(os.path.join(self.temp_dir, '*.npz'))
            res_data = [
                rs for rs in res_data
                if 'retina' not in rs.split(os.path.sep)[-1]
            ]
            # video_idx = np.load(
            #     os.path.join(
            #         self.temp_dir,
            #         'video_idx.npy'))
        assert len(res_data)
        res_data = np.asarray(res_data)
        video_idx = np.asarray([
            f.split(os.path.sep)[-1].split('g_')[0] for f in res_data
        ]).astype(int)

        # Calculate reliability
        unique_videos, video_counts = np.unique(video_idx, return_counts=True)
        repeated_videos = np.in1d(video_idx,
                                  unique_videos[np.where(video_counts > 1)[0]])
        repeated_videos = video_idx[repeated_videos]
        dff_reliability, ca_reliability = [], []
        e_reliability, variability = [], []
        mask = np.load(self.cell_mask)
        for video in tqdm(np.unique(repeated_videos),
                          desc='Calculating reliability',
                          total=len(repeated_videos) // 2):
            it_idx = np.in1d(video_idx, video)
            sel_data = res_data[it_idx]
            data_a = np.load(sel_data[0])
            data_b = np.load(sel_data[1])
            dff_a = data_a['dff']
            dff_b = data_b['dff']
            ephys_a = data_a['ephys']
            ephys_b = data_b['ephys']
            ca_a = data_a['ca'].reshape(dff_a.shape[0], -1)
            ca_b = data_b['ca'].reshape(dff_a.shape[0], -1)
            if self.binarize_spikes:
                ephys_a = (ephys_a > 0).astype(np.int32)
                ephys_b = (ephys_b > 0).astype(np.int32)
            proc_ca_a, proc_ca_b = [], []
            for cell in range(mask.shape[-1]):
                it_cells = mask[:, :, cell].ravel()
                cell_locations = np.where(it_cells)
                proc_ca_a += [
                    np.asarray([
                        ca_a[idx, cell_locations]
                        for idx in range(ca_a.shape[0])
                    ])
                ]
                proc_ca_b += [
                    np.asarray([
                        ca_b[idx, cell_locations]
                        for idx in range(ca_b.shape[0])
                    ])
                ]
            ca_rs = []
            var = []
            for pa, pb in zip(proc_ca_a, proc_ca_b):
                # Loop through cells
                pa = pa.squeeze()
                pb = pb.squeeze()
                rscores = [
                    np.corrcoef(pa[:, idx], pb[:, idx])[0, 1]
                    for idx in range(pa.shape[-1])
                ]
                arg_idx = np.argmax(rscores)
                rscores = self.aggregate(rscores)
                var += [[np.std(pa[:, arg_idx]), np.std(pb[:, arg_idx])]]
                ca_rs += [rscores]
            # ca_rs = self.aggregate(ca_rs)
            ca_reliability += [ca_rs]
            variability += [var]
            dff_reliability += [
                np.mean([
                    np.corrcoef(dff_a[:, idx], dff_b[:, idx])[0, 1]
                    for idx in range(dff_a.shape[-1])
                ])
            ]
            e_reliability += [
                np.corrcoef(ephys_a.ravel(), ephys_b.ravel())[0, 1]
            ]
        ca_reliability = np.asarray(ca_reliability)
        dff_reliability = np.asarray(dff_reliability)
        e_reliability = np.asarray(e_reliability)
        variability = np.stack([np.asarray(v)
                                for v in variability]).mean(0).mean(-1)

        # Bimodal difference at 0.8
        mask_thresh_idx = variability > 0.8
        mask = (mask * mask_thresh_idx[None, None, :]).transpose(2, 0, 1)
        np.savez(os.path.join(self.temp_dir,
                              '%s_reliabilities' % self.output_name),
                 ca_reliability=ca_reliability,
                 dff_reliability=dff_reliability,
                 proc_mask=mask,
                 mask_thresh_idx=mask_thresh_idx,
                 e_reliability=e_reliability,
                 variability=variability)
        print 'Ca2+ video reliability: %s' % ca_reliability[:,
                                                            mask_thresh_idx].mean(
                                                            )
        print 'dff video reliability: %s' % np.mean(dff_reliability)
        print 'ephys video reliability: %s' % np.nanmean(e_reliability)

        # Stimulus period is 5.15 seconds. Ca2+ is 111 frames at 15fps.
        # Means that there is 7.326 seconds of recording from Ca2+.
        # Video is variable framerate.
        # For each video resample ca2+ to 25fps then align both.
        # Trim this down to 76 for 5000 ms of ca2+ data

        # Create CV folds
        # num_files = len(np.unique(video_idx))
        # cv_range = np.random.permutation(num_files)
        # cv_split = np.round(num_files * self.cv_split).astype(int)
        # train_idx = cv_range[:cv_split]
        # validation_idx = cv_range[cv_split:]
        # train_idx = np.in1d(video_idx, train_idx)
        # validation_idx = np.in1d(video_idx, validation_idx)
        validation_idx = []
        validation_idx = np.asarray([
            np.where(video_idx == reps)[0][0]
            for reps in np.unique(repeated_videos)
        ])
        train_idx = ~np.in1d(video_idx, validation_idx)
        train_files = res_data[train_idx]
        validation_files = res_data[
            validation_idx]  # np.in1d(video_idx, validation_idx)]
        import ipdb
        ipdb.set_trace()
        if self.shuffle:
            rand_idx = np.random.permutation(len(train_files))
            train_files = train_files[rand_idx]
            rand_idx = np.random.permutation(len(validation_files))
            validation_files = validation_files[rand_idx]

        # Build CV dict
        cv_files = {}
        cv_files[self.folds['train']] = train_files
        cv_files[self.folds['val']] = validation_files
        return cv_files

def sigmoid_fun(x):
    """Apply sigmoid to maps before mAP."""
    return 1 / (1 + np.exp(x))


df = pd.read_csv(
    os.path.join(
        'data_to_process_for_jk',
        'generalize_snemi_experiment_data',
        'generalize_snemi_experiment_data_grouped.csv'))
sel, subsel = 0, 3

out_dir = os.path.join('data_to_process_for_jk', 'generalization_membranes')
py_utils.make_dir(out_dir)
datasets = np.unique(df.val_dataset)
models = np.unique(df.model)
for d in tqdm(
        datasets,
        desc='Cycling through datasets',
        total=len(datasets)):
    f = plt.figure()
    plt.suptitle(d)
    for idx, m in enumerate(models):
        data = np.load(
            df[np.logical_and(
                df.model == m, df.val_dataset == d)].file_name.values[0])
        data = data['val_dict'][sel]
        logs = data['logits']
        if idx == 0:
Esempio n. 17
0
def calculate_map(it_val_dict,
                  exp_label,
                  config,
                  map_dir='maps',
                  auto_adjust=False):
    """Calculate map and ARAND for segmentation performance."""
    py_utils.make_dir(map_dir)

    def get_segments(x, threshold=0.5, comp=np.greater):
        """Watershed boundary map."""
        distance = ndi.distance_transform_edt(comp(x, threshold).astype(float))
        local_maxi = peak_local_max(distance,
                                    indices=False,
                                    footprint=np.ones((10, 10)))
        markers = ndi.label(local_maxi)[0]
        return watershed(-distance, markers).astype(np.int32)

    maps, arands = [], []
    if it_val_dict is not None:
        for ol in tqdm(range(len(it_val_dict)), desc='Evaluating mAPs'):
            eval_dicts = it_val_dict[ol]
            eval_logits = eval_dicts['logits']
            eval_labels = eval_dicts['labels']
            for log, lab in zip(eval_logits, eval_labels):
                lab = lab.squeeze()
                log_shape = log.shape
                if len(log.shape) > 2:
                    if log_shape[-1] > 1:
                        log = -1 * log[..., [0, 1]].mean(-1)
                        lab = (lab[..., [0, 1]].mean(-1) > 0.5).astype(
                            np.int32)
                if auto_adjust and lab.mean() < 0.5:
                    lab = 1. - lab
                sig_pred = 1. - sigmoid_fun(log.squeeze())

                # First calculate map
                maps += [
                    average_precision_score(y_score=sig_pred.ravel(),
                                            y_true=lab.ravel())
                ]

                # Then get ARAND on segments
                pred_segs = get_segments(sig_pred)
                lab_segs = get_segments(lab)
                lab_mask = (lab == 0).astype(np.int32)
                pred_segs *= lab_mask
                lab_segs *= lab_mask
                arands += [adapted_rand(pred_segs, lab_segs)]

        out_path = os.path.join(map_dir, exp_label)
        print('mAP is: %s' % np.mean(maps))
        print('ARAND is: %s' % np.mean(arands))
        print('Saved to: %s.npz' % out_path)
        np.savez(out_path,
                 maps=maps,
                 arands=arands,
                 config=config,
                 val_dict=it_val_dict)
    else:
        print('Received an empty validation dict.')
    return maps, arands
Esempio n. 18
0
def extract_vgg_features(cm_type='contextual_vector_vd',
                         layer_name='pool3',
                         output_type='sparse_pool',
                         project_name=None,
                         model_type='vgg16',
                         timesteps=5,
                         dtype=tf.float32):
    """Main extraction and training script."""
    assert project_name is not None, 'Need a project name.'

    # 1. Get file paths and load config
    config = Config()
    config.cm_type = cm_type
    project_path = config.projects[project_name]

    # 2. Assert the model is there and load neural data.
    print 'Loading preprocessed data...'
    data = np.load(os.path.join(project_path, '%s.npz' % project_name))
    neural_data = data['data_matrix']
    if config.round_neural_data:
        neural_data = np.round(neural_data)
    # TODO: across_session_data_matrix is subtracted version
    images = data['all_images'].astype(np.float32)

    # Remove zeroed columns from neural data
    channel_check = np.abs(neural_data).sum(0) > 0
    neural_data = neural_data[:, channel_check]

    # TODO: create AUX dict with each channel's X/Y
    output_aux = {'loss': config.loss_type}  # None
    rfs = rf_sizes.get_eRFs(model_type)[layer_name]

    # 3. Create a output directory if necessary and save a timestamped numpy.
    model_description = '%s_%s_%s_%s_%s_%s' % (
        cm_type, layer_name, output_type, project_name, model_type, timesteps)
    dt_stamp = '%s_%s' % (model_description, str(datetime.now()).replace(
        ' ', '_').replace(':', '_').replace('-', '_'))
    project_dir = os.path.join(config.results, project_name)
    out_dir = os.path.join(project_dir, dt_stamp)
    checkpoint_dir = os.path.join(out_dir, 'checkpoints')
    dirs = [config.results, config.summaries, out_dir]
    [py_utils.make_dir(x) for x in dirs]
    print '-' * 60
    print('Training model:' + out_dir)
    print '-' * 60

    # 4. Prepare data on CPU
    neural_shape = list(neural_data.shape)
    num_neurons = neural_shape[-1]
    with tf.device('/cpu:0'):
        train_images = tf.placeholder(dtype=dtype,
                                      name='train_images',
                                      shape=[config.train_batch_size] +
                                      config.img_shape)
        train_neural = tf.placeholder(dtype=dtype,
                                      name='train_neural',
                                      shape=[config.train_batch_size] +
                                      [num_neurons])
        val_images = tf.placeholder(dtype=dtype,
                                    name='val_images',
                                    shape=[config.val_batch_size] +
                                    config.img_shape)
        val_neural = tf.placeholder(dtype=dtype,
                                    name='val_neural',
                                    shape=[config.val_batch_size] +
                                    [num_neurons])

    # 5. Prepare model on GPU
    with tf.device('/gpu:0'):
        with tf.variable_scope('cnn') as scope:
            vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path)
            train_mode = tf.get_variable(name='training', initializer=False)
            vgg.build(
                train_images,
                output_shape=1000,  # hardcode
                train_mode=train_mode,
                final_layer=layer_name)

            # Select a layer
            activities = vgg[layer_name]

            # Feature reduce with a 1x1 conv
            if config.reduce_features is not None:
                vgg, activities, reduce_weights = ff.pool_ff_interpreter(
                    self=vgg,
                    it_neuron_op='1x1conv',
                    act=activities,
                    it_name='feature_reduce',
                    out_channels=config.reduce_features,
                    aux=None)
            else:
                reduce_weights = None

            # Add con-model if requested
            if cm_type is not None and cm_type != 'none':
                norms = normalizations.normalizations()
                activities, cm_weights, _ = norms[cm_type](
                    x=activities,
                    r_in=rfs['r_in'],
                    j_in=rfs['j_in'],
                    timesteps=timesteps,
                    lesions=config.lesions,
                    train=True)
            else:
                cm_weights = None

            # Create output layer for N-recording channels
            activities = tf.nn.dropout(activities, 0.5)
            vgg, output_activities, output_weights = ff.pool_ff_interpreter(
                self=vgg,
                it_neuron_op=output_type,
                act=activities,
                it_name='output',
                out_channels=num_neurons,
                aux=output_aux)

            # Prepare the loss function
            loss, _ = loss_utils.loss_interpreter(logits=output_activities,
                                                  labels=train_neural,
                                                  loss_type=config.loss_type)

            # Add contextual model WD
            if config.reduce_features is not None and reduce_weights is not None:
                loss += loss_utils.add_wd(weights=reduce_weights,
                                          wd_dict=config.wd_types)

            # Add contextual model WD
            if config.cm_wd_types is not None and cm_weights is not None:
                loss += loss_utils.add_wd(weights=cm_weights,
                                          wd_dict=config.cm_wd_types)

            # Add WD to output layer
            if config.wd_types is not None:
                loss += loss_utils.add_wd(weights=output_weights,
                                          wd_dict=config.wd_types)

            # Finetune the learning rates
            train_op = loss_utils.optimizer_interpreter(
                loss=loss, lr=config.lr, optimizer=config.optimizer)

            # Calculate metrics
            train_accuracy = eval_metrics.metric_interpreter(
                metric=config.metric,
                pred=output_activities,
                labels=train_neural)

            # Add summaries for debugging
            tf.summary.image('train images', train_images)
            tf.summary.image('validation images', val_images)
            tf.summary.scalar("loss", loss)
            tf.summary.scalar("training accuracy", train_accuracy)

            # Setup validation op
            scope.reuse_variables()

            # Validation graph is the same as training except no batchnorm
            val_vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path)
            val_vgg.build(val_images,
                          output_shape=1000,
                          final_layer=layer_name)

            # Select a layer
            val_activities = val_vgg[layer_name]

            # Add feature reduction if requested
            if config.reduce_features is not None:
                val_vgg, val_activities, _ = ff.pool_ff_interpreter(
                    self=val_vgg,
                    it_neuron_op=config.reduce_type,
                    act=val_activities,
                    it_name='feature_reduce',
                    out_channels=config.reduce_features,
                    aux=None)
            else:
                reduce_weights = None

            # Add con-model if requested
            if cm_type is not None and cm_type != 'none':
                val_activities, _, _ = norms[cm_type](x=val_activities,
                                                      r_in=rfs['r_in'],
                                                      j_in=rfs['j_in'],
                                                      timesteps=timesteps,
                                                      lesions=config.lesions,
                                                      train=False)

            # Create output layer for N-recording channels
            val_vgg, val_output_activities, _ = ff.pool_ff_interpreter(
                self=val_vgg,
                it_neuron_op=output_type,
                act=val_activities,
                it_name='output',
                out_channels=num_neurons,
                aux=output_aux)

            # Prepare the loss function
            val_loss, _ = loss_utils.loss_interpreter(
                logits=val_output_activities,
                labels=val_neural,
                loss_type=config.loss_type,
                max_spikes=config.max_spikes)

            # Calculate metrics
            val_accuracy = eval_metrics.metric_interpreter(
                metric=config.metric,
                pred=val_output_activities,
                labels=val_neural)
            tf.summary.scalar('validation loss', val_loss)
            tf.summary.scalar('validation accuracy', val_accuracy)

    # Set up summaries and saver
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    # Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    # Need to initialize both of these if supplying num_epochs to inputs
    sess.run(tf.global_variables_initializer())
    summary_dir = os.path.join(config.summaries, dt_stamp)
    summary_writer = tf.summary.FileWriter(summary_dir, sess.graph)

    # Start training loop
    train_vars = {
        'images': train_images,
        'neural_data': train_neural,
        'loss': loss,
        'score': train_accuracy,
        'train_op': train_op
    }
    if cm_weights is not None:
        for k, v in cm_weights.iteritems():
            train_vars[k] = v
    val_vars = {
        'images': val_images,
        'neural_data': val_neural,
        'loss': val_loss,
        'score': val_accuracy,
    }
    extra_params = {
        'cm_type': cm_type,
        'layer_name': layer_name,
        'output_type': output_type,
        'project_name': project_name,
        'model_type': model_type,
        'lesions': config.lesions,
        'timesteps': timesteps
    }
    np.savez(os.path.join(out_dir, 'training_config_file'),
             config=config,
             extra_params=extra_params)
    train_cv_out, val_cv_out, weights = training.training_loop(
        config=config,
        neural_data=neural_data,
        images=images,
        target_size=config.img_shape[:2],
        sess=sess,
        train_vars=train_vars,
        val_vars=val_vars,
        summary_op=summary_op,
        summary_writer=summary_writer,
        checkpoint_dir=checkpoint_dir,
        summary_dir=summary_dir,
        saver=saver)
    np.savez(os.path.join(out_dir, 'data'),
             config=config,
             extra_params=extra_params,
             train_cv_out=train_cv_out,
             val_cv_out=val_cv_out,
             weight=weights)
def main(
        experiment_name,
        im_ext='.pdf',
        transform_loss=None,  # 'log',
        colors='Paired',
        flip_axis=False,
        port_fwd=False,
        num_steps=np.inf,
        exclude=None,
        list_experiments=False,
        out_dir='analysis_data'):
    """Plot results of provided experiment name."""
    config = Config()
    if list_experiments:
        db.list_experiments()
        return

    if port_fwd:
        config.db_ssh_forward = True
    py_utils.make_dir(out_dir)

    # Get experiment data
    if ',' in experiment_name:
        exps = experiment_name.split(',')
        perf = []
        for exp in exps:
            perf += db.get_performance(experiment_name=exp)
        experiment_name = exps[0]
    else:
        perf = db.get_performance(experiment_name=experiment_name)
    if len(perf) == 0:
        raise RuntimeError('Could not find any results.')

    structure_names = [x['model'].split('/')[-1] for x in perf]
    datasets = [x['val_dataset'] for x in perf]
    steps = [float(x['step']) for x in perf]
    training_loss = [float(x['train_loss']) for x in perf]
    validation_loss = [float(x['val_loss']) for x in perf]
    training_score = [float(x['train_score']) for x in perf]
    validation_score = [float(x['val_score']) for x in perf]
    summary_dirs = [x['summary_path'] for x in perf]
    ckpts = [x['ckpt_path'] for x in perf]
    params = [x['num_params'] for x in perf]
    lrs = [x['lr'] for x in perf]

    # Pass data into a pandas DF
    df = pd.DataFrame(np.vstack(
        (structure_names, datasets, steps, params, training_loss,
         training_score, validation_loss, validation_score, summary_dirs,
         ckpts, lrs)).transpose(),
                      columns=[
                          'model names', 'datasets', 'training iteration',
                          'params', 'training loss', 'training accuracy',
                          'validation loss', 'validation accuracy',
                          'summary_dirs', 'checkpoints', 'lrs'
                      ])
    df['training loss'] = pd.to_numeric(df['training loss'], errors='coerce')
    df['validation accuracy'] = pd.to_numeric(df['validation accuracy'],
                                              errors='coerce')
    df['training accuracy'] = pd.to_numeric(df['training accuracy'],
                                            errors='coerce')
    df['training iteration'] = pd.to_numeric(df['training iteration'],
                                             errors='coerce')
    df['params'] = pd.to_numeric(df['params'], errors='coerce')
    df['lrs'] = pd.to_numeric(df['lrs'], errors='coerce')

    # Plot TTA
    dfs = []
    print(len(df))
    uni_structure_names = np.unique(structure_names)
    max_num_steps = num_steps  # (20000 / 32) * num_epochs
    # min_num_steps = 1
    for m in tqdm(uni_structure_names, total=len(uni_structure_names)):
        it_df = df[df['model names'] == m]
        it_df = it_df[it_df['training iteration'] < max_num_steps]
        # sorted_df = it_df.sort_values('training loss')
        # max_vals = sorted_df.groupby(['datasets']).first()
        sorted_df = []
        different_models = np.unique(it_df['summary_dirs'])
        num_models = len(different_models)
        for model in different_models:
            # Grab each model then sort by training iteration
            sel_data = it_df[it_df['summary_dirs'] == model]
            sel_data = sel_data.sort_values('training iteration')

            # Smooth the sorted validation scores for tta
            sel_data['tta'] = ndimage.gaussian_filter1d(
                sel_data['validation accuracy'], 3)
            sel_data['num_runs'] = num_models
            sorted_df += [sel_data]
        sorted_df = pd.concat(sorted_df)
        dfs += [sorted_df]

    # Get max scores and TTAs
    dfs = pd.concat(dfs)
    scores = dfs.groupby(['lrs', 'datasets', 'model names'],
                         as_index=False).max()  # skipna=True)
    losses = dfs.groupby(['lrs', 'datasets', 'model names'],
                         as_index=False).min()  # skipna=True)
    ttas = dfs.groupby(['lrs', 'datasets', 'model names'],
                       as_index=False).mean()  # skipna=True)

    # Combine into a single DF
    print('Sort by val loss, then validate each (make a new dataloader)')
    scores['tta'] = ttas['validation accuracy']
    scores['validation loss'] = losses['validation loss']

    # Save datasets to csv
    filename = 'raw_data_%s.csv' % experiment_name
    dfs.to_csv(os.path.join(out_dir, filename))
    filename = 'scores_%s.csv' % experiment_name
    scores.to_csv(os.path.join(out_dir, filename))

    # Save an easy-to-parse csv for test datasets and fix for automated processing
    trim_ckpts, trim_models = [], []
    for idx in range(len(scores)):
        ckpt = scores.iloc[idx]['checkpoints']
        ckpt = '%s-%s' % (ckpt, ckpt.split('.')[0].split('_')[-1])
        model = scores.iloc[idx]['model names']
        trim_ckpts += [ckpt]
        trim_models += [model]
    # trimmed_ckpts = pd.DataFrame(trim_ckpts, columns=['checkpoints'])
    # trimmed_models = pd.DataFrame(trim_models, columns=['model'])
    trimmed_ckpts = pd.DataFrame(trim_ckpts)
    trimmed_models = pd.DataFrame(trim_models)
    trimmed_ckpts.to_csv(
        os.path.join(out_dir, 'checkpoints_%s.csv' % experiment_name))
    trimmed_models.to_csv(
        os.path.join(out_dir, 'models_%s.csv' % experiment_name))

    # Add indicator variable to group different model types during plotting
    scores['model_idx'] = 0
    model_groups = ['fgru', 'resnet', 'unet', 'hgru']
    for idx, m in enumerate(model_groups):
        scores['model_idx'][scores['model names'].str.contains(
            m, regex=False)] = idx
    keep_groups = np.where(~np.in1d(model_groups, 'hgru'))[0]
    scores = scores[scores['model_idx'].isin(keep_groups)]

    # Print scores to console
    print scores

    # Create max accuracy plots and aggregated dataset
    num_groups = len(keep_groups)
    # agg_df = []
    f = plt.figure()
    sns.set(context='paper', font='Arial', font_scale=.5)
    sns.set_style("white")
    sns.despine()
    count = 1
    for idx in keep_groups:
        plt.subplot(1, num_groups, count)
        sel_df = scores[scores['model_idx'] == idx]
        # sel_df = sel_df.groupby(
        #     ['datasets', 'model names'], as_index=False).aggregate('max')
        # agg_df += [sel_df]
        sns.pointplot(data=sel_df,
                      x='datasets',
                      y='validation accuracy',
                      hue='model names')
        plt.ylim([0.4, 1.1])
        count += 1
    plt.savefig(os.path.join(out_dir, 'max_%s.png' % experiment_name), dpi=300)
    filename = 'agg_data_%s.csv' % experiment_name
    # agg_df = pd.concat(agg_df)
    # agg_df.to_csv(os.path.join(out_dir, filename))
    plt.close(f)

    # Create tta plots
    f = plt.figure()
    sns.set(context='paper', font='Arial', font_scale=.5)
    sns.set_style("white")
    sns.despine()
    count = 1
    for idx in keep_groups:
        plt.subplot(1, num_groups, count)
        sel_df = scores[scores['model_idx'] == idx]
        # sel_df = sel_df.groupby(
        #     ['datasets', 'model names'], as_index=False).aggregate('mean')
        sns.pointplot(data=sel_df, x='datasets', y='tta', hue='model names')
        plt.ylim([0.4, 1.1])
        count += 1
    plt.savefig(os.path.join(out_dir, 'tta_%s.png' % experiment_name), dpi=300)
    plt.close(f)
Esempio n. 20
0
def main(experiment,
         model,
         train,
         val,
         checkpoint,
         use_db=True,
         test=False,
         reduction=0,
         random=True,
         add_config=None,
         gpu_device=['/gpu:0'],
         cpu_device='/cpu:0',
         num_gpus=False,
         transfer=False,
         placeholders=False,
         save_test_npz=True,
         num_batches=None,
         map_out='test_maps',
         out_dir=None):
    """Interpret and run a model."""
    main_config = Config()
    dt_string = py_utils.get_dt_stamp()
    log = logger.get(
        os.path.join(main_config.log_dir, '%s_%s' % (experiment, dt_string)))
    if num_gpus:
        gpu_device = ['/gpu:%d' % i for i in range(num_gpus)]
    if test and save_test_npz and out_dir is None:
        raise RuntimeError('You must specify an out_dir.')
    if use_db:
        exp_params = db.get_parameters(log=log,
                                       experiment=experiment,
                                       random=random)[0]
    else:
        exp = py_utils.import_module(experiment, pre_path='experiments')
        exp_params = exp.experiment_params()
        exp_params['_id'] = -1
        exp_params['experiment'] = experiment
        if model is not None:
            exp_params['model'] = model
        else:
            assert len(exp_params['model']) > 1, 'No model name supplied.'
            exp_params['model'] = exp_params['model'][0]
        if train is not None:
            exp_params['train_dataset'] = train
        if val is not None:
            exp_params['val_dataset'] = val
    # if reduction or out_dir is not None or transfer:
    #     fine_tune = get_fine_tune_params(
    #         out_dir=out_dir, reduction=reduction)
    # else:
    #     pass
    results = model_tools.build_model(exp_params=exp_params,
                                      dt_string=dt_string,
                                      log=log,
                                      test=test,
                                      config=main_config,
                                      use_db=use_db,
                                      num_batches=num_batches,
                                      map_out=map_out,
                                      placeholders=placeholders,
                                      add_config=add_config,
                                      gpu_device=gpu_device,
                                      cpu_device=cpu_device,
                                      checkpoint=checkpoint)
    if test and save_test_npz:
        # Save results somewhere safe
        py_utils.make_dir(out_dir)
        results['checkpoint'] = checkpoint
        results['model'] = model
        results['experiment'] = experiment
        np.savez(os.path.join(out_dir, results['exp_label']), **results)
    log.info('Finished.')
def training_loop(
        config,
        coord,
        sess,
        summary_op,
        summary_writer,
        saver,
        restore_saver,
        threads,
        directories,
        train_dict,
        val_dict,
        exp_label,
        num_params,
        use_db,
        log,
        placeholders=False,
        checkpoint=None,
        save_weights=False,
        save_checkpoints=False,
        save_activities=False,
        save_gradients=False):
    """Run the model training loop."""
    if checkpoint is not None:
        restore_saver.restore(sess, checkpoint)
        print 'Restored checkpoint %s' % checkpoint
    if not hasattr(config, 'early_stop'):
        config.early_stop = np.inf
    val_perf = np.asarray([np.inf])
    step = 0
    best_val_dict = None
    if save_weights:
        try:
            weight_dict = {v.name: v for v in tf.trainable_variables()}
            val_dict = dict(
                val_dict,
                **weight_dict)
        except Exception:
            raise RuntimeError('Failed to find weights to save.')
    else:
        weight_dict = None
    if hasattr(config, 'early_stop'):
        it_early_stop = config.early_stop
    else:
        it_early_stop = np.inf

    if hasattr(config, "adaptive_train"):
        adaptive_train = config.adaptive_train
    else:
        adaptive_train = False
    if placeholders:
        train_images = placeholders['train']['images']
        val_images = placeholders['val']['images']
        train_labels = placeholders['train']['labels']
        val_labels = placeholders['val']['labels']
        train_batches = len(train_images) / config.train_batch_size
        train_batch_idx = np.arange(
            train_batches).reshape(-1, 1).repeat(
                config.train_batch_size)
        train_images = train_images[:len(train_batch_idx)]
        train_labels = train_labels[:len(train_batch_idx)]
        val_batches = len(val_images) / config.val_batch_size
        val_batch_idx = np.arange(
            val_batches).reshape(-1, 1).repeat(
                config.val_batch_size)
        val_images = val_images[:len(val_batch_idx)]
        val_labels = val_labels[:len(val_batch_idx)]

        # Check that labels are appropriate shape
        tf_label_shape = train_dict['train_labels'].get_shape().as_list()
        np_label_shape = train_labels.shape
        if len(tf_label_shape) == 2 and len(np_label_shape) == 1:
            train_labels = train_labels[..., None]
            val_labels = val_labels[..., None]
        elif len(tf_label_shape) == len(np_label_shape):
            pass
        else:
            raise RuntimeError(
                'Mismatch label shape np: %s vs. tf: %s' % (
                    np_label_shape,
                    tf_label_shape))

        # Start training
        train_losses = []
        train_logits = []
        for epoch in tqdm(
                range(config.epochs),
                desc='Epoch',
                total=config.epochs):
            for train_batch in range(train_batches):
                io_start_time = time.time()
                data_idx = train_batch_idx == train_batch
                it_train_images = train_images[data_idx]
                it_train_labels = train_labels[data_idx]
                if isinstance(it_train_images[0], basestring):
                    it_train_images = np.asarray(
                        [
                            data_to_tfrecords.load_image(im)
                            for im in it_train_images])
                feed_dict = {
                    train_dict['train_images']: it_train_images,
                    train_dict['train_labels']: it_train_labels
                }
                (
                    train_score,
                    train_loss,
                    it_train_dict,
                    timer) = training_step(
                    sess=sess,
                    train_dict=train_dict,
                    config=config,
                    feed_dict=feed_dict)
                train_losses.append(train_loss)
                if step % config.validation_period == 0:
                    val_score, val_lo, it_val_dict, duration = validation_step(
                        sess=sess,
                        val_dict=val_dict,
                        config=config,
                        log=log,
                        val_images=val_images,
                        val_labels=val_labels,
                        val_batch_idx=val_batch_idx,
                        val_batches=val_batches)

                    # Save progress and important data
                    try:
                        val_check = np.where(val_lo < val_perf)[0]
                        if not len(val_check):
                            it_early_stop -= 1
                            print 'Deducted from early stop count.'
                        else:
                            it_early_stop = config.early_stop
                            best_val_dict = it_val_dict
                            print 'Reset early stop count.'
                        if it_early_stop <= 0:
                            print 'Early stop triggered. Ending early.'
                            print 'Best validation loss: %s' % np.min(val_perf)
                            return
                        save_progress(
                            config=config,
                            val_check=val_check,
                            weight_dict=weight_dict,
                            it_val_dict=it_val_dict,
                            exp_label=exp_label,
                            step=step,
                            directories=directories,
                            sess=sess,
                            saver=saver,
                            val_score=val_score,
                            val_loss=val_lo,
                            train_score=train_score,
                            train_loss=train_loss,
                            timer=duration,
                            num_params=num_params,
                            log=log,
                            summary_op=summary_op,
                            summary_writer=summary_writer,
                            save_activities=save_activities,
                            save_gradients=save_gradients,
                            save_checkpoints=save_checkpoints)
                    except Exception as e:
                        log.info('Failed to save checkpoint: %s' % e)

                    # Hack to get the visulations... clean this up later
                    if "BSDS500_test_orientation_viz" in config.experiment:  # .model == "BSDS_inh_perturb" or config.model == "BSDS_exc_perturb":
                        # from matplotlib import pyplot as plt;plt.plot(it_train_dict['train_logits'].squeeze(), "r", label="Perturb");plt.plot(it_train_dict['train_labels'].squeeze()[-6:], 'b', label="GT");plt.legend();plt.show()
                        # from matplotlib import pyplot as plt;plt.imshow((it_train_dict['impatch'].squeeze() + np.asarray([123.68, 116.78, 103.94])[None, None]).astype(np.uint8));plt.show()
                        # from matplotlib import pyplot as plt;dd = it_train_dict["grad0"];plt.imshow(np.abs(dd.squeeze()).mean(-1) / (np.abs(dd.squeeze()).std(-1) + 1e-4));plt.show()
                        # from matplotlib import pyplot as plt;dd = it_train_dict['mask'];plt.imshow(dd.squeeze().mean(-1));plt.show()
                        train_logits.append([it_train_dict["train_logits"].ravel()])
                        out_dir = "circuits_{}".format(config.out_dir)
                        py_utils.make_dir(out_dir)
                        out_target = os.path.join(out_dir, "{}_{}".format(config.model, config.train_dataset))
                        np.save("{}_optim".format(out_target), [sess.run(tf.trainable_variables())])  # , it_train_dict["conv"]])
                        np.save("{}_perf".format(out_target), train_losses)
                        np.save("{}_curves".format(out_target), train_logits)
                        np.save("{}_label".format(out_target), it_train_dict["train_labels"])
                    """
                    if config.model == "BSDS_inh_perturb":
                        np.save("inh_perturbs/optim", sess.run(tf.trainable_variables()[0]))
                        np.save("inh_perturbs/perf", train_losses)
                        np.save("inh_perturbs/curves", train_logits)
                        np.save("inh_perturbs/label", it_train_dict["train_labels"])

                    if config.model == "BSDS_exc_perturb":
                        np.save("exc_perturbs/optim", sess.run(tf.trainable_variables()[0]))
                        np.save("exc_perturbs/perf", train_losses)
                        np.save("exc_perturbs/curves", train_logits)
                        np.save("exc_perturbs/label", it_train_dict["train_labels"])
                    """

                    # Training status and validation accuracy
                    val_status(
                        log=log,
                        dt=datetime.now(),
                        step=step,
                        train_loss=train_loss,
                        rate=config.val_batch_size / duration,
                        timer=float(duration),
                        score_function=config.score_function,
                        train_score=train_score,
                        val_score=val_score,
                        val_loss=val_lo,
                        best_val_loss=np.min(val_perf),
                        summary_dir=directories['summaries'])
                else:
                    # Training status
                    io_duration = time.time() - io_start_time
                    train_status(
                        log=log,
                        dt=datetime.now(),
                        step=step,
                        train_loss=train_loss,
                        rate=config.val_batch_size / duration,
                        timer=float(duration),
                        io_timer=float(io_duration),
                        lr=it_train_dict['lr'],
                        score_function=config.score_function,
                        train_score=train_score)

                # End iteration
                val_perf = np.concatenate([val_perf, [val_lo]])
                step += 1
                
                # Adaptive ending
                if adaptive_train and train_loss <= adaptive_train:
                    break
            if adaptive_train and train_loss <= adaptive_train:
                break


    else:
        try:
            while not coord.should_stop():
                (
                    train_score,
                    train_loss,
                    it_train_dict,
                    duration) = training_step(
                    sess=sess,
                    config=config,
                    train_dict=train_dict)
                io_start_time = time.time()
                if step % config.validation_period == 0:
                    val_score, val_lo, it_val_dict, duration = validation_step(
                        sess=sess,
                        val_dict=val_dict,
                        config=config,
                        log=log)

                    # Save progress and important data
                    try:
                        val_check = np.where(val_lo < val_perf)[0]
                        if not len(val_check):
                            it_early_stop -= 1
                            print 'Deducted from early stop count.'
                        else:
                            it_early_stop = config.early_stop
                            best_val_dict = it_val_dict
                            print 'Reset early stop count.'
                        if it_early_stop <= 0:
                            print 'Early stop triggered. Ending early.'
                            print 'Best validation loss: %s' % np.min(val_perf)
                            break
                        val_perf = save_progress(
                            config=config,
                            val_check=val_check,
                            weight_dict=weight_dict,
                            it_val_dict=it_val_dict,
                            exp_label=exp_label,
                            step=step,
                            directories=directories,
                            sess=sess,
                            saver=saver,
                            val_score=val_score,
                            val_loss=val_lo,
                            val_perf=val_perf,
                            train_score=train_score,
                            train_loss=train_loss,
                            timer=duration,
                            num_params=num_params,
                            log=log,
                            use_db=use_db,
                            summary_op=summary_op,
                            summary_writer=summary_writer,
                            save_activities=save_activities,
                            save_gradients=save_gradients,
                            save_checkpoints=save_checkpoints)
                    except Exception as e:
                        log.info('Failed to save checkpoint: %s' % e)

                    # Training status and validation accuracy
                    val_status(
                        log=log,
                        dt=datetime.now(),
                        step=step,
                        train_loss=train_loss,
                        rate=config.val_batch_size / duration,
                        timer=float(duration),
                        score_function=config.score_function,
                        train_score=train_score,
                        val_score=val_score,
                        val_loss=val_lo,
                        best_val_loss=np.min(val_perf),
                        summary_dir=directories['summaries'])
                else:
                    # Training status
                    io_duration = time.time() - io_start_time
                    train_status(
                        log=log,
                        dt=datetime.now(),
                        step=step,
                        train_loss=train_loss,
                        rate=config.val_batch_size / duration,
                        timer=float(duration),
                        io_timer=float(io_duration),
                        lr=it_train_dict['lr'],
                        score_function=config.score_function,
                        train_score=train_score)

                # End iteration
                step += 1
        except tf.errors.OutOfRangeError:
            log.info(
                'Done training for %d epochs, %d steps.' % (
                    config.epochs, step))
            log.info('Saved to: %s' % directories['checkpoints'])
        finally:
            coord.request_stop()
        coord.join(threads)
        sess.close()
    print 'Best %s loss: %s' % (config.val_loss_function, val_perf[0])
    if hasattr(config, 'get_map') and config.get_map:
        tf_fun.calculate_map(best_val_dict, exp_label, config)
    return
import os
import re
import argparse
import numpy as np
from matplotlib import pyplot as plt
from utils import py_utils

OUTDIR = 'npy_log'
py_utils.make_dir(OUTDIR)


def main(log,
         save_data=False,
         plot_data=False,
         data_check='Validation',
         val_key=r'(?<=(Validation\saccuracy\s=\s))\d\.\d+',
         train_key=r'(?<=(Training\saccuracy\s=\s))\d\.\d+'):
    """Parse a log file for the key string extending to the next whitespace."""
    train_data, val_data = [], []
    with open(log, 'rb') as f:
        for line in f:
            if data_check in line:
                train_data += [float(re.search(train_key, line).group())]
                val_data += [float(re.search(val_key, line).group())]
    val_data = np.array(val_data)

    if plot_data:
        f = plt.figure()
        plt.plot(val_data, label='Validation')
        plt.plot(train_data, label='Training')
        plt.xlabel('Iterations of training (x2000)')
def plot_fits(
        experiment='760_cells_2017_11_04_16_29_09',
        query_db=False,
        num_models=3,
        template_exp='ALLEN_selected_cells_1',
        process_pnodes=False,
        allen_dir='/home/drew/Documents/Allen_Brain_Observatory',
        output_dir='tests/ALLEN_files',
        stimulus_dir='/media/data_cifs/AllenData/DataForTrain/all_stimulus_template',
        stimulus_type='tfrecord',
        top_n=0,
        preload_stim=False,
        target_layer='conv1_1',  # conv1_1, sep_conv1_1, dog1_1
        target_model='conv2d'):  # conv2d, sep_conv2d, dog
    """Plot fits across the RF.
    experiment: Name of Allen experiment you're plotting.
    query_db: Use data from DB versus data in Numpys.
    num_models: The number of architectures you're testing.
    template_exp: The name of the contextual_circuit model template used."""
    sys.path.append(allen_dir)
    from allen_config import Allen_Brain_Observatory_Config
    if process_pnodes:
        from pnodes_declare_datasets_loop import query_hp_hist, sel_exp_query
    else:
        from declare_datasets_loop import query_hp_hist, sel_exp_query
    config = Config()
    main_config = Allen_Brain_Observatory_Config()
    db_config = credentials.postgresql_connection()
    files = glob(
        os.path.join(
            allen_dir,
            main_config.multi_exps,
            experiment, '*.npz'))
    assert len(files), 'Couldn\'t find files.'
    out_data, xs, ys = [], [], []
    perfs, model_types, exps, arg_perf = [], [], [], []
    count = 0
    for f in files:
        data = np.load(f)
        d = {
            'x': data['rf_data'].item()['on_center_x'],
            'y': data['rf_data'].item()['on_center_y'],
            # x: files['dataset_method'].item()['x_min'],
            # y: files['dataset_method'].item()['y_min'],
        }
        exp_name = {
            'experiment_name': data['dataset_method'].item()[
                'experiment_name']}
        if query_db:
            perf = query_hp_hist(
                exp_name['experiment_name'],
                db_config=db_config)
            if perf is None:
                print 'No fits for: %s' % exp_name['experiment_name']
            else:
                raise NotImplementedError
                d['perf'] = perf
                d['max_val'] = np.max(perf)
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(d['perf'])]
                count += 1
        else:
            data_files = glob(
                os.path.join(
                    main_config.ccbp_exp_evals,
                    exp_name['experiment_name'],
                    '*val_losses.npy'))  # Scores has preds, labels has GT
            for gd in data_files:
                mt = gd.split(
                    os.path.sep)[-1].split(
                        template_exp + '_')[-1].split('_' + 'val')[0]
                it_data = np.load(gd).item()
                sinds = np.asarray(it_data.keys())[np.argsort(it_data.keys())]
                sit_data = [it_data[idx] for idx in sinds]
                d['perf'] = sit_data
                d['max_val'] = np.max(sit_data)
                d['max_idx'] = np.argmax(sit_data)
                d['mt'] = mt
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(sit_data)]
                arg_perf += [np.argmax(sit_data)]
                exps += [gd.split(os.path.sep)[-2]]
                model_types += [mt]
                count += 1

    # Package as a df
    xs = np.round(np.asarray(xs)).astype(int)
    ys = np.round(np.asarray(ys)).astype(int)
    perfs = np.asarray(perfs)
    arg_perf = np.asarray(arg_perf)
    exps = np.asarray(exps)
    model_types = np.asarray(model_types)

    # Filter to only keep top-scoring values at each x/y (dirty trick)
    fxs, fys, fperfs, fmodel_types, fexps, fargs = [], [], [], [], [], []
    xys = np.vstack((xs, ys)).transpose()
    cxy = np.ascontiguousarray(  # Unique rows
        xys).view(
        np.dtype((np.void, xys.dtype.itemsize * xys.shape[1])))
    _, idx = np.unique(cxy, return_index=True)
    uxys = xys[idx]
    for xy in uxys:
        sel_idx = (xys == xy).sum(axis=-1) == 2
        sperfs = perfs[sel_idx]
        sexps = exps[sel_idx]
        sargs = arg_perf[sel_idx]
        sel_mts = model_types[sel_idx]
        bp = np.argmax(sperfs)
        fxs += [xy[0]]
        fys += [xy[1]]
        fperfs += [sperfs[bp]]
        fargs += [sargs[bp]]
        fmodel_types += [sel_mts[bp]]
        fexps += [sexps[bp]]
    xs = np.asarray(fxs)
    ys = np.asarray(fys)
    perfs = np.asarray(fperfs)
    arg_perf = np.asarray(fargs)
    exps = np.asarray(fexps)
    model_types = np.asarray(fmodel_types)
    umt, model_types_inds = np.unique(model_types, return_inverse=True)

    # Get weights for the top-n fitting models of each type
    it_perfs = perfs[model_types == target_model]
    it_exps = exps[model_types == target_model]
    # it_args = arg_perf[model_types == target_model]
    # sorted_perfs = np.argsort(it_perfs)[::-1][:top_n]
    sorted_perfs = [np.argsort(it_perfs)[::-1][top_n]]
    for idx in sorted_perfs:
        perf = sel_exp_query(
            experiment_name=it_exps[idx],
            model=target_model,
            db_config=db_config)
        # perf_steps = np.argsort([v['training_step'] for v in perf])[::-1]
        perf_steps = [v['validation_loss'] for v in perf]
        max_score = np.max(perf_steps)
        arg_perf_steps = np.argmax(perf_steps)
        sel_model = perf[arg_perf_steps]  # perf_steps[it_args[idx]]]
        print 'Using %s' % sel_model
        model_file = sel_model['ckpt_file'].split('.')[0]
        model_ckpt = '%s.ckpt-%s' % (
            model_file,
            model_file.split(os.path.sep)[-1].split('_')[-1])
        model_meta = '%s.meta' % model_ckpt

        # Pull stimuli
        stim_dir = os.path.join(
            main_config.tf_record_output,
            sel_model['experiment_name'])
        stim_files = glob(stim_dir + '*')
        stim_meta_file = [x for x in stim_files if 'meta' in x][0]
        # stim_val_data = [x for x in stim_files if 'val.tfrecords' in x][0]
        stim_val_data = [x for x in stim_files if 'train.tfrecords' in x][0]
        stim_val_mean = [x for x in stim_files if 'train_means' in x][0]
        assert stim_meta_file is not None
        assert stim_val_data is not None
        assert stim_val_mean is not None
        stim_meta_data = np.load(stim_meta_file).item()
        rf_stim_meta_data = stim_meta_data['rf_data']
        stim_mean_data = np.load(
            stim_val_mean).items()[0][1].item()['image']['mean']

        # Store sparse noise for reference
        sparse_rf_on = {
            'center_x': rf_stim_meta_data.get('on_center_x', None),
            'center_y': rf_stim_meta_data.get('on_center_y', None),
            'width_x': rf_stim_meta_data.get('on_width_x', None),
            'width_y': rf_stim_meta_data.get('on_width_y', None),
            'distance': rf_stim_meta_data.get('on_distance', None),
            'area': rf_stim_meta_data.get('on_area', None),
            'rotation': rf_stim_meta_data.get('on_rotation', None),
        }
        sparse_rf_off = {
            'center_x': rf_stim_meta_data.get('off_center_x', None),
            'center_y': rf_stim_meta_data.get('off_center_y', None),
            'width_x': rf_stim_meta_data.get('off_width_x', None),
            'width_y': rf_stim_meta_data.get('off_width_y', None),
            'distance': rf_stim_meta_data.get('off_distance', None),
            'area': rf_stim_meta_data.get('off_area', None),
            'rotation': rf_stim_meta_data.get('off_rotation', None),
        }
        sparse_rf = {'on': sparse_rf_on, 'off': sparse_rf_off}

        # Pull responses
        dataset_module = py_utils.import_module(
            model_dir=config.dataset_info,
            dataset=sel_model['experiment_name'])
        dataset_module = dataset_module.data_processing()
        with tf.device('/cpu:0'):
            val_images = tf.placeholder(
                tf.float32,
                shape=[1] + [x for x in dataset_module.im_size])

        # Mean normalize
        log = logger.get(os.path.join(output_dir, 'sta_logs', target_model))
        data_dir = os.path.join(output_dir, 'data', target_model)
        py_utils.make_dir(data_dir)
        sys.path.append(os.path.join('models', 'structs', sel_model['experiment_name']))
        model_dict = __import__(target_model) 
        if hasattr(model_dict, 'output_structure'):
            # Use specified output layer
            output_structure = model_dict.output_structure
        else:
            output_structure = None
        model = model_utils.model_class(
            mean=stim_mean_data,
            training=True,  # FIXME
            output_size=dataset_module.output_size)
        with tf.device('/gpu:0'):
            with tf.variable_scope('cnn') as scope:
                val_scores, model_summary = model.build(
                    data=val_images,
                    layer_structure=model_dict.layer_structure,
                    output_structure=output_structure,
                    log=log,
                    tower_name='cnn')
                grad_image = tf.gradients(model.output, val_images)[0]
        print(json.dumps(model_summary, indent=4))

        # Set up summaries and saver
        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge_all()

        # Initialize the graph
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        # Need to initialize both of these if supplying num_epochs to inputs
        sess.run(
            tf.group(
                tf.global_variables_initializer(),
                tf.local_variables_initializer())
            )
        saver.restore(sess, model_ckpt)

        # Set up exemplar threading
        if target_model == 'conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'conv1_1_filters:0' in x.name]
        elif target_model == 'sep_conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'sep_conv1_1_filters:0' in x.name]
        elif target_model == 'dog':
            fname = [
                x for x in tf.global_variables()
                if 'dog1_1_filters:0' in x.name]
        else:
            raise NotImplementedError
        val_tensors = {
            'images': val_images,
        #     'labels': val_labels,
            'filts': fname,
            'responses': model[target_layer],
            'labels': model['output'],
            'grads': grad_image
        }
        all_images, all_preds, all_grads, all_responses = [], [], [], []
        num_steps = 10000
        stimuli = os.path.join(
            stimulus_dir,
            'locally_sparse_noise_8deg_template.pkl')
        stimuli = pickle.load(open(stimuli, 'rb'))[:num_steps]
        ih, iw = 304, 608
        ns, sh, sw = stimuli.shape
        sh, sw = ih, iw
        sh = int(sh)
        sw = int(sw)
        tb = int((ih - sh) // 2)
        lr = int((iw - sw) // 2)
        gns = 32
        cnst = 127.5
        for step in range(num_steps):  # step, stim in enumerate(stimuli):  # step in range(num_steps):
            chosen_stim = np.random.permutation(ns)[0]
            if preload_stim:
                it_stim = stimuli[chosen_stim].astype(np.float32)
                it_stim = it_stim.astype(np.float32)
                noise_im = (misc.imresize(it_stim, [sh, sw], interp='nearest'))
                noise_im = cv2.copyMakeBorder(
                    noise_im.squeeze(), tb, tb, lr, lr, cv2.BORDER_CONSTANT, value=cnst)
            else:
                stim_noise = scipy.sparse.csr_matrix(scipy.sparse.random(ih // gns, iw // gns, density=0.05)).todense()
                stim_noise_mask = stim_noise == 0
                stim_noise[stim_noise > 0.5] = 255.
                stim_noise[stim_noise < 0.5] = 0.
                stim_noise[stim_noise_mask] = cnst
                noise_im = (misc.imresize(stim_noise.squeeze(), [sh, sw], interp='nearest'))
            if np.random.rand() < 0.5:
                noise_im = np.fliplr(noise_im)
            if np.random.rand() < 0.5:
                noise_im = np.flipud(noise_im)
            # noise_im = (misc.imresize(it_stim, [ih, iw], interp='nearest'))[None, :, :, None]
            noise_im = noise_im / 255.
            noise_im = noise_im[None, :, :, None]
            assert noise_im.max() <= 1
            val_vals = sess.run(val_tensors.values(), feed_dict={val_images: noise_im})
            val_dict = {k: v for k, v in zip(val_tensors.keys(), val_vals)}
            all_images += [noise_im]  # val_dict['images']]
            # all_responses += [val_dict['responses']]
            all_preds += [val_dict['labels'].squeeze()]
            # all_grads += [val_dict['grads'].squeeze()]
            print 'Finished step %s' % step

        # Process and save data
        all_images = np.concatenate(all_images).squeeze()
        ev, vals = peakdet(all_preds, np.median(all_preds))
        sp = np.zeros_like(all_preds)
        sp[ev[:, 0].astype(int)] = 1
        plt.imshow(np.matmul(all_images.reshape(all_images.shape[0], -1).transpose(), all_preds).reshape(ih, iw));plt.show()
        plt.imshow(np.matmul(all_images.reshape(all_images.shape[0], -1).transpose(), sp).reshape(ih, iw));plt.show()
        filters = val_dict['filts'][0].squeeze().transpose(2, 0, 1)
        import ipdb;ipdb.set_trace()
        all_grads = np.asarray(all_grads)
        all_preds = np.asarray(all_preds).reshape(-1, 1)
        import ipdb;ipdb.set_trace()
       
    
        # res_f = all_responses.reshape(ne * h * w, k)
        # res_g = res_grads.reshape(ne, rh * rw)
        # i_cov = np.cov(res_i.transpose())
        # f_cov = np.cov(res_f.transpose())
        # g_cov = np.cov(res_g.transpose())
        # sp = (all_preds > all_preds.mean()).astype(np.float32)
        # res_i = res_g
        # ev, vals = peakdet(all_preds, 0.5)
        # sp = np.zeros_like(all_preds)
        # sp[ev[:, 0].astype(int)] = 1
        # slen = ne
        # nsp = np.sum(sp)  # number of spikes
        # swid = rh * rw
        # Msz = np.dot(np.dot(slen, swid), ne)  # Size of full stimulus matrix
        # rowlen = 1830  # np.dot(swid, ne) # Length of a single row of stimulus matrix
        
        # Compute raw mean and covariance
        # RawMu = np.mean(res_i, 0).T
        # RawCov = np.dot(res_i.T, res_i) / (slen-1.) - (RawMu*np.vstack(RawMu)*slen) / (slen-1.)
        
        # Compute spike-triggered mean and covariance
        # iisp = np.nonzero((sp > 0.))
        # spvec = sp[iisp]
        # STA = np.divide(np.dot(spvec.T, res_i[iisp[0],:]).T, nsp)
        # STC = np.dot(res_i[iisp[0],:].T, np.multiply(res_i[iisp[0],:], ml.repmat(spvec, rowlen, 1).T))/(nsp-1.) - (STA*np.vstack(STA)*nsp)/(nsp-1.)

        # res_i_cov = np.matmul(res_i.transpose(), res_i)
        # inv_res_i = np.linalg.pinv(res_i_cov)
        # sta = inv_res_i * np.matmul(res_i.transpose(), all_preds)
        # sta = inv_res_i * np.matmul(res_i.transpose(), spike_preds)

        # sti = (1. / float(ne)) * (np.linalg.pinv(i_cov) * np.matmul(res_i, all_preds))
        # sta = (1. / float(ne)) * (np.linalg.pinv(f_cov) * np.matmul(res_f, all_preds))
        # sta = sta.reshape(h, w)
        # stg = (1. / float(ne)) * np.matmul(all_grads.reshape(h * w, ne), all_preds)
        # stg = stg.reshape(h, w)
        np.savez(
            os.path.join(data_dir, 'data'),
            images=all_images,
            pred=all_preds,
            filters=filters,
            STA=STA,
            fits=fits,
            grads=all_grads)
        if target_model != 'dog':
            save_mosaic(
                maps=filters,  # [0].squeeze().transpose(2, 0, 1),
                output=os.path.join(data_dir, '%s_filters' % target_layer),
                rc=8,
                cc=4,
                title='%s filters' % (
                    target_layer))
        else:
            import ipdb;ipdb.set_trace()
        print 'Complete.'
Esempio n. 24
0
e = 'for_alekh'
dirs = [
    'berson_001',
    'berson_010',
    'berson_100',
    'snemi_001',
    'snemi_010',
    'snemi_100',
    'seung_berson_001',
    'seung_berson_010',
    'seung_berson_100',
    'seung_snemi_001',
    'seung_snemi_010',
    'seung_snemi_100',
]


for d in tqdm(dirs, total=len(dirs)):
    z = np.load(glob(os.path.join(p, d, '*.npz'))[0])['test_dict']
    out_path = os.path.join(p, e, d)
    label_path = os.path.join(out_path, 'groundTruth')
    pred_path = os.path.join(out_path, 'predictions')
    py_utils.make_dir(out_path)
    py_utils.make_dir(label_path)
    py_utils.make_dir(pred_path)
    for idx, i in enumerate(z):
        label = i['labels']
        pred = tf_fun.sigmoid_fun(i['logits'])
        np.save(os.path.join(label_path, '%s.npy' % idx), label)
        np.save(os.path.join(pred_path, '%s.npy' % idx), pred)
Esempio n. 25
0
def plot_fits(
        experiment='760_cells_2017_11_04_16_29_09',
        query_db=False,
        num_models=3,
        template_exp='ALLEN_selected_cells_1',
        process_pnodes=False,
        allen_dir='/home/drew/Documents/Allen_Brain_Observatory',
        output_dir='tests/ALLEN_files',
        stimulus_type='tfrecord',
        top_n=1,
        grad='lrp',
        target_layer='conv1_1',  # conv1_1, sep_conv1_1, dog1_1
        target_model='conv2d'):  # conv2d, sep_conv2d, dog
    """Plot fits across the RF.
    experiment: Name of Allen experiment you're plotting.
    query_db: Use data from DB versus data in Numpys.
    num_models: The number of architectures you're testing.
    template_exp: The name of the contextual_circuit model template used."""
    sys.path.append(allen_dir)
    from allen_config import Allen_Brain_Observatory_Config
    if process_pnodes:
        from pnodes_declare_datasets_loop import query_hp_hist, sel_exp_query
    else:
        from declare_datasets_loop import query_hp_hist, sel_exp_query
    config = Config()
    main_config = Allen_Brain_Observatory_Config()
    db_config = credentials.postgresql_connection()
    files = glob(
        os.path.join(
            allen_dir,
            main_config.multi_exps,
            experiment, '*.npz'))
    assert len(files), 'Couldn\'t find files.'
    out_data, xs, ys = [], [], []
    perfs, model_types, exps, arg_perf = [], [], [], []
    count = 0
    for f in files:
        data = np.load(f)
        d = {
            'x': data['rf_data'].item()['on_center_x'],
            'y': data['rf_data'].item()['on_center_y'],
            # x: files['dataset_method'].item()['x_min'],
            # y: files['dataset_method'].item()['y_min'],
        }
        exp_name = {
            'experiment_name': data['dataset_method'].item()[
                'experiment_name']}
        if query_db:
            perf = query_hp_hist(
                exp_name['experiment_name'],
                db_config=db_config)
            if perf is None:
                print 'No fits for: %s' % exp_name['experiment_name']
            else:
                raise NotImplementedError
                d['perf'] = perf
                d['max_val'] = np.max(perf)
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(d['perf'])]
                count += 1
        else:
            data_files = glob(
                os.path.join(
                    main_config.ccbp_exp_evals,
                    exp_name['experiment_name'],
                    '*val_losses.npy'))  # Scores has preds, labels has GT
            for gd in data_files:
                mt = gd.split(
                    os.path.sep)[-1].split(
                        template_exp + '_')[-1].split('_' + 'val')[0]
                it_data = np.load(gd).item()
                sinds = np.asarray(it_data.keys())[np.argsort(it_data.keys())]
                sit_data = [it_data[idx] for idx in sinds]
                d['perf'] = sit_data
                d['max_val'] = np.max(sit_data)
                d['max_idx'] = np.argmax(sit_data)
                d['mt'] = mt
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(sit_data)]
                arg_perf += [np.argmax(sit_data)]
                exps += [gd.split(os.path.sep)[-2]]
                model_types += [mt]
                count += 1

    # Package as a df
    xs = np.round(np.asarray(xs)).astype(int)
    ys = np.round(np.asarray(ys)).astype(int)
    perfs = np.asarray(perfs)
    arg_perf = np.asarray(arg_perf)
    exps = np.asarray(exps)
    model_types = np.asarray(model_types)

    # Filter to only keep top-scoring values at each x/y (dirty trick)
    fxs, fys, fperfs, fmodel_types, fexps, fargs = [], [], [], [], [], []
    xys = np.vstack((xs, ys)).transpose()
    cxy = np.ascontiguousarray(  # Unique rows
        xys).view(
        np.dtype((np.void, xys.dtype.itemsize * xys.shape[1])))
    _, idx = np.unique(cxy, return_index=True)
    uxys = xys[idx]
    scores = []
    for xy in uxys:
        sel_idx = (xys == xy).sum(axis=-1) == 2
        sperfs = perfs[sel_idx]
        sexps = exps[sel_idx]
        sargs = arg_perf[sel_idx]
        sel_mts = model_types[sel_idx]
        # Only get top conv/sep spots
        sperfs = sperfs[sel_mts != 'dog']
        sperfs = sperfs[sel_mts != 'DoG']
        scores += [sperfs.mean() / sperfs.std()]
    best_fits = np.argmax(np.asarray(scores))
    xs = np.asarray([uxys[best_fits][0]])
    ys = np.asarray([uxys[best_fits][1]])
    sel_idx = (xys == uxys[best_fits]).sum(axis=-1) == 2
    perfs = np.asarray(perfs[sel_idx])
    exps = np.asarray(exps[sel_idx])
    model_types = np.asarray(model_types[sel_idx])
    umt, model_types_inds = np.unique(model_types, return_inverse=True)

    # Get weights for the top-n fitting models of each type
    it_perfs = perfs[model_types == target_model]
    it_exps = exps[model_types == target_model]
    # it_args = arg_perf[model_types == target_model]
    sorted_perfs = np.argsort(it_perfs)[::-1][:top_n]
    for idx in sorted_perfs:
        perf = sel_exp_query(
            experiment_name=it_exps[idx],
            model=target_model,
            db_config=db_config)
        # perf_steps = np.argsort([v['training_step'] for v in perf])[::-1]
        perf_steps = [v['validation_loss'] for v in perf]
        max_score = np.max(perf_steps)
        arg_perf_steps = np.argmax(perf_steps)
        sel_model = perf[arg_perf_steps]  # perf_steps[it_args[idx]]]
        print 'Using %s' % sel_model
        model_file = sel_model['ckpt_file'].split('.')[0]
        model_ckpt = '%s.ckpt-%s' % (
            model_file,
            model_file.split(os.path.sep)[-1].split('_')[-1])
        model_meta = '%s.meta' % model_ckpt

        # Pull stimuli
        stim_dir = os.path.join(
            main_config.tf_record_output,
            sel_model['experiment_name'])
        stim_files = glob(stim_dir + '*')
        stim_meta_file = [x for x in stim_files if 'meta' in x][0]
        # stim_val_data = [x for x in stim_files if 'val.tfrecords' in x][0]
        stim_val_data = [x for x in stim_files if 'train.tfrecords' in x][0]
        stim_val_mean = [x for x in stim_files if 'train_means' in x][0]
        assert stim_meta_file is not None
        assert stim_val_data is not None
        assert stim_val_mean is not None
        stim_meta_data = np.load(stim_meta_file).item()
        rf_stim_meta_data = stim_meta_data['rf_data']
        stim_mean_data = np.load(
            stim_val_mean).items()[0][1].item()['image']['mean']

        # Store sparse noise for reference
        sparse_rf_on = {
            'center_x': rf_stim_meta_data.get('on_center_x', None),
            'center_y': rf_stim_meta_data.get('on_center_y', None),
            'width_x': rf_stim_meta_data.get('on_width_x', None),
            'width_y': rf_stim_meta_data.get('on_width_y', None),
            'distance': rf_stim_meta_data.get('on_distance', None),
            'area': rf_stim_meta_data.get('on_area', None),
            'rotation': rf_stim_meta_data.get('on_rotation', None),
        }
        sparse_rf_off = {
            'center_x': rf_stim_meta_data.get('off_center_x', None),
            'center_y': rf_stim_meta_data.get('off_center_y', None),
            'width_x': rf_stim_meta_data.get('off_width_x', None),
            'width_y': rf_stim_meta_data.get('off_width_y', None),
            'distance': rf_stim_meta_data.get('off_distance', None),
            'area': rf_stim_meta_data.get('off_area', None),
            'rotation': rf_stim_meta_data.get('off_rotation', None),
        }
        sparse_rf = {'on': sparse_rf_on, 'off': sparse_rf_off}

        # Pull responses
        dataset_module = py_utils.import_module(
            model_dir=config.dataset_info,
            dataset=sel_model['experiment_name'])
        dataset_module = dataset_module.data_processing()
        with tf.device('/cpu:0'):
            if stimulus_type == 'sparse_noise':
                pass
            elif stimulus_type == 'drifting_grating':
                pass
            elif stimulus_type == 'tfrecord':
                val_images, val_labels = data_loader.inputs(
                    dataset=stim_val_data,
                    batch_size=1,
                    model_input_image_size=dataset_module.model_input_image_size,
                    tf_dict=dataset_module.tf_dict,
                    data_augmentations=[None],  # dataset_module.preprocess,
                    num_epochs=1,
                    tf_reader_settings=dataset_module.tf_reader,
                    shuffle=False
                )

        # Mean normalize
        log = logger.get(os.path.join(output_dir, 'sta_logs', target_model))
        data_dir = os.path.join(output_dir, 'data', target_model)
        py_utils.make_dir(data_dir)
        sys.path.append(os.path.join('models', 'structs', sel_model['experiment_name']))
        model_dict = __import__(target_model) 
        if hasattr(model_dict, 'output_structure'):
            # Use specified output layer
            output_structure = model_dict.output_structure
        else:
            output_structure = None
        model = model_utils.model_class(
            mean=stim_mean_data,
            training=True,  # FIXME
            output_size=dataset_module.output_size)
        with tf.device('/gpu:0'):
            with tf.variable_scope('cnn') as scope:
                val_scores, model_summary = model.build(
                    data=val_images,
                    layer_structure=model_dict.layer_structure,
                    output_structure=output_structure,
                    log=log,
                    tower_name='cnn')
                if grad == 'vanilla':
                    grad_image = tf.gradients(model.output, val_images)[0]
                elif grad == 'lrp':
                    eval_graph = tf.Graph()
                    with eval_graph.as_default():
                        with eval_graph.gradient_override_map(
                            {'Relu': 'GradLRP'}):
                            grad_image = tf.gradients(model.output, val_images)[0]
                elif grad == 'cam':
                    eval_graph = tf.Graph()
                    with eval_graph.as_default():
                        with eval_graph.gradient_override_map(
                            {'Relu': 'GuidedRelu'}):
                            grad_image = tf.gradients(model.output, val_images)[0]
                else:
                    raise NotImplementedError
        print(json.dumps(model_summary, indent=4))

        # Set up summaries and saver
        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge_all()

        # Initialize the graph
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        # Need to initialize both of these if supplying num_epochs to inputs
        sess.run(
            tf.group(
                tf.global_variables_initializer(),
                tf.local_variables_initializer())
            )
        saver.restore(sess, model_ckpt)

        # Set up exemplar threading
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        if target_model == 'conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'conv1_1_filters:0' in x.name]
        elif target_model == 'sep_conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'sep_conv1_1_filters:0' in x.name]
        elif target_model == 'dog' or target_model == 'DoG':
            fname = [
                x for x in tf.global_variables()
                if 'dog1_1_filters:0' in x.name]
        else:
            raise NotImplementedError
        val_tensors = {
            'images': val_images,
            'labels': val_labels,
            'filts': fname,
            'responses': model.output,  # model[target_layer],
            'grads': grad_image
        }
        all_images, all_preds, all_grads, all_responses = [], [], [], []
        step = 0
        try:
            while not coord.should_stop():
                val_vals = sess.run(val_tensors.values())
                val_dict = {k: v for k, v in zip(val_tensors.keys(), val_vals)}
                all_images += [val_dict['images']]
                all_responses += [val_dict['responses']]
                all_preds += [val_dict['labels'].squeeze()]
                all_grads += [val_dict['grads'].squeeze()]
                print 'Finished step %s' % step
                step += 1
        except:
            print 'Finished tfrecords'
        finally:
            coord.request_stop()
        coord.join(threads)
        sess.close()

        # Process and save data
        # if target_model != 'dog':
        #     filters = val_dict['filts'][0].squeeze().transpose(2, 0, 1)
        all_images = np.concatenate(all_images).squeeze()
        all_grads = np.asarray(all_grads)
        all_preds = np.asarray(all_preds).reshape(-1, 1)
        all_responses = np.asarray(all_responses).squeeze()

        np.savez(
            os.path.join(data_dir, 'data'),
            images=all_images,
            pred=all_preds,
            # filters=filters,
            grads=all_grads)
        # if target_model != 'dog':
        #     save_mosaic(
        #         maps=filters,  # [0].squeeze().transpose(2, 0, 1),
        #         output=os.path.join(data_dir, '%s_filters' % target_layer),
        #         rc=8,
        #         cc=4,
        #         title='%s filters' % (
        #             target_layer))
        print 'Complete.'
Esempio n. 26
0
def plot_fits(
    experiment='760_cells_2017_11_04_16_29_09',
    query_db=False,
    template_exp='ALLEN_selected_cells_1',
    process_pnodes=False,
    allen_dir='/home/drew/Documents/Allen_Brain_Observatory',
    output_dir='tests/ALLEN_files',
    stimulus_dir='/media/data_cifs/AllenData/DataForTrain/all_stimulus_template',
    stimulus_type='tfrecord',
    top_n=100,
    recalc=False,
    preload_stim=False,
    target_layer='conv1_1',  # conv1_1, sep_conv1_1, dog1_1
    target_model='conv2d'):  # conv2d, sep_conv2d, dog
    """Plot fits across the RF.
    experiment: Name of Allen experiment you're plotting.
    query_db: Use data from DB versus data in Numpys.
    num_models: The number of architectures you're testing.
    template_exp: The name of the contextual_circuit model template used."""
    sys.path.append(allen_dir)
    from allen_config import Allen_Brain_Observatory_Config
    if process_pnodes:
        from pnodes_declare_datasets_loop import query_hp_hist, sel_exp_query
    else:
        from declare_datasets_loop import query_hp_hist, sel_exp_query
    config = Config()
    main_config = Allen_Brain_Observatory_Config()
    db_config = credentials.postgresql_connection()
    files = glob(
        os.path.join(allen_dir, main_config.multi_exps, experiment, '*.npz'))
    assert len(files), 'Couldn\'t find files.'
    out_data, xs, ys = [], [], []
    perfs, model_types, exps, arg_perf = [], [], [], []
    count = 0
    for f in files:
        data = np.load(f)
        d = {
            'x': data['rf_data'].item()['on_center_x'],
            'y': data['rf_data'].item()['on_center_y'],
            # x: files['dataset_method'].item()['x_min'],
            # y: files['dataset_method'].item()['y_min'],
        }
        exp_name = {
            'experiment_name': data['dataset_method'].item()['experiment_name']
        }
        if query_db:
            perf = query_hp_hist(exp_name['experiment_name'],
                                 db_config=db_config)
            if perf is None:
                print 'No fits for: %s' % exp_name['experiment_name']
            else:
                raise NotImplementedError
                d['perf'] = perf
                d['max_val'] = np.max(perf)
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(d['perf'])]
                count += 1
        else:
            data_files = glob(
                os.path.join(
                    main_config.ccbp_exp_evals, exp_name['experiment_name'],
                    '*val_losses.npy'))  # Scores has preds, labels has GT
            score_files = glob(
                os.path.join(
                    main_config.ccbp_exp_evals, exp_name['experiment_name'],
                    '*val_scores.npy'))  # Scores has preds, labels has GT
            lab_files = glob(
                os.path.join(
                    main_config.ccbp_exp_evals, exp_name['experiment_name'],
                    '*val_labels.npy'))  # Scores has preds, labels has GT
            for gd, sd, ld in zip(data_files, score_files, lab_files):
                mt = gd.split(os.path.sep)[-1].split(template_exp +
                                                     '_')[-1].split('_' +
                                                                    'val')[0]
                if not recalc:
                    it_data = np.load(gd).item()
                else:
                    lds = np.load(ld).item()
                    sds = np.load(sd).item()
                    it_data = {
                        k: np.corrcoef(lds[k], sds[k])[0, 1]
                        for k in sds.keys()
                    }
                sinds = np.asarray(it_data.keys())[np.argsort(it_data.keys())]
                sit_data = [it_data[idx] for idx in sinds]
                d['perf'] = sit_data
                d['max_val'] = np.max(sit_data)
                d['max_idx'] = np.argmax(sit_data)
                d['mt'] = mt
                out_data += [d]
                xs += [np.round(d['x'])]
                ys += [np.round(d['y'])]
                perfs += [np.max(sit_data)]
                arg_perf += [np.argmax(sit_data)]
                exps += [gd.split(os.path.sep)[-2]]
                model_types += [mt]
                count += 1

    # Package as a df
    xs = np.round(np.asarray(xs)).astype(int)
    ys = np.round(np.asarray(ys)).astype(int)
    perfs = np.asarray(perfs)
    arg_perf = np.asarray(arg_perf)
    exps = np.asarray(exps)
    model_types = np.asarray(model_types)

    # Filter to only keep top-scoring values at each x/y (dirty trick)
    fxs, fys, fperfs, fmodel_types, fexps, fargs = [], [], [], [], [], []
    xys = np.vstack((xs, ys)).transpose()
    cxy = np.ascontiguousarray(  # Unique rows
        xys).view(np.dtype((np.void, xys.dtype.itemsize * xys.shape[1])))
    _, idx = np.unique(cxy, return_index=True)
    uxys = xys[idx]
    for xy in uxys:
        sel_idx = (xys == xy).sum(axis=-1) == 2
        sperfs = perfs[sel_idx]
        sexps = exps[sel_idx]
        sargs = arg_perf[sel_idx]
        sel_mts = model_types[sel_idx]
        bp = np.argmax(sperfs)
        fxs += [xy[0]]
        fys += [xy[1]]
        fperfs += [sperfs[bp]]
        fargs += [sargs[bp]]
        fmodel_types += [sel_mts[bp]]
        fexps += [sexps[bp]]
    xs = np.asarray(fxs)
    ys = np.asarray(fys)
    perfs = np.asarray(fperfs)
    arg_perf = np.asarray(fargs)
    exps = np.asarray(fexps)
    model_types = np.asarray(fmodel_types)
    umt, model_types_inds = np.unique(model_types, return_inverse=True)

    # Get weights for the top-n fitting models of each type
    it_perfs = perfs[model_types == target_model]
    it_exps = exps[model_types == target_model]
    # it_args = arg_perf[model_types == target_model]
    sorted_perfs = np.argsort(it_perfs)[::-1][:top_n]
    perf = sel_exp_query(experiment_name=it_exps[sorted_perfs[0]],
                         model=target_model,
                         db_config=db_config)
    dummy_sel_model = perf[-1]
    print 'Using %s' % dummy_sel_model
    model_file = dummy_sel_model['ckpt_file'].split('.')[0]
    model_ckpt = '%s.ckpt-%s' % (model_file, model_file.split(
        os.path.sep)[-1].split('_')[-1])
    model_meta = '%s.meta' % model_ckpt

    # Pull responses
    dataset_module = py_utils.import_module(
        model_dir=config.dataset_info,
        dataset=dummy_sel_model['experiment_name'])
    dataset_module = dataset_module.data_processing()
    with tf.device('/cpu:0'):
        val_images = tf.placeholder(tf.float32,
                                    shape=[1] +
                                    [x for x in dataset_module.im_size])

    # Pull stimuli
    stim_dir = os.path.join(main_config.tf_record_output,
                            dummy_sel_model['experiment_name'])
    stim_files = glob(stim_dir + '*')
    stim_meta_file = [x for x in stim_files if 'meta' in x][0]
    stim_val_data = [x for x in stim_files if 'train.tfrecords' in x][0]
    stim_val_mean = [x for x in stim_files if 'train_means' in x][0]
    assert stim_meta_file is not None
    assert stim_val_data is not None
    assert stim_val_mean is not None
    stim_meta_data = np.load(stim_meta_file).item()
    rf_stim_meta_data = stim_meta_data['rf_data']
    stim_mean_data = np.load(
        stim_val_mean).items()[0][1].item()['image']['mean']

    # Mean normalize
    log = logger.get(os.path.join(output_dir, 'sta_logs', target_model))
    data_dir = os.path.join(output_dir, 'data', target_model)
    py_utils.make_dir(data_dir)
    sys.path.append(
        os.path.join('models', 'structs', dummy_sel_model['experiment_name']))
    model_dict = __import__(target_model)
    if hasattr(model_dict, 'output_structure'):
        # Use specified output layer
        output_structure = model_dict.output_structure
    else:
        output_structure = None
    model = model_utils.model_class(
        mean=stim_mean_data,
        training=True,  # FIXME
        output_size=dataset_module.output_size)
    with tf.device('/gpu:0'):
        with tf.variable_scope('cnn') as scope:
            val_scores, model_summary = model.build(
                data=val_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            grad_image = tf.gradients(model.output, val_images)[0]
    print(json.dumps(model_summary, indent=4))

    # Set up summaries and saver
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    # Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    # Need to initialize both of these if supplying num_epochs to inputs
    sess.run(
        tf.group(tf.global_variables_initializer(),
                 tf.local_variables_initializer()))
    all_filters = []
    all_rfs = []
    max_scores = []
    for idx in sorted_perfs:
        perf = sel_exp_query(experiment_name=it_exps[idx],
                             model=target_model,
                             db_config=db_config)
        # perf_steps = np.argsort([v['training_step'] for v in perf])[::-1]
        perf_steps = [v['validation_loss'] for v in perf]
        max_score = np.max(perf_steps)
        arg_perf_steps = np.argmax(perf_steps)
        sel_model = perf[arg_perf_steps]  # perf_steps[it_args[idx]]]
        print 'Using %s' % sel_model
        model_file = sel_model['ckpt_file'].split('.')[0]
        model_ckpt = '%s.ckpt-%s' % (model_file, model_file.split(
            os.path.sep)[-1].split('_')[-1])
        model_meta = '%s.meta' % model_ckpt

        # Store sparse noise for reference
        stim_dir = os.path.join(main_config.tf_record_output,
                                sel_model['experiment_name'])
        stim_files = glob(stim_dir + '*')
        stim_meta_file = [x for x in stim_files if 'meta' in x][0]
        stim_val_data = [x for x in stim_files if 'train.tfrecords' in x][0]
        stim_val_mean = [x for x in stim_files if 'train_means' in x][0]
        assert stim_meta_file is not None
        assert stim_val_data is not None
        assert stim_val_mean is not None
        stim_meta_data = np.load(stim_meta_file).item()
        rf_stim_meta_data = stim_meta_data['rf_data']
        stim_mean_data = np.load(
            stim_val_mean).items()[0][1].item()['image']['mean']

        rf_stim_meta_data = rf_stim_meta_data.values()[0][0]
        sparse_rf_on = {
            'center_x': rf_stim_meta_data.get('on_center_x', None),
            'center_y': rf_stim_meta_data.get('on_center_y', None),
            'width_x': rf_stim_meta_data.get('on_width_x', None),
            'width_y': rf_stim_meta_data.get('on_width_y', None),
            'distance': rf_stim_meta_data.get('on_distance', None),
            'area': rf_stim_meta_data.get('on_area', None),
            'rotation': rf_stim_meta_data.get('on_rotation', None),
        }
        sparse_rf_off = {
            'center_x': rf_stim_meta_data.get('off_center_x', None),
            'center_y': rf_stim_meta_data.get('off_center_y', None),
            'width_x': rf_stim_meta_data.get('off_width_x', None),
            'width_y': rf_stim_meta_data.get('off_width_y', None),
            'distance': rf_stim_meta_data.get('off_distance', None),
            'area': rf_stim_meta_data.get('off_area', None),
            'rotation': rf_stim_meta_data.get('off_rotation', None),
        }
        sparse_rf = {'on': sparse_rf_on, 'off': sparse_rf_off}

        # Set up exemplar threading
        if target_model == 'conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'conv1_1_filters:0' in x.name
            ]
        elif target_model == 'sep_conv2d':
            fname = [
                x for x in tf.global_variables()
                if 'sep_conv1_1_filters:0' in x.name
            ]
        elif target_model == 'dog' or target_model == 'DoG':
            fname = [
                x for x in tf.global_variables()
                if 'dog1_1_filters:0' in x.name
            ]
        else:
            raise NotImplementedError

        print 'Using %s' % sel_model
        model_file = sel_model['ckpt_file'].split('.')[0]
        model_ckpt = '%s.ckpt-%s' % (model_file, model_file.split(
            os.path.sep)[-1].split('_')[-1])
        saver.restore(sess, model_ckpt)
        all_filters += [sess.run(fname)]
        all_rfs += [sparse_rf]
        max_scores += [max_score]
    np.savez('tests/ALLEN_files/filters/%s_%s_recalc_%s' %
             (experiment, target_model, recalc),
             rfs=all_rfs,
             perf=max_scores,
             filters=all_filters)
    print 'SAVED'
Esempio n. 27
0
def main(experiment_name, list_experiments=False, gpu_device='/gpu:0'):
    """Create a tensorflow worker to run experiments in your DB."""
    if list_experiments:
        exps = db.list_experiments()
        print '_' * 30
        print 'Initialized experiments:'
        print '_' * 30
        for l in exps:
            print l.values()[0]
        print '_' * 30
        print 'You can add to the DB with: '\
            'python prepare_experiments.py --experiment=%s' % \
            exps[0].values()[0]
        return
    if experiment_name is None:
        print 'No experiment specified. Pulling one out of the DB.'
        experiment_name = db.get_experiment_name()

    # Prepare to run the model
    config = Config()
    condition_label = '%s_%s' % (experiment_name, py_utils.get_dt_stamp())
    experiment_label = '%s' % (experiment_name)
    log = logger.get(os.path.join(config.log_dir, condition_label))
    experiment_dict = experiments.experiments()[experiment_name]()
    config = add_to_config(d=experiment_dict, config=config)  # Globals
    config, exp_params = process_DB_exps(
        experiment_name=experiment_name, log=log,
        config=config)  # Update config w/ DB params
    dataset_module = py_utils.import_module(model_dir=config.dataset_info,
                                            dataset=config.dataset)
    dataset_module = dataset_module.data_processing()  # hardcoded class name
    train_data, train_means = get_data_pointers(
        dataset=config.dataset,
        base_dir=config.tf_records,
        cv=dataset_module.folds.keys()[1],  # TODO: SEARCH FOR INDEX.
        log=log)
    val_data, val_means = get_data_pointers(dataset=config.dataset,
                                            base_dir=config.tf_records,
                                            cv=dataset_module.folds.keys()[0],
                                            log=log)

    # Initialize output folders
    dir_list = {
        'checkpoints':
        os.path.join(config.checkpoints, condition_label),
        'summaries':
        os.path.join(config.summaries, condition_label),
        'condition_evaluations':
        os.path.join(config.condition_evaluations, condition_label),
        'experiment_evaluations':
        os.path.join(  # DEPRECIATED
            config.experiment_evaluations, experiment_label),
        'visualization':
        os.path.join(config.visualizations, condition_label),
        'weights':
        os.path.join(config.condition_evaluations, condition_label, 'weights')
    }
    [py_utils.make_dir(v) for v in dir_list.values()]

    # Prepare data loaders on the cpu
    config.data_augmentations = py_utils.flatten_list(
        config.data_augmentations, log)
    with tf.device('/cpu:0'):
        train_images, train_labels = data_loader.inputs(
            dataset=train_data,
            batch_size=config.batch_size,
            model_input_image_size=dataset_module.model_input_image_size,
            tf_dict=dataset_module.tf_dict,
            data_augmentations=config.data_augmentations,
            num_epochs=config.epochs,
            tf_reader_settings=dataset_module.tf_reader,
            shuffle=config.shuffle)
        val_images, val_labels = data_loader.inputs(
            dataset=val_data,
            batch_size=config.batch_size,
            model_input_image_size=dataset_module.model_input_image_size,
            tf_dict=dataset_module.tf_dict,
            data_augmentations=config.data_augmentations,
            num_epochs=config.epochs,
            tf_reader_settings=dataset_module.tf_reader,
            shuffle=config.shuffle)
    log.info('Created tfrecord dataloader tensors.')

    # Load model specification
    struct_name = config.model_struct.split(os.path.sep)[-1]
    try:
        model_dict = py_utils.import_module(
            dataset=struct_name,
            model_dir=os.path.join('models', 'structs',
                                   experiment_name).replace(os.path.sep, '.'))
    except IOError:
        print 'Could not find the model structure: %s' % experiment_name

    # Inject model_dict with hyperparameters if requested
    model_dict.layer_structure = hp_opt_utils.inject_model_with_hps(
        layer_structure=model_dict.layer_structure, exp_params=exp_params)

    # Prepare model on GPU
    with tf.device(gpu_device):
        with tf.variable_scope('cnn') as scope:

            # Training model
            if len(dataset_module.output_size) > 1:
                log.warning('Found > 1 dimension for your output size.'
                            'Converting to a scalar.')
                dataset_module.output_size = np.prod(
                    dataset_module.output_size)

            if hasattr(model_dict, 'output_structure'):
                # Use specified output layer
                output_structure = model_dict.output_structure
            else:
                output_structure = None
            model = model_utils.model_class(
                mean=train_means,
                training=True,
                output_size=dataset_module.output_size)
            train_scores, model_summary = model.build(
                data=train_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            log.info('Built training model.')
            log.debug(json.dumps(model_summary, indent=4), verbose=0)
            print_model_architecture(model_summary)

            # Prepare the loss function
            train_loss, _ = loss_utils.loss_interpreter(
                logits=train_scores,
                labels=train_labels,
                loss_type=config.loss_function,
                dataset_module=dataset_module)

            # Add weight decay if requested
            if len(model.regularizations) > 0:
                train_loss = loss_utils.wd_loss(
                    regularizations=model.regularizations,
                    loss=train_loss,
                    wd_penalty=config.regularization_strength)
            train_op = loss_utils.optimizer_interpreter(
                loss=train_loss,
                lr=config.lr,
                optimizer=config.optimizer,
                constraints=config.optimizer_constraints,
                model=model)
            log.info('Built training loss function.')

            train_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,
                pred=train_scores,
                labels=train_labels)  # training accuracy
            if int(train_images.get_shape()[-1]) <= 3:
                tf.summary.image('train images', train_images)
            tf.summary.scalar('training loss', train_loss)
            tf.summary.scalar('training accuracy', train_accuracy)
            log.info('Added training summaries.')

            # Validation model
            scope.reuse_variables()
            val_model = model_utils.model_class(
                mean=val_means,
                training=True,
                output_size=dataset_module.output_size)
            val_scores, _ = val_model.build(  # Ignore summary
                data=val_images,
                layer_structure=model_dict.layer_structure,
                output_structure=output_structure,
                log=log,
                tower_name='cnn')
            log.info('Built validation model.')

            val_loss, _ = loss_utils.loss_interpreter(
                logits=val_scores,
                labels=val_labels,
                loss_type=config.loss_function,
                dataset_module=dataset_module)
            val_accuracy = eval_metrics.metric_interpreter(
                metric=dataset_module.score_metric,
                pred=val_scores,
                labels=val_labels)  # training accuracy
            if int(train_images.get_shape()[-1]) <= 3:
                tf.summary.image('val images', val_images)
            tf.summary.scalar('validation loss', val_loss)
            tf.summary.scalar('validation accuracy', val_accuracy)
            log.info('Added validation summaries.')

    # Set up summaries and saver
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    # Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    # Need to initialize both of these if supplying num_epochs to inputs
    sess.run(
        tf.group(tf.global_variables_initializer(),
                 tf.local_variables_initializer()))
    summary_writer = tf.summary.FileWriter(dir_list['summaries'], sess.graph)

    # Set up exemplar threading
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # Create dictionaries of important training and validation information
    train_dict = {
        'train_loss': train_loss,
        'train_accuracy': train_accuracy,
        'train_images': train_images,
        'train_labels': train_labels,
        'train_op': train_op,
        'train_scores': train_scores
    }
    val_dict = {
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'val_images': val_images,
        'val_labels': val_labels,
        'val_scores': val_scores,
    }

    # Start training loop
    np.save(
        os.path.join(dir_list['condition_evaluations'],
                     'training_config_file'), config)
    log.info('Starting training')
    output_dict = training.training_loop(
        config=config,
        db=db,
        coord=coord,
        sess=sess,
        summary_op=summary_op,
        summary_writer=summary_writer,
        saver=saver,
        threads=threads,
        summary_dir=dir_list['summaries'],
        checkpoint_dir=dir_list['checkpoints'],
        weight_dir=dir_list['weights'],
        train_dict=train_dict,
        val_dict=val_dict,
        train_model=model,
        val_model=val_model,
        exp_params=exp_params)
    log.info('Finished training.')

    model_name = config.model_struct.replace('/', '_')
    py_utils.save_npys(data=output_dict,
                       model_name=model_name,
                       output_string=dir_list['experiment_evaluations'])
Esempio n. 28
0
def encode_dataset(dataset, train_shards=0, val_shards=0, force_val=False):
    config = Config()
    data_class = py_utils.import_module(module=dataset,
                                        pre_path=config.dataset_classes)
    data_proc = data_class.data_processing()
    data = data_proc.get_data()
    if len(data) == 2:
        files, labels = data
        nhot = None
    elif len(data) == 3:
        files, labels, nhot = data
    else:
        raise NotImplementedError
    targets = data_proc.targets
    im_size = data_proc.im_size
    if hasattr(data_proc, 'preprocess'):
        preproc_list = data_proc.preprocess
    else:
        preproc_list = []
    if hasattr(data_proc, 'label_size'):
        label_size = data_proc.label_size
    else:
        label_size = None
    if hasattr(data_proc, 'label_size'):
        store_z = data_proc.store_z
    else:
        store_z = False
    if hasattr(data_proc, 'normalize_im'):
        normalize_im = data_proc.normalize_im
    else:
        normalize_im = False
    if not train_shards:
        ds_name = os.path.join(config.tf_records, data_proc.output_name)
        data_to_tfrecords(files=files,
                          labels=labels,
                          targets=targets,
                          nhot=nhot,
                          ds_name=ds_name,
                          im_size=im_size,
                          label_size=label_size,
                          preprocess=preproc_list,
                          store_z=store_z,
                          normalize_im=normalize_im)
    else:
        assert val_shards > 0, 'Choose the number of val shards.'
        raise NotImplementedError('Needs support for nhot.')
        shard_dir = os.path.join(config.tf_records, data_proc.output_name)
        py_utils.make_dir(shard_dir)
        if not force_val:
            create_shards(it_shards=train_shards,
                          shard_dir=shard_dir,
                          key='train',
                          files=files,
                          labels=labels,
                          targets=targets,
                          im_size=im_size,
                          label_size=label_size,
                          preprocess=preproc_list,
                          store_z=store_z,
                          normalize_im=normalize_im)
        create_shards(it_shards=val_shards,
                      shard_dir=shard_dir,
                      key='val',
                      files=files,
                      labels=labels,
                      targets=targets,
                      im_size=im_size,
                      label_size=label_size,
                      preprocess=preproc_list,
                      store_z=store_z,
                      normalize_im=normalize_im)
Esempio n. 29
0
    def get_labels(self, files):
        """Process and save label images."""
        labels = {}
        new_files = {}
        for k, images in files.iteritems():
            # Replace extension and path with labels
            label_vec = []
            file_vec = []
            fold = images[0].split(os.path.sep)[-2]

            # New label dir
            proc_dir = os.path.join(
                images[0].split(fold)[0],
                fold,
                self.processed_labels)
            py_utils.make_dir(proc_dir)

            # New image dir
            proc_image_dir = os.path.join(
                self.config.data_root,
                self.name,
                self.images_dir,
                fold,
                self.processed_images)
            py_utils.make_dir(proc_image_dir)
            all_images = []
            for im in images:
                it_label = im.split(os.path.sep)[-1]
                it_label_path = '%s%s' % (im.split('.')[0], self.lab_extension)
                it_label_path = it_label_path.replace(
                    self.images_dir,
                    self.labels_dir)

                # Process every label and duplicate images for each
                label_data = io.loadmat(
                    it_label_path)['groundTruth'].reshape(-1)
                im_data = misc.imread(im)
                transpose_labels = False
                if not np.all(self.im_size == list(im_data.shape)):
                    im_data = np.swapaxes(im_data, 0, 1)
                    transpose_labels = True
                assert np.all(
                    self.im_size == list(im_data.shape)
                    ), 'Mismatched dimensions.'

                if self.fold_options[k] == 'duplicate':
                    # Loop through all labels
                    for idx, lab in enumerate(label_data):

                        # Process labels
                        ip_lab = lab.item()[1].astype(np.float32)
                        if transpose_labels:
                            ip_lab = np.swapaxes(ip_lab, 0, 1)
                        it_im_name = '%s_%s' % (idx, it_label)
                        it_lab_name = '%s.npy' % it_im_name.split('.')[0]
                        out_lab = os.path.join(proc_dir, it_lab_name)
                        np.save(out_lab, ip_lab)
                        label_vec += [out_lab]

                        # Process images
                        proc_im = os.path.join(proc_image_dir, it_im_name)
                        all_images.append(im_data)
                        file_vec += [proc_im]
                elif self.fold_options[k] == 'mean':
                    mean_labs = []
                    for idx, lab in enumerate(label_data):
                        # Process labels
                        ip_lab = lab.item()[1].astype(np.float32)
                        if transpose_labels:
                            ip_lab = np.swapaxes(ip_lab, 0, 1)
                        mean_labs += [ip_lab]
                    mean_lab = np.asarray(mean_labs).mean(0)
                    out_lab = os.path.join(
                        proc_dir, '%s.npy' % it_label.strip(
                            self.im_extension)[0])
                    np.save(out_lab, mean_lab)
                    label_vec += [out_lab]

                    # Process images
                    proc_im = os.path.join(proc_image_dir, it_label)
                    all_images.append(im_data)
                    file_vec += [proc_im]
                else:
                    raise NotImplementedError

            # Zscore the images.
            # load all images from filevec into an asarray
            # calculate the mean
            # calculate the std
            # zscore
            # Overwrite the saved images... use floating point flag on imsave
            # Alternatively, save with scikit-image
            all_images = np.array(all_images).astype(np.float32)
            if k == 'train':
                mean_rgb_train = all_images.mean(
                    0, keepdims=True).mean(
                    1, keepdims=True).mean(
                    2, keepdims=True)
                std_rgb_train = all_images.std()
            all_images_z = (all_images - mean_rgb_train) / std_rgb_train
            for i, img in enumerate(all_images_z):
                np.save(
                    '%s.npy' % file_vec[i].strip(self.im_extension),
                    img.astype(np.float32))
            # labels[k] = label_vec
            # new_files[k] = ['%s.npy' % i for i in file_vec]
            labels[k] = label_vec
            new_files[k] = ['%s.npy' % i for i in file_vec]
        return labels, new_files

# Config
file_path = '/home/drew/Documents/hgru/results'
out_path = 'movies'
f = os.path.join(file_path, 'val_gradients.npz')
im_key = 'val_images'
grad_key = 'val_gradients'
lab_key = 'val_labels'
path_index = 3
nopath_index = 0
interp_frames = 100

# Process data
data = np.load(f)
images = data[im_key]
grads = data[grad_key]
labels = data[lab_key]
py_utils.make_dir(out_path)

# Render movies in a loop
for idx in range(len(images)):
    render_movie(
        images=images,
        grads=grads,
        labels=labels,
        idx=idx,
        interp_frames=interp_frames,
        out_path=out_path)