Exemple #1
0
    def generate_output_names(self, batch_index, params, results,
                              prefix_template, output_dir):

        results_and_params = combine_dicts(
            (('p.', params.__dict__), ('r.', results[batch_index].__dict__)))
        prefix = prefix_template % results_and_params

        prefix = os.path.join(output_dir, prefix)

        if os.path.isdir(prefix):
            if prefix[-1] != '/':
                prefix += '/'  # append slash for dir-only template
        else:
            dirname = os.path.dirname(prefix)
            if dirname:
                mkdir_p(dirname)

        best_X_name = '%s_best_X.jpg' % prefix
        best_Xpm_name = '%s_best_Xpm.jpg' % prefix
        majority_X_name = '%s_majority_X.jpg' % prefix
        majority_Xpm_name = '%s_majority_Xpm.jpg' % prefix
        info_name = '%s_info.txt' % prefix
        info_pkl_name = '%s_info.pkl' % prefix
        info_big_pkl_name = '%s_info_big.pkl' % prefix
        return [
            best_X_name, best_Xpm_name, majority_X_name, majority_Xpm_name,
            info_name, info_pkl_name, info_big_pkl_name
        ]
Exemple #2
0
def calculate_weights_histogram_for_specific_layer(net,
                                                   layer_name,
                                                   output_dir,
                                                   fig,
                                                   ax,
                                                   force=False):
    # generate weights histogram for layer
    hist_filename = os.path.join(output_dir, 'weight_histogram.png')

    weights = net.params[layer_name][0].data.flatten()
    hist, bin_edges = np.histogram(weights, bins=50)
    if not force and os.path.isfile(hist_filename):
        print 'The weights histogram for this layer already exists. Skipping...'
    else:
        weights_num = len(weights)

        width = 0.7 * (bin_edges[1] - bin_edges[0])
        center = (bin_edges[:-1] + bin_edges[1:]) / 2
        ax.bar(center, hist, align='center', width=width, color='g')

        fig.suptitle('weights for layer %s\n %s weights used' %
                     (layer_name, weights_num))
        ax.xaxis.label.set_text('weight value')
        ax.yaxis.label.set_text('count')

        # Save layer weights histogram to image file
        mkdir_p(output_dir)
        fig.savefig(hist_filename)

        ax.cla()

    return hist, bin_edges
Exemple #3
0
 def process_channel_figure(channel_idx, fig):
     unit_dir = os.path.join(outdir, layer_name,
                             'unit_%04d' % channel_idx)
     mkdir_p(unit_dir)
     filename = os.path.join(unit_dir, 'max_histogram.png')
     fig.savefig(filename)
     pass
Exemple #4
0
def crawl_pages(subcats):
    dirpath = "data/site/%s/%s/" % (config.wiki_lang, config.start_cat)
    pages = []

    counter = 0
    for subcat in subcats:
        counter += 1
        pb.update(counter, len(subcats))

        subcat_dirpath = dirpath + subcat + "/"
        misc.mkdir_p(subcat_dirpath)

        filepath = subcat_dirpath + "pages.txt"
        if os.path.exists(filepath):
            subcat_pages = misc.read_file(filepath)
        else:
            subcat_pages = get_subcat_pages(subcat)
            misc.write_file(filepath, subcat_pages)

        pages.extend(subcat_pages)

    pages = [
        page for page in pages if not config.page_bl(page) and lang.can(page)
    ]
    pages = OrderedDict.fromkeys(pages).keys()  # unique
    return pages
Exemple #5
0
def calculate_weight_bias_histograms_for_net(net,
                                             settings,
                                             output_dir=None,
                                             scale_by_layer=False,
                                             do_print=True,
                                             force=False):
    if not output_dir:
        output_dir = settings.caffevis_outputs_dir

    layers = settings.layers_to_output_in_offline_scripts

    fig = plt.figure(figsize=(10, 10), facecolor='white', tight_layout=False)
    ax = fig.add_subplot(111)

    for layer_name in layers:
        if do_print:
            print "calculating weights and bias histogram for layer %s" % (
                layer_name)

        layer_output_dir = os.path.join(output_dir, layer_name)
        mkdir_p(layer_output_dir)
        try:
            all_weights_per_layer[layer_name] = net.params[layer_name][0].data
            all_biases_per_layer[layer_name] = net.params[layer_name][1].data
            layer_hist, layer_bin_edges = calculate_weights_histogram_for_specific_layer(
                net, layer_name, layer_output_dir, fig, ax, force)
            calculate_bias_histogram_for_specific_layer(
                net, layer_name, layer_output_dir, fig, ax, force)

            for channel_idx in xrange(0, net.params[layer_name][0].shape[0]):
                n_channels = net.params[layer_name][0].shape[0]

                if do_print and channel_idx % 10 == 0:
                    print "calculating weights histogram for layer %s channel %d out of %d" % (
                        layer_name, channel_idx, n_channels)

                unit_output_dir = os.path.join(layer_output_dir,
                                               'unit_%04d' % channel_idx)
                mkdir_p(unit_output_dir)
                if scale_by_layer and not layer_bin_edges is None:
                    layer_hist_range = (layer_bin_edges.min(),
                                        layer_bin_edges.max())
                else:
                    layer_hist_range = (None, None)
                calculate_weights_histogram_for_specific_unit(
                    net, layer_name, channel_idx, unit_output_dir, fig, ax,
                    layer_hist_range, force)
        except KeyError as excep:
            print 'Unable to generate weights/bias histograms for this layer: ' + str(
                excep)

    if len(all_weights_per_layer.keys()) > 0:
        np.save(os.path.join(output_dir, 'all_weights.npy'),
                all_weights_per_layer)
        np.save(os.path.join(output_dir, 'all_biases.npy'),
                all_biases_per_layer)
Exemple #6
0
def save_max_tracker_to_file(filename, net_max_tracker):

    dir_name = os.path.dirname(filename)
    mkdir_p(dir_name)

    with WithTimer('Saving maxes'):
        with open(filename, 'wb') as ff:
            pickle.dump(net_max_tracker, ff, -1)
        # save text version of pickle file for easier debugging
        pickle_to_text(filename)
Exemple #7
0
    def save_results(self,
                     params,
                     results,
                     prefix_template,
                     brave=False,
                     skipbig=False):
        if prefix_template is None:
            return

        results_and_params = combine_dicts(
            (('p.', params.__dict__), ('r.', results.__dict__)))
        prefix = prefix_template % results_and_params

        if os.path.isdir(prefix):
            if prefix[-1] != '/':
                prefix += '/'  # append slash for dir-only template
        else:
            dirname = os.path.dirname(prefix)
            if dirname:
                mkdir_p(dirname)

        # Don't overwrite previous results
        if os.path.exists('%sinfo.txt' % prefix) and not brave:
            raise Exception('Cowardly refusing to overwrite ' +
                            '%sinfo.txt' % prefix)

        output_majority = False
        if output_majority:
            if results.majority_xx is not None:
                asimg = results.majority_xx[
                    self.channel_swap_to_rgb].transpose((1, 2, 0))
                saveimagescc('%smajority_X.jpg' % prefix, asimg, 0)
                saveimagesc('%smajority_Xpm.jpg' % prefix,
                            asimg + self._data_mean_rgb_img)  # PlusMean

        if results.best_xx is not None:
            asimg = results.best_xx[self.channel_swap_to_rgb].transpose(
                (1, 2, 0))
            saveimagescc('%sbest_X.jpg' % prefix, asimg, 0)
            saveimagesc('%sbest_Xpm.jpg' % prefix,
                        asimg + self._data_mean_rgb_img)  # PlusMean

        with open('%sinfo.txt' % prefix, 'w') as ff:
            print >> ff, params
            print >> ff
            print >> ff, results
        if not skipbig:
            with open('%sinfo_big.pkl' % prefix, 'w') as ff:
                pickle.dump((params, results), ff, protocol=-1)
        results.trim_arrays()
        with open('%sinfo.pkl' % prefix, 'w') as ff:
            pickle.dump((params, results), ff, protocol=-1)
Exemple #8
0
 def _save_weights(step, G, D, optimizers):
     cp_path = os.path.join(args.outdir, "checkpoints")
     print "Saving checkpoint..."
     checkpoint = {
         "g_weights": G.state_dict(),
         "d_weights": D.state_dict(),
         "g_optim": optimizers.g.state_dict(),
         "d_optim": optimizers.d.state_dict()
     }
     misc.mkdir_p(cp_path)
     save_path = os.path.join(cp_path, "%s.pt" % str(step))
     torch.save(checkpoint, save_path)
     print "Checkpoint saved to %s" % save_path
def get_receptive_field(settings, net, layer_name):

    # flag which indicates whether the dictionary was changed hence we need to write it to cache
    should_save_to_cache = False

    # check if dictionary exists
    if not hasattr(settings, '_receptive_field_per_layer'):

        # if it doesn't, try load it from file
        receptive_fields_cache_filename = _get_receptive_fields_cache_filename(
            settings)
        if os.path.isfile(receptive_fields_cache_filename):
            try:
                with open(receptive_fields_cache_filename,
                          'rb') as receptive_fields_cache_file:
                    settings._receptive_field_per_layer = pickle.load(
                        receptive_fields_cache_file)
            except:
                settings._receptive_field_per_layer = dict()
                should_save_to_cache = True
        else:
            settings._receptive_field_per_layer = dict()
            should_save_to_cache = True

    # calculate lazy
    if layer_name not in settings._receptive_field_per_layer:
        print("Calculating receptive fields for layer %s" % (layer_name))
        top_name = layer_name_to_top_name(net, layer_name)
        if top_name is not None:
            blob = net.blobs[top_name].data
            is_spatial = (len(blob.shape) == 4)
            layer_receptive_field = get_max_data_extent(
                net, settings, layer_name, is_spatial)
            settings._receptive_field_per_layer[
                layer_name] = layer_receptive_field
            should_save_to_cache = True

    if should_save_to_cache:
        try:
            receptive_fields_cache_filename = _get_receptive_fields_cache_filename(
                settings)
            mkdir_p(settings.caffevis_outputs_dir)
            with open(receptive_fields_cache_filename,
                      'wb') as receptive_fields_cache_file:
                pickle.dump(settings._receptive_field_per_layer,
                            receptive_fields_cache_file, -1)
        except IOError:
            # ignore problems in cache saving
            pass

    return settings._receptive_field_per_layer[layer_name]
Exemple #10
0
    def calculate_correlation(self, layer_name, outdir):

        # convert list of arrays to numpy array
        all_max_array = np.vstack(self.all_max_vals)

        # skip layers with only one channel
        if all_max_array.shape[1] == 1:
            return

        corr = np.corrcoef(all_max_array.transpose())

        # fix possible NANs
        corr = np.nan_to_num(corr)
        np.fill_diagonal(corr, 1)

        # sort correlation matrix
        # import cPickle as pickle
        #  with open('corr_%s.pickled' % layer_name, 'wb') as ff:
        #     pickle.dump(corr, ff, protocol=2)

        # alternative sorting
        # values = np.dot(corr, np.arange(corr.shape[0]))
        # indexes = np.argsort(values)

        indexes = np.lexsort(corr)
        sorted_corr = corr[indexes, :][:, indexes]

        # plot correlation matrix
        import matplotlib.pyplot as plt
        fig = plt.figure(figsize=(10, 10))
        plt.subplot(1, 1, 1)
        plt.imshow(sorted_corr, interpolation='nearest', vmin=-1, vmax=1)
        plt.colorbar()
        plt.title('channels activations correlation matrix for layer %s' %
                  (layer_name))
        plt.tight_layout()

        # save correlation matrix
        layer_dir = os.path.join(outdir, layer_name)
        mkdir_p(layer_dir)
        filename = os.path.join(layer_dir, 'channels_correlation.png')
        fig.savefig(filename, bbox_inches='tight')

        plt.close()

        return
Exemple #11
0
def save_max_tracker_to_file(filename, net_max_tracker):
    dir_name = os.path.dirname(filename)
    mkdir_p(dir_name)

    with WithTimer('Saving maxes'):
        # Save unpickleable list of ndarrays with all maximal values
        all_max_vals_dict = dict()
        for layer_name in net_max_tracker.layers:
            all_max_vals_dict[layer_name] = net_max_tracker.max_trackers[
                layer_name].all_max_vals
        np.save(os.path.join(dir_name, 'all_max_vals.npy'), all_max_vals_dict)
        del all_max_vals_dict

        # Pickle pickleable Net_Max_Tracker parameters
        with open(filename, 'wb') as ff:
            pickle.dump(net_max_tracker, ff, -1)
        # save text version of pickle file for easier debugging
        pickle_to_text(filename)
    def save_results(self, params, results, prefix_template, brave = False, skipbig = False):
        if prefix_template is None:
            return

        results_and_params = combine_dicts((('p.', params.__dict__),
                                            ('r.', results.__dict__)))
        prefix = prefix_template % results_and_params
        
        if os.path.isdir(prefix):
            if prefix[-1] != '/':
                prefix += '/'   # append slash for dir-only template
        else:
            dirname = os.path.dirname(prefix)
            if dirname:
                mkdir_p(dirname)

        # Don't overwrite previous results
        if os.path.exists('%sinfo.txt' % prefix) and not brave:
            raise Exception('Cowardly refusing to overwrite ' + '%sinfo.txt' % prefix)

        output_majority = False
        if output_majority:
            if results.majority_xx is not None:
                asimg = results.majority_xx[self.channel_swap_to_rgb].transpose((1,2,0))
                saveimagescc('%smajority_X.jpg' % prefix, asimg, 0)
                saveimagesc('%smajority_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img)  # PlusMean

        if results.best_xx is not None:
            asimg = results.best_xx[self.channel_swap_to_rgb].transpose((1,2,0))
            saveimagescc('%sbest_X.jpg' % prefix, asimg, 0)
            saveimagesc('%sbest_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img)  # PlusMean

        with open('%sinfo.txt' % prefix, 'w') as ff:
            print >>ff, params
            print >>ff
            print >>ff, results
        if not skipbig:
            with open('%sinfo_big.pkl' % prefix, 'w') as ff:
                pickle.dump((params, results), ff, protocol=-1)
        results.trim_arrays()
        with open('%sinfo.pkl' % prefix, 'w') as ff:
            pickle.dump((params, results), ff, protocol=-1)
Exemple #13
0
def calculate_weights_histogram_for_specific_unit(net,
                                                  layer_name,
                                                  channel_idx,
                                                  output_dir,
                                                  fig,
                                                  ax,
                                                  hist_range=(None, None),
                                                  force=False):
    hist_filename = os.path.join(output_dir, 'weight_histogram.png')
    if not force and os.path.isfile(hist_filename):
        print 'The weights histogram for unit %d already exists. Skipping...' % (
            channel_idx)
    else:
        # get vector of weights
        weights = net.params[layer_name][0].data[channel_idx].flatten()
        bias = net.params[layer_name][1].data[channel_idx]
        weights_num = len(weights)

        # create histogram
        if hist_range == (None, None):
            hist_range = (weights.min(), weights.max())
        hist, bin_edges = np.histogram(weights, bins=50, range=hist_range)

        # generate histogram image file
        width = 0.7 * (bin_edges[1] - bin_edges[0])
        center = (bin_edges[:-1] + bin_edges[1:]) / 2

        ax.bar(center, hist, align='center', width=width, color='g')

        fig.suptitle('weights for unit %d, bias is %f\n %s weights used' %
                     (channel_idx, bias, weights_num))
        ax.xaxis.label.set_text('weight value')
        ax.yaxis.label.set_text('count')

        # save weight histogram as image file
        mkdir_p(output_dir)
        fig.savefig(hist_filename)

        ax.cla()

        return hist, bin_edges
Exemple #14
0
 def _gen_sample_images(step, G):
     print "Generating %d sample images..." % len(sample_images)
     G.eval()
     x = float_tensor(np.array([si[1] for si in sample_images]))
     z = float_tensor(np.random.uniform(-1., 1., (x.shape[0], args.Nz)))
     c = one_hot(
         long_tensor(
             np.array([misc.pose_class(args.Np, 0.)]).repeat(x.shape[0])),
         args.Np)
     gen = G(Variable(x), Variable(c), Variable(z)).x.detach().data
     for i, s in enumerate(sample_images):
         grid = vutils.make_grid(gen[i:i + 1],
                                 1,
                                 normalize=True,
                                 scale_each=True,
                                 padding=0)
         writer.add_image(s[0], grid, step)
     if args.outdir:
         misc.mkdir_p(os.path.join(args.outdir, "generated_images"))
         img_path = os.path.join(args.outdir, "generated_images",
                                 "%d.jpg" % step)
         vutils.save_image(gen, img_path, normalize=True)
     print "Sample images generated!"
Exemple #15
0
def crawl_pages(subcats):
	dirpath = "data/site/%s/" % config.start_cat
	pages = []

	counter = 0
	for subcat in subcats:
		counter += 1
		pb.update(counter, len(subcats))

		subcat_dirpath = dirpath + subcat + "/"
		misc.mkdir_p(subcat_dirpath)

		filepath = subcat_dirpath + "pages.txt"
		if os.path.exists(filepath):
			subcat_pages = misc.read_file(filepath)
		else:
			subcat_pages = get_subcat_pages(subcat)
			misc.write_file(filepath, subcat_pages)

		pages.extend(subcat_pages)

	pages = [page for page in pages if lang.can_page(page)]
	pages = OrderedDict.fromkeys(pages).keys() # unique
	return pages
Exemple #16
0
def main():
    misc.mkdir_p("data/site/%s/" % config.start_cat)
    misc.mkdir_p("data/pages/%s/" % config.start_cat)
    misc.mkdir_p("data/speling/%s/" % config.start_cat)

    # stage 1 - obtaining list of subcategories
    print("** Stage 1: Obtaining list of subcategories to crawl. **")
    subcats = crawler.crawl_subcats()

    # stage 2 - obtaining list of pages
    print("** Stage 2: Obtaining list of pages to crawl. **")
    pages = crawler.crawl_pages(subcats)

    # stage 3 - crawling all pages
    print("** Stage 3: Crawling all pages in list. **")
    crawler.crawl_all_pages(pages)

    # stage 4 - parsing (scraping) all pages
    print("** Stage 4: Parsing all pages in list. **")
    spelings = parser.parse(pages)

    # stage 5 - write to file
    print("** Stage 5: Writing final results to file. **")
    filepath = "data/%s-speling.txt" % config.lang
    f = open(filepath, "w")
    for speling in spelings:
        f.write(speling + "\n")
    f.close()

    print("")
    print("")
    print("=== STATS ===")
    print("Crawled %d pages" % len(pages))
    print("Obtained %d spelings" % len(spelings))
    print("Wrote spelings to %s" % filepath)
    print("=============")
Exemple #17
0
def main():
	misc.mkdir_p("data/site/%s/" % config.start_cat)
	misc.mkdir_p("data/pages/%s/" % config.start_cat)
	misc.mkdir_p("data/speling/%s/" % config.start_cat)

	# stage 1 - obtaining list of subcategories
	print("** Stage 1: Obtaining list of subcategories to crawl. **")
	subcats = crawler.crawl_subcats()

	# stage 2 - obtaining list of pages
	print("** Stage 2: Obtaining list of pages to crawl. **")
	pages = crawler.crawl_pages(subcats)

	# stage 3 - crawling all pages
	print("** Stage 3: Crawling all pages in list. **")
	crawler.crawl_all_pages(pages)

	# stage 4 - parsing (scraping) all pages
	print("** Stage 4: Parsing all pages in list. **")
	spelings = parser.parse(pages)

	# stage 5 - write to file
	print("** Stage 5: Writing final results to file. **")
	filepath = "data/%s-speling.txt" % config.lang
	f = open(filepath, "w")
	for speling in spelings:
		f.write(speling + "\n")
	f.close()

	print("")
	print("")
	print("=== STATS ===")
	print("Crawled %d pages" % len(pages))
	print("Obtained %d spelings" % len(spelings))
	print("Wrote spelings to %s" % filepath)
	print("=============")
Exemple #18
0
        def wrapper(*args, **kwargs):
            startHashWall = time.time()

            # Hash the function, its args, and its kwargs
            hasher = PersistentHasher()
            hasher.update(function)
            hasher.update(args)
            hasher.update(kwargs)

            # Check cache for previous result
            functionName = function.__name__    # a little more reliable than func_name
            digest = hasher.hexdigest()

            cacheFilename    = '%s.%s.pkl.gz' % (digest[:16], functionName)
            # get a unique filename that does not affect any random number generators
            cacheTmpFilename = '.%s-%06d.tmp' % (cacheFilename, datetime.now().microsecond)
            cachePath    = os.path.join(globalCacheDir, cacheFilename[:2], cacheFilename)
            cacheTmpPath = os.path.join(globalCacheDir, cacheFilename[:2], cacheTmpFilename)
            elapsedHashWall = time.time() - startHashWall

            try:
                start = time.time()
                if globalCacheVerbose >= 3:
                    print (' -> cache.py: %s: trying to load file %s'
                           % (functionName, cachePath))
                (stats,result) = loadFromPklGz(cachePath)
                elapsedWall = time.time() - start
                if globalCacheVerbose >= 1:
                    print (' -> cache.py: %s: cache hit (%.04fs hash overhead, %.04fs to load, saved %.04fs)'
                           % (functionName, elapsedHashWall, elapsedWall, stats['timeWall'] - elapsedWall))
                    if globalCacheVerbose >= 2:
                        print '   -> loaded %s' % cachePath
            except IOError:
                if globalCacheVerbose >= 3:
                    print (' -> cache.py: %s: cache miss, computing function'
                           % (functionName))
                startWall = time.time()
                startCPU  = time.clock()
                result = function(*args, **kwargs)
                elapsedWall = time.time() - startWall
                elapsedCPU  = time.clock() - startCPU
                    
                stats = {'functionName': functionName,
                         'timeWall': elapsedWall,
                         'timeCPU': elapsedCPU,
                         'saveDate': datetime.now(),
                         }

                startSave = time.time()
                mkdir_p(os.path.dirname(cachePath))
                if globalCacheVerbose >= 3:
                    print (' -> cache.py: %s: function execution finished, saving result to file %s'
                           % (functionName, cachePath))
                saveToFile(cacheTmpPath, (stats,result), quiet = True)
                os.rename(cacheTmpPath, cachePath)
                if globalCacheVerbose >= 1:
                    print (' -> cache.py: %s: cache miss (%.04fs hash overhead, %.04fs to save, %.04fs to compute)'
                           % (functionName, elapsedHashWall, time.time() - startSave, elapsedWall))
                    if globalCacheVerbose >= 2:
                        print '   -> saved to %s' % cachePath

            return result
Exemple #19
0
def output_max_patches(settings, max_tracker, net, layer_name, idx_begin,
                       idx_end, num_top, datadir, outdir, search_min,
                       do_which):
    '''

    :param settings:
    :param max_tracker:
    :param net:
    :param layer_name:
    :param idx_begin:
    :param idx_end:
    :param num_top:
    :param datadir:
    :param outdir:
    :param search_min:
    :param do_which: do_info must be True
    :return:
    '''
    do_maxes, do_deconv, do_deconv_norm, do_backprop, do_backprop_norm, do_info = do_which
    assert do_maxes or do_deconv or do_deconv_norm or do_backprop or do_backprop_norm or do_info, 'nothing to do'

    sys.path.insert(0, os.path.join(settings.caffevis_caffe_root, 'python'))
    import caffe

    mt = max_tracker

    locs = mt.min_locs if search_min else mt.max_locs
    vals = mt.min_vals if search_min else mt.max_vals

    image_filenames, image_labels = get_files_list(datadir)

    print 'Loaded filenames and labels for %d files' % len(image_filenames)
    print '  First file', os.path.join(datadir, image_filenames[0])

    num_top_in_mt = locs.shape[1]
    assert num_top <= num_top_in_mt, 'Requested %d top images but MaxTracker contains only %d' % (
        num_top, num_top_in_mt)
    assert idx_end >= idx_begin, 'Range error'

    # minor fix for backwards compatability
    if hasattr(mt, 'is_conv'):
        mt.is_spatial = mt.is_conv

    size_ii, size_jj = get_max_data_extent(net, settings, layer_name,
                                           mt.is_spatial)
    data_size_ii, data_size_jj = net.blobs['data'].data.shape[2:4]

    net_input_dims = net.blobs['data'].data.shape[2:4]

    # prepare variables used for batches
    batch = [None] * settings.max_tracker_batch_size
    for i in range(0, settings.max_tracker_batch_size):
        batch[i] = MaxTrackerCropBatchRecord()

    batch_index = 0

    channel_to_info_file = dict()

    n_total_images = (idx_end - idx_begin) * num_top
    for cc, channel_idx in enumerate(range(idx_begin, idx_end)):

        unit_dir = os.path.join(outdir, layer_name, 'unit_%04d' % channel_idx)
        mkdir_p(unit_dir)

        # check if all required outputs exist, in which case skip this iteration
        [
            info_filename, maxim_filenames, deconv_filenames,
            deconvnorm_filenames, backprop_filenames, backpropnorm_filenames
        ] = generate_output_names(unit_dir, num_top, do_info, do_maxes,
                                  do_deconv, do_deconv_norm, do_backprop,
                                  do_backprop_norm, search_min)

        relevant_outputs = info_filename + \
                           maxim_filenames + \
                           deconv_filenames + \
                           deconvnorm_filenames + \
                           backprop_filenames + \
                           backpropnorm_filenames

        # we skip generation if:
        # 1. all outputs exist, AND
        # 2.1.   (not last iteration OR
        # 2.2.    last iteration, but batch is empty)
        relevant_outputs_exist = [
            os.path.exists(file_name) for file_name in relevant_outputs
        ]
        if all(relevant_outputs_exist) and \
                ((channel_idx != idx_end - 1) or ((channel_idx == idx_end - 1) and (batch_index == 0))):
            print "skipped generation of channel %d in layer %s since files already exist" % (
                channel_idx, layer_name)
            continue

        if do_info:
            channel_to_info_file[channel_idx] = InfoFileMetadata()
            channel_to_info_file[channel_idx].info_file = open(
                info_filename[0], 'w')
            channel_to_info_file[channel_idx].ref_count = num_top

            print >> channel_to_info_file[
                channel_idx].info_file, '# is_spatial val image_idx selected_input_index i(if is_spatial) j(if is_spatial) filename'

        # iterate through maxes from highest (at end) to lowest
        for max_idx_0 in range(num_top):
            batch[batch_index].cc = cc
            batch[batch_index].channel_idx = channel_idx
            batch[batch_index].info_filename = info_filename
            batch[batch_index].maxim_filenames = maxim_filenames
            batch[batch_index].deconv_filenames = deconv_filenames
            batch[batch_index].deconvnorm_filenames = deconvnorm_filenames
            batch[batch_index].backprop_filenames = backprop_filenames
            batch[batch_index].backpropnorm_filenames = backpropnorm_filenames
            batch[batch_index].info_file = channel_to_info_file[
                channel_idx].info_file

            batch[batch_index].max_idx_0 = max_idx_0
            batch[batch_index].max_idx = num_top_in_mt - 1 - batch[
                batch_index].max_idx_0

            if mt.is_spatial:

                # fix for backward compatability
                if locs.shape[2] == 5:
                    # remove second column
                    locs = np.delete(locs, 1, 2)

                batch[batch_index].im_idx, batch[
                    batch_index].selected_input_index, batch[
                        batch_index].ii, batch[batch_index].jj = locs[
                            batch[batch_index].channel_idx,
                            batch[batch_index].max_idx]
            else:
                # fix for backward compatability
                if locs.shape[2] == 3:
                    # remove second column
                    locs = np.delete(locs, 1, 2)

                batch[batch_index].im_idx, batch[
                    batch_index].selected_input_index = locs[
                        batch[batch_index].channel_idx,
                        batch[batch_index].max_idx]
                batch[batch_index].ii, batch[batch_index].jj = 0, 0

            # if ii and jj are invalid then there is no data for this "top" image, so we can skip it
            if (batch[batch_index].ii, batch[batch_index].jj) == (-1, -1):
                continue

            batch[batch_index].recorded_val = vals[
                batch[batch_index].channel_idx, batch[batch_index].max_idx]
            batch[batch_index].filename = image_filenames[
                batch[batch_index].im_idx]
            do_print = (batch[batch_index].max_idx_0 == 0)
            if do_print:
                print '%s   Output file/image(s) %d/%d   layer %s channel %d' % (
                    datetime.now().ctime(), batch[batch_index].cc * num_top,
                    n_total_images, layer_name, batch[batch_index].channel_idx)

            # print "DEBUG: (mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, layer_name, size_ii, size_jj, data_size_ii, data_size_jj)", str((mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, rc, layer_name, size_ii, size_jj, data_size_ii, data_size_jj))

            [batch[batch_index].out_ii_start,
             batch[batch_index].out_ii_end,
             batch[batch_index].out_jj_start,
             batch[batch_index].out_jj_end,
             batch[batch_index].data_ii_start,
             batch[batch_index].data_ii_end,
             batch[batch_index].data_jj_start,
             batch[batch_index].data_jj_end] = \
                compute_data_layer_focus_area(mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, settings,
                                              layer_name,
                                              size_ii, size_jj, data_size_ii, data_size_jj)

            # print "DEBUG: channel:%d out_ii_start:%d out_ii_end:%d out_jj_start:%d out_jj_end:%d data_ii_start:%d data_ii_end:%d data_jj_start:%d data_jj_end:%d" % \
            #       (channel_idx,
            #        batch[batch_index].out_ii_start, batch[batch_index].out_ii_end,
            #        batch[batch_index].out_jj_start, batch[batch_index].out_jj_end,
            #        batch[batch_index].data_ii_start, batch[batch_index].data_ii_end,
            #        batch[batch_index].data_jj_start, batch[batch_index].data_jj_end)

            if do_info:
                print >> batch[
                    batch_index].info_file, 1 if mt.is_spatial else 0, '%.6f' % vals[
                        batch[batch_index].channel_idx,
                        batch[batch_index].max_idx],
                if mt.is_spatial:
                    print >> batch[
                        batch_index].info_file, '%d %d %d %d' % tuple(
                            locs[batch[batch_index].channel_idx,
                                 batch[batch_index].max_idx]),
                else:
                    print >> batch[batch_index].info_file, '%d %d' % tuple(
                        locs[batch[batch_index].channel_idx,
                             batch[batch_index].max_idx]),
                print >> batch[batch_index].info_file, batch[
                    batch_index].filename

            if not (do_maxes or do_deconv or do_deconv_norm or do_backprop
                    or do_backprop_norm):
                continue

            with WithTimer('Load image', quiet=not do_print):
                # load image
                batch[batch_index].im = caffe.io.load_image(os.path.join(
                    datadir, batch[batch_index].filename),
                                                            color=True)

                # resize images according to input dimension
                batch[batch_index].im = resize_without_fit(
                    batch[batch_index].im, net_input_dims)

                # convert to float to avoid caffe destroying the image in the scaling phase
                batch[batch_index].im = batch[batch_index].im.astype(
                    np.float32)

            batch_index += 1

            # if current batch is full
            if batch_index == settings.max_tracker_batch_size \
                    or ((channel_idx == idx_end - 1) and (max_idx_0 == num_top - 1)):  # or last iteration

                with WithTimer('Predict on batch  ', quiet=not do_print):
                    im_batch = [record.im for record in batch]
                    net.predict(im_batch, oversample=False)

                # go over batch and update statistics
                for i in range(0, batch_index):

                    batch[i].denormalized_layer_name = layer_name
                    batch[i].denormalized_top_name = layer_name_to_top_name(
                        net, batch[i].denormalized_layer_name)
                    batch[i].layer_format = 'normal'  # non-siamese

                    if len(net.blobs[
                            batch[i].denormalized_top_name].data.shape) == 4:
                        reproduced_val = net.blobs[
                            batch[i].denormalized_top_name].data[
                                i, batch[i].channel_idx, batch[i].ii,
                                batch[i].jj]

                    else:
                        reproduced_val = net.blobs[
                            batch[i].denormalized_top_name].data[
                                i, batch[i].channel_idx]

                    if abs(reproduced_val - batch[i].recorded_val) > .1:
                        print 'Warning: recorded value %s is suspiciously different from reproduced value %s. Is the filelist the same?' % (
                            batch[i].recorded_val, reproduced_val)

                    if do_maxes:
                        # grab image from data layer, not from im (to ensure preprocessing / center crop details match between image and deconv/backprop)

                        out_arr = extract_patch_from_image(
                            net.blobs['data'].data[i], net,
                            batch[i].selected_input_index, settings,
                            batch[i].data_ii_end, batch[i].data_ii_start,
                            batch[i].data_jj_end, batch[i].data_jj_start,
                            batch[i].out_ii_end, batch[i].out_ii_start,
                            batch[i].out_jj_end, batch[i].out_jj_start,
                            size_ii, size_jj)

                        with WithTimer('Save img  ', quiet=not do_print):
                            save_caffe_image(
                                out_arr,
                                batch[i].maxim_filenames[batch[i].max_idx_0],
                                autoscale=False,
                                autoscale_center=0,
                                channel_swap=settings.channel_swap)

                if do_deconv or do_deconv_norm:

                    # TODO: we can improve performance by doing batch of deconv_from_layer, but only if we group
                    # together instances which have the same selected_input_index, this can be done by holding two
                    # separate batches

                    for i in range(0, batch_index):
                        diffs = net.blobs[
                            batch[i].denormalized_top_name].diff * 0

                        if len(diffs.shape) == 4:
                            diffs[i, batch[i].channel_idx, batch[i].ii,
                                  batch[i].jj] = 1.0
                        else:
                            diffs[i, batch[i].channel_idx] = 1.0

                        with WithTimer('Deconv    ', quiet=not do_print):
                            net.deconv_from_layer(
                                batch[i].denormalized_layer_name,
                                diffs,
                                zero_higher=True,
                                deconv_type='Guided Backprop')

                        out_arr = extract_patch_from_image(
                            net.blobs['data'].diff[i], net,
                            batch[i].selected_input_index, settings,
                            batch[i].data_ii_end, batch[i].data_ii_start,
                            batch[i].data_jj_end, batch[i].data_jj_start,
                            batch[i].out_ii_end, batch[i].out_ii_start,
                            batch[i].out_jj_end, batch[i].out_jj_start,
                            size_ii, size_jj)

                        if out_arr.max() == 0:
                            print 'Warning: Deconv out_arr in range', out_arr.min(
                            ), 'to', out_arr.max(
                            ), 'ensure force_backward: true in prototxt'

                        if do_deconv:
                            with WithTimer('Save img  ', quiet=not do_print):
                                save_caffe_image(
                                    out_arr,
                                    batch[i].deconv_filenames[
                                        batch[i].max_idx_0],
                                    autoscale=False,
                                    autoscale_center=0,
                                    channel_swap=settings.channel_swap)
                        if do_deconv_norm:
                            out_arr = np.linalg.norm(out_arr, axis=0)
                            with WithTimer('Save img  ', quiet=not do_print):
                                save_caffe_image(
                                    out_arr,
                                    batch[i].deconvnorm_filenames[
                                        batch[i].max_idx_0],
                                    channel_swap=settings.channel_swap)

                if do_backprop or do_backprop_norm:

                    for i in range(0, batch_index):
                        diffs = net.blobs[
                            batch[i].denormalized_top_name].diff * 0

                        if len(diffs.shape) == 4:
                            diffs[i, batch[i].channel_idx, batch[i].ii,
                                  batch[i].jj] = 1.0
                        else:
                            diffs[i, batch[i].channel_idx] = 1.0

                    with WithTimer('Backward batch  ', quiet=not do_print):
                        net.backward_from_layer(
                            batch[i].denormalized_layer_name, diffs)

                    for i in range(0, batch_index):

                        out_arr = extract_patch_from_image(
                            net.blobs['data'].diff[i], net,
                            batch[i].selected_input_index, settings,
                            batch[i].data_ii_end, batch[i].data_ii_start,
                            batch[i].data_jj_end, batch[i].data_jj_start,
                            batch[i].out_ii_end, batch[i].out_ii_start,
                            batch[i].out_jj_end, batch[i].out_jj_start,
                            size_ii, size_jj)

                        if out_arr.max() == 0:
                            print 'Warning: Deconv out_arr in range', out_arr.min(
                            ), 'to', out_arr.max(
                            ), 'ensure force_backward: true in prototxt'
                        if do_backprop:
                            with WithTimer('Save img  ', quiet=not do_print):
                                save_caffe_image(
                                    out_arr,
                                    batch[i].backprop_filenames[
                                        batch[i].max_idx_0],
                                    autoscale=False,
                                    autoscale_center=0,
                                    channel_swap=settings.channel_swap)
                        if do_backprop_norm:
                            out_arr = np.linalg.norm(out_arr, axis=0)
                            with WithTimer('Save img  ', quiet=not do_print):
                                save_caffe_image(
                                    out_arr,
                                    batch[i].backpropnorm_filenames[
                                        batch[i].max_idx_0],
                                    channel_swap=settings.channel_swap)

                # close info files
                for i in range(0, batch_index):
                    channel_to_info_file[batch[i].channel_idx].ref_count -= 1
                    if channel_to_info_file[
                            batch[i].channel_idx].ref_count == 0:
                        if do_info:
                            channel_to_info_file[
                                batch[i].channel_idx].info_file.close()

                batch_index = 0
Exemple #20
0
    '10.1371/journal.pone.0029411',
    '10.1080/17470910802083167',
]

def doi_to_fixture(doi, fname):
    """ Retrieve CSL data from DOI and save to file. """

    # Retrieve CSL data
    csl = xref.doi_to_csl(doi)
    
    # Write to file
    with open(fname, 'w') as fp:
        json.dump(csl, fp, indent=4)

if __name__ == '__main__':

    # Ensure that directory exists
    misc.mkdir_p(fixture_dir)

    # Iterate over DOIs
    for doi in fixture_dois:

        # Build file name
        fname = os.path.join(
            fixture_dir,
            '%s.json' % (misc.escape_doi(doi))
        )
        
        # Save CSL data to file
        doi_to_fixture(doi, fname)