def generate_output_names(self, batch_index, params, results, prefix_template, output_dir): results_and_params = combine_dicts( (('p.', params.__dict__), ('r.', results[batch_index].__dict__))) prefix = prefix_template % results_and_params prefix = os.path.join(output_dir, prefix) if os.path.isdir(prefix): if prefix[-1] != '/': prefix += '/' # append slash for dir-only template else: dirname = os.path.dirname(prefix) if dirname: mkdir_p(dirname) best_X_name = '%s_best_X.jpg' % prefix best_Xpm_name = '%s_best_Xpm.jpg' % prefix majority_X_name = '%s_majority_X.jpg' % prefix majority_Xpm_name = '%s_majority_Xpm.jpg' % prefix info_name = '%s_info.txt' % prefix info_pkl_name = '%s_info.pkl' % prefix info_big_pkl_name = '%s_info_big.pkl' % prefix return [ best_X_name, best_Xpm_name, majority_X_name, majority_Xpm_name, info_name, info_pkl_name, info_big_pkl_name ]
def calculate_weights_histogram_for_specific_layer(net, layer_name, output_dir, fig, ax, force=False): # generate weights histogram for layer hist_filename = os.path.join(output_dir, 'weight_histogram.png') weights = net.params[layer_name][0].data.flatten() hist, bin_edges = np.histogram(weights, bins=50) if not force and os.path.isfile(hist_filename): print 'The weights histogram for this layer already exists. Skipping...' else: weights_num = len(weights) width = 0.7 * (bin_edges[1] - bin_edges[0]) center = (bin_edges[:-1] + bin_edges[1:]) / 2 ax.bar(center, hist, align='center', width=width, color='g') fig.suptitle('weights for layer %s\n %s weights used' % (layer_name, weights_num)) ax.xaxis.label.set_text('weight value') ax.yaxis.label.set_text('count') # Save layer weights histogram to image file mkdir_p(output_dir) fig.savefig(hist_filename) ax.cla() return hist, bin_edges
def process_channel_figure(channel_idx, fig): unit_dir = os.path.join(outdir, layer_name, 'unit_%04d' % channel_idx) mkdir_p(unit_dir) filename = os.path.join(unit_dir, 'max_histogram.png') fig.savefig(filename) pass
def crawl_pages(subcats): dirpath = "data/site/%s/%s/" % (config.wiki_lang, config.start_cat) pages = [] counter = 0 for subcat in subcats: counter += 1 pb.update(counter, len(subcats)) subcat_dirpath = dirpath + subcat + "/" misc.mkdir_p(subcat_dirpath) filepath = subcat_dirpath + "pages.txt" if os.path.exists(filepath): subcat_pages = misc.read_file(filepath) else: subcat_pages = get_subcat_pages(subcat) misc.write_file(filepath, subcat_pages) pages.extend(subcat_pages) pages = [ page for page in pages if not config.page_bl(page) and lang.can(page) ] pages = OrderedDict.fromkeys(pages).keys() # unique return pages
def calculate_weight_bias_histograms_for_net(net, settings, output_dir=None, scale_by_layer=False, do_print=True, force=False): if not output_dir: output_dir = settings.caffevis_outputs_dir layers = settings.layers_to_output_in_offline_scripts fig = plt.figure(figsize=(10, 10), facecolor='white', tight_layout=False) ax = fig.add_subplot(111) for layer_name in layers: if do_print: print "calculating weights and bias histogram for layer %s" % ( layer_name) layer_output_dir = os.path.join(output_dir, layer_name) mkdir_p(layer_output_dir) try: all_weights_per_layer[layer_name] = net.params[layer_name][0].data all_biases_per_layer[layer_name] = net.params[layer_name][1].data layer_hist, layer_bin_edges = calculate_weights_histogram_for_specific_layer( net, layer_name, layer_output_dir, fig, ax, force) calculate_bias_histogram_for_specific_layer( net, layer_name, layer_output_dir, fig, ax, force) for channel_idx in xrange(0, net.params[layer_name][0].shape[0]): n_channels = net.params[layer_name][0].shape[0] if do_print and channel_idx % 10 == 0: print "calculating weights histogram for layer %s channel %d out of %d" % ( layer_name, channel_idx, n_channels) unit_output_dir = os.path.join(layer_output_dir, 'unit_%04d' % channel_idx) mkdir_p(unit_output_dir) if scale_by_layer and not layer_bin_edges is None: layer_hist_range = (layer_bin_edges.min(), layer_bin_edges.max()) else: layer_hist_range = (None, None) calculate_weights_histogram_for_specific_unit( net, layer_name, channel_idx, unit_output_dir, fig, ax, layer_hist_range, force) except KeyError as excep: print 'Unable to generate weights/bias histograms for this layer: ' + str( excep) if len(all_weights_per_layer.keys()) > 0: np.save(os.path.join(output_dir, 'all_weights.npy'), all_weights_per_layer) np.save(os.path.join(output_dir, 'all_biases.npy'), all_biases_per_layer)
def save_max_tracker_to_file(filename, net_max_tracker): dir_name = os.path.dirname(filename) mkdir_p(dir_name) with WithTimer('Saving maxes'): with open(filename, 'wb') as ff: pickle.dump(net_max_tracker, ff, -1) # save text version of pickle file for easier debugging pickle_to_text(filename)
def save_results(self, params, results, prefix_template, brave=False, skipbig=False): if prefix_template is None: return results_and_params = combine_dicts( (('p.', params.__dict__), ('r.', results.__dict__))) prefix = prefix_template % results_and_params if os.path.isdir(prefix): if prefix[-1] != '/': prefix += '/' # append slash for dir-only template else: dirname = os.path.dirname(prefix) if dirname: mkdir_p(dirname) # Don't overwrite previous results if os.path.exists('%sinfo.txt' % prefix) and not brave: raise Exception('Cowardly refusing to overwrite ' + '%sinfo.txt' % prefix) output_majority = False if output_majority: if results.majority_xx is not None: asimg = results.majority_xx[ self.channel_swap_to_rgb].transpose((1, 2, 0)) saveimagescc('%smajority_X.jpg' % prefix, asimg, 0) saveimagesc('%smajority_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img) # PlusMean if results.best_xx is not None: asimg = results.best_xx[self.channel_swap_to_rgb].transpose( (1, 2, 0)) saveimagescc('%sbest_X.jpg' % prefix, asimg, 0) saveimagesc('%sbest_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img) # PlusMean with open('%sinfo.txt' % prefix, 'w') as ff: print >> ff, params print >> ff print >> ff, results if not skipbig: with open('%sinfo_big.pkl' % prefix, 'w') as ff: pickle.dump((params, results), ff, protocol=-1) results.trim_arrays() with open('%sinfo.pkl' % prefix, 'w') as ff: pickle.dump((params, results), ff, protocol=-1)
def _save_weights(step, G, D, optimizers): cp_path = os.path.join(args.outdir, "checkpoints") print "Saving checkpoint..." checkpoint = { "g_weights": G.state_dict(), "d_weights": D.state_dict(), "g_optim": optimizers.g.state_dict(), "d_optim": optimizers.d.state_dict() } misc.mkdir_p(cp_path) save_path = os.path.join(cp_path, "%s.pt" % str(step)) torch.save(checkpoint, save_path) print "Checkpoint saved to %s" % save_path
def get_receptive_field(settings, net, layer_name): # flag which indicates whether the dictionary was changed hence we need to write it to cache should_save_to_cache = False # check if dictionary exists if not hasattr(settings, '_receptive_field_per_layer'): # if it doesn't, try load it from file receptive_fields_cache_filename = _get_receptive_fields_cache_filename( settings) if os.path.isfile(receptive_fields_cache_filename): try: with open(receptive_fields_cache_filename, 'rb') as receptive_fields_cache_file: settings._receptive_field_per_layer = pickle.load( receptive_fields_cache_file) except: settings._receptive_field_per_layer = dict() should_save_to_cache = True else: settings._receptive_field_per_layer = dict() should_save_to_cache = True # calculate lazy if layer_name not in settings._receptive_field_per_layer: print("Calculating receptive fields for layer %s" % (layer_name)) top_name = layer_name_to_top_name(net, layer_name) if top_name is not None: blob = net.blobs[top_name].data is_spatial = (len(blob.shape) == 4) layer_receptive_field = get_max_data_extent( net, settings, layer_name, is_spatial) settings._receptive_field_per_layer[ layer_name] = layer_receptive_field should_save_to_cache = True if should_save_to_cache: try: receptive_fields_cache_filename = _get_receptive_fields_cache_filename( settings) mkdir_p(settings.caffevis_outputs_dir) with open(receptive_fields_cache_filename, 'wb') as receptive_fields_cache_file: pickle.dump(settings._receptive_field_per_layer, receptive_fields_cache_file, -1) except IOError: # ignore problems in cache saving pass return settings._receptive_field_per_layer[layer_name]
def calculate_correlation(self, layer_name, outdir): # convert list of arrays to numpy array all_max_array = np.vstack(self.all_max_vals) # skip layers with only one channel if all_max_array.shape[1] == 1: return corr = np.corrcoef(all_max_array.transpose()) # fix possible NANs corr = np.nan_to_num(corr) np.fill_diagonal(corr, 1) # sort correlation matrix # import cPickle as pickle # with open('corr_%s.pickled' % layer_name, 'wb') as ff: # pickle.dump(corr, ff, protocol=2) # alternative sorting # values = np.dot(corr, np.arange(corr.shape[0])) # indexes = np.argsort(values) indexes = np.lexsort(corr) sorted_corr = corr[indexes, :][:, indexes] # plot correlation matrix import matplotlib.pyplot as plt fig = plt.figure(figsize=(10, 10)) plt.subplot(1, 1, 1) plt.imshow(sorted_corr, interpolation='nearest', vmin=-1, vmax=1) plt.colorbar() plt.title('channels activations correlation matrix for layer %s' % (layer_name)) plt.tight_layout() # save correlation matrix layer_dir = os.path.join(outdir, layer_name) mkdir_p(layer_dir) filename = os.path.join(layer_dir, 'channels_correlation.png') fig.savefig(filename, bbox_inches='tight') plt.close() return
def save_max_tracker_to_file(filename, net_max_tracker): dir_name = os.path.dirname(filename) mkdir_p(dir_name) with WithTimer('Saving maxes'): # Save unpickleable list of ndarrays with all maximal values all_max_vals_dict = dict() for layer_name in net_max_tracker.layers: all_max_vals_dict[layer_name] = net_max_tracker.max_trackers[ layer_name].all_max_vals np.save(os.path.join(dir_name, 'all_max_vals.npy'), all_max_vals_dict) del all_max_vals_dict # Pickle pickleable Net_Max_Tracker parameters with open(filename, 'wb') as ff: pickle.dump(net_max_tracker, ff, -1) # save text version of pickle file for easier debugging pickle_to_text(filename)
def save_results(self, params, results, prefix_template, brave = False, skipbig = False): if prefix_template is None: return results_and_params = combine_dicts((('p.', params.__dict__), ('r.', results.__dict__))) prefix = prefix_template % results_and_params if os.path.isdir(prefix): if prefix[-1] != '/': prefix += '/' # append slash for dir-only template else: dirname = os.path.dirname(prefix) if dirname: mkdir_p(dirname) # Don't overwrite previous results if os.path.exists('%sinfo.txt' % prefix) and not brave: raise Exception('Cowardly refusing to overwrite ' + '%sinfo.txt' % prefix) output_majority = False if output_majority: if results.majority_xx is not None: asimg = results.majority_xx[self.channel_swap_to_rgb].transpose((1,2,0)) saveimagescc('%smajority_X.jpg' % prefix, asimg, 0) saveimagesc('%smajority_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img) # PlusMean if results.best_xx is not None: asimg = results.best_xx[self.channel_swap_to_rgb].transpose((1,2,0)) saveimagescc('%sbest_X.jpg' % prefix, asimg, 0) saveimagesc('%sbest_Xpm.jpg' % prefix, asimg + self._data_mean_rgb_img) # PlusMean with open('%sinfo.txt' % prefix, 'w') as ff: print >>ff, params print >>ff print >>ff, results if not skipbig: with open('%sinfo_big.pkl' % prefix, 'w') as ff: pickle.dump((params, results), ff, protocol=-1) results.trim_arrays() with open('%sinfo.pkl' % prefix, 'w') as ff: pickle.dump((params, results), ff, protocol=-1)
def calculate_weights_histogram_for_specific_unit(net, layer_name, channel_idx, output_dir, fig, ax, hist_range=(None, None), force=False): hist_filename = os.path.join(output_dir, 'weight_histogram.png') if not force and os.path.isfile(hist_filename): print 'The weights histogram for unit %d already exists. Skipping...' % ( channel_idx) else: # get vector of weights weights = net.params[layer_name][0].data[channel_idx].flatten() bias = net.params[layer_name][1].data[channel_idx] weights_num = len(weights) # create histogram if hist_range == (None, None): hist_range = (weights.min(), weights.max()) hist, bin_edges = np.histogram(weights, bins=50, range=hist_range) # generate histogram image file width = 0.7 * (bin_edges[1] - bin_edges[0]) center = (bin_edges[:-1] + bin_edges[1:]) / 2 ax.bar(center, hist, align='center', width=width, color='g') fig.suptitle('weights for unit %d, bias is %f\n %s weights used' % (channel_idx, bias, weights_num)) ax.xaxis.label.set_text('weight value') ax.yaxis.label.set_text('count') # save weight histogram as image file mkdir_p(output_dir) fig.savefig(hist_filename) ax.cla() return hist, bin_edges
def _gen_sample_images(step, G): print "Generating %d sample images..." % len(sample_images) G.eval() x = float_tensor(np.array([si[1] for si in sample_images])) z = float_tensor(np.random.uniform(-1., 1., (x.shape[0], args.Nz))) c = one_hot( long_tensor( np.array([misc.pose_class(args.Np, 0.)]).repeat(x.shape[0])), args.Np) gen = G(Variable(x), Variable(c), Variable(z)).x.detach().data for i, s in enumerate(sample_images): grid = vutils.make_grid(gen[i:i + 1], 1, normalize=True, scale_each=True, padding=0) writer.add_image(s[0], grid, step) if args.outdir: misc.mkdir_p(os.path.join(args.outdir, "generated_images")) img_path = os.path.join(args.outdir, "generated_images", "%d.jpg" % step) vutils.save_image(gen, img_path, normalize=True) print "Sample images generated!"
def crawl_pages(subcats): dirpath = "data/site/%s/" % config.start_cat pages = [] counter = 0 for subcat in subcats: counter += 1 pb.update(counter, len(subcats)) subcat_dirpath = dirpath + subcat + "/" misc.mkdir_p(subcat_dirpath) filepath = subcat_dirpath + "pages.txt" if os.path.exists(filepath): subcat_pages = misc.read_file(filepath) else: subcat_pages = get_subcat_pages(subcat) misc.write_file(filepath, subcat_pages) pages.extend(subcat_pages) pages = [page for page in pages if lang.can_page(page)] pages = OrderedDict.fromkeys(pages).keys() # unique return pages
def main(): misc.mkdir_p("data/site/%s/" % config.start_cat) misc.mkdir_p("data/pages/%s/" % config.start_cat) misc.mkdir_p("data/speling/%s/" % config.start_cat) # stage 1 - obtaining list of subcategories print("** Stage 1: Obtaining list of subcategories to crawl. **") subcats = crawler.crawl_subcats() # stage 2 - obtaining list of pages print("** Stage 2: Obtaining list of pages to crawl. **") pages = crawler.crawl_pages(subcats) # stage 3 - crawling all pages print("** Stage 3: Crawling all pages in list. **") crawler.crawl_all_pages(pages) # stage 4 - parsing (scraping) all pages print("** Stage 4: Parsing all pages in list. **") spelings = parser.parse(pages) # stage 5 - write to file print("** Stage 5: Writing final results to file. **") filepath = "data/%s-speling.txt" % config.lang f = open(filepath, "w") for speling in spelings: f.write(speling + "\n") f.close() print("") print("") print("=== STATS ===") print("Crawled %d pages" % len(pages)) print("Obtained %d spelings" % len(spelings)) print("Wrote spelings to %s" % filepath) print("=============")
def wrapper(*args, **kwargs): startHashWall = time.time() # Hash the function, its args, and its kwargs hasher = PersistentHasher() hasher.update(function) hasher.update(args) hasher.update(kwargs) # Check cache for previous result functionName = function.__name__ # a little more reliable than func_name digest = hasher.hexdigest() cacheFilename = '%s.%s.pkl.gz' % (digest[:16], functionName) # get a unique filename that does not affect any random number generators cacheTmpFilename = '.%s-%06d.tmp' % (cacheFilename, datetime.now().microsecond) cachePath = os.path.join(globalCacheDir, cacheFilename[:2], cacheFilename) cacheTmpPath = os.path.join(globalCacheDir, cacheFilename[:2], cacheTmpFilename) elapsedHashWall = time.time() - startHashWall try: start = time.time() if globalCacheVerbose >= 3: print (' -> cache.py: %s: trying to load file %s' % (functionName, cachePath)) (stats,result) = loadFromPklGz(cachePath) elapsedWall = time.time() - start if globalCacheVerbose >= 1: print (' -> cache.py: %s: cache hit (%.04fs hash overhead, %.04fs to load, saved %.04fs)' % (functionName, elapsedHashWall, elapsedWall, stats['timeWall'] - elapsedWall)) if globalCacheVerbose >= 2: print ' -> loaded %s' % cachePath except IOError: if globalCacheVerbose >= 3: print (' -> cache.py: %s: cache miss, computing function' % (functionName)) startWall = time.time() startCPU = time.clock() result = function(*args, **kwargs) elapsedWall = time.time() - startWall elapsedCPU = time.clock() - startCPU stats = {'functionName': functionName, 'timeWall': elapsedWall, 'timeCPU': elapsedCPU, 'saveDate': datetime.now(), } startSave = time.time() mkdir_p(os.path.dirname(cachePath)) if globalCacheVerbose >= 3: print (' -> cache.py: %s: function execution finished, saving result to file %s' % (functionName, cachePath)) saveToFile(cacheTmpPath, (stats,result), quiet = True) os.rename(cacheTmpPath, cachePath) if globalCacheVerbose >= 1: print (' -> cache.py: %s: cache miss (%.04fs hash overhead, %.04fs to save, %.04fs to compute)' % (functionName, elapsedHashWall, time.time() - startSave, elapsedWall)) if globalCacheVerbose >= 2: print ' -> saved to %s' % cachePath return result
def output_max_patches(settings, max_tracker, net, layer_name, idx_begin, idx_end, num_top, datadir, outdir, search_min, do_which): ''' :param settings: :param max_tracker: :param net: :param layer_name: :param idx_begin: :param idx_end: :param num_top: :param datadir: :param outdir: :param search_min: :param do_which: do_info must be True :return: ''' do_maxes, do_deconv, do_deconv_norm, do_backprop, do_backprop_norm, do_info = do_which assert do_maxes or do_deconv or do_deconv_norm or do_backprop or do_backprop_norm or do_info, 'nothing to do' sys.path.insert(0, os.path.join(settings.caffevis_caffe_root, 'python')) import caffe mt = max_tracker locs = mt.min_locs if search_min else mt.max_locs vals = mt.min_vals if search_min else mt.max_vals image_filenames, image_labels = get_files_list(datadir) print 'Loaded filenames and labels for %d files' % len(image_filenames) print ' First file', os.path.join(datadir, image_filenames[0]) num_top_in_mt = locs.shape[1] assert num_top <= num_top_in_mt, 'Requested %d top images but MaxTracker contains only %d' % ( num_top, num_top_in_mt) assert idx_end >= idx_begin, 'Range error' # minor fix for backwards compatability if hasattr(mt, 'is_conv'): mt.is_spatial = mt.is_conv size_ii, size_jj = get_max_data_extent(net, settings, layer_name, mt.is_spatial) data_size_ii, data_size_jj = net.blobs['data'].data.shape[2:4] net_input_dims = net.blobs['data'].data.shape[2:4] # prepare variables used for batches batch = [None] * settings.max_tracker_batch_size for i in range(0, settings.max_tracker_batch_size): batch[i] = MaxTrackerCropBatchRecord() batch_index = 0 channel_to_info_file = dict() n_total_images = (idx_end - idx_begin) * num_top for cc, channel_idx in enumerate(range(idx_begin, idx_end)): unit_dir = os.path.join(outdir, layer_name, 'unit_%04d' % channel_idx) mkdir_p(unit_dir) # check if all required outputs exist, in which case skip this iteration [ info_filename, maxim_filenames, deconv_filenames, deconvnorm_filenames, backprop_filenames, backpropnorm_filenames ] = generate_output_names(unit_dir, num_top, do_info, do_maxes, do_deconv, do_deconv_norm, do_backprop, do_backprop_norm, search_min) relevant_outputs = info_filename + \ maxim_filenames + \ deconv_filenames + \ deconvnorm_filenames + \ backprop_filenames + \ backpropnorm_filenames # we skip generation if: # 1. all outputs exist, AND # 2.1. (not last iteration OR # 2.2. last iteration, but batch is empty) relevant_outputs_exist = [ os.path.exists(file_name) for file_name in relevant_outputs ] if all(relevant_outputs_exist) and \ ((channel_idx != idx_end - 1) or ((channel_idx == idx_end - 1) and (batch_index == 0))): print "skipped generation of channel %d in layer %s since files already exist" % ( channel_idx, layer_name) continue if do_info: channel_to_info_file[channel_idx] = InfoFileMetadata() channel_to_info_file[channel_idx].info_file = open( info_filename[0], 'w') channel_to_info_file[channel_idx].ref_count = num_top print >> channel_to_info_file[ channel_idx].info_file, '# is_spatial val image_idx selected_input_index i(if is_spatial) j(if is_spatial) filename' # iterate through maxes from highest (at end) to lowest for max_idx_0 in range(num_top): batch[batch_index].cc = cc batch[batch_index].channel_idx = channel_idx batch[batch_index].info_filename = info_filename batch[batch_index].maxim_filenames = maxim_filenames batch[batch_index].deconv_filenames = deconv_filenames batch[batch_index].deconvnorm_filenames = deconvnorm_filenames batch[batch_index].backprop_filenames = backprop_filenames batch[batch_index].backpropnorm_filenames = backpropnorm_filenames batch[batch_index].info_file = channel_to_info_file[ channel_idx].info_file batch[batch_index].max_idx_0 = max_idx_0 batch[batch_index].max_idx = num_top_in_mt - 1 - batch[ batch_index].max_idx_0 if mt.is_spatial: # fix for backward compatability if locs.shape[2] == 5: # remove second column locs = np.delete(locs, 1, 2) batch[batch_index].im_idx, batch[ batch_index].selected_input_index, batch[ batch_index].ii, batch[batch_index].jj = locs[ batch[batch_index].channel_idx, batch[batch_index].max_idx] else: # fix for backward compatability if locs.shape[2] == 3: # remove second column locs = np.delete(locs, 1, 2) batch[batch_index].im_idx, batch[ batch_index].selected_input_index = locs[ batch[batch_index].channel_idx, batch[batch_index].max_idx] batch[batch_index].ii, batch[batch_index].jj = 0, 0 # if ii and jj are invalid then there is no data for this "top" image, so we can skip it if (batch[batch_index].ii, batch[batch_index].jj) == (-1, -1): continue batch[batch_index].recorded_val = vals[ batch[batch_index].channel_idx, batch[batch_index].max_idx] batch[batch_index].filename = image_filenames[ batch[batch_index].im_idx] do_print = (batch[batch_index].max_idx_0 == 0) if do_print: print '%s Output file/image(s) %d/%d layer %s channel %d' % ( datetime.now().ctime(), batch[batch_index].cc * num_top, n_total_images, layer_name, batch[batch_index].channel_idx) # print "DEBUG: (mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, layer_name, size_ii, size_jj, data_size_ii, data_size_jj)", str((mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, rc, layer_name, size_ii, size_jj, data_size_ii, data_size_jj)) [batch[batch_index].out_ii_start, batch[batch_index].out_ii_end, batch[batch_index].out_jj_start, batch[batch_index].out_jj_end, batch[batch_index].data_ii_start, batch[batch_index].data_ii_end, batch[batch_index].data_jj_start, batch[batch_index].data_jj_end] = \ compute_data_layer_focus_area(mt.is_spatial, batch[batch_index].ii, batch[batch_index].jj, settings, layer_name, size_ii, size_jj, data_size_ii, data_size_jj) # print "DEBUG: channel:%d out_ii_start:%d out_ii_end:%d out_jj_start:%d out_jj_end:%d data_ii_start:%d data_ii_end:%d data_jj_start:%d data_jj_end:%d" % \ # (channel_idx, # batch[batch_index].out_ii_start, batch[batch_index].out_ii_end, # batch[batch_index].out_jj_start, batch[batch_index].out_jj_end, # batch[batch_index].data_ii_start, batch[batch_index].data_ii_end, # batch[batch_index].data_jj_start, batch[batch_index].data_jj_end) if do_info: print >> batch[ batch_index].info_file, 1 if mt.is_spatial else 0, '%.6f' % vals[ batch[batch_index].channel_idx, batch[batch_index].max_idx], if mt.is_spatial: print >> batch[ batch_index].info_file, '%d %d %d %d' % tuple( locs[batch[batch_index].channel_idx, batch[batch_index].max_idx]), else: print >> batch[batch_index].info_file, '%d %d' % tuple( locs[batch[batch_index].channel_idx, batch[batch_index].max_idx]), print >> batch[batch_index].info_file, batch[ batch_index].filename if not (do_maxes or do_deconv or do_deconv_norm or do_backprop or do_backprop_norm): continue with WithTimer('Load image', quiet=not do_print): # load image batch[batch_index].im = caffe.io.load_image(os.path.join( datadir, batch[batch_index].filename), color=True) # resize images according to input dimension batch[batch_index].im = resize_without_fit( batch[batch_index].im, net_input_dims) # convert to float to avoid caffe destroying the image in the scaling phase batch[batch_index].im = batch[batch_index].im.astype( np.float32) batch_index += 1 # if current batch is full if batch_index == settings.max_tracker_batch_size \ or ((channel_idx == idx_end - 1) and (max_idx_0 == num_top - 1)): # or last iteration with WithTimer('Predict on batch ', quiet=not do_print): im_batch = [record.im for record in batch] net.predict(im_batch, oversample=False) # go over batch and update statistics for i in range(0, batch_index): batch[i].denormalized_layer_name = layer_name batch[i].denormalized_top_name = layer_name_to_top_name( net, batch[i].denormalized_layer_name) batch[i].layer_format = 'normal' # non-siamese if len(net.blobs[ batch[i].denormalized_top_name].data.shape) == 4: reproduced_val = net.blobs[ batch[i].denormalized_top_name].data[ i, batch[i].channel_idx, batch[i].ii, batch[i].jj] else: reproduced_val = net.blobs[ batch[i].denormalized_top_name].data[ i, batch[i].channel_idx] if abs(reproduced_val - batch[i].recorded_val) > .1: print 'Warning: recorded value %s is suspiciously different from reproduced value %s. Is the filelist the same?' % ( batch[i].recorded_val, reproduced_val) if do_maxes: # grab image from data layer, not from im (to ensure preprocessing / center crop details match between image and deconv/backprop) out_arr = extract_patch_from_image( net.blobs['data'].data[i], net, batch[i].selected_input_index, settings, batch[i].data_ii_end, batch[i].data_ii_start, batch[i].data_jj_end, batch[i].data_jj_start, batch[i].out_ii_end, batch[i].out_ii_start, batch[i].out_jj_end, batch[i].out_jj_start, size_ii, size_jj) with WithTimer('Save img ', quiet=not do_print): save_caffe_image( out_arr, batch[i].maxim_filenames[batch[i].max_idx_0], autoscale=False, autoscale_center=0, channel_swap=settings.channel_swap) if do_deconv or do_deconv_norm: # TODO: we can improve performance by doing batch of deconv_from_layer, but only if we group # together instances which have the same selected_input_index, this can be done by holding two # separate batches for i in range(0, batch_index): diffs = net.blobs[ batch[i].denormalized_top_name].diff * 0 if len(diffs.shape) == 4: diffs[i, batch[i].channel_idx, batch[i].ii, batch[i].jj] = 1.0 else: diffs[i, batch[i].channel_idx] = 1.0 with WithTimer('Deconv ', quiet=not do_print): net.deconv_from_layer( batch[i].denormalized_layer_name, diffs, zero_higher=True, deconv_type='Guided Backprop') out_arr = extract_patch_from_image( net.blobs['data'].diff[i], net, batch[i].selected_input_index, settings, batch[i].data_ii_end, batch[i].data_ii_start, batch[i].data_jj_end, batch[i].data_jj_start, batch[i].out_ii_end, batch[i].out_ii_start, batch[i].out_jj_end, batch[i].out_jj_start, size_ii, size_jj) if out_arr.max() == 0: print 'Warning: Deconv out_arr in range', out_arr.min( ), 'to', out_arr.max( ), 'ensure force_backward: true in prototxt' if do_deconv: with WithTimer('Save img ', quiet=not do_print): save_caffe_image( out_arr, batch[i].deconv_filenames[ batch[i].max_idx_0], autoscale=False, autoscale_center=0, channel_swap=settings.channel_swap) if do_deconv_norm: out_arr = np.linalg.norm(out_arr, axis=0) with WithTimer('Save img ', quiet=not do_print): save_caffe_image( out_arr, batch[i].deconvnorm_filenames[ batch[i].max_idx_0], channel_swap=settings.channel_swap) if do_backprop or do_backprop_norm: for i in range(0, batch_index): diffs = net.blobs[ batch[i].denormalized_top_name].diff * 0 if len(diffs.shape) == 4: diffs[i, batch[i].channel_idx, batch[i].ii, batch[i].jj] = 1.0 else: diffs[i, batch[i].channel_idx] = 1.0 with WithTimer('Backward batch ', quiet=not do_print): net.backward_from_layer( batch[i].denormalized_layer_name, diffs) for i in range(0, batch_index): out_arr = extract_patch_from_image( net.blobs['data'].diff[i], net, batch[i].selected_input_index, settings, batch[i].data_ii_end, batch[i].data_ii_start, batch[i].data_jj_end, batch[i].data_jj_start, batch[i].out_ii_end, batch[i].out_ii_start, batch[i].out_jj_end, batch[i].out_jj_start, size_ii, size_jj) if out_arr.max() == 0: print 'Warning: Deconv out_arr in range', out_arr.min( ), 'to', out_arr.max( ), 'ensure force_backward: true in prototxt' if do_backprop: with WithTimer('Save img ', quiet=not do_print): save_caffe_image( out_arr, batch[i].backprop_filenames[ batch[i].max_idx_0], autoscale=False, autoscale_center=0, channel_swap=settings.channel_swap) if do_backprop_norm: out_arr = np.linalg.norm(out_arr, axis=0) with WithTimer('Save img ', quiet=not do_print): save_caffe_image( out_arr, batch[i].backpropnorm_filenames[ batch[i].max_idx_0], channel_swap=settings.channel_swap) # close info files for i in range(0, batch_index): channel_to_info_file[batch[i].channel_idx].ref_count -= 1 if channel_to_info_file[ batch[i].channel_idx].ref_count == 0: if do_info: channel_to_info_file[ batch[i].channel_idx].info_file.close() batch_index = 0
'10.1371/journal.pone.0029411', '10.1080/17470910802083167', ] def doi_to_fixture(doi, fname): """ Retrieve CSL data from DOI and save to file. """ # Retrieve CSL data csl = xref.doi_to_csl(doi) # Write to file with open(fname, 'w') as fp: json.dump(csl, fp, indent=4) if __name__ == '__main__': # Ensure that directory exists misc.mkdir_p(fixture_dir) # Iterate over DOIs for doi in fixture_dois: # Build file name fname = os.path.join( fixture_dir, '%s.json' % (misc.escape_doi(doi)) ) # Save CSL data to file doi_to_fixture(doi, fname)