def evaluate(self, n_requests, request_delay): """ Performance benchmarking for client-pull :param n_requests: number of requests to hit the server :param request_delay: delay in each request """ i = 0 getScore_time = 0 while i < n_requests: game = random.sample(utils.games, 1)[0] time.sleep(request_delay) t, r = utils.timeit(self.getScore, game, False) getScore_time += t i += 1 getMedalTally_time = 0 i = 0 while i < n_requests: team = random.sample(utils.teams, 1)[0] time.sleep(request_delay) medal_type = random.sample(utils.medals, 1)[0] t, r = utils.timeit(self.getMedalTally, team, False) getMedalTally_time += t i += 1 print "Time taken to perform %d getScore requests:" % n_requests, getScore_time print "Time taken to perform each getScore request:", float( getScore_time) / n_requests print "Time taken to perform %d getMedalTally_time requests:" % n_requests, getMedalTally_time print "Time taken to perform each getMedalTally request:", float( getMedalTally_time) / n_requests
def evaluate(self, n_requests, request_delay): """ :param n_requests: the number of requests to hit server with :param request_delay: delay in each request :return: """ self.getServer() i = 0 setScore_time = 0 while i < n_requests: game = random.sample(utils.games, 1)[0] time.sleep(request_delay) t, r = utils.timeit(self.setScore, game, "10", "10", to_print=False) setScore_time += t i += 1 incrementMedalTally_time = 0 i = 0 while i < n_requests: team = random.sample(utils.teams, 1)[0] time.sleep(request_delay) medal_type = random.sample(utils.medals, 1)[0] t, r = utils.timeit(self.incrementMedalTally, team, medal_type, to_print=False) incrementMedalTally_time += t i += 1 print "Time taken to perform %d setScore requests:" % n_requests, setScore_time print "Time taken to perform each setScore request:", float(setScore_time) / n_requests print "Time taken to perform %d incrementMedalTally_time requests:" % n_requests, incrementMedalTally_time print "Time taken to perform each incrementMedalTally request:", float(incrementMedalTally_time) / n_requests
def main(): cwd = os.path.dirname(os.path.abspath(__file__)) info("starting") for folder in [ '{}/../reports'.format(cwd), '{}/../reports/perf-tests'.format(cwd), '{}/../reports/perf-tests/logs'.format(cwd), '{}/../reports/perf-tests/graphs'.format(cwd), '{}/../reports/perf-tests/metrics'.format(cwd) ]: os.system('mkdir -p {}'.format(folder)) for folder in [ '{}/../reports/perf-tests/metrics/*.json'.format(cwd), '{}/../reports/perf-tests/logs/*.log'.format(cwd), '{}/../reports/perf-tests/graphs/*.png'.format(cwd), ]: os.system('rm -rf {}'.format(folder)) info("setup") logs_collector = LogsCollector() logs_collector.start() info("bootstrap") manager = ApplianceManager() manager.bootstrap() info("start") messages_to_push = int(os.environ.get('MESSAGES_PUSHED', '100000')) i = 1_000 while i <= messages_to_push: info('pushing {:,.0f} messages throught ZMQ'.format(i)) with timeit('{:,.0f} messages relay'.format(i)): with metrics(manager, 'count_{}'.format(i)): Publisher(i) info('generating graph for {:,.0f} messages'.format(i)) with timeit('{:,.0f} graph plotting'.format(i)): Graph(Metrics('{}/../reports/perf-tests/metrics/metrics.count_{}.json'.format(cwd, i))) i *= 10 info("stopping") logs_collector.stop() manager.teardown() info("stop") sys.exit(0)
def main(): matrix = HT1632C() while True: for i in range(1, 9): matrix.fill(BLACK) x = (i - 1) % 4 * 8 y = int(i > 4) * 8 matrix.text(str(i), x, y, GREEN) timeit(matrix.show)
def test_ssim(): video, calibration_image = get_capture_and_calibration_image_video2() frame = video.read_at_pos(286) true_ellipse_image1 = frame[551:551 + 67, 418:418 + 69] true_ellipse_image2 = video.read_at_pos(820)[269:269 + 67, 659:659 + 69] false_ellipse_image = frame[501:501 + 67, 745:745 + 69] # false_ellipse_image = cv2.resize(false_ellipse_image, (true_ellipse_image.shape[1], true_ellipse_image.shape[0])) detector = get_detector() # ind1, ind2 = calc_ssi(true_ellipse_image1, false_ellipse_image) # print('ssind', ind1, ind2) # ind1, ind2 = calc_ssi(true_ellipse_image1, true_ellipse_image2) # print('ssind', ind1, ind2) # ind1, ind2 = calc_ssi(true_ellipse_image2, false_ellipse_image) # print('ssind', ind1, ind2) # print('-------------------') # ind1, ind2 = calc_ssi(true_ellipse_image1, true_ellipse_image1) # print('ssind', ind1, ind2) # ind1, ind2 = calc_ssi(true_ellipse_image2, true_ellipse_image2) # print('ssind', ind1, ind2) # ind1, ind2 = calc_ssi(false_ellipse_image, false_ellipse_image) # print('ssind', ind1, ind2) i = 1000 print( utils.timeit( lambda: calc_ssi(true_ellipse_image1, false_ellipse_image), i)) print( utils.timeit( lambda: calc_ssi(true_ellipse_image1, true_ellipse_image2), i)) print( utils.timeit( lambda: calc_ssi(true_ellipse_image2, false_ellipse_image), i)) return true_ellipse_wnd1 = CvNamedWindow('true_ellipse1', cv2.WINDOW_NORMAL) detect_and_show(true_ellipse_wnd1, detector, true_ellipse_image1.copy(), None, wait=False) false_ellipse_wnd = CvNamedWindow('false_ellipse', cv2.WINDOW_NORMAL) detect_and_show(false_ellipse_wnd, detector, false_ellipse_image.copy(), None, wait=True)
def _load_data(dataset, is_training=False): """Load input data, target values and file names for a dataset. The input data is assumed to be a dataset of feature vectors. These feature vectors are standardized using a scaler that is either loaded from disk (if it exists) or computed on-the-fly. The latter is only possible if the input data is training data, which is indicated by the `is_training` parameter. Target values and file names are read from the metadata file. Args: dataset: Structure encapsulating dataset information. training (bool): Whether the input data is training data. Returns: x (np.ndarray): The input data. y (np.ndarray): The target values. names (list): The associated file names. """ import data_augmentation as aug import features features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') x = utils.timeit(lambda: features.load_features(features_path), 'Loaded features of %s dataset' % dataset.name) # Clip dynamic range to 90 dB x = np.maximum(x, x.max() - 90.0) # Load scaler from file if cached, or else compute it. scaler_path = cfg.scaler_path if os.path.exists(scaler_path) or not is_training: with open(scaler_path, 'rb') as f: scaler = pickle.load(f) else: scaler = utils.timeit(lambda: utils.compute_scaler(x), 'Computed standard scaler') with open(scaler_path, 'wb') as f: pickle.dump(scaler, f) x = utils.timeit(lambda: utils.standardize(x, scaler), 'Standardized %s features' % dataset.name) names, y = utils.timeit(lambda: utils.read_metadata(dataset.metadata_path), 'Loaded %s metadata' % dataset.name) if dataset == cfg.training_set and cfg.enable_augmentation: names, y = aug.expand_metadata((names, y)) return x, y, names
def benchmark(order=None): """Run nilearn.signal._detrend""" shape = (201, 200000) print ("Running for %s order..." % order) rand_gen = np.random.RandomState(0) series = np.ndarray(shape, order=order) series[...] = rand_gen.randn(*shape) output1 = utils.timeit(profile(nilearn.signal._detrend))(series) time.sleep(0.5) # For memory_profiler del output1 output2 = utils.timeit(profile(scipy.signal.detrend))(series, axis=0) time.sleep(0.5) # For memory_profiler del output2
def _load_features(dataset, data_path, block_size=128): """Load the features and the associated metadata for a dataset. The metadata is read from a CSV file and returned as a DataFrame. Each DataFrame entry corresponds to an instance in the dataset. Args: dataset (Dataset): Information about the dataset. data_path (str): Path to directory containing feature vectors. Returns: tuple: Tuple containing the array of feature vectors and the metadata of the dataset. """ import features import utils # Load feature vectors from disk features_path = os.path.join(data_path, dataset.name + '.h5') x, n_blocks = utils.timeit( lambda: features.load_features(features_path, block_size, block_size // 4), f'Loaded features of {dataset.name} dataset') # Reshape feature vectors: NxTxF -> NxTxFx1 x = np.expand_dims(x, axis=-1) # Load metadata and duplicate entries based on number of blocks df = pd.read_csv(dataset.metadata_path, index_col=0) df = df.loc[np.repeat(df.index, n_blocks)] return x, df
def main(): global cfg, logger, execute_sql cfg = load_config() log_cfg = cfg['log'] log_file = log_cfg['file'] log_level = log_cfg['level'] logger = mylogger() logger.init_logger('task-purger', log_level, log_file, SysLogHandler.LOG_LOCAL1) # For debug logger.info('Starting...................') execute_sql = timeit(execute_sql, 'SQL', logger) # signal.signal(signal.SIGQUIT, stop_all) # signal.signal(signal.SIGTERM, stop_all) dbpc_cfg = cfg['dbpc'] DBPC_HOST = dbpc_cfg['host'] DBPC_PORT = int(dbpc_cfg['port']) DBPC_SERVICE = dbpc_cfg['service'] DBPC_COMPONENT = dbpc_cfg['component'] DBPC_INTERVAL = int(dbpc_cfg['interval']) t_dbpc = dbpc.dbpc(DBPC_HOST, DBPC_PORT, DBPC_SERVICE, DBPC_COMPONENT, DBPC_INTERVAL) t_dbpc.start() try: loop() except KeyboardInterrupt: pass logger.info('Stopping...................') print 'THE END.'
def _load_dataset(dataset): """Load input data and the associated metadata for a dataset. Args: dataset: Structure encapsulating dataset information. Returns: tuple: Tuple containing: x (np.ndarray): The input data of the dataset. df (pd.DataFrame): The metadata of the dataset. """ import features # Load feature vectors and reshape to 4D tensor features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') x, n_chunks = utils.timeit(lambda: features.load_features(features_path), 'Loaded features of %s dataset' % dataset.name) x = np.expand_dims(x, -1) assert x.ndim == 4 # Load metadata and duplicate entries based on number of chunks df = io.read_metadata(dataset.metadata_path) return x, df
def benchmark(order=None): """Run nilearn.signal._detrend""" shape = (201, 200001) print ("Running for %s order..." % order) rand_gen = np.random.RandomState(0) series = np.ndarray(shape, order=order) series[...] = rand_gen.randn(*shape) output1 = utils.timeit(profile(nilearn.signal._mean_of_squares))(series) time.sleep(0.5) # For memory_profiler # del output1 output2 = utils.timeit(profile(ref_mean_of_squares))(series) time.sleep(0.5) # For memory_profiler # del output2 np.testing.assert_almost_equal(output1, output2)
def inference_graph(tf_server, image, model_name): with open(image, "rb") as f: bs64_img = base64.b64encode(f.read()).decode("utf8") card_ = PredictCard(None) with timeit("tf-verving") as t: card = card_.tf_serving_main(bs64_img, tf_server, model_name) # Frozen graph implementation. img = Image.open(open(image, "rb")) image_np = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) object_detection = ObjectDetection(*set_graph_and_tensors()) predict_card = PredictCard(object_detection) with timeit("frozen-graph") as t: card = predict_card.main(image=img)
def rama_pmf(data, A, C, **kw): pt_dd = U.get_pt_dd(C, A.property, A.plot_type, A.v) if not pt_dd: # if there is no rama_pmf, search for rama instead, this is trying to # reduce duplication in .xitconfig.yaml pt_dd = U.get_pt_dd(C, A.property, 'rama', A.v) logger.info(pt_dd) ncol, nrow = U.gen_rc(len(data.keys()), pt_dd) fig, axes = plt.subplots(nrows=nrow, ncols=ncol, figsize=(ncol * 7, nrow * 6)) # to make the data type consistent for following analysis. # by the way, this is a very wiredly behaved api --2013-08-07 # http://matplotlib.org/api/pyplot_api.html?highlight=subplots#matplotlib.pyplot.subplots if isinstance(axes, np.ndarray): axes = axes.flat elif isinstance(axes, Subplot): axes = [axes] if 'subplots_adjust' in pt_dd: fig.subplots_adjust(**pt_dd['subplots_adjust']) if 'bins' in pt_dd: bins = np.arange(*pt_dd.get('bins')) else: bins = np.arange(-180, 171, 4) logger.info('bins:\n {0}'.format(bins)) normed = pt_dd.get('normed', False) gk_xypmfs = [] min_, max_ = get_min_max(data, bins, normed, gk_xypmfs) logger.info("min: {0}; max: {1}".format(min_, max_)) for c, (gk, phi_edges, psi_edges, h_pmf) in enumerate(gk_xypmfs): ax = axes[c] cmap = getattr(cm, pt_dd.get('cmap', 'jet')) cmap.set_over('white') params = get_params(gk, pt_dd) logger.info('params: {0}'.format(params)) F = U.timeit(ax.contourf) contour = F(phi_edges, psi_edges, h_pmf, **params) decorate_ax(ax, pt_dd, ncol, nrow, c, gk, A) cax = fig.add_axes([0.92, 0.2, 0.02, 0.6]) # left, bottom, width, hight cbar = plt.colorbar(contour, cax=cax) if 'cbar_ylabel' in pt_dd: cbar.ax.set_ylabel(**pt_dd['cbar_ylabel']) plt.savefig(U.gen_output_filename(A, C), **pt_dd.get('savefig', {}))
def benchmark(): """ """ n_regions = 1500 print("Loading data ...") adhd = nilearn.datasets.fetch_adhd() filename = adhd["func"][0] img = nilearn.utils.check_niimg(filename) shape = img.shape[:3] affine = img.get_affine() _ = img.get_data() # Preload data print("Generating regions ...") regions = nilearn.testing.generate_labeled_regions_large(shape, n_regions, affine=affine) signals, labels = utils.timeit(profile(nilearn.region.img_to_signals_labels) )(img, regions) img_r = utils.timeit(profile(nilearn.region.signals_to_img_labels) )(signals, regions, order='C')
def predict(dataset, fold): """Generate predictions for audio tagging. This function uses an ensemble of trained models to generate the predictions, with the averaging function being an arithmetic mean. Computed predictions are then saved to disk. Args: dataset: Dataset to generate predictions for. fold (int): The specific fold to generate predictions for. Only applicable for the training dataset. """ import inference # Load input data and associated metadata x, df = _load_data(dataset) dataset_name = dataset.name if dataset.name == 'training': if fold == -1: raise ValueError('Invalid fold: %d' % fold) dataset_name += str(fold) mask = df.fold == fold tr_x = x[~mask] x = x[mask] df = df[mask] else: tr_x, tr_df = _load_data(cfg.to_dataset('training')) if fold >= 0: dataset_name += str(fold) tr_x = tr_x[tr_df.fold != fold] generator = utils.fit_scaler(tr_x) x = generator.standardize(x) # Predict class probabilities for each model (epoch) preds = [] for epoch in _determine_epochs(cfg.prediction_epochs, fold, n=4): pred = utils.timeit(lambda: _load_model(fold, epoch).predict(x), '[Epoch %d] Predicted class probabilities' % epoch) preds.append(inference.merge_predictions(pred, df.index)) pred_mean = pd.concat(preds).groupby(level=0).mean() # Ensure output directory exists and set file path format os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) predictions_path = cfg.predictions_path.format('%s', dataset_name) # Save free parameters to disk utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, os.path.join(os.path.dirname(cfg.predictions_path), 'parameters.json')) # Write predictions to disk pred_mean.to_csv(predictions_path % 'predictions') io.write_predictions(pred_mean, predictions_path % 'submission')
def search_by_frame(self, dataset_id: str, reference_frame_id: int, **kwargs) -> ResultSet: dataset = self.repository.load(dataset_id) reference_frame = dataset.get_frame_by_id(reference_frame_id) with timeit(f"search {len(dataset.frames)} items"): return SearchEngine.search( dataset, matcher=self.get_frame_matcher(reference_frame), **kwargs)
def benchmark(kind="gz"): data_filename, mask_filename = get_filenames(kind=kind) smooth = 2 if utils.cache_tools_available: print("Invalidating cache of input file...") utils.dontneed(data_filename) utils.dontneed(mask_filename) print("Masking data...") masked = utils.timeit(profile(nilearn.masking.apply_mask) )(data_filename, mask_filename, smooth=smooth) del masked print("Masking data...") masked = utils.timeit(profile(nilearn.masking.apply_mask) )(data_filename, mask_filename, smooth=smooth) del masked
def rama_pmf(data, A, C, **kw): pt_dd = U.get_pt_dd(C, A.property, A.plot_type, A.v) if not pt_dd: # if there is no rama_pmf, search for rama instead, this is trying to # reduce duplication in .xitconfig.yaml pt_dd = U.get_pt_dd(C, A.property, 'rama', A.v) logger.info(pt_dd) ncol, nrow = U.gen_rc(len(data.keys()), pt_dd) fig, axes = plt.subplots(nrows=nrow, ncols=ncol, figsize=(ncol*7, nrow*6)) # to make the data type consistent for following analysis. # by the way, this is a very wiredly behaved api --2013-08-07 # http://matplotlib.org/api/pyplot_api.html?highlight=subplots#matplotlib.pyplot.subplots if isinstance(axes, np.ndarray): axes = axes.flat elif isinstance(axes, Subplot): axes = [axes] if 'subplots_adjust' in pt_dd: fig.subplots_adjust(**pt_dd['subplots_adjust']) if 'bins' in pt_dd: bins = np.arange(*pt_dd.get('bins')) else: bins = np.arange(-180, 171, 4) logger.info('bins:\n {0}'.format(bins)) normed = pt_dd.get('normed', False) gk_xypmfs = [] min_, max_ = get_min_max(data, bins, normed, gk_xypmfs) logger.info("min: {0}; max: {1}".format(min_, max_)) for c, (gk, phi_edges, psi_edges, h_pmf) in enumerate(gk_xypmfs): ax = axes[c] cmap = getattr(cm, pt_dd.get('cmap', 'jet')) cmap.set_over('white') params = get_params(gk, pt_dd) logger.info('params: {0}'.format(params)) F = U.timeit(ax.contourf) contour = F(phi_edges, psi_edges, h_pmf, **params) decorate_ax(ax, pt_dd, ncol, nrow, c, gk, A) cax = fig.add_axes([0.92, 0.2, 0.02, 0.6]) # left, bottom, width, hight cbar = plt.colorbar(contour, cax=cax) if 'cbar_ylabel' in pt_dd: cbar.ax.set_ylabel(**pt_dd['cbar_ylabel']) plt.savefig(U.gen_output_filename(A, C), **pt_dd.get('savefig', {}))
def modified_gsc(signals, parameters, probe=None): """Modified group_sparse_covariance, just for joblib wrapping. """ _, est_precs = utils.timeit(group_sparse_covariance)( signals, parameters['alpha'], max_iter=parameters['max_iter'], tol=parameters['tol'], probe_function=probe, precisions_init=parameters.get("precisions_init", None), verbose=1, debug=False) return est_precs, probe
def test_loadlite(litefpath): unet = UNetTfLite(litefpath) for i in range(30): fpath = "dataset/membrane/test/{}.png".format(i) fpath2 = "dataset/membrane/test/{}_pb.png".format(i) img = cv2.imread(fpath) print(fpath, img.shape) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) with utils.timeit(): result = unet.predict(gray)
def benchmark(order=None): """Run nilearn.signal.high_variance_confounds""" shape = (201, 200000) if order == "C" or order is None: print ("Running for C order...") rand_gen = np.random.RandomState(0) series = np.ndarray(shape, order="C") series[...] = rand_gen.randn(*shape) output = utils.timeit(profile(nilearn.signal.high_variance_confounds) )(series) time.sleep(0.5) # For memory_profiler del output if order == "F" or order is None: print ("Running for F order...") rand_gen = np.random.RandomState(0) series = np.ndarray(shape, order="F") series[...] = rand_gen.randn(*shape) output = utils.timeit(profile(nilearn.signal.high_variance_confounds) )(series) time.sleep(0.5) del output
def lasso_gsc_comparison(): """Check that graph lasso and group-sparse covariance give the same output for a single task.""" from sklearn.covariance import graph_lasso, empirical_covariance parameters = {'n_tasks': 1, 'n_var': 20, 'density': 0.15, 'rho': .2, 'tol': 1e-4, 'max_iter': 50} _, _, gt = create_signals(parameters, output_dir=output_dir) signals = gt["signals"] _, gsc_precision = utils.timeit(group_sparse_covariance)( signals, parameters['rho'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) emp_cov = empirical_covariance(signals[0]) _, gl_precision = utils.timeit(graph_lasso)( emp_cov, parameters['rho'], tol=parameters['tol'], max_iter=parameters['max_iter']) np.testing.assert_almost_equal(gl_precision, gsc_precision[..., 0], decimal=4)
def benchmark2(output_dir="_prof_group_sparse_covariance"): """Run GroupSparseCovarianceCV on a simple case, for benchmarking.""" parameters = {'n_tasks': 40, 'n_var': 10, 'density': 0.15, 'alphas': 4, 'tol': 1e-4, 'max_iter': 50} parameters["tol_cv"] = parameters["tol"] parameters["max_iter_cv"] = parameters["max_iter"] _, _, gt = create_signals(parameters, output_dir=output_dir) gsc = GroupSparseCovarianceCV(alphas=parameters['alphas'], max_iter=parameters['max_iter'], tol=parameters['tol'], max_iter_cv=parameters['max_iter_cv'], tol_cv=parameters['tol_cv'], verbose=1, debug=False, early_stopping=True) utils.timeit(gsc.fit)(gt["signals"]) print(gsc.alpha_) utils.cache_array(gsc.precisions_, os.path.join(output_dir, "est_precs_cv_{n_var:d}.npy".format(**parameters)), decimal=3)
def test_loadpb(pbfpath): unet = UNetPB(pbfpath, (256, 256, 1)) for i in range(30): fpath = "dataset/membrane/test/{}.png".format(i) fpath2 = "dataset/membrane/test/{}_pb.png".format(i) img = cv2.imread(fpath) print(fpath, img.shape) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) with utils.timeit(): result = unet.predict(gray) print(result.min(), result.max(), result.shape) cv2.imwrite(fpath2, result)
def benchmark1(output_dir="_prof_group_sparse_covariance"): """Run group_sparse_covariance on a simple case, for benchmarking.""" parameters = {'n_tasks': 40, 'n_var': 30, 'density': 0.15, 'alpha': .01, 'tol': 1e-4, 'max_iter': 50} _, _, gt = create_signals(parameters, output_dir=output_dir) _, est_precs = utils.timeit(group_sparse_covariance)( gt["signals"], parameters['alpha'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) # Check that output doesn't change between invocations. utils.cache_array(est_precs, os.path.join(output_dir, "benchmark1_est_precs.npy"), decimal=4)
def predict(dataset): """Generate predictions for audio tagging and sound event detection. This function uses an ensemble of trained models to generate the predictions, with the averaging function being an arithmetic mean. Computed predictions are then saved to disk. Args: dataset: Dataset to generate predictions for. """ import capsnet # Load (standardized) input data and associated file names test_x, _, names = _load_data(dataset) # Predict class probabilities for each model (epoch) at_preds, sed_preds = [], [] for epoch in _determine_epochs(cfg.prediction_epochs): model = _load_model(epoch) at_pred, sed_pred = utils.timeit( lambda: capsnet.gccaps_predict(test_x, model), '[Epoch %d] Predicted class probabilities' % epoch) at_preds.append(at_pred) sed_preds.append(sed_pred) # Average predictions to give an overall output total_at_pred = np.mean(at_preds, axis=0) total_sed_pred = np.mean(sed_preds, axis=0) # Ensure output directory exists and set file path format os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) predictions_path = cfg.predictions_path.format('%s', dataset.name) # Save free parameters to disk utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, os.path.join(os.path.dirname(cfg.predictions_path), 'parameters.json')) # Write predictions to disk utils.write_predictions(names, total_at_pred, predictions_path % 'at') utils.write_predictions(names, total_sed_pred, predictions_path % 'sed')
def run_etl(): """Run the ETL for today""" # Get dropbox connector vdp = u.get_vdropbox() download_log(vdp) detect_env() log.info("Starting vtasks") result = u.timeit(flow.run)(mdate=date.today()) log.info("End of vtasks") copy_log(vdp) if not result.is_successful(): log.error("ETL has failed") raise ValueError("ETL has failed")
def brute_force_study(output_dir="_early_stopping"): """Loop through many values of alpha, and run a full gsc for each. Record information for each iteration using CostProbe, store the obtained values on disk. Plot scores on train and test sets versus wall-clock time. """ parameters = {'n_tasks': 10, 'tol': 1e-3, 'max_iter': 50, "fold_n": 2, "n_alphas": 20} mem = joblib.Memory(".") print("-- Extracting signals ...") signals = [] for n in range(parameters["n_tasks"]): signals.append(mem.cache(region_signals)(n)) signals, test_signals, emp_covs, test_emp_covs, n_samples_norm = \ split_signals(signals, fold_n=parameters["fold_n"]) print("-- Optimizing --") alpha_mx, _ = compute_alpha_max(emp_covs, n_samples_norm) # alphas = np.logspace(-3, -1, 10) alphas = np.logspace(np.log10(alpha_mx / 500), np.log10(alpha_mx), parameters["n_alphas"]) cost_probes = [] t0 = time.time() for alpha in alphas: # Honorio-Samaras cost_probes.append(CostProbe(test_emp_covs)) _, est_precs = utils.timeit(group_sparse_covariance)( signals, alpha, max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False, probe_function=cost_probes[-1]) t1 = time.time() print ('Time spent in loop: %.2fs' % (t1 - t0)) out_filename = os.path.join(output_dir, 'brute_force_study.pickle') pickle.dump([alphas, cost_probes], open(out_filename, "wb")) print("Use plot_early_stopping.py to analyze the generated file:\n" "%s" % out_filename)
def main(): args = engine.parser.parse_args() filenames = utils.get_filenames(args.input) files_exist = len(filenames) != 0 stopwords_exist = os.path.isfile(args.stopwords) if files_exist and stopwords_exist: used_tokenizer = engine.tokenizers[args.tokenizer] if used_tokenizer.has_rule(rules.stopping): used_tokenizer.make_rule(rules.stopping, args.stopwords) values = ['store_positions', 'calculate_tfidf'] combinations = [{ key: value for key, value in zip(values, option) } for option in product([True, False], repeat=len(values))] for combination in combinations: (indexer, max_memory), interval = utils.timeit(utils.profileit, engine.indexit, used_tokenizer, filenames, memory_usage=args.memory, **combination) indexer.save(args.output) print('Answers({}):'.format(', '.join([ '{} = {}'.format(key, value) for key, value in combination.items() ]))) print('Time taken: {}s'.format(interval)) print('Max memory usage: {}'.format(utils.sizeof_fmt(max_memory))) print('Disk size: {}'.format( utils.sizeof_fmt(os.path.getsize(args.output)))) indexer.dispose() del indexer else: if not files_exist: print( 'Error: File or directory (with files) to index doesn\'t exist!' ) if not stopwords_exist: print('Error: Stopwords\' file doesn\'t exist!')
def main(): parser.add_argument( '--store_positions', action='store_true', help='Indicates if indexer stores positions of terms or not') parser.add_argument('--tfidf', action='store_true', help='Indicates if program calculates tfidf or not') args = parser.parse_args() filenames = utils.get_filenames(args.input) files_exist = len(filenames) != 0 stopwords_exist = os.path.isfile(args.stopwords) if files_exist and stopwords_exist: used_tokenizer = tokenizers[args.tokenizer] if used_tokenizer.has_rule(rules.stopping): used_tokenizer.make_rule(rules.stopping, args.stopwords) (index, max_memory), interval = utils.timeit( utils.profileit, indexit, used_tokenizer, filenames, store_positions=args.store_positions, calculate_tfidf=args.tfidf, memory_usage=args.memory) index.save(args.output) print('Answers:') print('Time taken: {}s'.format(interval)) print('Max memory usage: {}'.format(utils.sizeof_fmt(max_memory))) print('Disk size: {}'.format( utils.sizeof_fmt(os.path.getsize('{}.csv'.format(args.output))))) shutil.rmtree('index') else: if not files_exist: print( 'Error: File or directory (with files) to index doesn\'t exist!' ) if not stopwords_exist: print('Error: Stopwords\' file doesn\'t exist!')
def prof_graph_lasso_cv(random_state_seed=1): # Sample data from a sparse multivariate normal dim = 10 # 80 n_samples = 60 # Generate input data random_state = check_random_state(random_state_seed) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) utils.cache_value(X, "prof_graph_lasso_cv/X_%d_%d_%d" % (dim, n_samples, random_state_seed)) # Test with alphas as integer ## mode = 'cd' ## gl1 = utils.timeit(GraphLassoCV(verbose=1, alphas=3, mode=mode).fit)(X) ## utils.cache_value(gl1.covariance_, ## "prof_graph_lasso_cv/covariance_%d_%d_%d" % ## (dim, n_samples, random_state_seed)) ## utils.cache_value(gl1.precision_, ## "prof_graph_lasso_cv/precision_%d_%d_%d" % ## (dim, n_samples, random_state_seed)) # Test with alphas as list. # Take same alphas as were found in the first step, check the result # is the same. ## gl2 = utils.timeit(GraphLassoCV(alphas=gl1.cv_alphas_, n_jobs=1, ## mode=mode).fit)(X) ## np.testing.assert_almost_equal(gl1.covariance_, gl2.covariance_, ## decimal=3) ## np.testing.assert_almost_equal(gl1.precision_, gl2.precision_, ## decimal=3) ## np.testing.assert_almost_equal(gl1.alpha_, gl2.alpha_) # Smoke test with an alternate cross-validation object. gl3 = utils.timeit(GraphLassoCV(cv=KFold(n=X.shape[0], n_folds=20), n_jobs=1).fit)(X)
def singular_cov_case(): """Check behaviour of algorithm for singular input matrix.""" parameters = {'n_tasks': 10, 'n_var': 40, 'density': 0.15, 'rho': .1, 'tol': 1e-2, 'max_iter': 50, 'min_samples': 10, 'max_samples': 15} _, _, gt = create_signals(parameters, output_dir=output_dir) signals = gt["signals"] emp_covs, _ = empirical_covariances(signals) # Check that all covariance matrices are singular. eps = np.finfo(float).eps for k in range(emp_covs.shape[-1]): eigvals = np.linalg.eigvalsh(emp_covs[..., k]) assert(abs(eigvals.min()) <= 50 * eps) _, gsc_precisions = utils.timeit(group_sparse_covariance)( signals, parameters['rho'], max_iter=parameters['max_iter'], tol=parameters['tol'], verbose=1, debug=False) print('found sparsity: {0:.3f}' ''.format(1. * (gsc_precisions[..., 0] != 0).sum() / gsc_precisions.shape[0] ** 2))
def main(): global cfg, logger, flag_queue global execute_sql, insert_vddb, _do_insert cfg = load_config() logger = mylogger() p_manager = Manager() flag_queue = p_manager.Queue() log_cfg = cfg['log'] log_file = log_cfg['file'] log_level = log_cfg['level'] logger.init_logger('tmp-result-generator', log_level, log_file, SysLogHandler.LOG_LOCAL1) # For debug execute_sql = timeit(execute_sql, 'SQL', logger) insert_vddb = timeit(insert_vddb, 'VDDB', logger) _do_insert = timeit(_do_insert, 'INSERT()', logger) signal.signal(signal.SIGQUIT, stop_all) signal.signal(signal.SIGTERM, stop_all) logger.info('==================[Starting....]===================') db_type = cfg['db-type'] db_info = cfg['db'][db_type] pool_size_get = cfg['pool-size-get'] pool_size_insert = cfg['pool-size-insert'] dbpc_cfg = cfg['dbpc'] DBPC_HOST = dbpc_cfg['host'] DBPC_PORT = int(dbpc_cfg['port']) DBPC_SERVICE = dbpc_cfg['service'] DBPC_COMPONENT = dbpc_cfg['component'] DBPC_INTERVAL = int(dbpc_cfg['interval']) t_dbpc = dbpc.dbpc(DBPC_HOST, DBPC_PORT, DBPC_SERVICE, DBPC_COMPONENT, DBPC_INTERVAL) t_dbpc.start() pool_get = Pool(pool_size_get) for i in range(pool_size_get): pool_get.apply_async(loop_get, args=(flag_queue, db_info)) pool_get.close() manager = Manager() res_queue = manager.Queue(pool_size_insert) task_queue = manager.Queue(pool_size_insert) pool_insert = Pool(pool_size_insert) for i in range(pool_size_insert): pool_insert.apply_async(do_insert, args=(flag_queue, task_queue, res_queue, db_info)) pool_insert.close() try: loop_insert(flag_queue, task_queue, res_queue, db_info) except KeyboardInterrupt: pass stop_all(None, None) print '<THE END>' logger.info('======================[THE END]==================')
def main(args): print('Starting') matplotlib.use('agg') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu checkpoints = args.checkpoint.parent.glob(args.checkpoint.name + '_*.pth') checkpoints = [c for c in checkpoints if extract_id(c) in args.decoders] assert len(checkpoints) >= 1, "No checkpoints found." model_args = torch.load(args.model.parent / 'args.pth')[0] encoder = wavenet_models.Encoder(model_args) encoder.load_state_dict(torch.load(checkpoints[0])['encoder_state']) encoder.eval() encoder = encoder.cuda() decoders = [] decoder_ids = [] for checkpoint in checkpoints: decoder = WaveNet(model_args) decoder.load_state_dict(torch.load(checkpoint)['decoder_state']) decoder.eval() decoder = decoder.cuda() if args.py: decoder = WavenetGenerator(decoder, args.batch_size, wav_freq=args.rate) else: decoder = NVWavenetGenerator(decoder, args.rate * (args.split_size // 20), args.batch_size, 3) decoders += [decoder] decoder_ids += [extract_id(checkpoint)] xs = [] assert args.output_next_to_orig ^ (args.output is not None) if len(args.files) == 1 and args.files[0].is_dir(): top = args.files[0] file_paths = list(top.glob('**/*.wav')) + list(top.glob('**/*.h5')) else: file_paths = args.files if not args.skip_filter: file_paths = [f for f in file_paths if not '_' in str(f.name)] for file_path in file_paths: if file_path.suffix == '.wav': data, rate = librosa.load(file_path, sr=16000) assert rate == 16000 data = utils.mu_law(data) elif file_path.suffix == '.h5': data = utils.mu_law(h5py.File(file_path, 'r')['wav'][:] / (2**15)) if data.shape[-1] % args.rate != 0: data = data[:-(data.shape[-1] % args.rate)] assert data.shape[-1] % args.rate == 0 else: raise Exception(f'Unsupported filetype {file_path}') if args.sample_len: data = data[:args.sample_len] else: args.sample_len = len(data) xs.append(torch.tensor(data).unsqueeze(0).float().cuda()) xs = torch.stack(xs).contiguous() print(f'xs size: {xs.size()}') def save(x, decoder_ix, filepath): wav = utils.inv_mu_law(x.cpu().numpy()) print(f'X size: {x.shape}') print(f'X min: {x.min()}, max: {x.max()}') if args.output_next_to_orig: save_audio(wav.squeeze(), filepath.parent / f'{filepath.stem}_{decoder_ix}.wav', rate=args.rate) else: save_audio(wav.squeeze(), args.output / str(extract_id(args.model)) / str(args.update) / filepath.with_suffix('.wav').name, rate=args.rate) yy = {} with torch.no_grad(): zz = [] for xs_batch in torch.split(xs, args.batch_size): zz += [encoder(xs_batch)] zz = torch.cat(zz, dim=0) with utils.timeit("Generation timer"): for i, decoder_id in enumerate(decoder_ids): yy[decoder_id] = [] decoder = decoders[i] for zz_batch in torch.split(zz, args.batch_size): print(zz_batch.shape) splits = torch.split(zz_batch, args.split_size, -1) audio_data = [] decoder.reset() for cond in tqdm.tqdm(splits): audio_data += [decoder.generate(cond).cpu()] audio_data = torch.cat(audio_data, -1) yy[decoder_id] += [audio_data] yy[decoder_id] = torch.cat(yy[decoder_id], dim=0) del decoder for decoder_ix, decoder_result in yy.items(): for sample_result, filepath in zip(decoder_result, file_paths): save(sample_result, decoder_ix, filepath)
## X = np.random.normal(size=(n_samples, n_features)) tol = 1e-4 ## print("\n-- scipy.cluster.vq") ## ratio = 1. ## np.random.seed(random_state) ## sc, _ = utils.timeit(profile(kmeans))(X, n_clusters, iter=2, ## thresh=tol / ratio) ## ## utils.cache_value(sc, 'prof_kmeans/scipy_kmeans_%d_%d' ## ## % (n_samples, n_features)) ## inertia1 = _labels_inertia(X, (X ** 2).sum(axis=-1), sc)[1] ## print('scipy inertia: %.1f' % np.sqrt(inertia1)) print("\n-- sklearn.cluster") ratio = 1. #np.mean(np.var(X, axis=0)) # just to make the comparison fair. np.random.seed(random_state) sk, _, _ = utils.timeit(profile(k_means))(X, n_clusters, n_init=2, tol=tol / ratio, init="random", random_state=random_state) ## utils.cache_value(sk, 'prof_kmeans/sklearn_kmeans_%d_%d' % ## (n_samples, n_features)) inertia2 = _labels_inertia(X, (X ** 2).sum(axis=-1), sk)[1] print('inertia: %.1f' % np.sqrt(inertia2)) ## print ('\nsklearn - scipy inertia: %.1f. Relative variation: %.1e' % ## ((inertia2 - inertia1), (inertia2 - inertia1) / ( ## 2. * (inertia1 + inertia2))))
def main(): matrix = HT1632C() matrix.fill(ORANGE) matrix.text('abcd', 0, 0, RED) matrix.text('wxyz', 0, 8, GREEN) timeit(matrix.show)
(Label.cslist, m.Cs, 'cs_list2'), (Label.objlist, m.ObjList, 'obj_list'), (Label.haccserv, m.HAccServ, 'haccserv'), (Label.servable, m.Servable, 'servable'), (Label.hdesc, m.HDesc, 'hdesc'), (Label.adesc, m.ADesc, 'adesc'), (Label.fdesc, m.FDesc, 'fdesc'), (Label.objlink_acc_og, m.ObjLinkAccOg, 'objlink_acc_og'), (Label.objlink_h_hg, m.ObjLinkHHg, 'objlink_h_hg'), (Label.atariff, m.ATariff, 'atariff'), (Label.gtariff, m.GTariff, 'gtariff'), (Label.htariff, m.HTariff, 'htariff'), (Label.rtariff, m.RTariff, 'rtariff'), ] with timeit('all'): with timeit('generate'): data = {} for name, model, file_name in source: print(name) with open('/home/liinda/tmp/mule_csv/pull/' + file_name, 'r') as csvf: reader = csv.reader(csvf, delimiter=';') source_data = [] for row in reader: source_data.append(model(*row)) data[name] = source_data if source_data: print(len(source_data)) print(source_data[0])
'--workers', type=int, default=1, ) FILES = [ 'Iris-150.txt', 'Iris-1500.txt', 'Iris-15000.txt', 'Iris-150000.txt', 'Iris-1500000.txt', 'Iris-15000000.txt' ] if __name__ == '__main__': args = parser.parse_args() alg = {'KM': 0, 'FCM': 1}[args.alg] data_lengths = [] alg_times = [] for file in FILES: data, _ = utils.read_data(os.path.join(args.data_dir_path, file)) data_lengths.append(data.shape[0]) if args.workers == 1: alg_args = (args.num_centroids, data, alg, args.max_iter) alg_times.append(timeit(fit, *alg_args)) else: alg_args = (args.num_centroids, args.workers, data, alg, args.max_iter) alg_times.append(timeit(fit_multiprocess, *alg_args)) utils.plot_time_results(data_lengths, 'Data length', alg_times)
def exp_CAM_eval(dataset, classifier, out_folder): dataset.initialize_iterator_val(classifier.sess) mask_dim = dataset.shape if len(dataset.shape) == 2 else dataset.shape[0:2] all_probs = [] all_std_dists = [] counter = 0 with timeit(): while True: try: fd = classifier.prepare_feed(is_train=False, debug=True) # Calc prediction, target, conv_acts tensors = [ classifier.last_conv, classifier.softmax_weights, classifier.targets, classifier.pred ] conv_acts, softmax_w, y_real, y_pred = classifier.sess.run( tensors, fd) pred_class = np.argmax(y_pred, axis=1) pred_values = np.max(y_pred, axis=1) batch_s = conv_acts.shape[0] n_filters = softmax_w.shape[0] # Calc CAM of predictions # np.array_equal( softmax_w[:,pred_class][:,2], softmax_w[:,pred_class[2]]) predicted_soft_w = softmax_w[:, pred_class] predicted_soft_w = predicted_soft_w.T.reshape( batch_s, 1, 1, n_filters ) # (for broadcasting in the filter h,w dimension) cam_maps = (conv_acts * predicted_soft_w).sum( axis=-1 ) # Element wise multiplication per channel and then sum all the channels for a batch # equivalent to np.array_equal((conv_acts[1,:, :, :] * softmax_w[:, pred_class[1]]).sum(axis=2), res[1]) im_batch = fd['model_input:0'] for ind, image in enumerate(im_batch): norm_cam = (cam_maps[ind] - cam_maps[ind].min()) / ( cam_maps[ind].max() - cam_maps[ind].min()) norm_cam = (norm_cam * 2) - 1 # to -1 +1 range norm_cam = resize(norm_cam, mask_dim, mode='constant') binary_mask = norm_cam > 0.6 prob, mean, std = random_pattern_test(classifier, fd, pred_class[ind], image, binary_mask, samples=25) diff_prob = prob - pred_values[ind] std_dist = abs(prob - mean) * 1.0 / std if std != 0 else 0 all_probs.append(diff_prob) all_std_dists.append(std_dist) counter += 1 if counter % 100 == 0: print("It: {0}".format(counter)) except Exception as err: print(str(err)) # Filter outliers segun IQR de 4 std fuera all_std_dists = np.array(all_std_dists) q75, q25 = np.percentile(all_std_dists, [75, 25]) iqr = q75 - q25 min = q25 - (iqr * 1.5) max = q75 + (iqr * 1.5) filtered_std_dists = all_std_dists[np.bitwise_and( all_std_dists > min, all_std_dists < max)] mean_diff_prob = np.array(all_probs).mean() mean_std_dist = filtered_std_dists.mean() print("Images used for calculation: {0}".format( len(all_probs))) print("Mean_prob_change : {0}, Mean_STD_distance: {1}".format( mean_diff_prob, mean_std_dist)) f, axes = plt.subplots(1, 2) axes[0].boxplot(filtered_std_dists) axes[0].set_title("Std distances") axes[1].boxplot(np.array(all_probs)) axes[1].set_title("Diff prob") # write results to json import json out_path = os.path.join(out_folder, "result.json") with open(out_path, 'w') as file: json.dump( { 'mean_diff_prob': float(mean_diff_prob), "mean_std_dist": float(mean_std_dist) }, file) # write boxplot figures out_path = os.path.join(out_folder, "result.png") plt.savefig(out_path) break
# mat_idx = self.mat[i, j] # if mat_idx > -1 and self.pieces[mat_idx] != (i, j): # print( # "Mat (%d, %d) -> %d; Pieces -> %s" % (i, j, mat_idx, str(self.pieces[mat_idx])) # ) if __name__ == "__main__": """Unit test""" state = State() # print_state(state) # state.push_action(0, (3, 3)) # print_state(state) # state.pop_action() # print_state(state) # print("color", state.get_player_color()) # # Is checked # for k in range(32): # print(k, "is checked:", state.is_pieced_checked(k)) # timeit(state.is_pieced_checked, (0,), 1000) # Action for k in range(32): print(k, "actions:", state.get_actions(k)) timeit(state.get_actions, (1, ), 1000) # Player actions print("player actions", state.get_player_actions(0)) timeit(state.get_player_actions, (1, ), 1000)
# x_size, y_size = 50, 1500 n_var = 200 metric = "euclidean" # metric = "chebyshev" # metric = "mahalanobis" # doesn't work # metric = 'manhattan' # metric = "minkowski" if metric == "euclidean": kwargs = {"squared": False} elif metric == "minkowski": kwargs = {"p": 2} else: kwargs = {} X = np.random.rand(x_size, n_var) Y = np.random.rand(y_size, n_var) axis = 1 batch_size = 200 # chunked_timing(X, Y, metric=metric, axis=axis, **kwargs); sys.exit(0) dist_orig_ind, dist_orig_val = utils.timeit(profile(original))( X, Y, metric=metric, axis=axis, **kwargs) dist_chunked_ind, dist_chunked_val = utils.timeit(profile(chunked))( X, Y, axis=axis, metric=metric, batch_size=batch_size, **kwargs) np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7) np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7)
def main(): cwd = os.path.dirname(os.path.abspath(__file__)) info("starting") for folder in [ '{}/../reports'.format(cwd), '{}/../reports/perf-tests'.format(cwd), '{}/../reports/perf-tests/logs'.format(cwd), '{}/../reports/perf-tests/graphs'.format(cwd), '{}/../reports/perf-tests/metrics'.format(cwd) ]: os.system('mkdir -p {}'.format(folder)) for folder in [ '{}/../reports/perf-tests/metrics/*.json'.format(cwd), '{}/../reports/perf-tests/logs/*.log'.format(cwd), '{}/../reports/perf-tests/graphs/*.png'.format(cwd), ]: os.system('rm -rf {}'.format(folder)) info("setup") logs_collector = LogsCollector() logs_collector.start() relay = Relay() relay.start() info("bootstrap") manager = ApplianceManager() manager.bootstrap() integration = Integration(manager) integration.wait_for_healthy() accounts_to_create = int(os.environ.get('ACCOUNTS_CREATED', '10000')) j = 0 i = 100 while i <= accounts_to_create: info('creating {:,.0f} accounts throught vault'.format(i)) with timeit('create {:,.0f} accounts'.format(i)): with metrics(manager, 'create_accounts_{}'.format(i)): integration.create_random_accounts('one', str(j), i) info('generating graph for {:,.0f} accounts'.format(i)) with timeit('{:,.0f} graph plotting'.format(i)): Graph( Metrics( '{}/../reports/perf-tests/metrics/metrics.create_accounts_{}.json' .format(cwd, i))) i *= 10 j += 1 info("stopping") relay.stop() logs_collector.stop() manager.cleanup() info("stop")
def visualize_dataset_CAM_predicted(dataset, model, out_folder, out_pickle=False, x_class=None, use_real_label=False): dataset.initialize_iterator_val(model.sess) # make imgs folder imgs_f = os.path.join(out_folder, "CAM_imgs") os.makedirs(imgs_f, exist_ok=True) out_list = [] index_list = [] label_list = [] counter = 0 with timeit(): while True: try: fd = model.prepare_feed(is_train=False, debug=False) # Calc prediction, target, conv_acts tensors = [ model.indexs, model.input_l, model.last_conv, model.softmax_weights, model.targets, model.pred ] indexs, images, conv_acts, softmax_w, y_real, y_pred = model.sess.run( tensors, fd) if use_real_label: pred_class = np.argmax(y_real, axis=1).astype(np.int) pred_values = y_pred[np.arange(60), pred_class] else: pred_class = np.ones( (y_pred.shape[0], 1)) * x_class if x_class else np.argmax(y_pred, axis=1) pred_values = np.max(y_pred, axis=1) batch_s = conv_acts.shape[0] n_filters = softmax_w.shape[0] # Calc CAM of predictions # np.array_equal( softmax_w[:,pred_class][:,2], softmax_w[:,pred_class[2]]) predicted_soft_w = softmax_w[:, pred_class] predicted_soft_w = predicted_soft_w.T.reshape( batch_s, 1, 1, n_filters ) # (for broadcasting in the filter h,w dimension) cam_maps = (conv_acts * predicted_soft_w).sum( axis=-1 ) # Element wise multiplication per channel and then sum all the channels for a batch # equivalent to np.array_equal((conv_acts[1,:, :, :] * softmax_w[:, pred_class[1]]).sum(axis=2), res[1]) for ind, img in enumerate(images): img = dataset.inverse_preprocess(img) test_image_plot = imshow_util( img.reshape(dataset.vis_shape()), dataset.get_data_range()) p_class = pred_class[ind] out_shape = list(test_image_plot.shape) if len(test_image_plot.shape) == 3: out_shape = out_shape[0:2] cam_img = imshow_util( cam_maps[ind], [cam_maps[ind].min(), cam_maps[ind].max()]) resized_map = resize(cam_img, out_shape, mode='constant') if out_pickle: out_list.append(resized_map.flatten()) index_list.append(indexs[ind]) label_list.append(y_real[ind]) else: # Blend the visualization and the image with cv2 heatmap_jet = cv2.applyColorMap( (resized_map * 255).astype(np.uint8), cv2.COLORMAP_JET) if len(test_image_plot.shape ) == 2 or test_image_plot.shape[2] == 1: rgb_grayscale = cv2.cvtColor( (test_image_plot * 255).astype(np.uint8), cv2.COLOR_GRAY2RGB) dest = cv2.addWeighted(rgb_grayscale, 0.7, heatmap_jet, 0.3, 0, dtype=3) else: dest = cv2.addWeighted( (test_image_plot * 255).astype(np.uint8), 0.7, heatmap_jet, 0.3, 0, dtype=3) # format name f_name = indexs[ind].decode() if type( indexs[ind]) == bytes else indexs[ind] f_name = f_name if type(f_name) == str else str(f_name) f_name = f_name.split( '.' )[0] if '.' in f_name else f_name # Sometimes the index is a file path out_img = "index_{1}_prob_{0}.png".format( int(pred_values[ind] * 100), f_name) img_out_path = os.path.join(imgs_f, out_img) cv2.imwrite(img_out_path, dest) if (counter % 100 == 0): print("Image {0}".format(counter)) counter += 1 except tf.errors.OutOfRangeError as err: print("Ended") break if out_pickle: import pickle print("Saving Vis files") out_imgs = os.path.join(out_folder, "vis_img.npy") out_indexs = os.path.join(out_folder, "indexs.pkl") out_labels = os.path.join(out_folder, "labels.npy") np.save(out_imgs, np.vstack(out_list)) np.save(out_labels, np.array(label_list)) with open(out_indexs, 'wb') as f: pickle.dump(index_list, f) pass
f = urllib2.urlopen(url, timeout = 200) code = f.getcode() if code < 200 or code >= 300: print('get page %s failed, error code %d' % (url, code)) return None return f.read() except Exception, e: if isinstance(e, urllib2.HTTPError): print 'http error: {0}'.format(e.code) elif isinstance(e, urllib2.URLError) and isinstance(e.reason, socket.timeout): print 'url error: socket timeout {0}'.format(e.__str__()) else: print 'misc error: ' + e.__str__() return None get = timeit(get_content) count = 0 es = ES({"localhost":"9200"}, 'test', 'web') index = timeit(es.index) def crawl_sec(sec): url = root + sec data = get(url) if data is not None: soup = BeautifulSoup(data) links = soup.find_all('a') for link in links: hre = link.get('href') if hre.startswith(sec) and hre != sec: lk = root + hre pd = get(lk)
import inspect from utils import timeit modules = [] for mod_name in ['xy', 'mp_alx', 'mp_grped_along_var', 'grped_along_var_2prop']: # 'grped_bars', 'grped_distr', 'grped_distr_ave']: # http://docs.python.org/2/library/functions.html#__import__ modules.append(__import__(mod_name, globals(), locals(), [], -1)) PLOTMP_TYPES = {} for mod in modules: for fname in dir(mod): f = getattr(mod, fname) if inspect.isfunction(f) and hasattr(f, 'IS_PLOTMP_TYPE') and f.IS_PLOTMP_TYPE: PLOTMP_TYPES.update({f.func_name: timeit(f)})
def benchmark3(): """Compare group_sparse_covariance result for different initializations. """ ## parameters = {'n_tasks': 10, 'n_var': 50, 'density': 0.15, ## 'alpha': .001, 'tol': 1e-2, 'max_iter': 100} parameters = {'n_var': 40, 'n_tasks': 10, 'density': 0.15, 'alpha': .01, 'tol': 1e-3, 'max_iter': 100} mem = joblib.Memory(".") _, _, gt = create_signals(parameters, output_dir="_prof_group_sparse_covariance") signals = gt["signals"] emp_covs, n_samples = empirical_covariances(signals) print("alpha max: " + str(compute_alpha_max(emp_covs, n_samples))) # With diagonal elements initialization probe1 = ScoreProbe() est_precs1, probe1 = mem.cache(modified_gsc)(signals, parameters, probe1) probe1.comment = "diagonal" # set after execution for joblib not to see it probe1.plot() # With Ledoit-Wolf initialization ld = np.empty(emp_covs.shape) for k in range(emp_covs.shape[-1]): ld[..., k] = np.linalg.inv(ledoit_wolf(signals[k])[0]) probe1 = ScoreProbe() est_precs1, probe1 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe1) probe1.comment = "diagonal" # for joblib to ignore this value probe2 = ScoreProbe() parameters["precisions_init"] = ld est_precs2, probe2 = utils.timeit(mem.cache(modified_gsc))( signals, parameters, probe=probe2) probe2.comment = "ledoit-wolf" print("difference between final estimates (max norm) %.2e" % abs(est_precs1 - est_precs2).max()) pl.figure() pl.semilogy(probe1.timings[1:], probe1.max_norm, "+-", label=probe1.comment) pl.semilogy(probe2.timings[1:], probe2.max_norm, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("Max norm") pl.grid() pl.legend(loc="best") pl.figure() pl.plot(probe1.timings, probe1.objective, "+-", label=probe1.comment) pl.plot(probe2.timings, probe2.objective, "+-", label=probe2.comment) pl.xlabel("Time [s]") pl.ylabel("objective") pl.grid() pl.legend(loc="best") pl.show()
def train(self, train_file_used=None, save_model=True, eval=True, special_t=1): self.current_log = '' saver = tf.train.Saver() i = 0 show_batch_dist = True best_eval = 0 with timeit() as t: for i in range(special_t): # todo refactor self.dataset.initialize_iterator_train(self.sess) while True: try: fd = self.prepare_feed(is_train=True, debug=self.debug) if self.use_summary: l, _, acc, tgts, summary = self.sess.run([ self.loss, self.train_step, self.accuracy, self.targets, self.all_summaries ], fd) self.summary_train.add_summary(summary) else: l, _, acc, tgts = self.sess.run([ self.loss, self.train_step, self.accuracy, self.targets ], fd) if i % 100 == 0: from collections import Counter log = "It: {}, loss_batch: {:.3f}, batch_accuracy: {:.2f}%".format( i, l, acc * 100) self.current_log += '{0} \n'.format(log) print(log) if show_batch_dist: print(Counter(tgts.argmax(axis=1).tolist())) i += 1 except tf.errors.OutOfRangeError: log = 'break at2 {0}'.format(i) self.current_log += '{0} \n'.format(log) print(log) break # Eval in val set if eval: print("Doing eval") out_string, acc_v = self.eval() if save_model and (best_eval < acc_v): best_eval = acc_v self.save_model(saver, prefix='best') # if best_eval > 0.78: # break # if best_eval > 0.78: # break # Save model if save_model: path_model_checkpoint = self.save_model(saver) # save accuracy in val set if eval: with open( os.path.join(path_model_checkpoint, "accuracy_val.txt"), 'w') as f: f.write(out_string) # create train_result config data = { 'mask_files': [], 'model_load_path': path_model_checkpoint, 'train_file_used': train_file_used } out_folder = os.path.join('config_files', 'train_result') os.makedirs(out_folder, exist_ok=True) now = now_string() json_name = '{0}__{1}__{2}.json'.format( self.dataset.__class__.__name__, self.__class__.__name__, now) with open(os.path.join(out_folder, json_name), 'w') as f: json.dump(data, f) return path_model_checkpoint else: return None