def create_movie(self, path, size, animation_frames=list(), quality=100, samples_per_pixel=1, start_frame=0, end_frame=0, interpupillary_distance=0.0, export_intermediate_frames=True): """ Create and export a set of PNG frames for later movie generation :path: Full path of the snapshot folder :size: Frame buffer size :animation_frames: Optional list of animation frames :quality: PNG quality :samples_per_pixel: Samples per pixel :start_frame: Start frame to export in the provided sequence :end_frame: Last frame to export in the provided sequence :interpupillary_distance: Interpupillary distance for stereo rendering. If set to 0, stereo is disabled :export_intermediate_frames: If True, intermediate samples are stored to disk. Otherwise, only the final accumulation is exported """ application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) self.export_frames( path=path, base_name='', animation_frames=animation_frames, start_frame=start_frame, end_frame=end_frame, size=size, samples_per_pixel=samples_per_pixel, quality=quality, interpupillary_distance=interpupillary_distance, export_intermediate_frames=export_intermediate_frames) done = False while not done: time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] self._client.set_application_parameters( image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) progress_widget.description = 'Done' progress_widget.value = 100
def calcular_vulnerabilidad_urbana(inicio, fin, min_casos=20, min_defunciones=0): ''' Ajustar un modelo para los municipios urbanos y otro para los no urbanos ''' inicio = pd.to_datetime(inicio, yearfirst=True) fin = pd.to_datetime(fin, yearfirst=True) fechas = pd.date_range(inicio, fin) resultados = [] modelos = [] asegura_archivos_covid_disponibles(fechas) f = IntProgress(min=0, max=len(fechas) - 1) # instantiate the bar display(f) # display the bar urbanos = municipios_urbanos() for count, fecha in enumerate(fechas): covid_municipal = tabla_covid_indicadores_municipales( fecha.strftime("%y%m%d")) covid_municipal = agregar_tasas_municipales(covid_municipal) caracteristicas = caracteristicas_modelos_municipios(covid_municipal) serie_urbanos = covid_municipal[ covid_municipal.CLAVE_MUNICIPIO_RES.isin(urbanos.CLAVE_MUNICIPIO)] pls_urbanos = ajustar_pls_letalidad(serie_urbanos, caracteristicas, min_defunciones=min_defunciones, min_casos=min_casos) df_urbanos = calificar_municipios_letalidad_formato_largo( serie_urbanos, pls_urbanos, caracteristicas, modelo='PLS_URBANO', dia_ajuste=fecha) resultados.append(df_urbanos) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': pls_urbanos.coef_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'PLS_URBANO' modelos.append(modelo) serie_no_urbanos = covid_municipal[ ~covid_municipal.CLAVE_MUNICIPIO_RES.isin(urbanos.CLAVE_MUNICIPIO)] pls_no_urbanos = ajustar_pls_letalidad(serie_no_urbanos, caracteristicas, min_defunciones=min_defunciones, min_casos=min_casos) df_no_urbanos = calificar_municipios_letalidad_formato_largo( serie_no_urbanos, pls_no_urbanos, caracteristicas, modelo='PLS_NO_URBANO', dia_ajuste=fecha) resultados.append(df_no_urbanos) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': pls_no_urbanos.coef_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'PLS_NO_URBANO' modelos.append(modelo) f.value = count resultados_df = pd.concat(resultados, ignore_index=True) modelos_df = pd.concat(modelos, ignore_index=True) return modelos_df, resultados_df
from ipywidgets import IntProgress from IPython.display import display import time max_count = 100 f = IntProgress(min=0, max=max_count) # instantiate the bar display(f) # display the bar count = 0 while count <= max_count: f.value += 1 # signal to increment the progress bar time.sleep(.1) count += 1
def __iter__(self): self.index = 0 self.bar = IntProgress(max=self.len) display(self.bar) return self
def make_old_impl(self, in_memory=False): # TODO : make base class so we can reuse this with sandbox/base.py progress = IntProgress(description='Rendering...', max=len(self._time_range) - 1) self._event = threading.Event() def _make(event): image_files = [] iw = None if not self.skip_render: for i in self._time_range: progress.value = i if not event.is_set(): self.view.frame = i self.sleep() if self.perframe_hook: self.perframe_hook(self.view) self.sleep() if not self.in_memory: self.view.download_image( self.prefix + '.' + str(i) + '.png', **self.render_params) else: iw = self.view.render_image(**self.render_params) self.sleep() if self.in_memory: rgb = self._base64_to_ndarray( self.view._image_data) self._image_array.append(rgb) if iw: iw.close() # free memory if not self.in_memory: template = "{}/{}.{}.png" image_files = [ image_dir for image_dir in (template.format( self.download_folder, self.prefix, str(i)) for i in self._time_range) if os.path.exists(image_dir) ] else: image_files = self._image_array if not self._event.is_set(): progress.description = "Writing ..." clip = mpy.ImageSequenceClip(image_files, fps=self.fps) with Output(): if self.output.endswith('.gif'): clip.write_gif(self.output, fps=self.fps, verbose=False, **self.moviepy_params) else: clip.write_videofile(self.output, fps=self.fps, **self.moviepy_params) self._image_array = [] progress.description = 'Done' time.sleep(1) progress.close() self.thread = threading.Thread(target=_make, args=(self._event, )) self.thread.daemon = True self.thread.start() return progress
def bias_variance(datasets=[], algos=[], metrics=[], L=10, k=2): """ This method computes the bias vs. variance decomposition of the error. The approach used here is based on the works of [Webb05]_ and [Dom05]_. Each instance of the dataset is scored `L` times. A single scoring is achieved by splitting the dataset at random into `k` folds. Each fold is scored by the model `M` trained on the remainder folds. [Webb05]_ recommends the use of 2 folds. If metric is MSE then the standard decomposition is used. The Bias for and instance `x` is defined as mean squared error of the `L` trained models w.r.t. the true label `y`, denoted with :math:`{\\sf E}_{L} [M(x) - y]^2`. The Variance for an instance `x` is measured across the `L` trained models: :math:`{\\sf E}_{L} [M(x) - {\\sf E}_{L} M(x)]^2`. Both are averaged over all instances in the dataset. If metric is any of the IR quality measures, we resort to the bias variance decomposition of the mean squared error of the given metric w.r.t. its ideal value, e.g., for the case of NDCG, :math:`{\\sf E}_{L} [1 - NDCG]^2`. Recall that, a formal Bias/Variance decomposition was not proposed yet. Parameters ---------- dataset : rankeval.dataset.Dataset The dataset instance. algo : function This should be a wrapper of learning algorithm. The function should accept four parameters: `train_X`, `train_Y`, `train_q`, `test_X`. - `train_X`: numpy.ndarray storing a 2-D matrix of size num_docs x num_features - `train_Y`: numpy.ndarray storing a vector of document's relevance labels - `train_q`: numpy.ndarray storing a vector of query lengths - `test_X`: numpy.ndarray as for `train_X` A model is trained on `train_X`, `train_Y`, `train_q`, and used to score `test_X`. An numpy.ndarray with such score must be returned. metric : "mse" or rankeval.metrics.metric.Metric The metric used to compute the error. L : int Number of iterations k : int Number of folds. Returns ------- bias_variance : xarray.DataArray A DataArray containing the bias/variance decomposition of the error for any given dataset, algorithm and metric. References ---------- .. [Webb05] Webb, Geoffrey I., and Paul Conilione. "Estimating bias and variance from data." Pre-publication manuscript (`pdf <http://www.csse.monash.edu/webb/-Files/WebbConilione06.pdf>`_) (2005). .. [Dom05] Domingos P. A unified bias-variance decomposition. In Proceedings of 17th International Conference on Machine Learning 2000 (pp. 231-238). """ assert(k>=2) assert(L>=2) assert(len(datasets)>0) assert(len(metrics)>0) for metric in metrics: assert isinstance(metric, Metric) progress_bar = IntProgress(min=0, max=len(datasets)*len(metrics)*len(algos), description="Iterating datasets and metrics") display(progress_bar) data = np.zeros(shape=(len(datasets), len(metrics), len(algos), 3), dtype=np.float32) for idx_dataset, dataset in enumerate(datasets): for idx_algo, algo in enumerate(algos): for idx_metric, metric in enumerate(metrics): progress_bar.value += 1 scores = _multi_kfold_scoring(dataset, algo=algo, L=L, k=k) avg_error = 0. avg_bias = 0. avg_var = 0. if not isinstance(metric, MSE): # mse over metric, assume error is 1-metric # not exactly domingos paper q_scores = np.empty((dataset.n_queries, L), dtype=np.float32) for i in range(L): q_scores[:,i] = metric.eval(dataset=dataset, y_pred=scores[:,i])[1] avg_error = np.mean( (q_scores-1.)**2. ) avg_pred = np.mean(q_scores, axis=1) avg_bias = np.mean((avg_pred - 1.)**2.) avg_var = np.mean( (q_scores-avg_pred.reshape((-1,1)))**2. ) else: # mse avg_error = np.mean( (scores-dataset.y.reshape((-1,1)))**2. ) avg_pred = np.mean(scores, axis=1) avg_bias = np.mean((avg_pred - dataset.y)**2.) avg_var = np.mean( (scores-avg_pred.reshape((-1,1)))**2. ) data[idx_dataset][idx_metric][idx_algo][0] = avg_error data[idx_dataset][idx_metric][idx_algo][1] = avg_bias data[idx_dataset][idx_metric][idx_algo][2] = avg_var progress_bar.bar_style = "success" progress_bar.close() performance = xr.DataArray(data, name='Bias/Variance Decomposition', coords=[datasets, metrics, [a.__name__ for a in algos], ['Error', 'Bias', 'Variance']], dims=['dataset', 'metric', 'algo', 'error']) return performance
def sample_mcmc(model, h, x0=None, burnin=1000, n_samples=10000, sample_rate=10, g=None, noiseless_sample=False, progress_bar=False): """ Sample points (theta) from either a Gaussian process model or simulator using the Metropolis-Hastings algorithm. Default proposal density, g, is a Gaussian with diagonal covariance; covariances set to a small value based on the range of possible parameter settings for each dimension. Args: (models.GP) OR (simulators.Simulator) model: GP model of the discrepancy, OR Simulator instance with callable f(), noiseless_f() (float) h: bandwidth for KDE. (np.ndarray) x0: initial starting point. (int) burnin: number of burn-in samples. (int) sample_rate: how many iterations sampling. (callable) g: proposal density. (bool) noiseless_sample: whether to call noiseless_f or f (when `model' is a Simulator). (bool) progress_bar: whether to show progress bar in Jupyter notebook. Returns: (np.ndarray) samples: with shape (n_samples, input_dim). """ input_dim = model.input_dim bounds = model.bounds # function proportional to predictive distribution if isinstance(model, GP): f = lambda x: norm.cdf( (h - model.mu(x)) / np.sqrt(model.v(x) + model.obs_noise)) elif isinstance(model, Simulator): # std. dev. of obs noise is stored in simulator, so no np.sqrt if noiseless_sample: f = lambda x: norm.cdf( (h - model.noiseless_f(x) / model.obs_noise)) else: f = lambda x: norm.cdf((h - model.f(x) / model.obs_noise)) else: raise ValueError('pass simulator or GP model as first argument.') if x0 is None: x0 = np.array([np.random.uniform(b1, b2) for (b1, b2) in bounds]).reshape(1, input_dim) if g is None: cov = [] for (b1, b2) in bounds: cov.append(0.025 * (b2 - b1)) cov = np.diag(np.array(cov)).reshape(input_dim, input_dim) g = lambda xt: np.random.multivariate_normal(xt.squeeze(), cov ).reshape(1, input_dim) progress_bar = progress_bar and 'jupyter' in os.environ['_'] # ================================================ # Burn-in period ================================= if progress_bar: prog = IntProgress(value=0, max=burnin, description='Burn-in') display(prog) x = np.array(x0) for i in range(burnin): cand = g(x) # candidate point if not model.within_bounds(cand): continue a = f(cand) / f(x) # acceptance ratio if np.random.rand() < a: # accept/reject x = np.copy(cand) if progress_bar: prog.value += 1 # ================================================ # Begin sampling ================================= if progress_bar: prog.close() prog = IntProgress(value=0, max=n_samples, description='Sampling') display(prog) samples = [] i = 0 while len(samples) < n_samples: cand = g(x) # candidate point if not model.within_bounds(cand): continue a = f(cand) / f(x) # acceptance ratio if a < 0: continue if np.random.rand() < a: # accept/reject x = np.copy(cand) if (i % sample_rate) == 0: samples.append(np.copy(x)) if progress_bar: prog.value += 1 i += 1 if progress_bar: prog.close() return np.array(samples).reshape(n_samples, input_dim)
def _status_bar_add_progress_bar(self, stretch=0): widget = IntProgress() self._layout_add_widget(self._status_bar_layout, widget) return _IpyWidget(widget)
def run_update_steps(self, steps, save_to_path, gif=True, keep_frames=False): """ Runs the simulation for multiple timesteps and saves the result as individual frames and a GIF. For steps many times, the self.update_step() function is called. If gif==True, each timestep is saved as a frame (*.png), including a caption of time step, to generate the .*gif. If keep_frames==False, the frames (*.png) will be deleted after generating the *.gif. For gif==False, no frames or GIF is stored. In any case, simulation will run for steps many times and the result is stored in the grid. Args: steps (int): Number of timesteps of simulation. save_to_path (str): The path, where frames and *.gif will be stored. Enumeration and file endings are appended automatically. (e.g. save_to_path="simulation1/scenario4) gif (bool, optional): Wether to produce a *.gif file of simulation. Defaults to True. keep_frames (bool, optional): Wether the frames to generate the *.gif should be kept. Defaults to False. """ # increment steps by one, as step 0 only display current states steps += 1 # list with all file names needed for animation filenames = [] # display a progressbar bar = IntProgress(min=0, max=steps) IPython.display.display(bar) # from IPython # loop "steps" many times for i in range(steps): # update the progress bar bar.value += 1 # calculate an update step self.update_step() # only do this, if gif is required if gif: # Caption for GIF caption = f"Time step {i}." # File name, with time step, add to list path = save_to_path + f"_{i}.png" filenames.append(path) # save the figure self.show(caption=caption, save_to_path=path) # only display result or create gif? if gif: # append the last image additional times, to "freeze" GIF at the end, before it restarts additional_frames = 4 for i in range(additional_frames): filenames.append(path) # create file name path = save_to_path + ".gif" # use imageio to create the gif with imageio.get_writer(path, mode='I', duration=0.5) as writer: for filename in filenames[:-additional_frames]: image = imageio.imread(filename) writer.append_data(image) # display the gif # Problem with Jupyter Notebook: Only displays first image of frame! # Workaround is using iPython to display directly in jupyter notebook. #plt.imshow(mpl.image.imread(path)) # check if individual frames are supposed to be kept if not keep_frames: # Remove all the frames, that were necessary for the GIF for filename in filenames: try: # delete the file/frame on disk os.remove(filename) except FileNotFoundError: # The last frame was added a few times extra in the end, to ensure the GIF wouldn't loop # without showing the result. Therefore, the code will try to delete the frame, eventhough # it has been deleted already. Hence, an exception. # print(f"Did not find file: {filename}") pass except: print("An exception occured!") # if no gif, only display the result else: self.show() return
def log_progress(sequence: list, every=None, size=None, name='Items', userProgress=None): '''Creates a progress bar in jupyter notebooks. Automatically detects the size of a list and estimates the best step size for progress bar updates. This function also automatically estimates the total time to completion of the iterations, updating the estimate using the time that every step takes. If the sequence argument is an iterator, the total number of elements cannot determined. In this case, the user must define the `every` parameter to indicate the update frequency of the progress bar. If the progress bar is used in a nested loop, passing a list to the `userProgress` argument will force the re-utilization of `ipywidgets` objects, preventing the creation of a new progress bar at every iteration of the inner loop. This progress bar was based on https://github.com/alexanderkuk/log-progress. Args: sequence : An iterable object. every (int): The update frequency. size (int): The number of elements in the sequence. name (str): The name of the progress bar. userProgress (list): List for creation of nested progress bars. ''' from ipywidgets import IntProgress, HTML, HBox, Label from IPython.display import display from numpy import mean as npmean from collections import deque from math import floor from datetime import datetime from string import Template is_iterator = False if size is None: try: size = len(sequence) except TypeError: is_iterator = True if size is not None: if every is None: if size <= 200: every = 1 else: every = floor(float(size)*0.005) # every 0.5%, minimum is 1 else: assert every is not None, 'sequence is iterator, set every' # For elapsed time initTime = datetime.now() totTime = "?" labTempl = Template(" (~ $min total time (min) ; $ell minutes elapsed)") # If provided, we use the objects already created. # If not provided, we create from scratch. if userProgress is None or userProgress == []: progress = IntProgress(min=0, max=1, value=1) label = HTML() labelTime = Label("") box = HBox(children=[label, progress, labelTime]) if userProgress == []: userProgress.append(box) display(box) else: box = userProgress[0] if is_iterator: #progress = IntProgress(min=0, max=1, value=1) box.children[1].min = 0 box.children[1].max = 1 box.children[1].value = 1 box.children[1].bar_style = 'info' else: #progress = IntProgress(min=0, max=size, value=0) box.children[1].min = 0 box.children[1].max = size box.children[1].value = 0 # For remaining time estimation deltas = deque() lastTime = None meandelta = 0 index = 0 try: for index, record in enumerate(sequence, 1): if index == 1 or index % every == 0: if is_iterator: box.children[0].value = '{name}: {index} / ?'.format( name=name, index=index ) else: box.children[1].value = index box.children[0].value = u'{name}: {index} / {size}'.format( name=name, index=index, size=size ) # Estimates remaining time with average delta per iteration # Uses (at most) the last 30 iterations if len(deltas) == 101: deltas.popleft() if lastTime: deltas.append( (datetime.now() - lastTime).total_seconds() ) meandelta = npmean(deltas)/60.0 # From seconds to minute totTime = round(meandelta*size/float(every), 3) # Mean iteration for all iterations else: totTime = "?" # First iteration has no time lastTime = datetime.now() # All ellapsed time in minutes elapsed = round( (datetime.now() - initTime).total_seconds()/60.0, 3) box.children[2].value = labTempl.safe_substitute({"min":totTime, "ell":elapsed}) yield record except: box.children[1].bar_style = 'danger' raise else: box.children[1].bar_style = 'success' box.children[1].value = index box.children[0].value = "{name}: {index}".format( name=name, index=str(index or '?') )
def explain_instance(self, instance, num_reps=50, num_features=4, neighborhood_samples=10000, use_cov_matrix=False, verbose=False, figure_dir=None): npEX = np.array(self.EX) cls_proba = self.bb_classifier.predict_proba x0 = copy.deepcopy(instance) # instance to be explained mockobj = mock.Mock() # Neighborhood random samples cov_matrix = np.cov( ((X - npEX) / self.StdX).T) if use_cov_matrix else 1.0 NormV = scipy.stats.multivariate_normal.rvs(mean=np.zeros(self.F), cov=cov_matrix, size=neighborhood_samples, random_state=10) # Get the output of the black-box classifier on x0 output = cls_proba([x0])[0] label_x0 = 1 if output[1] >= output[0] else 0 prob_x0 = output[label_x0] prob_x0_F, prob_x0_T = output[0], output[1] if verbose: print('prob_x0', prob_x0, ' label_x0', self.class_names[label_x0]) # Prepare instance for LIME lime_x0 = np.divide((x0 - npEX), self.StdX, where=np.logical_not(np.isclose(self.StdX, 0))) shap_x0 = (x0 - npEX) rows = None progbar = IntProgress(min=0, max=num_reps) label = Label(value="") display(HBox([Label("K=%d " % (num_features)), progbar, label])) # Explain the same instance x0 multiple times for rnum in range(num_reps): label.value = "%d/%d" % (rnum + 1, num_reps) R = mock.Mock() # store all the computed metrics R.rnum, R.prob_x0 = rnum, prob_x0 # Explain the instance x0 with LIME lime_expl = self.LIMEEXPL.explain_instance( np.array(x0), cls_proba, num_features=num_features, top_labels=1, num_samples=self.explanation_samples) # Explain x0 using SHAP shap_phi = self.SHAPEXPL.shap_values(x0, l1_reg="num_features(10)") shap_phi0 = self.SHAPEXPL.expected_value # Take only the top @num_features from shap_phi argtop = np.argsort(np.abs(shap_phi[0])) for k in range(len(shap_phi)): shap_phi[k][argtop[:(self.F - num_features)]] = 0 # Recover both the LIME and the SHAP classifiers R.lime_g = get_LIME_classifier(lime_expl, label_x0, x0) R.shap_g = get_SHAP_classifier(label_x0, shap_phi, shap_phi0, x0, self.EX) #---------------------------------------------------------- # Evaluate the white box classifiers EL = eval_whitebox_classifier(R, R.lime_g, npEX, self.StdX, NormV, x0, label_x0, cls_proba, "lime", precision_recalls=True) ES = eval_whitebox_classifier(R, R.shap_g, npEX, np.ones(len(x0)), NormV * self.StdX, x0, label_x0, cls_proba, "shap", precision_recalls=True) R.lime_local_discr = np.abs( R.lime_g.predict([lime_x0])[0] - prob_x0) R.shap_local_discr = np.abs( R.shap_g.predict([shap_x0])[0] - prob_x0) # Indices of the most important features, ordered by their absolute value R.lime_argtop = np.argsort(np.abs(R.lime_g.coef_)) R.shap_argtop = np.argsort(np.abs(R.shap_g.coef_)) # get the K most common features in the explanation of x0 R.mcf_lime = tuple( [R.lime_argtop[-k] for k in range(num_features)]) R.mcf_shap = tuple( [R.shap_argtop[-k] for k in range(num_features)]) # Binary masks of the argtops R.lime_bin_expl, R.shap_bin_expl = np.zeros(self.F), np.zeros( self.F) R.lime_bin_expl[np.array(R.mcf_lime)] = 1 R.shap_bin_expl[np.array(R.mcf_shap)] = 1 # Save the Ridge regressors built by LIME and SHAP # lime_g_W, shap_g_W = tuple(lime_g.coef_), tuple(shap_g.coef_) # lime_g_w0, shap_g_w0 = lime_g.intercept_, shap_g.intercept_ # get the appropriate R keys R_keys = copy.copy(R.__dict__) for key in copy.copy(list(R_keys.keys())): if key.startswith("wb_"): R_keys[wb_name + key[2:]] = R_keys.pop(key) elif key in mockobj.__dict__: del R_keys[key] rows = pd.DataFrame(columns=R_keys) if rows is None else rows rows = rows.append({k: R.__dict__[k] for k in R_keys}, ignore_index=True) progbar.value += 1 label.value += " Done." # use the multiple explanations to compute the LEAF metrics # display(rows) # Jaccard distances between the various explanations (stability) lime_jaccard_mat = 1 - pdist(np.stack(rows.lime_bin_expl, axis=0), 'jaccard') shap_jaccard_mat = 1 - pdist(np.stack(rows.shap_bin_expl, axis=0), 'jaccard') self.lime_avg_jaccard_bin, self.lime_std_jaccard_bin = np.mean( lime_jaccard_mat), np.std(lime_jaccard_mat) self.shap_avg_jaccard_bin, self.shap_std_jaccard_bin = np.mean( shap_jaccard_mat), np.std(shap_jaccard_mat) # LIME/SHAP explanation comparisons lime_shap_jaccard_mat = 1 - cdist(np.stack(rows.lime_bin_expl, axis=0), np.stack(rows.shap_bin_expl, axis=0), 'jaccard') lime_shap_avg_jaccard_bin, lime_shap_std_jaccard_bin = np.mean( lime_shap_jaccard_mat), np.std(lime_shap_jaccard_mat) # store the metrics for later use self.metrics = rows def leaf_plot(stability, method): fig, ax1 = plt.subplots(figsize=(6, 2.2)) data = [ stability.flatten(), 1 - rows[method + '_local_discr'], rows[method + '_fidelity_f1'], # rows[method + '_prescriptivity_f1'], # rows[method + '_bal_prescriptivity' ], 1 - 2 * np.abs(rows[method + '_boundary_discr']) ] # color = 'tab:red' ax1.tick_params(axis='both', which='major', labelsize=12) ax1.set_xlabel('distribution') ax1.set_ylabel('LEAF metrics', color='black', fontsize=15) ax1.boxplot(data, vert=False, widths=0.7) ax1.tick_params(axis='y', labelcolor='#500000') ax1.set_yticks(np.arange(1, len(data) + 1)) ax1.set_yticklabels([ 'Stability', 'Local Concordance', 'Fidelity', 'Prescriptivity' ]) ax1.set_xlim([-0.05, 1.05]) ax1.invert_yaxis() ax2 = ax1.twinx( ) # instantiate a second axes that shares the same x-axis ax2.tick_params(axis='both', which='major', labelsize=12) ax2.set_ylabel( 'Values', color='#000080') # we already handled the x-label with ax1 ax2.boxplot(data, vert=False, widths=0.7) # ax2.boxplot([np.mean(d) for d in data], color=color) ax2.tick_params(axis='y', labelcolor='#000080') ax2.set_yticks(np.arange(1, len(data) + 1)) ax2.set_yticklabels( [" %.3f ± %.3f " % (np.mean(d), np.std(d)) for d in data]) ax2.invert_yaxis() fig.tight_layout( ) # otherwise the right y-label is slightly clipped if figure_dir is not None: imgname = figure_dir + method + "_leaf.pdf" print('Saving', imgname) plt.savefig(imgname, dpi=150, bbox_inches='tight') plt.show() # Show LIME explanation display(HTML("<h2>LIME</h2>")) lime_expl.show_in_notebook(show_table=True, show_all=False) leaf_plot(lime_jaccard_mat, 'lime') # Show SHAP explanation display(HTML("<h2>SHAP</h2>")) display(shap.force_plot(shap_phi0[label_x0], shap_phi[label_x0], x0)) leaf_plot(shap_jaccard_mat, 'shap') prescription = False if prescription: print("====================================================") lime_x1, lime_sx1 = EL shap_x1, shap_sx1 = ES print( 'SHAP accuracy %f balanced_accuracy %f precision %f recall %f' % (rows.shap_prescriptivity.mean(), rows.shap_bal_prescriptivity.mean(), rows.shap_precision_x1.mean(), rows.shap_recall_x1.mean())) lime_diff = (rows.iloc[-1].lime_g.coef_ != 0) * (lime_x1 - x0) shap_diff = (rows.iloc[-1].shap_g.coef_ != 0) * (shap_x1 - x0) print(np.array(rows.iloc[-1].lime_g.coef_ != 0)) print('lime_diff\n', lime_diff) print('shap_diff\n', shap_diff) lime_output_x1 = cls_proba([lime_x1])[0] shap_output_x1 = cls_proba([shap_x1])[0] lime_label_x1 = 1 if lime_output_x1[1] >= lime_output_x1[0] else 0 shap_label_x1 = 1 if shap_output_x1[1] >= shap_output_x1[0] else 0 print("LIME(x1) prob =", lime_output_x1) print("SHAP(x1) prob =", shap_output_x1) # df = pd.DataFrame([x0, x0 + shap_diff], index=['x', 'x\'']).round(2) # display(df.T.iloc[:math.ceil(F/2),:]) # display(df.T.iloc[math.ceil(F/2):,:]) # Show LIME explanation lime_expl = LIMEEXPL.explain_instance( np.array(shap_x1), cls_proba, num_features=num_features, top_labels=1, num_samples=self.explanation_samples) lime_expl.show_in_notebook(show_table=True, show_all=False) # leaf_plot(lime_jaccard_mat, 'lime') # Show SHAP explanation shap_phi = SHAPEXPL.shap_values(shap_x1, l1_reg="num_features(10)") shap_phi0 = SHAPEXPL.expected_value argtop = np.argsort(np.abs(shap_phi[0])) for k in range(len(shap_phi)): shap_phi[k][argtop[:(F - num_features)]] = 0 display( shap.force_plot(shap_phi0[shap_label_x1], shap_phi[shap_label_x1], shap_x1))
def status_printer(file, total=None, desc=None): """ Manage the printing of an IPython/Jupyter Notebook progress bar widget. """ # Fallback to text bar if there's no total # DEPRECATED: replaced with an 'info' style bar # if not total: # return super(tqdm_notebook, tqdm_notebook).status_printer(file) fp = file # Prepare IPython progress bar if total: pbar = IntProgress(min=0, max=total) else: # No total? Show info style bar with no progress tqdm status pbar = IntProgress(min=0, max=1) pbar.value = 1 pbar.bar_style = 'info' if desc: pbar.description = desc # Prepare status text ptext = HTML() # Only way to place text to the right of the bar is to use a container container = HBox(children=[pbar, ptext]) display(container) def print_status(s='', close=False, bar_style=None): # Note: contrary to native tqdm, s='' does NOT clear bar # goal is to keep all infos if error happens so user knows # at which iteration the loop failed. # Clear previous output (really necessary?) # clear_output(wait=1) # Get current iteration value from format_meter string if total: n = None if s: npos = s.find(r'/|/') # cause we use bar_format=r'{n}|...' # Check that n can be found in s (else n > total) if npos >= 0: n = int(s[:npos]) # get n from string s = s[npos + 3:] # remove from string # Update bar with current n value if n is not None: pbar.value = n # Print stats if s: # never clear the bar (signal: s='') s = s.replace('||', '') # remove inesthetical pipes s = escape(s) # html escape special characters (like '?') ptext.value = s # Change bar style if bar_style: # Hack-ish way to avoid the danger bar_style being overriden by # success because the bar gets closed after the error... if not (pbar.bar_style == 'danger' and bar_style == 'success'): pbar.bar_style = bar_style # Special signal to close the bar if close and pbar.bar_style != 'danger': # hide only if no error container.visible = False return print_status
def train(FLAG): print("Reading dataset...") # load data Xtrain, df_train = read_dataset(TRAIN_CSV, TRAIN_DIR) Xtest, df_test = read_dataset(TEST_CSV, TEST_DIR) vae = VAE() vae.build(lambda_KL=FLAG.lambda_KL, n_dim=FLAG.n_dim, batch_size=FLAG.batch_size, shape=Xtrain.shape[1:]) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = FLAG.batch_size epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) obj = vae.train_op train_op = opt.minimize(obj, global_step=global_step) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_loss = np.float('Inf') # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss = 0.0 train_reconstruction_loss = 0.0 train_kl_loss = 0.0 for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, reconstruction_loss, kl_loss, _ = sess.run( [ obj, vae.loss['reconstruction'], vae.loss['KL_loss'], train_op ], feed_dict={ vae.x: Xtrain[st:ed, :], vae.y: Xtrain[st:ed, :], vae.is_train: True }) print(loss) print(reconstruction_loss) print(kl_loss) train_loss += loss train_reconstruction_loss += reconstruction_loss train_kl_loss += kl_loss ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) output = sess.run( [vae.output], feed_dict={ vae.x: Xtrain[0:64, :], vae.y: Xtrain[0:64, :], vae.is_train: False }) print("=== train data ====") print(output) #print((Xtrain[0,:]-128.0)/128.0) train_loss = train_loss / ptrain.value train_reconstruction_loss = train_reconstruction_loss / ptrain.value train_kl_loss = train_kl_loss / ptrain.value # validation val_loss = 0 val_reconstruction_loss = 0.0 val_kl_loss = 0.0 for i in range(int(Xtest.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, reconstruction_loss, kl_loss = sess.run( [obj, vae.loss['reconstruction'], vae.loss['KL_loss']], feed_dict={ vae.x: Xtest[st:ed, :], vae.y: Xtest[st:ed, :], vae.is_train: False }) val_loss += loss val_reconstruction_loss += reconstruction_loss val_kl_loss += kl_loss pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_reconstruction_loss = val_reconstruction_loss / pval.value val_kl_loss = val_kl_loss / pval.value # plot # if epoch_counter%10 == 0: # Xplot = sess.run(vae.output, # feed_dict={vae.x: Xtest[:,:], # vae.y: Xtest[:,:], # vae.is_train: False}) # for i, fname in enumerate(track): # imageio.imwrite(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"), saveimg) # print(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png")) # early stopping check if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("save in %s" % checkpoint_path) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain = Xtrain[idx, :, :, :] # epoch end epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train recon loss: %.4f, train kl loss: %.4f, val loss: %.4f, val recon loss: %.4f, val kl loss: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_reconstruction_loss, train_kl_loss, val_loss, val_reconstruction_loss, val_kl_loss)) # para_dict = sess.run(vgg16.para_dict) # np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict) # print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy")) FLAG.optimizer = opt_type FLAG.lr = start_learning_rate FLAG.batch_size = batch_size FLAG.epoch_end = epoch_counter FLAG.val_loss = current_best_val_loss header = '' row = '' for key in sorted(vars(FLAG)): if header is '': header = key row = str(getattr(FLAG, key)) else: header += "," + key row += "," + str(getattr(FLAG, key)) row += "\n" if os.path.exists("/home/cmchang/DLCV2018SPRING/hw4/model.csv"): with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "a") as myfile: myfile.write(row) else: with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "w") as myfile: myfile.write(header) myfile.write(row)
def _analytics(self, b): """ Uses the self.user_qa_selection OrderedDictionary to extract the corresponding QA values and create a mask of dimensions: (number of qa layers, time steps, cols(lat), rows(lon)) Additionally computes the temporal mask and the max gap length """ if not type(b) == QProgressBar: progress_bar = IntProgress( value=0, min=0, max=len(self.user_qa_selection), step=1, description='', bar_style='', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal', style = {'description_width': 'initial'}, layout={'width': '50%'} ) display(progress_bar) n_qa_layers = len(self.user_qa_selection) # Get the name of the first data var to extract its shape for k, v in self.ts.data.data_vars.items(): break # Create mask xarray _time, _latitude, _longitude = self.ts.data.data_vars[k].shape mask = np.zeros((n_qa_layers, _time, _latitude, _longitude), np.int8) qa_layer = self.qa_def.QualityLayer.unique() # QA layer user to create mask _qa_layer = getattr(self.ts.qa, f"qa{qa_layer[0]}") for i, user_qa in enumerate(self.user_qa_selection): if type(b) == QProgressBar: b.setValue(i) b.setFormat(f"Masking by QA {user_qa}") else: progress_bar.value = i progress_bar.description = f"Masking by QA {user_qa}" user_qa_fieldname = user_qa.replace(" ", "_").replace("/", "_") for j, qa_value in enumerate(self.user_qa_selection[user_qa]): qa_value_field_name = qa_value.replace(" ", "_") qa_flag_val = self.qa_def[(self.qa_def.Name == user_qa) & (self.qa_def.Description == qa_value)].Value.iloc[0] if j == 0 : mask[i] = (_qa_layer[user_qa_fieldname] == qa_flag_val) else: mask[i] = np.logical_or( mask[i], _qa_layer[user_qa_fieldname] == qa_flag_val) if type(b) == QProgressBar: b.setValue(0) b.setEnabled(False) else: # Remove progress bar progress_bar.close() del progress_bar #self.__temp_mask = mask #mask = xr.DataArray(np.all(self.__temp_mask, axis=0), mask = xr.DataArray(np.all(mask, axis=0), coords=[v.time.data, v.latitude.data, v.longitude.data], dims=['time', 'latitude', 'longitude']) mask.attrs = v.attrs self.mask = mask # Remove local multi-layer mask variable mask = None del(mask) # Create the percentage of data available mask # Get the per-pixel per-time step binary mask pct_data_available = (self.mask.sum(axis=0) * 100.0) / _time pct_data_available.latitude.data = v.latitude.data pct_data_available.longitude.data = v.longitude.data # Set the pct_data_available object self.pct_data_available = pct_data_available # Using the computed mask get the max gap length self.__get_max_gap_length(b)
def _log_progress(sequence: Iterable, desc: Optional[Text] = None, total: Optional[int] = None, miniters: Optional[int] = None): """ Make and display a progress bar. Parameters ---------- sequence : iterable Represents a sequence of elements. desc : str, optional Represents the description of the operation, by default None. total : int, optional Represents the total/number elements in sequence, by default None. miniters : int, optional Represents the steps in which the bar will be updated, by default None. """ if desc is None: desc = '' is_iterator = False if total is None: try: total = len(sequence) except TypeError: is_iterator = True if total is not None: if miniters is None: if total <= 200: miniters = 1 else: miniters = int(total / 200) else: if miniters is None: miniters = 1 if is_iterator: progress = IntProgress(min=0, max=1, value=1) progress.bar_style = 'info' else: progress = IntProgress(min=0, max=total, value=0) label = HTML() box = VBox(children=[label, progress]) display(box) index = 0 try: for index, record in enumerate(sequence, 1): if index == 1 or index % miniters == 0: if is_iterator: label.value = '%s: %s / ?' % (desc, index) else: progress.value = index label.value = u'%s: %s / %s' % (desc, index, total) yield record except Exception: progress.bar_style = 'danger' raise else: progress.bar_style = 'success' progress.value = index label.value = '%s: %s' % (desc, str(index or '?'))
def get_samples_from_relation_with_chunks(file, num_of_nodes=20000, chunksize = 10000): ''' getting samples from the graph by picking the first node as a source looking for its neighbors. Then getting the neighbors of its neighbors. By iteration we obtain a subsample of the relations. This methods uses chunks to process on huge .csv files. Parameters: file (string): the .csv file path of the relation dataframe num_of_nodes (int): threshold representing the minimum of nodes expected in the sampled dataframe chunksize (int): number of chunks Returns: pd.Dataframe, pd.Dataframe: nodes dataframe, edgelist dataframe ''' usersdata = pd.read_csv('data/usersdata.csv', delimiter = '\t', names = ['userId', 'sex', 'timePassedValidation', 'ageGroup', 'label']) print('preparing the progress bar') n_rows = sum(1 for row in open(file, 'r')) - 1 f = IntProgress(min=0, max=int(np.ceil(n_rows/chunksize)), description = 'Process') # instantiate the bar display(f) # display the bar node_list = [] neighbors = [pd.read_csv(file,delimiter=',', nrows = 1)['src'].values[0]] node_list.extend(neighbors) print('Start sampling') while len(node_list) < num_of_nodes: count = 0 previous = neighbors previous_size = len(node_list) neighbors = [] for chunk in pd.read_csv(file,iterator=True,delimiter=',', chunksize=chunksize): f.value = count # signal to increment the progress bar count += 1 neighbors.extend(chunk[chunk['src'].isin(previous)]['dst'].tolist() + chunk[chunk['dst'].isin(previous)]['src'].tolist()) node_list.extend(neighbors) node_list = list(set(node_list)) if len(node_list) >= num_of_nodes: break print('number of users acquired: {}'.format(len(node_list))) if(previous_size == len(node_list)): break else: previous_size = len(node_list) print('number of users finally acquired: {}'.format(len(node_list))) print('sub sampling relations:') relation_df= pd.read_csv(file,iterator=True,delimiter=',', chunksize=chunksize) subrels = pd.concat([chunk[(chunk['src'].isin(node_list)) & (chunk['dst'].isin(node_list))] for chunk in relation_df]) subrels = subrels.rename(columns={'Unnamed: 0':'index'}).set_index('index').groupby(['src','dst']).agg({'time_ms':'sum'}) subrels.reset_index(inplace = True) subrels.loc[:,'time_s'] = subrels['time_ms']/1000. fusers = usersdata[usersdata.userId.isin(node_list)] nodes = fusers edges = subrels[['src', 'dst', 'time_s']].rename(columns = {'time_s':'weight'}) nodes.reset_index(level=0, inplace=True) nodes = nodes.drop(columns={'index'}) nodes.reset_index(level=0, inplace=True) nodes = nodes.rename(columns = {'index':'node_idx'}) uid2idx = nodes[['node_idx', 'userId']] uid2idx = uid2idx.set_index('userId') edges_renumbered = edges.join(uid2idx, on = 'src').join(uid2idx, on = 'dst', rsuffix = '_dst').drop(columns = ['src', 'dst']) edgelist = edges_renumbered[['node_idx','node_idx_dst','weight']] return nodes, edgelist
def in_progress(seq, msg="Progress: [%(processed)d / %(total)d]", length=None, close=True): """ Iterate over sequence, yielding item with progress widget displayed. This is useful if you need to precess sequence of items with some time consuming operations .. note:: This works only in Jupyter Notebook .. note:: This function requires *ipywidgets* package to be installed :param seq: sequence to iterate on. :param str msg: (optional) message template to display. Following variables could be used in this template: - processed - total - time_total - time_per_item :param int length: (optional) if seq is generator, or it is not possible to apply 'len(seq)' function to 'seq', then this argument is required and it's value will be used as total number of items in seq. Example example:: import time for i in in_progress(range(10)): time.sleep(1) """ from IPython.display import display from ipywidgets import IntProgress import time if length is None: length = len(seq) start_time = time.time() progress = IntProgress( value=0, min=0, max=length, description=msg % { 'processed': 0, 'total': length, 'time_total': 0.0, 'time_per_item': 0.0, 'time_remaining': 0.0, } ) display(progress) for i, item in enumerate(seq, 1): progress.value = i # i_start_time = time.time() yield item # Do the job i_end_time = time.time() progress.description = msg % { 'processed': i, 'total': length, 'time_total': i_end_time - start_time, 'time_per_item': (i_end_time - start_time) / i, 'time_remaining': ((i_end_time - start_time) / i) * (length - i), } if close: progress.close()
def periodo_vulnerabilidad_con_dataframe(covid_municipal, inicio, fin, columna='tasa_covid_letal', min_casos=20, min_defunciones=-1, rf=True): """Calcula la vulnerabilidad (PLS) para todo el periodo usando como objetivo la columna que se le pase. :param df: el dataframe con los datos para ajustar el modelo. Debe traer ya las tasas municipales :type df: pd.DataFrame :param inicio: fecha inicial (Y-m-d) :type inicio: str :param fin: fecha final (Y-m-d) :type fin: str :param columna: la columna para usar como objetivo, el default es 'tasa_covid_letal' :type columna: str :param min_casos: Número mínimo de casos para considerar a un municipio :type min_casos: int :param min_defunciones: Número mínimo de defunciones para considerar a un municipio :type min_defunciones: int :param rf: True/False ajustar también un nmodelo de Random Forest a los dato :type rf: bool :returns: Un DataFrame igual que el de entrada pero cun una columna extra con el resultado del modelo. La columna se llama 'valor_{columna}' :rtype: gpd.GeoDataFrame """ inicio = pd.to_datetime(inicio, yearfirst=True) fin = pd.to_datetime(fin, yearfirst=True) fin = min(covid_municipal.FECHA_INGRESO.max(), fin) fechas = pd.date_range(inicio, fin) resultados = [] modelos = [] f = IntProgress(min=0, max=len(fechas) - 1) # instantiate the bar display(f) # display the bar # covid_municipal = agregar_tasas_municipales(df) caracteristicas = caracteristicas_modelos_municipios(covid_municipal) for count, fecha in enumerate(fechas): covid_municipal_fecha = covid_municipal.query( f'FECHA_INGRESO == "{fecha.strftime("%Y-%m-%d")}"') pls = ajustar_pls_columna(covid_municipal_fecha, caracteristicas, columna=columna, min_casos=min_casos, min_defunciones=min_defunciones) df = calificar_municipios_letalidad_formato_largo( covid_municipal_fecha, pls, caracteristicas, modelo='PLS', dia_ajuste=fecha) resultados.append(df) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': pls.coef_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'PLS' modelos.append(modelo) if rf: rf = ajustar_rf_letalidad(covid_municipal_fecha, caracteristicas, min_casos=min_casos, min_defunciones=min_defunciones) df = calificar_municipios_letalidad_formato_largo( covid_municipal_fecha, rf, caracteristicas, modelo='RF', dia_ajuste=fecha) resultados.append(df) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': rf.feature_importances_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'RF' modelos.append(modelo) f.value = count resultados_df = pd.concat(resultados, ignore_index=True) modelos_df = pd.concat(modelos, ignore_index=True) resultados_df = gpd.GeoDataFrame(resultados_df, geometry='geometry') resultados_df.rename({'valor': f'valor_{columna}'}, axis=1, inplace=True) return modelos_df, resultados_df
def _randomization(metric_scores_a, metric_scores_b, n_perm=100000): """ This method computes the randomization test as described in [1]. Parameters ---------- metric_scores_a : numpy array Vector of per-query metric scores for the IR system A. metric_scores_b : numpy array Vector of per-query metric scores for the IR system B. n_perm : int Number of permutations evaluated in the randomization test. Returns ------- metric_scores : (float, float) A tuple (p-value_1, p-value_2) being respectively the one-sided and two-sided p-values. References ---------- .. [1] Smucker, Mark D., James Allan, and Ben Carterette. "A comparison of statistical significance tests for information retrieval evaluation." In Proceedings of the sixteenth ACM conference on Conference on information and knowledge management, pp. 623-632. ACM, 2007. """ progress_bar = IntProgress(min=0, max=10, description="Randomization Test") display(progress_bar) # find the best system metric_scores_a_mean = np.mean(metric_scores_a) metric_scores_b_mean = np.mean(metric_scores_b) best_metrics = metric_scores_a worst_metrics = metric_scores_b if metric_scores_a_mean < metric_scores_b_mean: best_metrics = metric_scores_b worst_metrics = metric_scores_a difference = np.mean(best_metrics) - np.mean(worst_metrics) abs_difference = np.abs(difference) p1 = 0.0 # one-sided p2 = 0.0 # two-sided N = float(len(metric_scores_a)) a_sum = np.sum(best_metrics) b_sum = np.sum(worst_metrics) # repeat n_prem times for i in range(n_perm): if i % (n_perm/10)==0: progress_bar.value+=1 # select a random subset sel = np.random.choice([False, True], len(metric_scores_a)) a_sel_sum = np.sum(best_metrics[sel]) b_sel_sum = np.sum(worst_metrics[sel]) # compute avg performance of randomized models a_mean = (a_sum - a_sel_sum + b_sel_sum) / N b_mean = (b_sum - b_sel_sum + a_sel_sum) / N # performance difference delta = a_mean - b_mean if delta >= difference: p1 += 1. if np.abs(delta) >= abs_difference: p2 += 1. progress_bar.bar_style = "success" progress_bar.close() p1 /= n_perm p2 /= n_perm return p1, p2
def downloadFromURL(uris=None, fileNames=None, nodeNames=None, checksums=None, loadFiles=None, customDownloader=None, loadFileTypes=None, loadFileProperties={}): """Download data from custom URL with progress bar. See API description in SampleData.downloadFromURL. """ import SampleData sampleDataLogic = SampleData.SampleDataLogic() try: from ipywidgets import IntProgress from IPython.display import display progress = IntProgress() except ImportError: progress = None def reporthook(msg, level=None): # Download will only account for 90 percent of the time # (10% is left for loading time). progress.value = sampleDataLogic.downloadPercent * 0.9 if progress: sampleDataLogic.logMessage = reporthook display(progress) # show progress bar computeFileNames = not fileNames computeNodeNames = not nodeNames if computeFileNames or computeNodeNames: urisList = uris if type(uris) == list else [uris] if computeFileNames: fileNames = [] else: filenamesList = fileNames if type(fileNames) == list else [ fileNames ] if computeNodeNames: nodeNames = [] else: nodeNamesList = nodeNames if type(nodeNamesList) == list else [ nodeNames ] import os for index, uri in enumerate(urisList): if computeFileNames: fileName = getFileNameFromURL(uri) fileNames.append(fileName) else: fileName = fileNames[index] if computeNodeNames: fileNameWithoutExtension, _ = os.path.splitext(fileName) nodeNames.append(fileNameWithoutExtension) if type(uris) != list: if type(fileNames) == list: fileNames = fileNames[0] if type(nodeNames) == list: nodeNames = nodeNames[0] downloaded = sampleDataLogic.downloadFromURL(uris, fileNames, nodeNames, checksums, loadFiles, customDownloader, loadFileTypes, loadFileProperties) if progress: progress.layout.display = 'none' # hide progress bar return downloaded[0] if len(downloaded) == 1 else downloaded
from pynq import Overlay, allocate import xrfclk import xrfdc import os from .hierarchies import * from .quick_widgets import Image from ipywidgets import IntProgress from IPython.display import display from IPython.display import clear_output import time import threading load_progress = 0 max_count = 100 load_bar = IntProgress(min=load_progress, max=max_count) # instantiate the bar def generate_about(): global about about = ''.join(['<br><b>', __info__, '</b><br>', __channels__, ' ', __board__, ' ', __release__, '<br>', 'Version ', __version_number__, ': ', __version_name__, '<br>Date: ', __date__, '<br><br>', '<b>Organisation:</b> <br>', __organisation__, '<br><br>', '<b>Support</b>:<br>', __support__]) class Overlay(Overlay): def __init__(self, overlay_system='sam', init_rf_clks=True, **kwargs):
def tree_wise_performance(datasets, models, metrics, step=10): """ This method implements the analysis of the model on a tree-wise basis (part of the effectiveness analysis category). Parameters ---------- datasets : list of Dataset The datasets to use for analyzing the behaviour of the model using the given metrics and models models : list of RTEnsemble The models to analyze metrics : list of Metric The metrics to use for the analysis step : int Step-size identifying evenly spaced number of trees for evaluating the top=k model performance. (e.g., step=100 means the method will evaluate the model performance at 100, 200, 300, etc trees). Returns ------- metric_scores : xarray.DataArray A DataArray containing the metric scores of each model using the given metrics on the given datasets. The metric scores are cumulatively reported tree by tree, i.e., top 10 trees, top 20, etc., with a step-size between the number of trees as highlighted by the step parameter. """ def get_tree_steps(model_trees): trees = range(step-1, model_trees, step) # Add last tree to the steps if trees[-1] != model_trees-1: trees.append(model_trees-1) return np.array(trees) max_num_trees = 0 for model in models: if model.n_trees > max_num_trees: max_num_trees = model.n_trees tree_steps = get_tree_steps(max_num_trees) data = np.full(shape=(len(datasets), len(models), len(tree_steps), len(metrics)), fill_value=np.nan, dtype=np.float32) progress_bar = IntProgress(min=0, max=len(datasets)*len(metrics)* sum([len(get_tree_steps(model.n_trees)) for model in models ]), description="Computing metrics") display(progress_bar) for idx_dataset, dataset in enumerate(datasets): for idx_model, model in enumerate(models): y_pred, partial_y_pred, y_leaves = \ model.score(dataset, detailed=True) # the document scores are accumulated along for the various top-k # (in order to avoid useless re-scoring) y_pred = np.zeros(dataset.n_instances) for idx_top_k, top_k in enumerate(get_tree_steps(model.n_trees)): # compute the document scores using only top-k trees of # the model on the given dataset idx_tree_start = idx_top_k * step idx_tree_stop = top_k + 1 y_pred += partial_y_pred[:, idx_tree_start:idx_tree_stop].sum(axis=1) # compute the metric score using the predicted document scores for idx_metric, metric in enumerate(metrics): progress_bar.value += 1 metric_score, _ = metric.eval(dataset, y_pred) data[idx_dataset][idx_model][idx_top_k][idx_metric] = metric_score progress_bar.bar_style = "success" progress_bar.close() performance = xr.DataArray(data, name='Tree-Wise Performance', coords=[datasets, models, tree_steps+1, metrics], dims=['dataset', 'model', 'k', 'metric']) return performance
def screenshot(url: str, api_key: str = None) -> requests.models.Response: """ Get a screenshot of a url with Browshot. Parameters ---------- url : str The url a screenshot is wanted for. api_key : str (optional) Browshot API key. If not set msticpyconfig checked for this. Returns ------- image_data: requests.models.Response The final screenshot request response data. """ # Get Browshot API key from kwargs or config if api_key is not None: bs_api_key: Optional[str] = api_key else: bs_conf = config.settings.get( "DataProviders", {}).get("Browshot") or config.settings.get("Browshot") bs_api_key = None if bs_conf is not None: bs_api_key = bs_conf.get("Args", {}).get("AuthKey") # type: ignore if bs_api_key is None: raise MsticpyUserConfigError( "No configuration found for Browshot", "Please add a section to msticpyconfig.yaml:", "DataProviders:", " Browshot:", " Args:", " AuthKey: {your_auth_key}", title="Browshot configuration not found", browshot_uri=("Get an API key for Browshot", "https://api.browshot.com/"), ) # Request screenshot from Browshot and get request ID id_string = f"https://api.browshot.com/api/v1/screenshot/create?url={url}/&instance_id=26&size=screen&cache=0&key={bs_api_key}" # pylint: disable=line-too-long id_data = requests.get(id_string) bs_id = json.loads(id_data.content)["id"] status_string = ( f"https://api.browshot.com/api/v1/screenshot/info?id={bs_id}&key={bs_api_key}" ) image_string = f"https://api.browshot.com/api/v1/screenshot/thumbnail?id={bs_id}&zoom=50&key={bs_api_key}" # pylint: disable=line-too-long # Wait until the screenshot is ready and keep user updated with progress print("Getting screenshot") progress = IntProgress(min=0, max=40) display.display(progress) ready = False while not ready: progress.value += 1 status_data = requests.get(status_string) status = json.loads(status_data.content)["status"] if status == "finished": ready = True else: time.sleep(0.05) progress.value = 40 # Once ready get the screenshot image_data = requests.get(image_string) if image_data.status_code != 200: print( "There was a problem with the request, please check the status code for details" ) return image_data
def vgg16_train(model, train, test, init_from, save_dir, batch_size=64, epoch=300, early_stop_patience=25): if not os.path.exists(save_dir): os.makedirs(save_dir) checkpoint_path = os.path.join(save_dir, 'model.ckpt') with tf.Session() as sess: print(tf.trainable_variables()) # hyper parameters learning_rate = 5e-4 #adam min_delta = 0.0001 # recorder epoch_counter = 0 loss_history = [] val_loss_history = [] # optimizer opt = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = opt.minimize(model.loss) # saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) sess.run(tf.global_variables_initializer()) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(train.images.shape[0]/batch_size) pval.max = int(test.images.shape[0]/batch_size) # reset due to adding a new task patience_counter = 0 current_best_val_loss = 100000 # a large number # train start while(patience_counter < early_stop_patience): stime = time.time() bar_train = Bar('Training', max=int(train.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar('Validation', max=int(test.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') # training an epoch train_loss = 0 for i in range(int(train.images.shape[0]/batch_size)): st = i*batch_size ed = (i+1)*batch_size _, loss = sess.run([train_op, model.loss], feed_dict={model.x: train.images[st:ed,:], model.y: train.labels[st:ed,:], model.w: train.weights[st:ed,:] }) train_loss += loss ptrain.value +=1 ptrain.description = "Training %s/%s" % (i, ptrain.max) bar_train.next() train_loss /= ptrain.max val_loss = 0 for i in range(int(test.images.shape[0]/batch_size)): st = i*batch_size ed = (i+1)*batch_size loss = sess.run(model.loss, feed_dict={model.x: test.images[st:ed,:], model.y: test.labels[st:ed,:], model.w: np.expand_dims(np.repeat(1.0,batch_size),axis=1) }) val_loss += loss pval.value +=1 pval.description = "Training %s/%s" % (i, pval.max) bar_val.next() val_loss /= pval.max if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("reset early stopping and save model into %s at epoch %s" % (checkpoint_path,epoch_counter)) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch train.shuffle() loss_history.append(train_loss) val_loss_history.append(val_loss) ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print("Epoch %s (%s), %s sec >> train-loss: %.4f, val-loss: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, val_loss)) # epoch end epoch_counter += 1 if epoch_counter >= epoch: break res = pd.DataFrame({"epoch":range(0,len(loss_history)), "loss":loss_history, "val_loss":val_loss_history}) res.to_csv(os.path.join(save_dir,"history.csv"), index=False) print("end training")
def periodo_vulnerabilidad_con_dataframe(df, inicio, fin, min_casos=20, min_defunciones=-1): inicio = pd.to_datetime(inicio, yearfirst=True) fin = pd.to_datetime(fin, yearfirst=True) fin = min(df.FECHA_INGRESO.max(), fin) fechas = pd.date_range(inicio, fin) resultados = [] modelos = [] f = IntProgress(min=0, max=len(fechas) - 1) # instantiate the bar display(f) # display the bar covid_municipal = agregar_tasas_municipales(df) caracteristicas = caracteristicas_modelos_municipios(covid_municipal) for count, fecha in enumerate(fechas): covid_municipal_fecha = covid_municipal.query( f'FECHA_INGRESO == "{fecha.strftime("%Y-%m-%d")}"') pls = ajustar_pls_letalidad(covid_municipal_fecha, caracteristicas, min_casos=min_casos, min_defunciones=min_defunciones) df = calificar_municipios_letalidad_formato_largo( covid_municipal_fecha, pls, caracteristicas, modelo='PLS', dia_ajuste=fecha) resultados.append(df) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': pls.coef_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'PLS' modelos.append(modelo) rf = ajustar_rf_letalidad(covid_municipal_fecha, caracteristicas, min_casos=min_casos, min_defunciones=min_defunciones) df = calificar_municipios_letalidad_formato_largo( covid_municipal_fecha, rf, caracteristicas, modelo='RF', dia_ajuste=fecha) resultados.append(df) modelo = pd.DataFrame({ 'caracteristica': caracteristicas, 'coef': rf.feature_importances_ }) modelo['dia_ajuste'] = fecha modelo['modelo'] = 'RF' modelos.append(modelo) f.value = count resultados_df = pd.concat(resultados, ignore_index=True) modelos_df = pd.concat(modelos, ignore_index=True) resultados_df = gpd.GeoDataFrame(resultados_df, geometry='geometry') return modelos_df, resultados_df
def downsample(img_folder, out_folder, sample = True, split = .8, down_rate = 1, crop = None, dim="3D"): # Check if dim is properly defined if dim not in ["2D","3D"]: print("dim is not either 2D or 3D") return # Load all of the base filenames, ignoring all other files in directory base_files = [file for file in os.listdir(img_folder) if file.endswith("MR.npz")] # Check if the output directories exists. If not, create it. create_dir(out_folder) create_dir(out_folder + "/train") create_dir(out_folder + "/test") create_dir(out_folder + "/train/imgs") create_dir(out_folder + "/train/segs") create_dir(out_folder + "/test/imgs") create_dir(out_folder + "/test/segs") # Set up progress bar. f = IntProgress(min=0, max=len(base_files)) l = Label("Loading File") H = HBox([f, l]) display(H) # display the bar and label # Set up the output folders out_fol_img = out_folder + "/train/imgs/" out_fol_seg = out_folder + "/train/segs/" tt = "Train: " # The label for the progress bar # If crop is not None, get the crop range: if not crop: a1 = b1 = c1 = 0 (a2,b2,c2) = np.load(img_folder + "/" + base_files[0])['arr_0'].shape else: (a1,a2,b1,b2,c1,c2) = crop print("Cropping to ", a1,a2,b1,b2,c1,c2) # For each file, load both the file and segmentation in. Downsample both and output. ds = down_rate for n, file in enumerate(base_files): img = np.load(img_folder + "/" + file)['arr_0'][a1:a2,b1:b2,c1:c2] seg = np.load(img_folder + "/" + file[:-4] + "seg.npz")['arr_0'][a1:a2,b1:b2,c1:c2] if (n+1) > len(base_files)*split: out_fol_img = out_folder + "/test/imgs/" out_fol_seg = out_folder + "/test/segs/" tt = "Test: " for i in range(ds): for j in range(ds): for k in range(ds): N = str(i + ds*j + (ds**2)*k) ds_img = img[i::ds,j::ds,k::ds] ds_seg = seg[i::ds,j::ds,k::ds] if dim is "3D": np.savez_compressed(out_fol_img + file[:-4] + N + ".npz", ds_img) np.savez_compressed(out_fol_seg + file[:-4] + N + ".npz", ds_seg) elif dim is "2D": for r in range(a2-a1): np.savez_compressed(out_fol_img + file[:-4] + N + "_" + str(r) + ".npz", ds_img[r,:,:]) np.savez_compressed(out_fol_seg + file[:-4] + N + "_" + str(r) + ".npz", ds_seg[r,:,:]) f.value += 1 # signal to increment the progress bar l.value = tt + file # Display a sample output if requested if sample: display_train_test(out_folder,dim=dim) ## Summerize preproccesing info f = ds**3 print("Train Images:", int(f*np.floor(len(base_files)*split))) print("Test Images:", int(f*(len(base_files) - np.floor(len(base_files)*split)))) print("Dimensions:", ds_img.shape) return ds_img.shape
class PypelidWidget(object): """ """ widgets = { 'nreal': BoundedIntText(value=1000, min=0, max=100000, step=100, description='Number of realizations:', layout={'width': '250px'}, style={'description_width': '150px',}), 'button': Button(description="Run", icon='play', layout={'border':'solid 1px black', 'width': '100px'}), 'progress': IntProgress(bar_style='success'), 'timer': Label(), 'snrbox': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'zmeas': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'zerr': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'zerr_68': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'zerr_sys': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'zerr_cat': Label(layout={'border':'solid 1px green', 'width': '100px'}), 'signal_on': Checkbox(value=True, description='Signal', layout={'width':'80px'}, style={'description_width': '0px'}), 'noise_on': Checkbox(value=True, description='Noise', layout={'width':'80px'}, style={'description_width': '0px'}), 'real_on': Checkbox(value=True, description='Realization', layout={'width':'80px'}, style={'description_width': '0px'}), 'seed': IntText(description='Seed', disabled=True, layout={'width':'150px'}, style={'description_width': '50px'}), 'seed_checkbox': Checkbox(value=False, description='Freeze random seed',layout={'width':'150px'}, style={'description_width': '0px'}), } def __init__(self): self.instrument = instrument_widget.Instrument() self.foreground = foreground_widget.Foreground() self.galaxy = galaxy_widget.Galaxy() self.analysis = analysis_widget.Analysis() self.survey = survey_widget.Survey() self.config = config_widget.Config((self.galaxy, self.foreground, self.instrument, self.survey, self.analysis)) self.running = False self.render_lock = threading.Lock() self.param_lock = threading.Lock() def render(self, change=None): """ """ if not self.render_lock.acquire(False): return if not self.param_lock.acquire(False): return render_thread = threading.Thread(target=self._render, args=((self.render_lock, self.param_lock),)) render_thread.start() def _render(self, locks): """ """ self.widgets['render_button'].style.button_color = 'orange' if not self.widgets['seed_checkbox'].value: self.widgets['seed'].value = np.random.randint(0,1e6) seed = self.widgets['seed'].value rng.seed(seed) wavelength_scale, flux, var, obs_list = self.spec(noise=False) wavelength_scale_, flux_n, var_, obs_list_ = self.spec(noise=True) self.wavelength_scale = wavelength_scale / 1e4 step = wavelength_scale[1] - wavelength_scale[0] self.signal = flux / step self.real = flux_n / step self.noise = var**0.5 / step self.hideshow_line() L, gal = obs_list[0] x, y = np.transpose(gal.sample(int(1e6), L.plate_scale, self.galaxy.widgets['iso'].value)) dx, dy = np.transpose(L.PSF.sample(len(x))) x += dx y += dy r = np.sqrt(x*x + y*y) w = int(np.ceil(np.percentile(r, 80))) + 0.5 w = min(20.5, w) b = np.arange(-w, w+1, 1) h, ey, ex = np.histogram2d(y, x, bins=(b, b)) bc = (ey[1:]+ey[:-1])/2. self.figs['image'].data[0]['z'] = h self.figs['image'].data[0]['x'] = bc self.figs['image'].data[0]['y'] = bc ii = var > 0 snr = np.sqrt(np.sum(flux[ii]**2/var[ii])) self.widgets['snrbox'].value = "%3.2f"%snr self.widgets['render_button'].style.button_color = 'lightgreen' for lock in locks: lock.release() def spec(self, noise=True): emission_lines = [ ('Ha', self.galaxy.widgets['flux_ha'].value * 1e-16), ('N2a', self.galaxy.widgets['flux_n2a'].value * 1e-16), ('N2b', self.galaxy.widgets['flux_n2b'].value * 1e-16), ('S2a', self.galaxy.widgets['flux_s2a'].value * 1e-16), ('S2b', self.galaxy.widgets['flux_s2b'].value * 1e-16), ('S3a', self.galaxy.widgets['flux_s3a'].value * 1e-16), ('S3b', self.galaxy.widgets['flux_s3b'].value * 1e-16), ('O3a', self.galaxy.widgets['flux_o3a'].value * 1e-16), ('O3b', self.galaxy.widgets['flux_o3b'].value * 1e-16), ('Hb', self.galaxy.widgets['flux_hb'].value * 1e-16), ('O2', self.galaxy.widgets['flux_o2'].value * 1e-16), ] nexp_list = self.survey.widgets['nexp_red'].value, self.survey.widgets['nexp_blue'].value exp_time = self.survey.widgets['exp_time'].value ztol = self.analysis.widgets['ztol'].value config_list = self.instrument.get_config_list() obs_list = [] for i, config in enumerate(config_list): nexp = nexp_list[i] if nexp == 0: continue O = optics.Optics(config) L = linesim.LineSimulator(O, extraction_sigma=self.analysis.widgets['extraction_sigma'].value, isotropize=self.galaxy.widgets['iso'].value) det_bg = nexp * exp_time * config['darkcurrent'] + nexp * config['readnoise']**2 det_bg += nexp * exp_time * self.foreground.widgets['foreground'].value gal = galaxy.Galaxy( z=self.galaxy.widgets['redshift'].value, bulge_scale=self.galaxy.widgets['bulge_scale'].value, disk_scale=self.galaxy.widgets['disk_scale'].value, bulge_fraction=self.galaxy.widgets['bulge_fraction'].value, axis_ratio=self.galaxy.widgets['axis_ratio'].value, pa=self.galaxy.widgets['pa'].value, velocity_disp=self.galaxy.widgets['velocity_dispersion'].value, ) for line, flux in emission_lines: wavelength = (1 + gal.z) * consts.line_list[line] if wavelength < O.lambda_start or wavelength > O.lambda_end: continue signal = phot.flux_to_photon(flux, O.collecting_area, wavelength) signal *= exp_time * nexp signal *= O.transmission(np.array([wavelength]), 1)[0] if signal <= 0: continue line_variance = signal scale = flux / signal if noise is False: v = (signal * scale)**2/1e7 else: v = signal * scale**2 gal.append_line( wavelength=consts.line_list[line], flux=signal * scale, variance=v, background=det_bg * scale**2, rest_frame=1 ) # add a line at the center of the bandpass (observed frame) scale = phot.flux_to_photon(1, O.collecting_area, O.lambda_ref) scale *= exp_time * nexp scale *= O.transmission(np.array([O.lambda_ref]), 1)[0] scale = 1./scale gal.append_line( wavelength=O.lambda_ref, flux=0, variance=0, background=det_bg * scale**2, rest_frame=0 ) gal.compute_obs_wavelengths(gal.z) if gal.line_count == 0: continue obs_list.append((L, gal)) wavelength_scales = [] dispersion = [] for L, gal in obs_list: x = np.arange(L.npix) * L.dispersion + L.lambda_min wavelength_scales.append(x) dispersion.append(L.dispersion) dispersion = np.min(dispersion) wavelength_min = np.min(np.concatenate(wavelength_scales)) wavelength_max = np.max(np.concatenate(wavelength_scales)) wavelength_scale = np.arange(wavelength_min, wavelength_max, dispersion) specset = [] for i, obs in enumerate(obs_list): L, gal = obs spectra = L.sample_spectrum(gal) if noise: s = spectra[0] else: s = spectra[1] specset.append((wavelength_scales[i], np.array(s), np.array(spectra[2]))) flux_stack, var_stack = combine_spectra(wavelength_scale, specset) return wavelength_scale, flux_stack, var_stack, obs_list def update(self, zgrid, zmeas, wavelength_scale, mean_total, var_total, count): """ """ zmeas = np.array(zmeas) m = mean_total * 1./ count var = var_total * 1./ count - m**2 ii = var > 0 snr = np.sqrt(np.sum(m[ii]**2/var[ii])) self.widgets['snrbox'].value = "%3.2f"%snr ztrue = self.galaxy.widgets['redshift'].value ztol = self.analysis.widgets['ztol'].value dz = np.abs(zmeas - ztrue) sel = dz < ztol if np.sum(sel) > 0: z = np.mean(zmeas[sel]) dzobs = np.abs(zmeas - z) dz68 = np.percentile(dzobs[sel], 68) self.widgets['zerr_68'].value = "%3.2e"%dz68 if dz68 > 0: self.widgets['zerr_sys'].value = "%g"%((ztrue-z)*np.sqrt(np.sum(sel))/dz68) self.widgets['zerr_cat'].value = "%f"%(1 - np.sum(sel) * 1. / len(zmeas)) self.widgets['zmeas'].value = "%g"%z self.widgets['zerr'].value = "%3.2e"%(ztrue - z) h, e = np.histogram(zmeas, bins=zgrid) h = h * 1./ np.sum(h) x = (e[1:]+e[:-1])/2. a = np.where(h>0)[0][0]-1 b = np.where(h>0)[0][-1]+1 x = x[a:b+1] h = h[a:b+1] self.figs['pdf'].data[0]['x'] = x self.figs['pdf'].data[0]['y'] = h def run(self, stop_event): """ """ self.param_lock.acquire() self._start_time = time.time() emission_lines = [ ('Ha', self.galaxy.widgets['flux_ha'].value * 1e-16), ('N2a', self.galaxy.widgets['flux_n2a'].value * 1e-16), ('N2b', self.galaxy.widgets['flux_n2b'].value * 1e-16), ('S2a', self.galaxy.widgets['flux_s2a'].value * 1e-16), ('S2b', self.galaxy.widgets['flux_s2b'].value * 1e-16), ('S3a', self.galaxy.widgets['flux_s3a'].value * 1e-16), ('S3b', self.galaxy.widgets['flux_s3b'].value * 1e-16), ('O3a', self.galaxy.widgets['flux_o3a'].value * 1e-16), ('O3b', self.galaxy.widgets['flux_o3b'].value * 1e-16), ('Hb', self.galaxy.widgets['flux_hb'].value * 1e-16), ('O2', self.galaxy.widgets['flux_o2'].value * 1e-16), ] nexp_list = self.survey.widgets['nexp_red'].value, self.survey.widgets['nexp_blue'].value exp_time = self.survey.widgets['exp_time'].value ztol = self.analysis.widgets['ztol'].value self.figs['pdf'].update_layout( shapes=[go.layout.Shape( type="rect", xref="x", yref="paper", x0=self.galaxy.widgets['redshift'].value-ztol, y0=0, x1=self.galaxy.widgets['redshift'].value+ztol, y1=1, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0, ),]) config_list = self.instrument.get_config_list() obs_list = [] for i, config in enumerate(config_list): nexp = nexp_list[i] if nexp == 0: continue O = optics.Optics(config) L = linesim.LineSimulator(O, extraction_sigma=self.analysis.widgets['extraction_sigma'].value, isotropize=self.galaxy.widgets['iso'].value) det_bg = nexp * exp_time * config['darkcurrent'] + config['readnoise']**2 det_bg += nexp * exp_time * self.foreground.widgets['foreground'].value gal = galaxy.Galaxy( z=self.galaxy.widgets['redshift'].value, bulge_scale=self.galaxy.widgets['bulge_scale'].value, disk_scale=self.galaxy.widgets['disk_scale'].value, bulge_fraction=self.galaxy.widgets['bulge_fraction'].value, axis_ratio=self.galaxy.widgets['axis_ratio'].value, velocity_disp=self.galaxy.widgets['velocity_dispersion'].value, ) for line, flux in emission_lines: wavelength = (1 + gal.z) * consts.line_list[line] if wavelength < O.lambda_start or wavelength > O.lambda_end: continue signal = phot.flux_to_photon(flux, O.collecting_area, wavelength) signal *= exp_time * nexp signal *= O.transmission(np.array([wavelength]), 1)[0] if signal <= 0: continue line_variance = signal scale = flux / signal gal.append_line( wavelength=consts.line_list[line], flux=signal * scale, variance=signal * scale**2, background=det_bg * scale**2, rest_frame=1 ) # add a line at the center of the bandpass (observed frame) scale = phot.flux_to_photon(1, O.collecting_area, O.lambda_ref) scale *= exp_time * nexp scale *= O.transmission(np.array([O.lambda_ref]), 1)[0] scale = 1./scale gal.append_line( wavelength=O.lambda_ref, flux=0, variance=0, background=det_bg * scale**2, rest_frame=0 ) gal.compute_obs_wavelengths(gal.z) if gal.line_count == 0: continue obs_list.append((L, gal)) wavelength_scales = [] dispersion = [] for L, gal in obs_list: x = np.arange(L.npix) * L.dispersion + L.lambda_min wavelength_scales.append(x) dispersion.append(L.dispersion) dispersion = np.min(dispersion) wavelength_min = np.min(np.concatenate(wavelength_scales)) wavelength_max = np.max(np.concatenate(wavelength_scales)) wavelength_scale = np.arange(wavelength_min, wavelength_max, dispersion) zgrid = np.arange(self.analysis.widgets['zmin'].value, self.analysis.widgets['zmax'].value,self.analysis.widgets['zstep'].value) zfitter = template_fit.TemplateFit(wavelength_scale, zgrid, consts.line_list, res=self.analysis.widgets['templ_res'].value, template_file=self.analysis.template_path) nloops = self.widgets['nreal'].value self.widgets['progress'].min=0 self.widgets['progress'].max=nloops prob_z = [] zmeas = [] t0 = time.time() t1 = time.time() mean_total = 0 var_total = 0 count = 0 for loop in range(nloops): if stop_event.is_set(): break specset = [] for i, obs in enumerate(obs_list): L, gal = obs spectra = L.sample_spectrum(gal) specset.append((wavelength_scales[i], np.array(spectra[0]), np.array(spectra[2]))) flux_stack, var_stack = combine_spectra(wavelength_scale, specset) mean_total += flux_stack var_total += flux_stack**2 count += 1 ii = var_stack>0 invvar = np.zeros(len(var_stack), dtype='d') invvar[ii] = 1./var_stack[ii] amp = zfitter.template_fit(flux_stack, invvar, 2) pz = np.array(zfitter.pz()) zmeas.append(centroidz(zgrid, pz)) if time.time()-t0 > 10: self.update(zgrid, zmeas, wavelength_scale, mean_total, var_total, count) t0 = time.time() if time.time()-t1 > 1: self.widgets['progress'].value = loop self.widgets['progress'].description = "%i/%i"%(loop+1, nloops) self.widgets['timer'].value = "elapsed time: %i s"%(time.time()-self._start_time) t1 = time.time() self.widgets['progress'].description = "%i/%i"%(loop+1, nloops) self.widgets['timer'].value = "elapsed time: %i s"%(time.time()-self._start_time) self.widgets['progress'].value = 0 self.update(zgrid, zmeas, wavelength_scale, mean_total, var_total, count) self.reset_button(self.widgets['button']) self.param_lock.release() def click_start(self, button): if not self.running: self.running = True button.description = "Stop" button.icon = "stop" button.style.button_color = 'orange' self.stop_event = threading.Event() thread = threading.Thread(target=self.run, args=(self.stop_event,)) thread.start() else: self.stop_event.set() self.reset_button(button) def reset_button(self, button): """ """ self.running = False button.description = "Run" button.icon = "play" button.style.button_color = 'lightgreen' def tab_event(self, change): if change['type'] == 'change' and change['name'] == 'selected_index': if change['new'] == 5: self.config.update() def hideshow_line(self, change=None): for key,i,arr in [('signal_on',2,self.signal),('real_on',1,self.real),('noise_on',0,self.noise)]: if self.widgets[key].value: if len(self.figs['spec'].data[i]['x']) != len(self.wavelength_scale): self.figs['spec'].data[i]['x'] = self.wavelength_scale self.figs['spec'].data[i]['y'] = arr else: self.figs['spec'].data[i]['y'] = [] def seed_checkbox(self, change=None): if self.widgets['seed_checkbox'].value: self.widgets['seed'].disabled = False else: self.widgets['seed'].disabled = True def show(self): """ """ # display() about = VBox([HTML("<a href=\"https://github.com/bengranett/pypelidcalc\" target=\"_blank\">Pypelid-calc</a> version: %s"%pypelidcalc.__version__)]) tab = Tab([self.galaxy.widget, self.foreground.widget, self.instrument.widget, self.survey.widget, self.analysis.widget, self.config.widget, about]) tab.set_title(0, "Source") tab.set_title(1, "Foreground") tab.set_title(2, "Instrument") tab.set_title(3, "Survey") tab.set_title(4, "Analysis") tab.set_title(5, "Config") tab.set_title(6, "About") tab.layout={'height': '300px'} tab.observe(self.tab_event) display(tab) for group in [self.galaxy, self.foreground, self.instrument, self.survey, self.analysis]: for key, w in group.widgets.items(): w.observe(self.render, names='value') self.figs = {} self.figs['spec'] = go.FigureWidget() self.figs['spec'].update_layout(xaxis_title=u'Wavelength (\u03BCm)', height=200, yaxis_title='Flux density', margin=dict(l=0, r=0, t=0, b=0, pad=0)) self.figs['spec'].add_scatter(x=[], y=[], name='Noise', line_color='grey') self.figs['spec'].add_scatter(x=[], y=[], name='Realization', line_color='dodgerblue') self.figs['spec'].add_scatter(x=[], y=[], name='Signal', line_color='black') self.figs['image'] = go.FigureWidget() self.figs['image'].update_layout(height=200, width=200, margin=dict(l=0, r=0, t=0, b=0, pad=0)) self.figs['image'].add_trace(go.Heatmap(z=[[]], showscale=False)) self.widgets['render_button'] = Button(description="Update realization", layout={'border':'solid 1px black', 'width': '200px'}) self.widgets['render_button'].on_click(self.render) self.widgets['seed_checkbox'].observe(self.seed_checkbox, names='value') self.widgets['signal_on'].observe(self.hideshow_line, names='value') self.widgets['real_on'].observe(self.hideshow_line, names='value') self.widgets['noise_on'].observe(self.hideshow_line, names='value') checkboxes = HBox([self.widgets['signal_on'], self.widgets['noise_on'], self.widgets['real_on']]) display(HTML('<h3>Spectrum</h3>')) display(HBox([self.widgets['seed_checkbox'], self.widgets['seed']])) display(HBox([HTML('SNR:'), self.widgets['snrbox'], self.widgets['render_button'], checkboxes])) display(HBox([self.figs['spec'], self.figs['image']])) self.reset_button(self.widgets['button']) self.widgets['button'].on_click(self.click_start) elements = [HTML("<h3>Redshift measurement</h3>")] elements += [HBox([self.widgets['nreal'], self.widgets['button'], self.widgets['progress'], self.widgets['timer']])] horiz = [HTML('<b>Statistics:</b>')] horiz += [HTML('Mean z:'), self.widgets['zmeas']] horiz += [HTML('Error:'), self.widgets['zerr']] horiz += [HTML('68% limit:'), self.widgets['zerr_68']] horiz += [HTML('Fractional systematic:'), self.widgets['zerr_sys']] horiz += [HTML('Outlier rate:'), self.widgets['zerr_cat']] elements += [HBox(horiz)] display(VBox(elements)) self.figs['pdf'] = go.FigureWidget() self.figs['pdf'].update_layout(xaxis_title='Redshift', height=200, yaxis_title='Distribution',margin=dict(l=0, r=0, t=0, b=0, pad=0)) self.figs['pdf'].add_scatter(x=[], y=[], name='Measured redshift') display(self.figs['pdf']) self.render_lock.acquire() self.param_lock.acquire() self._render((self.render_lock, self.param_lock))
def train(FLAG): print("Reading dataset...") if FLAG.dataset == 'CIFAR-10': train_data = CIFAR10(train=True) test_data = CIFAR10(train=False) vgg16 = VGG16(classes=10) elif FLAG.dataset == 'CIFAR-100': train_data = CIFAR100(train=True) test_data = CIFAR100(train=False) vgg16 = VGG16(classes=100) else: raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.") print("Build VGG16 models for %s..." % FLAG.dataset) Xtrain, Ytrain = train_data.train_data, train_data.train_labels Xtest, Ytest = test_data.test_data, test_data.test_labels vgg16.build(vgg16_npy_path=FLAG.init_from, prof_type=FLAG.prof_type, conv_pre_training=True, fc_pre_training=False) vgg16.sparsity_train(l1_gamma=FLAG.lambda_s, l1_gamma_diff=FLAG.lambda_m, decay=FLAG.decay, keep_prob=FLAG.keep_prob) # define tasks tasks = ['var_dp'] print(tasks) # initial task cur_task = tasks[0] obj = vgg16.loss_dict[tasks[0]] saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks)) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') tvars_trainable = tf.trainable_variables() #for rm in vgg16.gamma_var: # tvars_trainable.remove(rm) # print('%s is not trainable.'% rm) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = 64 epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = 1e-4 # adam # 4e-3 #sgd half_cycle = 20000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = 1e-4 # adam # 4e-3 #sgd half_cycle = 10000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = opt.minimize(obj, global_step=global_step, var_list=tvars_trainable) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) spareness = vgg16.spareness(thresh=0.05) print("initial spareness: %s" % sess.run(spareness)) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_accu = 0 # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): def load_batches(): for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size batch = ia.Batch(images=Xtrain[st:ed, :, :, :], data=Ytrain[st:ed, :]) yield batch batch_loader = ia.BatchLoader(load_batches) bg_augmenter = ia.BackgroundAugmenter(batch_loader=batch_loader, augseq=transform, nb_workers=4) # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss, train_accu = 0.0, 0.0 while True: batch = bg_augmenter.get_batch() if batch is None: print("Finished epoch.") break x_images_aug = batch.images_aug y_images = batch.data loss, accu, _ = sess.run( [obj, vgg16.accu_dict[cur_task], train_op], feed_dict={ vgg16.x: x_images_aug, vgg16.y: y_images, vgg16.is_train: True }) bar_train.next() train_loss += loss train_accu += accu ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) train_loss = train_loss / ptrain.value train_accu = train_accu / ptrain.value batch_loader.terminate() bg_augmenter.terminate() # # training an epoch # for i in range(int(Xtrain.shape[0]/batch_size)): # st = i*batch_size # ed = (i+1)*batch_size # augX = transform.augment_images(Xtrain[st:ed,:,:,:]) # sess.run([train_op], feed_dict={vgg16.x: augX, # vgg16.y: Ytrain[st:ed,:], # vgg16.is_train: False}) # ptrain.value +=1 # ptrain.description = "Training %s/%s" % (i, ptrain.max) # bar_train.next() # validation val_loss = 0 val_accu = 0 for i in range(int(Xtest.shape[0] / 200)): st = i * 200 ed = (i + 1) * 200 loss, accu = sess.run( [obj, vgg16.accu_dict[cur_task]], feed_dict={ vgg16.x: Xtest[st:ed, :], vgg16.y: Ytest[st:ed, :], vgg16.is_train: False }) val_loss += loss val_accu += accu pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_accu = val_accu / pval.value print("\nspareness: %s" % sess.run(spareness)) # early stopping check if (val_accu - current_best_val_accu) > min_delta: current_best_val_accu = val_accu patience_counter = 0 para_dict = sess.run(vgg16.para_dict) np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict) print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy")) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :] # epoch end # writer.add_summary(epoch_summary, epoch_counter) epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu at %s: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_accu, val_loss, cur_task, val_accu)) saver.save(sess, checkpoint_path, global_step=epoch_counter) sp, rcut = gammaSparsifyVGG16(para_dict, thresh=0.02) np.save(os.path.join(FLAG.save_dir, "sparse_dict.npy"), sp) print("sparsify %s in %s" % (np.round( 1 - rcut, 3), os.path.join(FLAG.save_dir, "sparse_dict.npy"))) #writer.close() arr_spareness.append(1 - rcut) np.save(os.path.join(FLAG.save_dir, "sprocess.npy"), arr_spareness) FLAG.optimizer = opt_type FLAG.lr = start_learning_rate FLAG.batch_size = batch_size FLAG.epoch_end = epoch_counter FLAG.val_accu = current_best_val_accu header = '' row = '' for key in sorted(vars(FLAG)): if header is '': header = key row = str(getattr(FLAG, key)) else: header += "," + key row += "," + str(getattr(FLAG, key)) row += "\n" header += "\n" if os.path.exists("/home/cmchang/new_CP_CNN/model.csv"): with open("/home/cmchang/new_CP_CNN/model.csv", "a") as myfile: myfile.write(row) else: with open("/home/cmchang/new_CP_CNN/model.csv", "w") as myfile: myfile.write(header) myfile.write(row)
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10): ''' Training a network Arguments --------- net: CharRNN network data: text data to train the network epochs: Number of epochs to train batch_size: Number of mini-sequences per mini-batch, aka batch size seq_length: Number of character steps per mini-batch lr: learning rate clip: gradient clipping val_frac: Fraction of data to hold out for validation print_every: Number of steps for printing training and validation loss ''' net.train() opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() # create training and validation data val_idx = int(len(data) * (1 - val_frac)) data, val_data = data[:val_idx], data[val_idx:] if (net.train_on_gpu): net.cuda() counter = 0 n_chars = len(net.chars) progress = IntProgress( min=0, max=epochs * len(list(get_batches(data, batch_size, seq_length))), description="Training...") display(progress) for e in range(epochs): # initialize hidden state h = net.init_hidden(batch_size) for x, y in get_batches(data, batch_size, seq_length): counter += 1 progress.value += 1 # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) inputs, targets = torch.from_numpy(x), torch.from_numpy(y) if (net.train_on_gpu): inputs, targets = inputs.cuda(), targets.cuda() # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history h = tuple([each.data for each in h]) # zero accumulated gradients net.zero_grad() # get the output from the model output, h = net(inputs, h) # calculate the loss and perform backprop loss = criterion(output, targets.view(batch_size * seq_length).long()) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step() # loss stats if counter % print_every == 0: # Get validation loss val_h = net.init_hidden(batch_size) val_losses = [] net.eval() for x, y in get_batches(val_data, batch_size, seq_length): # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) x, y = torch.from_numpy(x), torch.from_numpy(y) # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history val_h = tuple([each.data for each in val_h]) inputs, targets = x, y if (net.train_on_gpu): inputs, targets = inputs.cuda(), targets.cuda() output, val_h = net(inputs, val_h) val_loss = criterion( output, targets.view(batch_size * seq_length).long()) val_losses.append(val_loss.item()) net.train( ) # reset to train mode after iterationg through validation data print("Epoch: {}/{}...".format(e + 1, epochs), "Step: {}...".format(counter), "Loss: {:.4f}...".format(loss.item()), "Val Loss: {:.4f}".format(np.mean(val_losses))) progress.close() print("Finished training.")
def create_snapshot(self, size, path, base_name, samples_per_pixel, export_intermediate_frames=False): """ Create a snapshot of the current frame :size: Frame buffer size :path: Path where the snapshot file is exported :base_name: Base name of the snapshot file :samples_per_pixel: Samples per pixel :export_intermediate_frames: If True, intermediate samples are stored to disk. Otherwise, only the final accumulation is exported """ application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] old_smoothed_key_frames = copy.deepcopy(self._smoothed_key_frames) self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) control_points = [self.get_camera()] current_animation_frame = int( self._client.get_animation_parameters()['current']) animation_frames = [current_animation_frame] self.build_camera_path(control_points=control_points, nb_steps_between_control_points=1, smoothing_size=1) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) self.export_frames( path=path, base_name=base_name, animation_frames=animation_frames, size=size, samples_per_pixel=samples_per_pixel, export_intermediate_frames=export_intermediate_frames) done = False while not done: time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] progress_widget.description = 'Done' progress_widget.value = 100 self._client.set_application_parameters( image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) self._smoothed_key_frames = copy.deepcopy(old_smoothed_key_frames)