コード例 #1
0
ファイル: movie_maker.py プロジェクト: BlueBrain/BioExplorer
    def create_movie(self,
                     path,
                     size,
                     animation_frames=list(),
                     quality=100,
                     samples_per_pixel=1,
                     start_frame=0,
                     end_frame=0,
                     interpupillary_distance=0.0,
                     export_intermediate_frames=True):
        """
        Create and export a set of PNG frames for later movie generation

        :path: Full path of the snapshot folder
        :size: Frame buffer size
        :animation_frames: Optional list of animation frames
        :quality: PNG quality
        :samples_per_pixel: Samples per pixel
        :start_frame: Start frame to export in the provided sequence
        :end_frame: Last frame to export in the provided sequence
        :interpupillary_distance: Interpupillary distance for stereo rendering. If set to 0, stereo
        is disabled
        :export_intermediate_frames: If True, intermediate samples are stored to disk. Otherwise,
        only the final accumulation is exported
        """
        application_params = self._client.get_application_parameters()
        renderer_params = self._client.get_renderer()

        old_image_stream_fps = application_params['image_stream_fps']
        old_viewport_size = application_params['viewport']
        old_samples_per_pixel = renderer_params['samples_per_pixel']
        old_max_accum_frames = renderer_params['max_accum_frames']
        self._client.set_renderer(samples_per_pixel=1,
                                  max_accum_frames=samples_per_pixel)
        self._client.set_application_parameters(viewport=size)
        self._client.set_application_parameters(image_stream_fps=0)

        progress_widget = IntProgress(description='In progress...',
                                      min=0,
                                      max=100,
                                      value=0)
        display(progress_widget)

        self.export_frames(
            path=path,
            base_name='',
            animation_frames=animation_frames,
            start_frame=start_frame,
            end_frame=end_frame,
            size=size,
            samples_per_pixel=samples_per_pixel,
            quality=quality,
            interpupillary_distance=interpupillary_distance,
            export_intermediate_frames=export_intermediate_frames)

        done = False
        while not done:
            time.sleep(1)
            progress = self.get_export_frames_progress()['progress']
            progress_widget.value = progress * 100
            done = self.get_export_frames_progress()['done']

        self._client.set_application_parameters(
            image_stream_fps=old_image_stream_fps, viewport=old_viewport_size)
        self._client.set_renderer(samples_per_pixel=old_samples_per_pixel,
                                  max_accum_frames=old_max_accum_frames)

        progress_widget.description = 'Done'
        progress_widget.value = 100
コード例 #2
0
def calcular_vulnerabilidad_urbana(inicio,
                                   fin,
                                   min_casos=20,
                                   min_defunciones=0):
    ''' Ajustar un modelo para los municipios urbanos y otro para los no urbanos '''
    inicio = pd.to_datetime(inicio, yearfirst=True)
    fin = pd.to_datetime(fin, yearfirst=True)
    fechas = pd.date_range(inicio, fin)
    resultados = []
    modelos = []

    asegura_archivos_covid_disponibles(fechas)
    f = IntProgress(min=0, max=len(fechas) - 1)  # instantiate the bar
    display(f)  # display the bar

    urbanos = municipios_urbanos()

    for count, fecha in enumerate(fechas):
        covid_municipal = tabla_covid_indicadores_municipales(
            fecha.strftime("%y%m%d"))
        covid_municipal = agregar_tasas_municipales(covid_municipal)

        caracteristicas = caracteristicas_modelos_municipios(covid_municipal)

        serie_urbanos = covid_municipal[
            covid_municipal.CLAVE_MUNICIPIO_RES.isin(urbanos.CLAVE_MUNICIPIO)]

        pls_urbanos = ajustar_pls_letalidad(serie_urbanos,
                                            caracteristicas,
                                            min_defunciones=min_defunciones,
                                            min_casos=min_casos)
        df_urbanos = calificar_municipios_letalidad_formato_largo(
            serie_urbanos,
            pls_urbanos,
            caracteristicas,
            modelo='PLS_URBANO',
            dia_ajuste=fecha)
        resultados.append(df_urbanos)
        modelo = pd.DataFrame({
            'caracteristica': caracteristicas,
            'coef': pls_urbanos.coef_
        })
        modelo['dia_ajuste'] = fecha
        modelo['modelo'] = 'PLS_URBANO'

        modelos.append(modelo)

        serie_no_urbanos = covid_municipal[
            ~covid_municipal.CLAVE_MUNICIPIO_RES.isin(urbanos.CLAVE_MUNICIPIO)]
        pls_no_urbanos = ajustar_pls_letalidad(serie_no_urbanos,
                                               caracteristicas,
                                               min_defunciones=min_defunciones,
                                               min_casos=min_casos)
        df_no_urbanos = calificar_municipios_letalidad_formato_largo(
            serie_no_urbanos,
            pls_no_urbanos,
            caracteristicas,
            modelo='PLS_NO_URBANO',
            dia_ajuste=fecha)
        resultados.append(df_no_urbanos)
        modelo = pd.DataFrame({
            'caracteristica': caracteristicas,
            'coef': pls_no_urbanos.coef_
        })
        modelo['dia_ajuste'] = fecha
        modelo['modelo'] = 'PLS_NO_URBANO'

        modelos.append(modelo)

        f.value = count

    resultados_df = pd.concat(resultados, ignore_index=True)
    modelos_df = pd.concat(modelos, ignore_index=True)
    return modelos_df, resultados_df
コード例 #3
0
from ipywidgets import IntProgress
from IPython.display import display
import time

max_count = 100

f = IntProgress(min=0, max=max_count)  # instantiate the bar
display(f)  # display the bar

count = 0
while count <= max_count:
    f.value += 1  # signal to increment the progress bar
    time.sleep(.1)
    count += 1
コード例 #4
0
 def __iter__(self):
     self.index = 0
     self.bar = IntProgress(max=self.len)
     display(self.bar)
     return self
コード例 #5
0
    def make_old_impl(self, in_memory=False):
        # TODO : make base class so we can reuse this with sandbox/base.py
        progress = IntProgress(description='Rendering...',
                               max=len(self._time_range) - 1)
        self._event = threading.Event()

        def _make(event):
            image_files = []
            iw = None
            if not self.skip_render:
                for i in self._time_range:
                    progress.value = i
                    if not event.is_set():
                        self.view.frame = i
                        self.sleep()
                        if self.perframe_hook:
                            self.perframe_hook(self.view)
                        self.sleep()
                        if not self.in_memory:
                            self.view.download_image(
                                self.prefix + '.' + str(i) + '.png',
                                **self.render_params)
                        else:
                            iw = self.view.render_image(**self.render_params)
                        self.sleep()
                        if self.in_memory:
                            rgb = self._base64_to_ndarray(
                                self.view._image_data)
                            self._image_array.append(rgb)
                            if iw:
                                iw.close()  # free memory
                if not self.in_memory:
                    template = "{}/{}.{}.png"
                    image_files = [
                        image_dir for image_dir in (template.format(
                            self.download_folder, self.prefix, str(i))
                                                    for i in self._time_range)
                        if os.path.exists(image_dir)
                    ]
                else:
                    image_files = self._image_array
            if not self._event.is_set():
                progress.description = "Writing ..."
                clip = mpy.ImageSequenceClip(image_files, fps=self.fps)
                with Output():
                    if self.output.endswith('.gif'):
                        clip.write_gif(self.output,
                                       fps=self.fps,
                                       verbose=False,
                                       **self.moviepy_params)
                    else:
                        clip.write_videofile(self.output,
                                             fps=self.fps,
                                             **self.moviepy_params)
                self._image_array = []
                progress.description = 'Done'
                time.sleep(1)
                progress.close()

        self.thread = threading.Thread(target=_make, args=(self._event, ))
        self.thread.daemon = True
        self.thread.start()
        return progress
コード例 #6
0
ファイル: statistical.py プロジェクト: cmacdonald/rankeval
def bias_variance(datasets=[], algos=[], metrics=[], L=10, k=2):
    """
    This method computes the bias vs. variance decomposition of the error.
    The approach used here is based on the works of [Webb05]_ and [Dom05]_.

    Each instance of the dataset is scored `L` times.
    A single scoring is achieved by splitting the dataset at random into
    `k` folds. Each fold is scored by the model `M` trained on the remainder folds.
    [Webb05]_ recommends the use of 2 folds.

    If metric is MSE then the standard decomposition is used.
    The Bias for and instance `x` is defined as mean squared error of the `L` trained models
    w.r.t. the true label `y`, denoted with :math:`{\\sf E}_{L} [M(x) - y]^2`. 
    The Variance for an instance `x` is measured across the `L` trained models: 
    :math:`{\\sf E}_{L} [M(x) - {\\sf E}_{L} M(x)]^2`. 
    Both are averaged over all instances in the dataset.

    If metric is any of the IR quality measures, we resort to the bias variance
    decomposition of the mean squared error of the given metric w.r.t. its ideal value,
    e.g., for the case of NDCG, :math:`{\\sf E}_{L} [1 - NDCG]^2`. 
    Recall that, a formal Bias/Variance decomposition was not proposed yet.


    Parameters
    ----------
    dataset : rankeval.dataset.Dataset
        The dataset instance.
    algo : function
        This should be a wrapper of learning algorithm.
        The function should accept four parameters: `train_X`, `train_Y`, `train_q`, `test_X`.
            - `train_X`: numpy.ndarray storing a 2-D matrix of size num_docs x num_features
            - `train_Y`: numpy.ndarray storing a vector of document's relevance labels
            - `train_q`: numpy.ndarray storing a vector of query lengths
            - `test_X`: numpy.ndarray as for `train_X`
        
        A model is trained on `train_X`, `train_Y`, `train_q`, and used to score `test_X`.
        An numpy.ndarray with such score must be returned.
    metric : "mse" or rankeval.metrics.metric.Metric
        The metric used to compute the error.
    L : int
        Number of iterations
    k : int
        Number of folds.

    Returns
    -------
    bias_variance : xarray.DataArray
        A DataArray containing the bias/variance decomposition of the error
        for any given dataset, algorithm and metric.

    References
    ----------
    .. [Webb05] Webb, Geoffrey I., and Paul Conilione. "Estimating bias and variance from data." 
            Pre-publication manuscript (`pdf <http://www.csse.monash.edu/webb/-Files/WebbConilione06.pdf>`_) (2005).
    .. [Dom05] Domingos P. A unified bias-variance decomposition. 
            In Proceedings of 17th International Conference on Machine Learning 2000 (pp. 231-238).
    """
    assert(k>=2)
    assert(L>=2)
    assert(len(datasets)>0)
    assert(len(metrics)>0)
    for metric in metrics:
        assert isinstance(metric, Metric)

    progress_bar = IntProgress(min=0, max=len(datasets)*len(metrics)*len(algos),
                               description="Iterating datasets and metrics")
    display(progress_bar)    

    data = np.zeros(shape=(len(datasets), len(metrics), len(algos), 3), dtype=np.float32)
    for idx_dataset, dataset in enumerate(datasets):
        for idx_algo, algo in enumerate(algos):
            for idx_metric, metric in enumerate(metrics):
                progress_bar.value += 1
                
                scores = _multi_kfold_scoring(dataset, algo=algo, L=L, k=k)
                
                avg_error = 0.
                avg_bias = 0.
                avg_var = 0.
                if not isinstance(metric, MSE):
                    # mse over metric, assume error is 1-metric
                    # not exactly domingos paper
                    q_scores = np.empty((dataset.n_queries, L), dtype=np.float32) 
                    for i in range(L):
                        q_scores[:,i] = metric.eval(dataset=dataset, y_pred=scores[:,i])[1]            
                    avg_error = np.mean( (q_scores-1.)**2. )
                    avg_pred  = np.mean(q_scores, axis=1)
                    avg_bias  = np.mean((avg_pred - 1.)**2.)
                    avg_var   = np.mean( (q_scores-avg_pred.reshape((-1,1)))**2. )
                else:
                    # mse
                    avg_error = np.mean( (scores-dataset.y.reshape((-1,1)))**2. )
                    avg_pred  = np.mean(scores, axis=1)
                    avg_bias  = np.mean((avg_pred - dataset.y)**2.)
                    avg_var   = np.mean( (scores-avg_pred.reshape((-1,1)))**2. )

                data[idx_dataset][idx_metric][idx_algo][0] = avg_error
                data[idx_dataset][idx_metric][idx_algo][1] = avg_bias
                data[idx_dataset][idx_metric][idx_algo][2] = avg_var
                

    progress_bar.bar_style = "success"
    progress_bar.close()

    performance = xr.DataArray(data,
                               name='Bias/Variance Decomposition',
                               coords=[datasets, metrics, [a.__name__ for a in algos], 
                               ['Error', 'Bias', 'Variance']],
                               dims=['dataset', 'metric', 'algo', 'error'])

    return performance
コード例 #7
0
def sample_mcmc(model,
                h,
                x0=None,
                burnin=1000,
                n_samples=10000,
                sample_rate=10,
                g=None,
                noiseless_sample=False,
                progress_bar=False):
    """
        Sample points (theta) from either a Gaussian process model or simulator using the
          Metropolis-Hastings algorithm.

        Default proposal density, g, is a Gaussian with diagonal covariance; covariances set to 
          a small value based on the range of possible parameter settings for each dimension.

        Args:
            (models.GP) OR (simulators.Simulator) model:
                GP model of the discrepancy, OR Simulator instance with callable f(), noiseless_f()

               (float)                h:   bandwidth for KDE.
          (np.ndarray)               x0:   initial starting point.
                 (int)           burnin:   number of burn-in samples.
                 (int)      sample_rate:   how many iterations sampling. 
            (callable)                g:   proposal density.
                (bool) noiseless_sample:   whether to call noiseless_f or f (when `model' is a Simulator).
                (bool)     progress_bar:   whether to show progress bar in Jupyter notebook.

        Returns: 
          (np.ndarray)          samples:   with shape (n_samples, input_dim).
    """
    input_dim = model.input_dim
    bounds = model.bounds

    # function proportional to predictive distribution
    if isinstance(model, GP):
        f = lambda x: norm.cdf(
            (h - model.mu(x)) / np.sqrt(model.v(x) + model.obs_noise))
    elif isinstance(model, Simulator):
        # std. dev. of obs noise is stored in simulator, so no np.sqrt
        if noiseless_sample:
            f = lambda x: norm.cdf(
                (h - model.noiseless_f(x) / model.obs_noise))
        else:
            f = lambda x: norm.cdf((h - model.f(x) / model.obs_noise))
    else:
        raise ValueError('pass simulator or GP model as first argument.')

    if x0 is None:
        x0 = np.array([np.random.uniform(b1, b2)
                       for (b1, b2) in bounds]).reshape(1, input_dim)

    if g is None:
        cov = []
        for (b1, b2) in bounds:
            cov.append(0.025 * (b2 - b1))
        cov = np.diag(np.array(cov)).reshape(input_dim, input_dim)

        g = lambda xt: np.random.multivariate_normal(xt.squeeze(), cov
                                                     ).reshape(1, input_dim)

    progress_bar = progress_bar and 'jupyter' in os.environ['_']

    # ================================================
    # Burn-in period =================================
    if progress_bar:
        prog = IntProgress(value=0, max=burnin, description='Burn-in')
        display(prog)

    x = np.array(x0)
    for i in range(burnin):
        cand = g(x)  # candidate point
        if not model.within_bounds(cand):
            continue

        a = f(cand) / f(x)  # acceptance ratio
        if np.random.rand() < a:  # accept/reject
            x = np.copy(cand)

        if progress_bar:
            prog.value += 1

    # ================================================
    # Begin sampling =================================
    if progress_bar:
        prog.close()
        prog = IntProgress(value=0, max=n_samples, description='Sampling')
        display(prog)

    samples = []
    i = 0
    while len(samples) < n_samples:
        cand = g(x)  # candidate point
        if not model.within_bounds(cand):
            continue

        a = f(cand) / f(x)  # acceptance ratio
        if a < 0:
            continue

        if np.random.rand() < a:  # accept/reject
            x = np.copy(cand)

        if (i % sample_rate) == 0:
            samples.append(np.copy(x))
            if progress_bar:
                prog.value += 1

        i += 1

    if progress_bar:
        prog.close()

    return np.array(samples).reshape(n_samples, input_dim)
コード例 #8
0
ファイル: _notebook.py プロジェクト: jackz314/mne-python
 def _status_bar_add_progress_bar(self, stretch=0):
     widget = IntProgress()
     self._layout_add_widget(self._status_bar_layout, widget)
     return _IpyWidget(widget)
コード例 #9
0
ファイル: main.py プロジェクト: joshuawe/CrowdModelling
    def run_update_steps(self,
                         steps,
                         save_to_path,
                         gif=True,
                         keep_frames=False):
        """
        Runs the simulation for multiple timesteps and saves the result as individual frames and a GIF.

        For steps many times, the self.update_step() function is called. If gif==True, each timestep is saved as a frame (*.png), including a caption of time step, to generate the
            .*gif. If keep_frames==False, the frames (*.png) will be deleted after generating the *.gif. For gif==False, no frames or GIF is stored. In any case, simulation will run for steps many times and the result is stored in the grid.
            
        Args:
            steps (int): Number of timesteps of simulation.
            save_to_path (str): The path, where frames and *.gif will be stored. Enumeration and file endings are appended automatically. (e.g. save_to_path="simulation1/scenario4)
            gif (bool, optional): Wether to produce a *.gif file of simulation. Defaults to True.
            keep_frames (bool, optional): Wether the frames to generate the *.gif should be kept. Defaults to False.
        """

        # increment steps by one, as step 0 only display current states
        steps += 1

        # list with all file names needed for animation
        filenames = []

        # display a progressbar
        bar = IntProgress(min=0, max=steps)
        IPython.display.display(bar)  # from IPython

        # loop "steps" many times
        for i in range(steps):
            # update the progress bar
            bar.value += 1

            # calculate an update step
            self.update_step()

            # only do this, if gif is required
            if gif:
                # Caption for GIF
                caption = f"Time step {i}."

                # File name, with time step, add to list
                path = save_to_path + f"_{i}.png"
                filenames.append(path)

                # save the figure
                self.show(caption=caption, save_to_path=path)

        # only display result or create gif?
        if gif:
            # append the last image additional times, to "freeze" GIF at the end, before it restarts
            additional_frames = 4
            for i in range(additional_frames):
                filenames.append(path)

            # create file name
            path = save_to_path + ".gif"

            # use imageio to create the gif
            with imageio.get_writer(path, mode='I', duration=0.5) as writer:
                for filename in filenames[:-additional_frames]:
                    image = imageio.imread(filename)
                    writer.append_data(image)

            # display the gif
            # Problem with Jupyter Notebook: Only displays first image of frame!
            # Workaround is using iPython to display directly in jupyter notebook.
            #plt.imshow(mpl.image.imread(path))

            # check if individual frames are supposed to be kept
            if not keep_frames:
                # Remove all the frames, that were necessary for the GIF
                for filename in filenames:
                    try:
                        # delete the file/frame on disk
                        os.remove(filename)
                    except FileNotFoundError:
                        # The last frame was added a few times extra in the end, to ensure the GIF wouldn't loop
                        # without showing the result. Therefore, the code will try to delete the frame, eventhough
                        # it has been deleted already. Hence, an exception.
                        # print(f"Did not find file: {filename}")
                        pass
                    except:
                        print("An exception occured!")

        # if no gif, only display the result
        else:
            self.show()

        return
コード例 #10
0
def log_progress(sequence: list, every=None, size=None, name='Items', userProgress=None):
    '''Creates a progress bar in jupyter notebooks.
    
    Automatically detects the size of a list and estimates the best step
    size for progress bar updates. This function also automatically estimates
    the total time to completion of the iterations, updating the estimate using 
    the time that every step takes.
    
    If the sequence argument is an iterator, the total number of elements cannot 
    determined. In this case, the user must define the `every` parameter to indicate
    the update frequency of the progress bar.
    
    If the progress bar is used in a nested loop, passing a list to the `userProgress` 
    argument will force the re-utilization of `ipywidgets` objects, preventing the 
    creation of a new progress bar at every iteration of the inner loop.
    
    This progress bar was based on https://github.com/alexanderkuk/log-progress.
    
    Args:
        sequence : An iterable object.
        every (int): The update frequency.
        size (int): The number of elements in the sequence.
        name (str): The name of the progress bar.
        userProgress (list): List for creation of nested progress bars.
    
    '''
    from ipywidgets import IntProgress, HTML, HBox, Label
    from IPython.display import display
    from numpy import mean as npmean
    from collections import deque
    from math import floor
    from datetime import datetime
    from string import Template
    
    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = floor(float(size)*0.005)     # every 0.5%, minimum is 1
    else:
        assert every is not None, 'sequence is iterator, set every'
    
    # For elapsed time
    initTime = datetime.now()
    totTime = "?"
    labTempl = Template(" (~ $min total time (min) ; $ell minutes elapsed)")
    
    # If provided, we use the objects already created.
    # If not provided, we create from scratch.
    if userProgress is None or userProgress == []:
        
        progress = IntProgress(min=0, max=1, value=1)

        label = HTML()
        labelTime = Label("")

        box = HBox(children=[label, progress, labelTime])
        
        if userProgress == []:
            userProgress.append(box)
        display(box)
    else:
        box = userProgress[0]
    
    if is_iterator:
        #progress = IntProgress(min=0, max=1, value=1)
        box.children[1].min = 0
        box.children[1].max = 1
        box.children[1].value = 1
        box.children[1].bar_style = 'info'
    else:
        #progress = IntProgress(min=0, max=size, value=0)
        box.children[1].min = 0
        box.children[1].max = size
        box.children[1].value = 0

        # For remaining time estimation
        deltas = deque()
        lastTime = None
        meandelta = 0
    
    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    box.children[0].value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    box.children[1].value = index
                    box.children[0].value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
                
                    # Estimates remaining time with average delta per iteration
                    # Uses (at most) the last 30 iterations
                    if len(deltas) == 101:
                        deltas.popleft()
                    
                    if lastTime:
                        deltas.append( (datetime.now() - lastTime).total_seconds() )
                        meandelta = npmean(deltas)/60.0    # From seconds to minute
                        totTime = round(meandelta*size/float(every), 3)  # Mean iteration for all iterations
                    else:
                        totTime = "?"       # First iteration has no time
                    
                    lastTime = datetime.now()
                
                # All ellapsed time in minutes
                elapsed = round( (datetime.now() - initTime).total_seconds()/60.0, 3)

                box.children[2].value = labTempl.safe_substitute({"min":totTime,
                                                       "ell":elapsed})
                
            yield record
    except:
        box.children[1].bar_style = 'danger'
        raise
    else:
        box.children[1].bar_style = 'success'
        box.children[1].value = index
        box.children[0].value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )
コード例 #11
0
    def explain_instance(self,
                         instance,
                         num_reps=50,
                         num_features=4,
                         neighborhood_samples=10000,
                         use_cov_matrix=False,
                         verbose=False,
                         figure_dir=None):
        npEX = np.array(self.EX)
        cls_proba = self.bb_classifier.predict_proba

        x0 = copy.deepcopy(instance)  # instance to be explained
        mockobj = mock.Mock()

        # Neighborhood random samples
        cov_matrix = np.cov(
            ((X - npEX) / self.StdX).T) if use_cov_matrix else 1.0
        NormV = scipy.stats.multivariate_normal.rvs(mean=np.zeros(self.F),
                                                    cov=cov_matrix,
                                                    size=neighborhood_samples,
                                                    random_state=10)

        # Get the output of the black-box classifier on x0
        output = cls_proba([x0])[0]
        label_x0 = 1 if output[1] >= output[0] else 0
        prob_x0 = output[label_x0]
        prob_x0_F, prob_x0_T = output[0], output[1]
        if verbose:
            print('prob_x0', prob_x0, '   label_x0',
                  self.class_names[label_x0])

        # Prepare instance for LIME
        lime_x0 = np.divide((x0 - npEX),
                            self.StdX,
                            where=np.logical_not(np.isclose(self.StdX, 0)))
        shap_x0 = (x0 - npEX)

        rows = None
        progbar = IntProgress(min=0, max=num_reps)
        label = Label(value="")
        display(HBox([Label("K=%d " % (num_features)), progbar, label]))

        # Explain the same instance x0 multiple times
        for rnum in range(num_reps):
            label.value = "%d/%d" % (rnum + 1, num_reps)
            R = mock.Mock()  # store all the computed metrics
            R.rnum, R.prob_x0 = rnum, prob_x0

            # Explain the instance x0 with LIME
            lime_expl = self.LIMEEXPL.explain_instance(
                np.array(x0),
                cls_proba,
                num_features=num_features,
                top_labels=1,
                num_samples=self.explanation_samples)

            # Explain x0 using SHAP
            shap_phi = self.SHAPEXPL.shap_values(x0, l1_reg="num_features(10)")
            shap_phi0 = self.SHAPEXPL.expected_value

            # Take only the top @num_features from shap_phi
            argtop = np.argsort(np.abs(shap_phi[0]))
            for k in range(len(shap_phi)):
                shap_phi[k][argtop[:(self.F - num_features)]] = 0

            # Recover both the LIME and the SHAP classifiers
            R.lime_g = get_LIME_classifier(lime_expl, label_x0, x0)
            R.shap_g = get_SHAP_classifier(label_x0, shap_phi, shap_phi0, x0,
                                           self.EX)

            #----------------------------------------------------------
            # Evaluate the white box classifiers
            EL = eval_whitebox_classifier(R,
                                          R.lime_g,
                                          npEX,
                                          self.StdX,
                                          NormV,
                                          x0,
                                          label_x0,
                                          cls_proba,
                                          "lime",
                                          precision_recalls=True)
            ES = eval_whitebox_classifier(R,
                                          R.shap_g,
                                          npEX,
                                          np.ones(len(x0)),
                                          NormV * self.StdX,
                                          x0,
                                          label_x0,
                                          cls_proba,
                                          "shap",
                                          precision_recalls=True)

            R.lime_local_discr = np.abs(
                R.lime_g.predict([lime_x0])[0] - prob_x0)
            R.shap_local_discr = np.abs(
                R.shap_g.predict([shap_x0])[0] - prob_x0)

            # Indices of the most important features, ordered by their absolute value
            R.lime_argtop = np.argsort(np.abs(R.lime_g.coef_))
            R.shap_argtop = np.argsort(np.abs(R.shap_g.coef_))

            # get the K most common features in the explanation of x0
            R.mcf_lime = tuple(
                [R.lime_argtop[-k] for k in range(num_features)])
            R.mcf_shap = tuple(
                [R.shap_argtop[-k] for k in range(num_features)])

            # Binary masks of the argtops
            R.lime_bin_expl, R.shap_bin_expl = np.zeros(self.F), np.zeros(
                self.F)
            R.lime_bin_expl[np.array(R.mcf_lime)] = 1
            R.shap_bin_expl[np.array(R.mcf_shap)] = 1

            # Save the Ridge regressors built by LIME and SHAP
            # lime_g_W, shap_g_W = tuple(lime_g.coef_), tuple(shap_g.coef_)
            # lime_g_w0, shap_g_w0 = lime_g.intercept_, shap_g.intercept_

            # get the appropriate R keys
            R_keys = copy.copy(R.__dict__)
            for key in copy.copy(list(R_keys.keys())):
                if key.startswith("wb_"):
                    R_keys[wb_name + key[2:]] = R_keys.pop(key)
                elif key in mockobj.__dict__:
                    del R_keys[key]

            rows = pd.DataFrame(columns=R_keys) if rows is None else rows
            rows = rows.append({k: R.__dict__[k]
                                for k in R_keys},
                               ignore_index=True)
            progbar.value += 1

        label.value += " Done."

        # use the multiple explanations to compute the LEAF metrics
        # display(rows)

        # Jaccard distances between the various explanations (stability)
        lime_jaccard_mat = 1 - pdist(np.stack(rows.lime_bin_expl, axis=0),
                                     'jaccard')
        shap_jaccard_mat = 1 - pdist(np.stack(rows.shap_bin_expl, axis=0),
                                     'jaccard')
        self.lime_avg_jaccard_bin, self.lime_std_jaccard_bin = np.mean(
            lime_jaccard_mat), np.std(lime_jaccard_mat)
        self.shap_avg_jaccard_bin, self.shap_std_jaccard_bin = np.mean(
            shap_jaccard_mat), np.std(shap_jaccard_mat)

        # LIME/SHAP explanation comparisons
        lime_shap_jaccard_mat = 1 - cdist(np.stack(rows.lime_bin_expl, axis=0),
                                          np.stack(rows.shap_bin_expl, axis=0),
                                          'jaccard')
        lime_shap_avg_jaccard_bin, lime_shap_std_jaccard_bin = np.mean(
            lime_shap_jaccard_mat), np.std(lime_shap_jaccard_mat)

        # store the metrics for later use
        self.metrics = rows

        def leaf_plot(stability, method):
            fig, ax1 = plt.subplots(figsize=(6, 2.2))
            data = [
                stability.flatten(),
                1 - rows[method + '_local_discr'],
                rows[method + '_fidelity_f1'],
                # rows[method + '_prescriptivity_f1'],
                # rows[method + '_bal_prescriptivity' ],
                1 - 2 * np.abs(rows[method + '_boundary_discr'])
            ]

            # color = 'tab:red'
            ax1.tick_params(axis='both', which='major', labelsize=12)
            ax1.set_xlabel('distribution')
            ax1.set_ylabel('LEAF metrics', color='black', fontsize=15)
            ax1.boxplot(data, vert=False, widths=0.7)
            ax1.tick_params(axis='y', labelcolor='#500000')
            ax1.set_yticks(np.arange(1, len(data) + 1))
            ax1.set_yticklabels([
                'Stability', 'Local Concordance', 'Fidelity', 'Prescriptivity'
            ])
            ax1.set_xlim([-0.05, 1.05])
            ax1.invert_yaxis()

            ax2 = ax1.twinx(
            )  # instantiate a second axes that shares the same x-axis
            ax2.tick_params(axis='both', which='major', labelsize=12)
            ax2.set_ylabel(
                'Values',
                color='#000080')  # we already handled the x-label with ax1
            ax2.boxplot(data, vert=False, widths=0.7)
            # ax2.boxplot([np.mean(d) for d in data], color=color)
            ax2.tick_params(axis='y', labelcolor='#000080')
            ax2.set_yticks(np.arange(1, len(data) + 1))
            ax2.set_yticklabels(
                ["  %.3f ± %.3f  " % (np.mean(d), np.std(d)) for d in data])
            ax2.invert_yaxis()

            fig.tight_layout(
            )  # otherwise the right y-label is slightly clipped
            if figure_dir is not None:
                imgname = figure_dir + method + "_leaf.pdf"
                print('Saving', imgname)
                plt.savefig(imgname, dpi=150, bbox_inches='tight')
            plt.show()

        # Show LIME explanation
        display(HTML("<h2>LIME</h2>"))
        lime_expl.show_in_notebook(show_table=True, show_all=False)
        leaf_plot(lime_jaccard_mat, 'lime')

        # Show SHAP explanation
        display(HTML("<h2>SHAP</h2>"))
        display(shap.force_plot(shap_phi0[label_x0], shap_phi[label_x0], x0))
        leaf_plot(shap_jaccard_mat, 'shap')

        prescription = False
        if prescription:
            print("====================================================")
            lime_x1, lime_sx1 = EL
            shap_x1, shap_sx1 = ES

            print(
                'SHAP accuracy %f balanced_accuracy %f precision %f recall %f'
                % (rows.shap_prescriptivity.mean(),
                   rows.shap_bal_prescriptivity.mean(),
                   rows.shap_precision_x1.mean(), rows.shap_recall_x1.mean()))

            lime_diff = (rows.iloc[-1].lime_g.coef_ != 0) * (lime_x1 - x0)
            shap_diff = (rows.iloc[-1].shap_g.coef_ != 0) * (shap_x1 - x0)

            print(np.array(rows.iloc[-1].lime_g.coef_ != 0))
            print('lime_diff\n', lime_diff)
            print('shap_diff\n', shap_diff)

            lime_output_x1 = cls_proba([lime_x1])[0]
            shap_output_x1 = cls_proba([shap_x1])[0]
            lime_label_x1 = 1 if lime_output_x1[1] >= lime_output_x1[0] else 0
            shap_label_x1 = 1 if shap_output_x1[1] >= shap_output_x1[0] else 0

            print("LIME(x1) prob =", lime_output_x1)
            print("SHAP(x1) prob =", shap_output_x1)

            # df = pd.DataFrame([x0, x0 + shap_diff], index=['x', 'x\'']).round(2)
            # display(df.T.iloc[:math.ceil(F/2),:])
            # display(df.T.iloc[math.ceil(F/2):,:])

            # Show LIME explanation
            lime_expl = LIMEEXPL.explain_instance(
                np.array(shap_x1),
                cls_proba,
                num_features=num_features,
                top_labels=1,
                num_samples=self.explanation_samples)
            lime_expl.show_in_notebook(show_table=True, show_all=False)
            # leaf_plot(lime_jaccard_mat, 'lime')

            # Show SHAP explanation
            shap_phi = SHAPEXPL.shap_values(shap_x1, l1_reg="num_features(10)")
            shap_phi0 = SHAPEXPL.expected_value
            argtop = np.argsort(np.abs(shap_phi[0]))
            for k in range(len(shap_phi)):
                shap_phi[k][argtop[:(F - num_features)]] = 0
            display(
                shap.force_plot(shap_phi0[shap_label_x1],
                                shap_phi[shap_label_x1], shap_x1))
コード例 #12
0
    def status_printer(file, total=None, desc=None):
        """
        Manage the printing of an IPython/Jupyter Notebook progress bar widget.
        """
        # Fallback to text bar if there's no total
        # DEPRECATED: replaced with an 'info' style bar
        # if not total:
        #    return super(tqdm_notebook, tqdm_notebook).status_printer(file)

        fp = file

        # Prepare IPython progress bar
        if total:
            pbar = IntProgress(min=0, max=total)
        else:  # No total? Show info style bar with no progress tqdm status
            pbar = IntProgress(min=0, max=1)
            pbar.value = 1
            pbar.bar_style = 'info'
        if desc:
            pbar.description = desc
        # Prepare status text
        ptext = HTML()
        # Only way to place text to the right of the bar is to use a container
        container = HBox(children=[pbar, ptext])
        display(container)

        def print_status(s='', close=False, bar_style=None):
            # Note: contrary to native tqdm, s='' does NOT clear bar
            # goal is to keep all infos if error happens so user knows
            # at which iteration the loop failed.

            # Clear previous output (really necessary?)
            # clear_output(wait=1)

            # Get current iteration value from format_meter string
            if total:
                n = None
                if s:
                    npos = s.find(r'/|/')  # cause we use bar_format=r'{n}|...'
                    # Check that n can be found in s (else n > total)
                    if npos >= 0:
                        n = int(s[:npos])  # get n from string
                        s = s[npos + 3:]  # remove from string

                        # Update bar with current n value
                        if n is not None:
                            pbar.value = n

            # Print stats
            if s:  # never clear the bar (signal: s='')
                s = s.replace('||', '')  # remove inesthetical pipes
                s = escape(s)  # html escape special characters (like '?')
                ptext.value = s

            # Change bar style
            if bar_style:
                # Hack-ish way to avoid the danger bar_style being overriden by
                # success because the bar gets closed after the error...
                if not (pbar.bar_style == 'danger' and bar_style == 'success'):
                    pbar.bar_style = bar_style

            # Special signal to close the bar
            if close and pbar.bar_style != 'danger':  # hide only if no error
                container.visible = False

        return print_status
コード例 #13
0
ファイル: train.py プロジェクト: zhaoxin111/DLCV2018SPRING
def train(FLAG):
    print("Reading dataset...")
    # load data
    Xtrain, df_train = read_dataset(TRAIN_CSV, TRAIN_DIR)
    Xtest, df_test = read_dataset(TEST_CSV, TEST_DIR)

    vae = VAE()
    vae.build(lambda_KL=FLAG.lambda_KL,
              n_dim=FLAG.n_dim,
              batch_size=FLAG.batch_size,
              shape=Xtrain.shape[1:])

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')

    def initialize_uninitialized(sess):
        global_vars = tf.global_variables()
        is_not_initialized = sess.run(
            [tf.is_variable_initialized(var) for var in global_vars])
        not_initialized_vars = [
            v for (v, f) in zip(global_vars, is_not_initialized) if not f
        ]
        if len(not_initialized_vars):
            sess.run(tf.variables_initializer(not_initialized_vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        batch_size = FLAG.batch_size
        epoch = 500
        early_stop_patience = 50
        min_delta = 0.0001
        opt_type = 'adam'

        # recorder
        epoch_counter = 0

        # optimizer
        global_step = tf.Variable(0, trainable=False)

        # Passing global_step to minimize() will increment it at each step.
        if opt_type is 'sgd':
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                             momentum=0.9,
                                             use_nesterov=True)
        else:
            start_learning_rate = FLAG.lr
            half_cycle = 2000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        obj = vae.train_op
        train_op = opt.minimize(obj, global_step=global_step)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        # re-initialize
        initialize_uninitialized(sess)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_loss = np.float('Inf')

        # optimize when the aggregated obj
        while (patience_counter < early_stop_patience
               and epoch_counter < epoch):

            # start training
            stime = time.time()
            bar_train = Bar(
                'Training',
                max=int(Xtrain.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val = Bar(
                'Validation',
                max=int(Xtest.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')

            train_loss = 0.0
            train_reconstruction_loss = 0.0
            train_kl_loss = 0.0
            for i in range(int(Xtrain.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, reconstruction_loss, kl_loss, _ = sess.run(
                    [
                        obj, vae.loss['reconstruction'], vae.loss['KL_loss'],
                        train_op
                    ],
                    feed_dict={
                        vae.x: Xtrain[st:ed, :],
                        vae.y: Xtrain[st:ed, :],
                        vae.is_train: True
                    })
                print(loss)
                print(reconstruction_loss)
                print(kl_loss)
                train_loss += loss
                train_reconstruction_loss += reconstruction_loss
                train_kl_loss += kl_loss
                ptrain.value += 1
                ptrain.description = "Training %s/%s" % (ptrain.value,
                                                         ptrain.max)

                output = sess.run(
                    [vae.output],
                    feed_dict={
                        vae.x: Xtrain[0:64, :],
                        vae.y: Xtrain[0:64, :],
                        vae.is_train: False
                    })

                print("=== train data ====")
                print(output)
                #print((Xtrain[0,:]-128.0)/128.0)
            train_loss = train_loss / ptrain.value
            train_reconstruction_loss = train_reconstruction_loss / ptrain.value
            train_kl_loss = train_kl_loss / ptrain.value

            # validation
            val_loss = 0
            val_reconstruction_loss = 0.0
            val_kl_loss = 0.0
            for i in range(int(Xtest.shape[0] / batch_size)):
                st = i * batch_size
                ed = (i + 1) * batch_size
                loss, reconstruction_loss, kl_loss = sess.run(
                    [obj, vae.loss['reconstruction'], vae.loss['KL_loss']],
                    feed_dict={
                        vae.x: Xtest[st:ed, :],
                        vae.y: Xtest[st:ed, :],
                        vae.is_train: False
                    })
                val_loss += loss
                val_reconstruction_loss += reconstruction_loss
                val_kl_loss += kl_loss
                pval.value += 1
                pval.description = "Testing %s/%s" % (pval.value, pval.value)
            val_loss = val_loss / pval.value
            val_reconstruction_loss = val_reconstruction_loss / pval.value
            val_kl_loss = val_kl_loss / pval.value

            # plot
            # if epoch_counter%10 == 0:
            #     Xplot = sess.run(vae.output,
            #             feed_dict={vae.x: Xtest[:,:],
            #                         vae.y: Xtest[:,:],
            #                         vae.is_train: False})
            #     for i, fname in enumerate(track):
            #         imageio.imwrite(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"), saveimg)
            #         print(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"))

            # early stopping check
            if (current_best_val_loss - val_loss) > min_delta:
                current_best_val_loss = val_loss
                patience_counter = 0
                saver.save(sess, checkpoint_path, global_step=epoch_counter)
                print("save in %s" % checkpoint_path)
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            idx = np.random.permutation(Xtrain.shape[0])
            Xtrain = Xtrain[idx, :, :, :]

            # epoch end
            epoch_counter += 1

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()

            print(
                "Epoch %s (%s), %s sec >> train loss: %.4f, train recon loss: %.4f, train kl loss: %.4f, val loss: %.4f, val recon loss: %.4f, val kl loss: %.4f"
                % (epoch_counter, patience_counter,
                   round(time.time() - stime, 2), train_loss,
                   train_reconstruction_loss, train_kl_loss, val_loss,
                   val_reconstruction_loss, val_kl_loss))

        # para_dict = sess.run(vgg16.para_dict)
        # np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict)
        # print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy"))

        FLAG.optimizer = opt_type
        FLAG.lr = start_learning_rate
        FLAG.batch_size = batch_size
        FLAG.epoch_end = epoch_counter
        FLAG.val_loss = current_best_val_loss

        header = ''
        row = ''
        for key in sorted(vars(FLAG)):
            if header is '':
                header = key
                row = str(getattr(FLAG, key))
            else:
                header += "," + key
                row += "," + str(getattr(FLAG, key))
        row += "\n"
        if os.path.exists("/home/cmchang/DLCV2018SPRING/hw4/model.csv"):
            with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv",
                      "a") as myfile:
                myfile.write(row)
        else:
            with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv",
                      "w") as myfile:
                myfile.write(header)
                myfile.write(row)
コード例 #14
0
ファイル: qa_analytics.py プロジェクト: KeilaVCortes/TATSSI
    def _analytics(self, b):
        """
        Uses the self.user_qa_selection OrderedDictionary to extract
        the corresponding QA values and create a mask of dimensions:
            (number of qa layers, time steps, cols(lat), rows(lon))
        Additionally computes the temporal mask and the max gap length
        """
        if not type(b) == QProgressBar:
            progress_bar = IntProgress(
                value=0,
                min=0,
                max=len(self.user_qa_selection),
                step=1,
                description='',
                bar_style='', # 'success', 'info', 'warning', 'danger' or ''
                orientation='horizontal',
                style = {'description_width': 'initial'},
                layout={'width': '50%'}
            )
            display(progress_bar)

        n_qa_layers = len(self.user_qa_selection)

        # Get the name of the first data var to extract its shape
        for k, v in self.ts.data.data_vars.items():
            break

        # Create mask xarray
        _time, _latitude, _longitude = self.ts.data.data_vars[k].shape
        mask = np.zeros((n_qa_layers, _time, _latitude, _longitude),
                        np.int8)

        qa_layer = self.qa_def.QualityLayer.unique()

        # QA layer user to create mask
        _qa_layer = getattr(self.ts.qa, f"qa{qa_layer[0]}")

        for i, user_qa in enumerate(self.user_qa_selection):

            if type(b) == QProgressBar:
                b.setValue(i)
                b.setFormat(f"Masking by QA {user_qa}")
            else:
                progress_bar.value = i
                progress_bar.description = f"Masking by QA {user_qa}"

            user_qa_fieldname = user_qa.replace(" ", "_").replace("/", "_")

            for j, qa_value in enumerate(self.user_qa_selection[user_qa]):
                qa_value_field_name = qa_value.replace(" ", "_")

                qa_flag_val = self.qa_def[(self.qa_def.Name == user_qa) & 
                        (self.qa_def.Description == qa_value)].Value.iloc[0]

                if j == 0 :
                    mask[i] = (_qa_layer[user_qa_fieldname] == qa_flag_val)
                else:
                    mask[i] = np.logical_or(
                            mask[i], _qa_layer[user_qa_fieldname] == qa_flag_val)

        if type(b) == QProgressBar:
            b.setValue(0)
            b.setEnabled(False)
        else:
            # Remove progress bar
            progress_bar.close()
            del progress_bar

        #self.__temp_mask = mask
        #mask = xr.DataArray(np.all(self.__temp_mask, axis=0),
        mask = xr.DataArray(np.all(mask, axis=0),
                            coords=[v.time.data,
                                    v.latitude.data,
                                    v.longitude.data],
                            dims=['time', 'latitude', 'longitude'])

        mask.attrs = v.attrs

        self.mask = mask
        # Remove local multi-layer mask variable
        mask = None
        del(mask)

        # Create the percentage of data available mask
        # Get the per-pixel per-time step binary mask
        pct_data_available = (self.mask.sum(axis=0) * 100.0) / _time
        pct_data_available.latitude.data = v.latitude.data
        pct_data_available.longitude.data = v.longitude.data
        # Set the pct_data_available object
        self.pct_data_available = pct_data_available

        # Using the computed mask get the max gap length
        self.__get_max_gap_length(b)
コード例 #15
0
ファイル: log.py プロジェクト: JoaoCarabetta/PyMove
def _log_progress(sequence: Iterable,
                  desc: Optional[Text] = None,
                  total: Optional[int] = None,
                  miniters: Optional[int] = None):
    """
    Make and display a progress bar.

    Parameters
    ----------
    sequence : iterable
        Represents a sequence of elements.
    desc : str, optional
        Represents the description of the operation, by default None.
    total : int, optional
        Represents the total/number elements in sequence, by default None.
    miniters : int, optional
        Represents the steps in which the bar will be updated, by default None.

    """
    if desc is None:
        desc = ''
    is_iterator = False
    if total is None:
        try:
            total = len(sequence)
        except TypeError:
            is_iterator = True
    if total is not None:
        if miniters is None:
            if total <= 200:
                miniters = 1
            else:
                miniters = int(total / 200)
    else:
        if miniters is None:
            miniters = 1

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=total, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % miniters == 0:
                if is_iterator:
                    label.value = '%s: %s / ?' % (desc, index)
                else:
                    progress.value = index
                    label.value = u'%s: %s / %s' % (desc, index, total)
            yield record
    except Exception:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = '%s: %s' % (desc, str(index or '?'))
コード例 #16
0
def get_samples_from_relation_with_chunks(file, num_of_nodes=20000, chunksize = 10000):
    '''
    getting samples from the graph by picking the first node as a source 
    looking for its neighbors. Then getting the neighbors of its neighbors.
    By iteration we obtain a subsample of the relations. This methods uses
    chunks to process on huge .csv files.

    Parameters:
    file (string): the .csv file path of the relation dataframe
    num_of_nodes (int): threshold representing the minimum of nodes expected in the sampled dataframe
    chunksize (int): number of chunks
    Returns:
    pd.Dataframe, pd.Dataframe: nodes dataframe, edgelist dataframe
    '''
    usersdata = pd.read_csv('data/usersdata.csv', delimiter = '\t', names = ['userId', 'sex', 'timePassedValidation', 'ageGroup', 'label'])
    
    print('preparing the progress bar')
    n_rows = sum(1 for row in open(file, 'r')) - 1
    f = IntProgress(min=0, max=int(np.ceil(n_rows/chunksize)), description = 'Process') # instantiate the bar
    display(f) # display the bar

    node_list = []
    neighbors = [pd.read_csv(file,delimiter=',', nrows = 1)['src'].values[0]]

    node_list.extend(neighbors)
    print('Start sampling')
    while len(node_list) < num_of_nodes:
        count = 0
        previous = neighbors
        previous_size = len(node_list)
        neighbors = []
        for chunk in pd.read_csv(file,iterator=True,delimiter=',', chunksize=chunksize):
            f.value = count # signal to increment the progress bar
            count += 1
            neighbors.extend(chunk[chunk['src'].isin(previous)]['dst'].tolist() + chunk[chunk['dst'].isin(previous)]['src'].tolist())
            node_list.extend(neighbors)
            node_list = list(set(node_list))
            if len(node_list) >= num_of_nodes:
                break
        print('number of users acquired: {}'.format(len(node_list)))
        if(previous_size == len(node_list)):
            break
        else:
            previous_size = len(node_list)
    print('number of users finally acquired: {}'.format(len(node_list)))

    print('sub sampling relations:')
    relation_df= pd.read_csv(file,iterator=True,delimiter=',', chunksize=chunksize)
    subrels = pd.concat([chunk[(chunk['src'].isin(node_list)) & (chunk['dst'].isin(node_list))] for chunk in relation_df])

    subrels = subrels.rename(columns={'Unnamed: 0':'index'}).set_index('index').groupby(['src','dst']).agg({'time_ms':'sum'})
    subrels.reset_index(inplace = True)
    subrels.loc[:,'time_s'] = subrels['time_ms']/1000.
    fusers = usersdata[usersdata.userId.isin(node_list)]

    nodes = fusers
    edges = subrels[['src', 'dst', 'time_s']].rename(columns = {'time_s':'weight'})
    nodes.reset_index(level=0, inplace=True)
    nodes = nodes.drop(columns={'index'})
    nodes.reset_index(level=0, inplace=True)
    nodes = nodes.rename(columns = {'index':'node_idx'})
    uid2idx = nodes[['node_idx', 'userId']]
    uid2idx = uid2idx.set_index('userId')
    edges_renumbered = edges.join(uid2idx, on = 'src').join(uid2idx, on = 'dst', rsuffix = '_dst').drop(columns = ['src', 'dst'])
    edgelist = edges_renumbered[['node_idx','node_idx_dst','weight']]
    return nodes, edgelist
コード例 #17
0
def in_progress(seq, msg="Progress: [%(processed)d / %(total)d]",
                length=None, close=True):
    """ Iterate over sequence, yielding item with progress widget displayed.
        This is useful if you need to precess sequence of items with some
        time consuming operations
        .. note::
            This works only in Jupyter Notebook
        .. note::
            This function requires *ipywidgets* package to be installed
        :param seq: sequence to iterate on.
        :param str msg: (optional) message template to display.
                        Following variables could be used in this template:
                            - processed
                            - total
                            - time_total
                            - time_per_item
        :param int length: (optional) if seq is generator, or it is not
                           possible to apply 'len(seq)' function to 'seq',
                           then this argument is required and it's value will
                           be used as total number of items in seq.
        Example example::
            import time
            for i in in_progress(range(10)):
                time.sleep(1)
    """
    from IPython.display import display
    from ipywidgets import IntProgress
    import time

    if length is None:
        length = len(seq)

    start_time = time.time()

    progress = IntProgress(
        value=0, min=0, max=length, description=msg % {
            'processed': 0,
            'total': length,
            'time_total': 0.0,
            'time_per_item': 0.0,
            'time_remaining': 0.0,
        }
    )
    display(progress)

    for i, item in enumerate(seq, 1):
        progress.value = i

        # i_start_time = time.time()

        yield item  # Do the job

        i_end_time = time.time()

        progress.description = msg % {
            'processed': i,
            'total': length,
            'time_total': i_end_time - start_time,
            'time_per_item': (i_end_time - start_time) / i,
            'time_remaining': ((i_end_time - start_time) / i) * (length - i),
        }

    if close:
        progress.close()
コード例 #18
0
def periodo_vulnerabilidad_con_dataframe(covid_municipal,
                                         inicio,
                                         fin,
                                         columna='tasa_covid_letal',
                                         min_casos=20,
                                         min_defunciones=-1,
                                         rf=True):
    """Calcula la vulnerabilidad (PLS) para todo el periodo usando como objetivo
       la columna que se le pase.

    :param df: el dataframe con los datos para ajustar el modelo. Debe traer ya las tasas municipales
    :type df: pd.DataFrame
    :param inicio: fecha inicial (Y-m-d)
    :type inicio: str
    :param fin: fecha final (Y-m-d)
    :type fin: str
    :param columna: la columna para usar como objetivo, el default es 'tasa_covid_letal'
    :type columna: str
    :param min_casos: Número mínimo de casos para considerar a un municipio
    :type min_casos: int
    :param min_defunciones: Número mínimo de defunciones para considerar a un municipio
    :type min_defunciones: int
    :param rf: True/False ajustar también un nmodelo de Random Forest a los dato
    :type rf: bool

    :returns: Un DataFrame igual que el de entrada pero cun una columna extra con el resultado
              del modelo. La columna se llama 'valor_{columna}'
    :rtype: gpd.GeoDataFrame

    """
    inicio = pd.to_datetime(inicio, yearfirst=True)
    fin = pd.to_datetime(fin, yearfirst=True)
    fin = min(covid_municipal.FECHA_INGRESO.max(), fin)
    fechas = pd.date_range(inicio, fin)
    resultados = []
    modelos = []
    f = IntProgress(min=0, max=len(fechas) - 1)  # instantiate the bar
    display(f)  # display the bar
    # covid_municipal = agregar_tasas_municipales(df)
    caracteristicas = caracteristicas_modelos_municipios(covid_municipal)
    for count, fecha in enumerate(fechas):
        covid_municipal_fecha = covid_municipal.query(
            f'FECHA_INGRESO == "{fecha.strftime("%Y-%m-%d")}"')
        pls = ajustar_pls_columna(covid_municipal_fecha,
                                  caracteristicas,
                                  columna=columna,
                                  min_casos=min_casos,
                                  min_defunciones=min_defunciones)
        df = calificar_municipios_letalidad_formato_largo(
            covid_municipal_fecha,
            pls,
            caracteristicas,
            modelo='PLS',
            dia_ajuste=fecha)
        resultados.append(df)
        modelo = pd.DataFrame({
            'caracteristica': caracteristicas,
            'coef': pls.coef_
        })
        modelo['dia_ajuste'] = fecha
        modelo['modelo'] = 'PLS'
        modelos.append(modelo)
        if rf:
            rf = ajustar_rf_letalidad(covid_municipal_fecha,
                                      caracteristicas,
                                      min_casos=min_casos,
                                      min_defunciones=min_defunciones)
            df = calificar_municipios_letalidad_formato_largo(
                covid_municipal_fecha,
                rf,
                caracteristicas,
                modelo='RF',
                dia_ajuste=fecha)
            resultados.append(df)
            modelo = pd.DataFrame({
                'caracteristica': caracteristicas,
                'coef': rf.feature_importances_
            })
            modelo['dia_ajuste'] = fecha
            modelo['modelo'] = 'RF'
            modelos.append(modelo)
        f.value = count

    resultados_df = pd.concat(resultados, ignore_index=True)
    modelos_df = pd.concat(modelos, ignore_index=True)
    resultados_df = gpd.GeoDataFrame(resultados_df, geometry='geometry')
    resultados_df.rename({'valor': f'valor_{columna}'}, axis=1, inplace=True)
    return modelos_df, resultados_df
コード例 #19
0
ファイル: statistical.py プロジェクト: cmacdonald/rankeval
def _randomization(metric_scores_a, metric_scores_b, n_perm=100000):
    """
    This method computes the randomization test as described in [1].

    Parameters
    ----------
    metric_scores_a : numpy array
        Vector of per-query metric scores for the IR system A.
    metric_scores_b : numpy array
        Vector of per-query metric scores for the IR system B.
    n_perm : int
        Number of permutations evaluated in the randomization test.

    Returns
    -------
    metric_scores : (float, float)
        A tuple (p-value_1, p-value_2) being respectively the one-sided and two-sided p-values.

    References
    ----------
    .. [1] Smucker, Mark D., James Allan, and Ben Carterette.
        "A comparison of statistical significance tests for information retrieval evaluation."
        In Proceedings of the sixteenth ACM conference on Conference on information and knowledge management, pp. 623-632. ACM, 2007.
    """
    progress_bar = IntProgress(min=0, max=10, description="Randomization Test")
    display(progress_bar)    

    # find the best system
    metric_scores_a_mean = np.mean(metric_scores_a)
    metric_scores_b_mean = np.mean(metric_scores_b)

    best_metrics = metric_scores_a
    worst_metrics = metric_scores_b
    if metric_scores_a_mean < metric_scores_b_mean:
        best_metrics = metric_scores_b
        worst_metrics = metric_scores_a

    difference = np.mean(best_metrics) - np.mean(worst_metrics)
    abs_difference = np.abs(difference)

    p1 = 0.0  # one-sided
    p2 = 0.0  # two-sided
    N = float(len(metric_scores_a))

    a_sum = np.sum(best_metrics)
    b_sum = np.sum(worst_metrics)

    # repeat n_prem times
    for i in range(n_perm):
        if i % (n_perm/10)==0: progress_bar.value+=1
        
        # select a random subset
        sel = np.random.choice([False, True], len(metric_scores_a))

        a_sel_sum = np.sum(best_metrics[sel])
        b_sel_sum = np.sum(worst_metrics[sel])

        # compute avg performance of randomized models
        a_mean = (a_sum - a_sel_sum + b_sel_sum) / N
        b_mean = (b_sum - b_sel_sum + a_sel_sum) / N

        # performance difference
        delta = a_mean - b_mean

        if delta >= difference:
            p1 += 1.
        if np.abs(delta) >= abs_difference:
            p2 += 1.

    progress_bar.bar_style = "success"
    progress_bar.close()

    p1 /= n_perm
    p2 /= n_perm

    return p1, p2
コード例 #20
0
def downloadFromURL(uris=None,
                    fileNames=None,
                    nodeNames=None,
                    checksums=None,
                    loadFiles=None,
                    customDownloader=None,
                    loadFileTypes=None,
                    loadFileProperties={}):
    """Download data from custom URL with progress bar.
  See API description in SampleData.downloadFromURL.
  """
    import SampleData
    sampleDataLogic = SampleData.SampleDataLogic()

    try:
        from ipywidgets import IntProgress
        from IPython.display import display
        progress = IntProgress()
    except ImportError:
        progress = None

    def reporthook(msg, level=None):
        # Download will only account for 90 percent of the time
        # (10% is left for loading time).
        progress.value = sampleDataLogic.downloadPercent * 0.9

    if progress:
        sampleDataLogic.logMessage = reporthook
        display(progress)  # show progress bar

    computeFileNames = not fileNames
    computeNodeNames = not nodeNames
    if computeFileNames or computeNodeNames:
        urisList = uris if type(uris) == list else [uris]
        if computeFileNames:
            fileNames = []
        else:
            filenamesList = fileNames if type(fileNames) == list else [
                fileNames
            ]
        if computeNodeNames:
            nodeNames = []
        else:
            nodeNamesList = nodeNames if type(nodeNamesList) == list else [
                nodeNames
            ]
        import os
        for index, uri in enumerate(urisList):
            if computeFileNames:
                fileName = getFileNameFromURL(uri)
                fileNames.append(fileName)
            else:
                fileName = fileNames[index]
            if computeNodeNames:
                fileNameWithoutExtension, _ = os.path.splitext(fileName)
                nodeNames.append(fileNameWithoutExtension)

    if type(uris) != list:
        if type(fileNames) == list:
            fileNames = fileNames[0]
        if type(nodeNames) == list:
            nodeNames = nodeNames[0]

    downloaded = sampleDataLogic.downloadFromURL(uris, fileNames, nodeNames,
                                                 checksums, loadFiles,
                                                 customDownloader,
                                                 loadFileTypes,
                                                 loadFileProperties)

    if progress:
        progress.layout.display = 'none'  # hide progress bar

    return downloaded[0] if len(downloaded) == 1 else downloaded
コード例 #21
0
ファイル: overlay.py プロジェクト: strath-sdr/rfsoc_sam
from pynq import Overlay, allocate
import xrfclk
import xrfdc
import os
from .hierarchies import *
from .quick_widgets import Image
from ipywidgets import IntProgress
from IPython.display import display
from IPython.display import clear_output
import time
import threading

load_progress = 0
max_count = 100
load_bar = IntProgress(min=load_progress, max=max_count) # instantiate the bar


def generate_about():
    global about
    about = ''.join(['<br><b>', __info__, '</b><br>', __channels__, ' ', __board__,
                    ' ', __release__, '<br>', 'Version ', __version_number__,
                    ': ', __version_name__, '<br>Date: ', __date__, '<br><br>',
                    '<b>Organisation:</b> <br>', __organisation__,
                    '<br><br>', '<b>Support</b>:<br>', __support__])


class Overlay(Overlay):
    
    def __init__(self, overlay_system='sam', init_rf_clks=True, **kwargs):
コード例 #22
0
ファイル: effectiveness.py プロジェクト: qfnuhp/rankeval
def tree_wise_performance(datasets, models, metrics, step=10):
    """
    This method implements the analysis of the model on a tree-wise basis
    (part of the effectiveness analysis category).

    Parameters
    ----------
    datasets : list of Dataset
        The datasets to use for analyzing the behaviour of the model using
        the given metrics and models
    models : list of RTEnsemble
        The models to analyze
    metrics : list of Metric
        The metrics to use for the analysis
    step : int
        Step-size identifying evenly spaced number of trees for evaluating
        the top=k model performance.
        (e.g., step=100 means the method will evaluate the model performance
        at 100, 200, 300, etc trees).


    Returns
    -------
    metric_scores : xarray.DataArray
        A DataArray containing the metric scores of each model using the given
        metrics on the given datasets.
        The metric scores are cumulatively reported tree by tree, i.e., top 10
        trees, top 20, etc., with a step-size between the number of trees
        as highlighted by the step parameter.

    """
    def get_tree_steps(model_trees):
        trees = range(step-1, model_trees, step)
        # Add last tree to the steps
        if trees[-1] != model_trees-1:
            trees.append(model_trees-1)
        return np.array(trees)

    max_num_trees = 0
    for model in models:
        if model.n_trees > max_num_trees:
            max_num_trees = model.n_trees

    tree_steps = get_tree_steps(max_num_trees)

    data = np.full(shape=(len(datasets), len(models), len(tree_steps),
                          len(metrics)), fill_value=np.nan, dtype=np.float32)


    progress_bar = IntProgress(min=0, max=len(datasets)*len(metrics)*
                               sum([len(get_tree_steps(model.n_trees)) for model in models ]), 
                               description="Computing metrics")
    display(progress_bar)    


    for idx_dataset, dataset in enumerate(datasets):
        for idx_model, model in enumerate(models):
            y_pred, partial_y_pred, y_leaves = \
                model.score(dataset, detailed=True)

            # the document scores are accumulated along for the various top-k
            # (in order to avoid useless re-scoring)
            y_pred = np.zeros(dataset.n_instances)

            for idx_top_k, top_k in enumerate(get_tree_steps(model.n_trees)):

                # compute the document scores using only top-k trees of
                # the model on the given dataset
                idx_tree_start = idx_top_k * step
                idx_tree_stop = top_k + 1

                y_pred += partial_y_pred[:, idx_tree_start:idx_tree_stop].sum(axis=1)

                # compute the metric score using the predicted document scores
                for idx_metric, metric in enumerate(metrics):
                    progress_bar.value += 1

                    metric_score, _ = metric.eval(dataset, y_pred)
                    data[idx_dataset][idx_model][idx_top_k][idx_metric] = metric_score

    progress_bar.bar_style = "success"
    progress_bar.close()

    performance = xr.DataArray(data,
                               name='Tree-Wise Performance',
                               coords=[datasets, models, tree_steps+1, metrics],
                               dims=['dataset', 'model', 'k', 'metric'])
    return performance
コード例 #23
0
def screenshot(url: str, api_key: str = None) -> requests.models.Response:
    """
    Get a screenshot of a url with Browshot.

    Parameters
    ----------
    url : str
        The url a screenshot is wanted for.
    api_key : str (optional)
        Browshot API key. If not set msticpyconfig checked for this.

    Returns
    -------
    image_data: requests.models.Response
        The final screenshot request response data.

    """
    # Get Browshot API key from kwargs or config
    if api_key is not None:
        bs_api_key: Optional[str] = api_key
    else:
        bs_conf = config.settings.get(
            "DataProviders",
            {}).get("Browshot") or config.settings.get("Browshot")
        bs_api_key = None
        if bs_conf is not None:
            bs_api_key = bs_conf.get("Args", {}).get("AuthKey")  # type: ignore

    if bs_api_key is None:
        raise MsticpyUserConfigError(
            "No configuration found for Browshot",
            "Please add a section to msticpyconfig.yaml:",
            "DataProviders:",
            "  Browshot:",
            "    Args:",
            "      AuthKey: {your_auth_key}",
            title="Browshot configuration not found",
            browshot_uri=("Get an API key for Browshot",
                          "https://api.browshot.com/"),
        )

    # Request screenshot from Browshot and get request ID
    id_string = f"https://api.browshot.com/api/v1/screenshot/create?url={url}/&instance_id=26&size=screen&cache=0&key={bs_api_key}"  # pylint: disable=line-too-long
    id_data = requests.get(id_string)
    bs_id = json.loads(id_data.content)["id"]
    status_string = (
        f"https://api.browshot.com/api/v1/screenshot/info?id={bs_id}&key={bs_api_key}"
    )
    image_string = f"https://api.browshot.com/api/v1/screenshot/thumbnail?id={bs_id}&zoom=50&key={bs_api_key}"  # pylint: disable=line-too-long
    # Wait until the screenshot is ready and keep user updated with progress
    print("Getting screenshot")
    progress = IntProgress(min=0, max=40)
    display.display(progress)
    ready = False
    while not ready:
        progress.value += 1
        status_data = requests.get(status_string)
        status = json.loads(status_data.content)["status"]
        if status == "finished":
            ready = True
        else:
            time.sleep(0.05)
    progress.value = 40

    # Once ready get the screenshot
    image_data = requests.get(image_string)

    if image_data.status_code != 200:
        print(
            "There was a problem with the request, please check the status code for details"
        )

    return image_data
コード例 #24
0
def vgg16_train(model, train, test, init_from, save_dir, batch_size=64, epoch=300, early_stop_patience=25):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    checkpoint_path = os.path.join(save_dir, 'model.ckpt')

    with tf.Session() as sess:
        print(tf.trainable_variables())
        
        # hyper parameters
        learning_rate =  5e-4 #adam
        min_delta = 0.0001

        # recorder
        epoch_counter = 0
        loss_history = []
        val_loss_history = []

        # optimizer
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = opt.minimize(model.loss)
        
        # saver 
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        
        sess.run(tf.global_variables_initializer())

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(train.images.shape[0]/batch_size)
        pval.max = int(test.images.shape[0]/batch_size)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_loss = 100000 # a large number
        

        # train start
        while(patience_counter < early_stop_patience):
            stime = time.time()
            bar_train = Bar('Training', max=int(train.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val =  Bar('Validation', max=int(test.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            
            # training an epoch
            train_loss = 0
            for i in range(int(train.images.shape[0]/batch_size)):
                st = i*batch_size
                ed = (i+1)*batch_size
                
                _, loss = sess.run([train_op, model.loss],
                                   feed_dict={model.x: train.images[st:ed,:],
                                              model.y: train.labels[st:ed,:],
                                              model.w: train.weights[st:ed,:]
                                             })
                train_loss += loss
                ptrain.value +=1
                ptrain.description = "Training %s/%s" % (i, ptrain.max)
                bar_train.next()
            
            train_loss /= ptrain.max
            
            val_loss = 0

            for i in range(int(test.images.shape[0]/batch_size)):
                st = i*batch_size
                ed = (i+1)*batch_size
                
                loss = sess.run(model.loss,
                                   feed_dict={model.x: test.images[st:ed,:],
                                              model.y: test.labels[st:ed,:],
                                              model.w: np.expand_dims(np.repeat(1.0,batch_size),axis=1)
                                             })
                val_loss += loss
                pval.value +=1
                pval.description = "Training %s/%s" % (i, pval.max)
                bar_val.next()
                
            val_loss /= pval.max
            
            if (current_best_val_loss - val_loss) > min_delta:
                current_best_val_loss = val_loss
                patience_counter = 0
                saver.save(sess, checkpoint_path, global_step=epoch_counter)
                print("reset early stopping and save model into %s at epoch %s" % (checkpoint_path,epoch_counter))
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            train.shuffle()
            
            loss_history.append(train_loss)
            val_loss_history.append(val_loss)

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()
            print("Epoch %s (%s), %s sec >> train-loss: %.4f, val-loss: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, val_loss))
            
            # epoch end
            epoch_counter += 1
            if epoch_counter >= epoch:
                break
        res = pd.DataFrame({"epoch":range(0,len(loss_history)), "loss":loss_history, "val_loss":val_loss_history})
        res.to_csv(os.path.join(save_dir,"history.csv"), index=False)
        print("end training")
コード例 #25
0
def periodo_vulnerabilidad_con_dataframe(df,
                                         inicio,
                                         fin,
                                         min_casos=20,
                                         min_defunciones=-1):
    inicio = pd.to_datetime(inicio, yearfirst=True)
    fin = pd.to_datetime(fin, yearfirst=True)
    fin = min(df.FECHA_INGRESO.max(), fin)

    fechas = pd.date_range(inicio, fin)
    resultados = []
    modelos = []

    f = IntProgress(min=0, max=len(fechas) - 1)  # instantiate the bar
    display(f)  # display the bar

    covid_municipal = agregar_tasas_municipales(df)
    caracteristicas = caracteristicas_modelos_municipios(covid_municipal)

    for count, fecha in enumerate(fechas):
        covid_municipal_fecha = covid_municipal.query(
            f'FECHA_INGRESO == "{fecha.strftime("%Y-%m-%d")}"')

        pls = ajustar_pls_letalidad(covid_municipal_fecha,
                                    caracteristicas,
                                    min_casos=min_casos,
                                    min_defunciones=min_defunciones)
        df = calificar_municipios_letalidad_formato_largo(
            covid_municipal_fecha,
            pls,
            caracteristicas,
            modelo='PLS',
            dia_ajuste=fecha)
        resultados.append(df)
        modelo = pd.DataFrame({
            'caracteristica': caracteristicas,
            'coef': pls.coef_
        })
        modelo['dia_ajuste'] = fecha
        modelo['modelo'] = 'PLS'
        modelos.append(modelo)

        rf = ajustar_rf_letalidad(covid_municipal_fecha,
                                  caracteristicas,
                                  min_casos=min_casos,
                                  min_defunciones=min_defunciones)
        df = calificar_municipios_letalidad_formato_largo(
            covid_municipal_fecha,
            rf,
            caracteristicas,
            modelo='RF',
            dia_ajuste=fecha)
        resultados.append(df)
        modelo = pd.DataFrame({
            'caracteristica': caracteristicas,
            'coef': rf.feature_importances_
        })
        modelo['dia_ajuste'] = fecha
        modelo['modelo'] = 'RF'
        modelos.append(modelo)

        f.value = count

    resultados_df = pd.concat(resultados, ignore_index=True)
    modelos_df = pd.concat(modelos, ignore_index=True)

    resultados_df = gpd.GeoDataFrame(resultados_df, geometry='geometry')

    return modelos_df, resultados_df
コード例 #26
0
def downsample(img_folder, out_folder, sample = True, split = .8, down_rate = 1, crop = None, dim="3D"):
    # Check if dim is properly defined
    if dim not in ["2D","3D"]:
        print("dim is not either 2D or 3D")
        return
    
    # Load all of the base filenames, ignoring all other files in directory
    base_files = [file for file in os.listdir(img_folder) if file.endswith("MR.npz")]
    
    # Check if the output directories exists. If not, create it. 
    
    create_dir(out_folder)
    create_dir(out_folder + "/train")
    create_dir(out_folder + "/test")
    create_dir(out_folder + "/train/imgs")
    create_dir(out_folder + "/train/segs")
    create_dir(out_folder + "/test/imgs")
    create_dir(out_folder + "/test/segs")
            
    # Set up progress bar.
    
    f = IntProgress(min=0, max=len(base_files))
    l = Label("Loading File")
    H = HBox([f, l])
    display(H) # display the bar and label
    
    # Set up the output folders
    
    out_fol_img = out_folder + "/train/imgs/"
    out_fol_seg = out_folder + "/train/segs/"
    tt = "Train: " # The label for the progress bar
    
    # If crop is not None, get the crop range:
    if not crop:
        a1 = b1 = c1 = 0
        (a2,b2,c2) = np.load(img_folder + "/" + base_files[0])['arr_0'].shape
    else:
        (a1,a2,b1,b2,c1,c2) = crop
        
    print("Cropping to ", a1,a2,b1,b2,c1,c2)
    
    # For each file, load both the file and segmentation in. Downsample both and output.
    
    ds = down_rate
    for n, file in enumerate(base_files):
        img = np.load(img_folder + "/" + file)['arr_0'][a1:a2,b1:b2,c1:c2]
        seg = np.load(img_folder + "/" + file[:-4] + "seg.npz")['arr_0'][a1:a2,b1:b2,c1:c2]
        

        if (n+1) > len(base_files)*split:
            out_fol_img = out_folder + "/test/imgs/"
            out_fol_seg = out_folder + "/test/segs/"
            tt = "Test: "
            
        for i in range(ds):
            for j in range(ds):
                for k in range(ds):
                    N = str(i + ds*j + (ds**2)*k)
                    ds_img = img[i::ds,j::ds,k::ds]
                    ds_seg = seg[i::ds,j::ds,k::ds]
                    
                    if dim is "3D":
                        np.savez_compressed(out_fol_img + file[:-4] + N + ".npz", ds_img)
                        np.savez_compressed(out_fol_seg + file[:-4] + N + ".npz", ds_seg)
                    elif dim is "2D":
                        for r in range(a2-a1):
                            np.savez_compressed(out_fol_img + file[:-4] + N + "_" + str(r) + ".npz", ds_img[r,:,:])
                            np.savez_compressed(out_fol_seg + file[:-4] + N + "_" + str(r) + ".npz", ds_seg[r,:,:])
                        
                    
        f.value += 1 # signal to increment the progress bar
        l.value = tt + file
        
      
    # Display a sample output if requested
    if sample:            
        display_train_test(out_folder,dim=dim)
                    
    ## Summerize preproccesing info
    
    f = ds**3
    
    print("Train Images:", int(f*np.floor(len(base_files)*split)))
    print("Test Images:", int(f*(len(base_files) - np.floor(len(base_files)*split))))
    print("Dimensions:", ds_img.shape)
    
    return ds_img.shape
コード例 #27
0
class PypelidWidget(object):
    """ """

    widgets = {
        'nreal': BoundedIntText(value=1000, min=0, max=100000, step=100, description='Number of realizations:',
            layout={'width': '250px'},
            style={'description_width': '150px',}),
        'button': Button(description="Run", icon='play', layout={'border':'solid 1px black', 'width': '100px'}),
        'progress': IntProgress(bar_style='success'),
        'timer': Label(),
        'snrbox': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'zmeas': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'zerr': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'zerr_68': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'zerr_sys': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'zerr_cat': Label(layout={'border':'solid 1px green', 'width': '100px'}),
        'signal_on': Checkbox(value=True, description='Signal', layout={'width':'80px'}, style={'description_width': '0px'}),
        'noise_on': Checkbox(value=True, description='Noise', layout={'width':'80px'}, style={'description_width': '0px'}),
        'real_on': Checkbox(value=True, description='Realization', layout={'width':'80px'}, style={'description_width': '0px'}),
        'seed': IntText(description='Seed', disabled=True, layout={'width':'150px'}, style={'description_width': '50px'}),
        'seed_checkbox': Checkbox(value=False, description='Freeze random seed',layout={'width':'150px'}, style={'description_width': '0px'}),
    }

    def __init__(self):
        self.instrument = instrument_widget.Instrument()
        self.foreground = foreground_widget.Foreground()
        self.galaxy = galaxy_widget.Galaxy()
        self.analysis = analysis_widget.Analysis()
        self.survey = survey_widget.Survey()
        self.config = config_widget.Config((self.galaxy, self.foreground, self.instrument, self.survey, self.analysis))
        self.running = False

        self.render_lock = threading.Lock()
        self.param_lock = threading.Lock()

    def render(self, change=None):
        """ """
        if not self.render_lock.acquire(False):
            return
        if not self.param_lock.acquire(False):
            return
        render_thread = threading.Thread(target=self._render, args=((self.render_lock, self.param_lock),))
        render_thread.start()

    def _render(self, locks):
        """ """
        self.widgets['render_button'].style.button_color = 'orange'

        if not self.widgets['seed_checkbox'].value:
            self.widgets['seed'].value = np.random.randint(0,1e6)
        seed = self.widgets['seed'].value

        rng.seed(seed)

        wavelength_scale, flux, var, obs_list = self.spec(noise=False)
        wavelength_scale_, flux_n, var_, obs_list_ = self.spec(noise=True)

        self.wavelength_scale = wavelength_scale / 1e4
        step = wavelength_scale[1] - wavelength_scale[0]
        self.signal = flux / step
        self.real = flux_n / step
        self.noise = var**0.5 / step

        self.hideshow_line()

        L, gal = obs_list[0]
        x, y = np.transpose(gal.sample(int(1e6), L.plate_scale, self.galaxy.widgets['iso'].value))

        dx, dy = np.transpose(L.PSF.sample(len(x)))

        x += dx
        y += dy

        r = np.sqrt(x*x + y*y)
        w = int(np.ceil(np.percentile(r, 80))) + 0.5
        w = min(20.5, w)
        b = np.arange(-w, w+1, 1)
        h, ey, ex = np.histogram2d(y, x, bins=(b, b))

        bc = (ey[1:]+ey[:-1])/2.

        self.figs['image'].data[0]['z'] = h
        self.figs['image'].data[0]['x'] = bc
        self.figs['image'].data[0]['y'] = bc

        ii = var > 0
        snr = np.sqrt(np.sum(flux[ii]**2/var[ii]))
        self.widgets['snrbox'].value = "%3.2f"%snr

        self.widgets['render_button'].style.button_color = 'lightgreen'
        for lock in locks:
            lock.release()



    def spec(self, noise=True):
        emission_lines = [
            ('Ha', self.galaxy.widgets['flux_ha'].value * 1e-16),
            ('N2a', self.galaxy.widgets['flux_n2a'].value * 1e-16),
            ('N2b', self.galaxy.widgets['flux_n2b'].value * 1e-16),
            ('S2a', self.galaxy.widgets['flux_s2a'].value * 1e-16),
            ('S2b', self.galaxy.widgets['flux_s2b'].value * 1e-16),
            ('S3a', self.galaxy.widgets['flux_s3a'].value * 1e-16),
            ('S3b', self.galaxy.widgets['flux_s3b'].value * 1e-16),
            ('O3a', self.galaxy.widgets['flux_o3a'].value * 1e-16),
            ('O3b', self.galaxy.widgets['flux_o3b'].value * 1e-16),
            ('Hb', self.galaxy.widgets['flux_hb'].value * 1e-16),
            ('O2', self.galaxy.widgets['flux_o2'].value * 1e-16),

        ]


        nexp_list = self.survey.widgets['nexp_red'].value, self.survey.widgets['nexp_blue'].value
        exp_time = self.survey.widgets['exp_time'].value

        ztol = self.analysis.widgets['ztol'].value



        config_list = self.instrument.get_config_list()
        obs_list = []
        for i, config in enumerate(config_list):

            nexp = nexp_list[i]
            if nexp == 0:
                continue

            O = optics.Optics(config)

            L = linesim.LineSimulator(O, extraction_sigma=self.analysis.widgets['extraction_sigma'].value, isotropize=self.galaxy.widgets['iso'].value)

            det_bg = nexp * exp_time * config['darkcurrent'] + nexp * config['readnoise']**2

            det_bg += nexp * exp_time * self.foreground.widgets['foreground'].value

            gal = galaxy.Galaxy(
                z=self.galaxy.widgets['redshift'].value,
                bulge_scale=self.galaxy.widgets['bulge_scale'].value,
                disk_scale=self.galaxy.widgets['disk_scale'].value,
                bulge_fraction=self.galaxy.widgets['bulge_fraction'].value,
                axis_ratio=self.galaxy.widgets['axis_ratio'].value,
                pa=self.galaxy.widgets['pa'].value,
                velocity_disp=self.galaxy.widgets['velocity_dispersion'].value,
            )

            for line, flux in emission_lines:
                wavelength = (1 + gal.z) * consts.line_list[line]

                if wavelength < O.lambda_start or wavelength > O.lambda_end:
                    continue

                signal = phot.flux_to_photon(flux, O.collecting_area, wavelength)
                signal *= exp_time * nexp
                signal *= O.transmission(np.array([wavelength]), 1)[0]

                if signal <= 0:
                    continue

                line_variance = signal

                scale = flux / signal

                if noise is False:
                    v = (signal * scale)**2/1e7
                else:
                    v = signal * scale**2

                gal.append_line(
                    wavelength=consts.line_list[line],
                    flux=signal * scale,
                    variance=v,
                    background=det_bg * scale**2,
                    rest_frame=1
                )

            # add a line at the center of the bandpass (observed frame)
            scale = phot.flux_to_photon(1, O.collecting_area, O.lambda_ref)
            scale *= exp_time * nexp
            scale *= O.transmission(np.array([O.lambda_ref]), 1)[0]
            scale = 1./scale
            gal.append_line(
                wavelength=O.lambda_ref,
                flux=0,
                variance=0,
                background=det_bg * scale**2,
                rest_frame=0
            )

            gal.compute_obs_wavelengths(gal.z)
            if gal.line_count == 0:
                continue

            obs_list.append((L, gal))

        wavelength_scales = []
        dispersion = []
        for L, gal in obs_list:
            x = np.arange(L.npix) * L.dispersion + L.lambda_min
            wavelength_scales.append(x)
            dispersion.append(L.dispersion)
        dispersion = np.min(dispersion)
        wavelength_min = np.min(np.concatenate(wavelength_scales))
        wavelength_max = np.max(np.concatenate(wavelength_scales))
        wavelength_scale = np.arange(wavelength_min, wavelength_max, dispersion)

        specset = []
        for i, obs in enumerate(obs_list):
            L, gal = obs
            spectra = L.sample_spectrum(gal)
            if noise:
                s = spectra[0]
            else:
                s = spectra[1]
            specset.append((wavelength_scales[i], np.array(s), np.array(spectra[2])))
        flux_stack, var_stack = combine_spectra(wavelength_scale, specset)

        return wavelength_scale, flux_stack, var_stack, obs_list


    def update(self, zgrid, zmeas, wavelength_scale, mean_total, var_total, count):
        """ """
        zmeas = np.array(zmeas)

        m = mean_total * 1./ count
        var = var_total * 1./ count - m**2

        ii = var > 0
        snr = np.sqrt(np.sum(m[ii]**2/var[ii]))
        self.widgets['snrbox'].value = "%3.2f"%snr

        ztrue = self.galaxy.widgets['redshift'].value

        ztol = self.analysis.widgets['ztol'].value

        dz = np.abs(zmeas - ztrue)
        sel = dz < ztol
        if np.sum(sel) > 0:
            z = np.mean(zmeas[sel])
            dzobs = np.abs(zmeas - z)
            dz68 = np.percentile(dzobs[sel], 68)
            self.widgets['zerr_68'].value = "%3.2e"%dz68
            if dz68 > 0:
                self.widgets['zerr_sys'].value = "%g"%((ztrue-z)*np.sqrt(np.sum(sel))/dz68)
            self.widgets['zerr_cat'].value = "%f"%(1 - np.sum(sel) * 1. / len(zmeas))

            self.widgets['zmeas'].value = "%g"%z
            self.widgets['zerr'].value = "%3.2e"%(ztrue - z)

        h, e = np.histogram(zmeas, bins=zgrid)
        h = h * 1./ np.sum(h)
        x = (e[1:]+e[:-1])/2.
        a = np.where(h>0)[0][0]-1
        b = np.where(h>0)[0][-1]+1
        x = x[a:b+1]
        h = h[a:b+1]

        self.figs['pdf'].data[0]['x'] = x
        self.figs['pdf'].data[0]['y'] = h


    def run(self, stop_event):
        """ """
        self.param_lock.acquire()
        self._start_time = time.time()

        emission_lines = [
            ('Ha', self.galaxy.widgets['flux_ha'].value * 1e-16),
            ('N2a', self.galaxy.widgets['flux_n2a'].value * 1e-16),
            ('N2b', self.galaxy.widgets['flux_n2b'].value * 1e-16),
            ('S2a', self.galaxy.widgets['flux_s2a'].value * 1e-16),
            ('S2b', self.galaxy.widgets['flux_s2b'].value * 1e-16),
            ('S3a', self.galaxy.widgets['flux_s3a'].value * 1e-16),
            ('S3b', self.galaxy.widgets['flux_s3b'].value * 1e-16),
            ('O3a', self.galaxy.widgets['flux_o3a'].value * 1e-16),
            ('O3b', self.galaxy.widgets['flux_o3b'].value * 1e-16),
            ('Hb', self.galaxy.widgets['flux_hb'].value * 1e-16),
            ('O2', self.galaxy.widgets['flux_o2'].value * 1e-16),

        ]


        nexp_list = self.survey.widgets['nexp_red'].value, self.survey.widgets['nexp_blue'].value
        exp_time = self.survey.widgets['exp_time'].value

        ztol = self.analysis.widgets['ztol'].value

        self.figs['pdf'].update_layout(
            shapes=[go.layout.Shape(
                   type="rect",
                   xref="x",
                    yref="paper",
                    x0=self.galaxy.widgets['redshift'].value-ztol,
                    y0=0,
                    x1=self.galaxy.widgets['redshift'].value+ztol,
                    y1=1,
                    fillcolor="LightSalmon",
                    opacity=0.5,
                    layer="below",
                    line_width=0,
        ),])

        config_list = self.instrument.get_config_list()
        obs_list = []
        for i, config in enumerate(config_list):

            nexp = nexp_list[i]
            if nexp == 0:
                continue

            O = optics.Optics(config)

            L = linesim.LineSimulator(O, extraction_sigma=self.analysis.widgets['extraction_sigma'].value, isotropize=self.galaxy.widgets['iso'].value)

            det_bg = nexp * exp_time * config['darkcurrent'] + config['readnoise']**2

            det_bg += nexp * exp_time * self.foreground.widgets['foreground'].value

            gal = galaxy.Galaxy(
                z=self.galaxy.widgets['redshift'].value,
                bulge_scale=self.galaxy.widgets['bulge_scale'].value,
                disk_scale=self.galaxy.widgets['disk_scale'].value,
                bulge_fraction=self.galaxy.widgets['bulge_fraction'].value,
                axis_ratio=self.galaxy.widgets['axis_ratio'].value,
                velocity_disp=self.galaxy.widgets['velocity_dispersion'].value,
            )

            for line, flux in emission_lines:
                wavelength = (1 + gal.z) * consts.line_list[line]

                if wavelength < O.lambda_start or wavelength > O.lambda_end:
                    continue

                signal = phot.flux_to_photon(flux, O.collecting_area, wavelength)
                signal *= exp_time * nexp
                signal *= O.transmission(np.array([wavelength]), 1)[0]

                if signal <= 0:
                    continue

                line_variance = signal

                scale = flux / signal

                gal.append_line(
                    wavelength=consts.line_list[line],
                    flux=signal * scale,
                    variance=signal * scale**2,
                    background=det_bg * scale**2,
                    rest_frame=1
                )

            # add a line at the center of the bandpass (observed frame)
            scale = phot.flux_to_photon(1, O.collecting_area, O.lambda_ref)
            scale *= exp_time * nexp
            scale *= O.transmission(np.array([O.lambda_ref]), 1)[0]
            scale = 1./scale
            gal.append_line(
                wavelength=O.lambda_ref,
                flux=0,
                variance=0,
                background=det_bg * scale**2,
                rest_frame=0
            )

            gal.compute_obs_wavelengths(gal.z)
            if gal.line_count == 0:
                continue

            obs_list.append((L, gal))

        wavelength_scales = []
        dispersion = []
        for L, gal in obs_list:
            x = np.arange(L.npix) * L.dispersion + L.lambda_min
            wavelength_scales.append(x)
            dispersion.append(L.dispersion)
        dispersion = np.min(dispersion)
        wavelength_min = np.min(np.concatenate(wavelength_scales))
        wavelength_max = np.max(np.concatenate(wavelength_scales))
        wavelength_scale = np.arange(wavelength_min, wavelength_max, dispersion)


        zgrid = np.arange(self.analysis.widgets['zmin'].value, self.analysis.widgets['zmax'].value,self.analysis.widgets['zstep'].value)
        zfitter = template_fit.TemplateFit(wavelength_scale, zgrid, consts.line_list, res=self.analysis.widgets['templ_res'].value,
                    template_file=self.analysis.template_path)


        nloops = self.widgets['nreal'].value
        self.widgets['progress'].min=0
        self.widgets['progress'].max=nloops


        prob_z = []
        zmeas = []

        t0 = time.time()
        t1 = time.time()

        mean_total = 0
        var_total = 0
        count = 0

        for loop in range(nloops):
            if stop_event.is_set():
                break
            specset = []
            for i, obs in enumerate(obs_list):
                L, gal = obs
                spectra = L.sample_spectrum(gal)
                specset.append((wavelength_scales[i], np.array(spectra[0]), np.array(spectra[2])))
            flux_stack, var_stack = combine_spectra(wavelength_scale, specset)

            mean_total += flux_stack
            var_total += flux_stack**2
            count += 1

            ii = var_stack>0
            invvar = np.zeros(len(var_stack), dtype='d')
            invvar[ii] = 1./var_stack[ii]

            amp = zfitter.template_fit(flux_stack, invvar, 2)
            pz = np.array(zfitter.pz())
            zmeas.append(centroidz(zgrid, pz))

            if time.time()-t0 > 10:
                self.update(zgrid, zmeas, wavelength_scale, mean_total, var_total, count)
                t0 = time.time()

            if time.time()-t1 > 1:
                self.widgets['progress'].value = loop
                self.widgets['progress'].description = "%i/%i"%(loop+1, nloops)
                self.widgets['timer'].value = "elapsed time: %i s"%(time.time()-self._start_time)
                t1 = time.time()

        self.widgets['progress'].description = "%i/%i"%(loop+1, nloops)
        self.widgets['timer'].value = "elapsed time: %i s"%(time.time()-self._start_time)

        self.widgets['progress'].value = 0
        self.update(zgrid, zmeas, wavelength_scale, mean_total, var_total, count)
        self.reset_button(self.widgets['button'])
        self.param_lock.release()


    def click_start(self, button):
        if not self.running:
            self.running = True
            button.description = "Stop"
            button.icon = "stop"
            button.style.button_color = 'orange'
            self.stop_event = threading.Event()
            thread = threading.Thread(target=self.run, args=(self.stop_event,))
            thread.start()
        else:
            self.stop_event.set()
            self.reset_button(button)

    def reset_button(self, button):
        """ """
        self.running = False
        button.description = "Run"
        button.icon = "play"
        button.style.button_color = 'lightgreen'

    def tab_event(self, change):
        if change['type'] == 'change' and change['name'] == 'selected_index':
            if change['new'] == 5:
                self.config.update()

    def hideshow_line(self, change=None):
        for key,i,arr in [('signal_on',2,self.signal),('real_on',1,self.real),('noise_on',0,self.noise)]:
            if self.widgets[key].value:
                if len(self.figs['spec'].data[i]['x']) != len(self.wavelength_scale):
                    self.figs['spec'].data[i]['x'] = self.wavelength_scale
                self.figs['spec'].data[i]['y'] = arr
            else:
                self.figs['spec'].data[i]['y'] = []

    def seed_checkbox(self, change=None):
        if self.widgets['seed_checkbox'].value:
            self.widgets['seed'].disabled = False
        else:
            self.widgets['seed'].disabled = True

    def show(self):
        """ """
        # display()
        about = VBox([HTML("<a href=\"https://github.com/bengranett/pypelidcalc\" target=\"_blank\">Pypelid-calc</a> version: %s"%pypelidcalc.__version__)])

        tab = Tab([self.galaxy.widget, self.foreground.widget, self.instrument.widget, self.survey.widget, self.analysis.widget, self.config.widget, about])
        tab.set_title(0, "Source")
        tab.set_title(1, "Foreground")
        tab.set_title(2, "Instrument")
        tab.set_title(3, "Survey")
        tab.set_title(4, "Analysis")
        tab.set_title(5, "Config")
        tab.set_title(6, "About")
        tab.layout={'height': '300px'}

        tab.observe(self.tab_event)

        display(tab)

        for group in [self.galaxy, self.foreground, self.instrument, self.survey, self.analysis]:
            for key, w in group.widgets.items():
                w.observe(self.render, names='value')

        self.figs = {}
        self.figs['spec'] = go.FigureWidget()
        self.figs['spec'].update_layout(xaxis_title=u'Wavelength (\u03BCm)',
                                  height=200,
                                  yaxis_title='Flux density',
                                  margin=dict(l=0, r=0, t=0, b=0, pad=0))
        self.figs['spec'].add_scatter(x=[], y=[], name='Noise', line_color='grey')
        self.figs['spec'].add_scatter(x=[], y=[], name='Realization', line_color='dodgerblue')
        self.figs['spec'].add_scatter(x=[], y=[], name='Signal', line_color='black')


        self.figs['image'] = go.FigureWidget()
        self.figs['image'].update_layout(height=200, width=200, margin=dict(l=0, r=0, t=0, b=0, pad=0))

        self.figs['image'].add_trace(go.Heatmap(z=[[]], showscale=False))

        self.widgets['render_button'] = Button(description="Update realization", layout={'border':'solid 1px black', 'width': '200px'})
        self.widgets['render_button'].on_click(self.render)

        self.widgets['seed_checkbox'].observe(self.seed_checkbox, names='value')

        self.widgets['signal_on'].observe(self.hideshow_line, names='value')
        self.widgets['real_on'].observe(self.hideshow_line, names='value')
        self.widgets['noise_on'].observe(self.hideshow_line, names='value')

        checkboxes = HBox([self.widgets['signal_on'], self.widgets['noise_on'], self.widgets['real_on']])
        display(HTML('<h3>Spectrum</h3>'))
        display(HBox([self.widgets['seed_checkbox'], self.widgets['seed']]))
        display(HBox([HTML('SNR:'), self.widgets['snrbox'], self.widgets['render_button'], checkboxes]))
        display(HBox([self.figs['spec'], self.figs['image']]))

        self.reset_button(self.widgets['button'])
        self.widgets['button'].on_click(self.click_start)

        elements = [HTML("<h3>Redshift measurement</h3>")]
        elements +=  [HBox([self.widgets['nreal'], self.widgets['button'], self.widgets['progress'], self.widgets['timer']])]

        horiz = [HTML('<b>Statistics:</b>')]
        horiz += [HTML('Mean z:'), self.widgets['zmeas']]
        horiz += [HTML('Error:'), self.widgets['zerr']]
        horiz += [HTML('68% limit:'), self.widgets['zerr_68']]
        horiz += [HTML('Fractional systematic:'), self.widgets['zerr_sys']]
        horiz += [HTML('Outlier rate:'), self.widgets['zerr_cat']]

        elements += [HBox(horiz)]

        display(VBox(elements))

        self.figs['pdf'] = go.FigureWidget()
        self.figs['pdf'].update_layout(xaxis_title='Redshift', height=200,
                              yaxis_title='Distribution',margin=dict(l=0, r=0, t=0, b=0, pad=0))

        self.figs['pdf'].add_scatter(x=[], y=[], name='Measured redshift')

        display(self.figs['pdf'])

        self.render_lock.acquire()
        self.param_lock.acquire()
        self._render((self.render_lock, self.param_lock))
コード例 #28
0
ファイル: train.py プロジェクト: twcmchang/CP-CNN-v2
def train(FLAG):
    print("Reading dataset...")
    if FLAG.dataset == 'CIFAR-10':
        train_data = CIFAR10(train=True)
        test_data = CIFAR10(train=False)
        vgg16 = VGG16(classes=10)
    elif FLAG.dataset == 'CIFAR-100':
        train_data = CIFAR100(train=True)
        test_data = CIFAR100(train=False)
        vgg16 = VGG16(classes=100)
    else:
        raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.")
    print("Build VGG16 models for %s..." % FLAG.dataset)

    Xtrain, Ytrain = train_data.train_data, train_data.train_labels
    Xtest, Ytest = test_data.test_data, test_data.test_labels

    vgg16.build(vgg16_npy_path=FLAG.init_from,
                prof_type=FLAG.prof_type,
                conv_pre_training=True,
                fc_pre_training=False)
    vgg16.sparsity_train(l1_gamma=FLAG.lambda_s,
                         l1_gamma_diff=FLAG.lambda_m,
                         decay=FLAG.decay,
                         keep_prob=FLAG.keep_prob)

    # define tasks
    tasks = ['var_dp']
    print(tasks)

    # initial task
    cur_task = tasks[0]
    obj = vgg16.loss_dict[tasks[0]]

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks))

    checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt')
    tvars_trainable = tf.trainable_variables()

    #for rm in vgg16.gamma_var:
    #    tvars_trainable.remove(rm)
    #    print('%s is not trainable.'% rm)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # hyper parameters
        batch_size = 64
        epoch = 500
        early_stop_patience = 50
        min_delta = 0.0001
        opt_type = 'adam'

        # recorder
        epoch_counter = 0

        # optimizer
        global_step = tf.Variable(0, trainable=False)

        # Passing global_step to minimize() will increment it at each step.
        if opt_type is 'sgd':
            start_learning_rate = 1e-4  # adam # 4e-3 #sgd
            half_cycle = 20000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                             momentum=0.9,
                                             use_nesterov=True)
        else:
            start_learning_rate = 1e-4  # adam # 4e-3 #sgd
            half_cycle = 10000
            learning_rate = tf.train.exponential_decay(start_learning_rate,
                                                       global_step,
                                                       half_cycle,
                                                       0.5,
                                                       staircase=True)
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        train_op = opt.minimize(obj,
                                global_step=global_step,
                                var_list=tvars_trainable)

        # progress bar
        ptrain = IntProgress()
        pval = IntProgress()
        display(ptrain)
        display(pval)
        ptrain.max = int(Xtrain.shape[0] / batch_size)
        pval.max = int(Xtest.shape[0] / batch_size)

        spareness = vgg16.spareness(thresh=0.05)
        print("initial spareness: %s" % sess.run(spareness))

        # re-initialize
        initialize_uninitialized(sess)

        # reset due to adding a new task
        patience_counter = 0
        current_best_val_accu = 0

        # optimize when the aggregated obj
        while (patience_counter < early_stop_patience
               and epoch_counter < epoch):

            def load_batches():
                for i in range(int(Xtrain.shape[0] / batch_size)):
                    st = i * batch_size
                    ed = (i + 1) * batch_size
                    batch = ia.Batch(images=Xtrain[st:ed, :, :, :],
                                     data=Ytrain[st:ed, :])
                    yield batch

            batch_loader = ia.BatchLoader(load_batches)
            bg_augmenter = ia.BackgroundAugmenter(batch_loader=batch_loader,
                                                  augseq=transform,
                                                  nb_workers=4)

            # start training
            stime = time.time()
            bar_train = Bar(
                'Training',
                max=int(Xtrain.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            bar_val = Bar(
                'Validation',
                max=int(Xtest.shape[0] / batch_size),
                suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
            train_loss, train_accu = 0.0, 0.0
            while True:
                batch = bg_augmenter.get_batch()
                if batch is None:
                    print("Finished epoch.")
                    break
                x_images_aug = batch.images_aug
                y_images = batch.data
                loss, accu, _ = sess.run(
                    [obj, vgg16.accu_dict[cur_task], train_op],
                    feed_dict={
                        vgg16.x: x_images_aug,
                        vgg16.y: y_images,
                        vgg16.is_train: True
                    })
                bar_train.next()
                train_loss += loss
                train_accu += accu
                ptrain.value += 1
                ptrain.description = "Training %s/%s" % (ptrain.value,
                                                         ptrain.max)
            train_loss = train_loss / ptrain.value
            train_accu = train_accu / ptrain.value
            batch_loader.terminate()
            bg_augmenter.terminate()

            # # training an epoch
            # for i in range(int(Xtrain.shape[0]/batch_size)):
            #     st = i*batch_size
            #     ed = (i+1)*batch_size

            #     augX = transform.augment_images(Xtrain[st:ed,:,:,:])

            #     sess.run([train_op], feed_dict={vgg16.x: augX,
            #                                     vgg16.y: Ytrain[st:ed,:],
            #                                     vgg16.is_train: False})
            #     ptrain.value +=1
            #     ptrain.description = "Training %s/%s" % (i, ptrain.max)
            #     bar_train.next()

            # validation
            val_loss = 0
            val_accu = 0
            for i in range(int(Xtest.shape[0] / 200)):
                st = i * 200
                ed = (i + 1) * 200
                loss, accu = sess.run(
                    [obj, vgg16.accu_dict[cur_task]],
                    feed_dict={
                        vgg16.x: Xtest[st:ed, :],
                        vgg16.y: Ytest[st:ed, :],
                        vgg16.is_train: False
                    })
                val_loss += loss
                val_accu += accu
                pval.value += 1
                pval.description = "Testing %s/%s" % (pval.value, pval.value)
            val_loss = val_loss / pval.value
            val_accu = val_accu / pval.value

            print("\nspareness: %s" % sess.run(spareness))
            # early stopping check
            if (val_accu - current_best_val_accu) > min_delta:
                current_best_val_accu = val_accu
                patience_counter = 0

                para_dict = sess.run(vgg16.para_dict)
                np.save(os.path.join(FLAG.save_dir, "para_dict.npy"),
                        para_dict)
                print("save in %s" %
                      os.path.join(FLAG.save_dir, "para_dict.npy"))
            else:
                patience_counter += 1

            # shuffle Xtrain and Ytrain in the next epoch
            idx = np.random.permutation(Xtrain.shape[0])
            Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :]

            # epoch end
            # writer.add_summary(epoch_summary, epoch_counter)
            epoch_counter += 1

            ptrain.value = 0
            pval.value = 0
            bar_train.finish()
            bar_val.finish()

            print(
                "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu at %s: %.4f"
                % (epoch_counter, patience_counter,
                   round(time.time() - stime, 2), train_loss, train_accu,
                   val_loss, cur_task, val_accu))
        saver.save(sess, checkpoint_path, global_step=epoch_counter)

        sp, rcut = gammaSparsifyVGG16(para_dict, thresh=0.02)
        np.save(os.path.join(FLAG.save_dir, "sparse_dict.npy"), sp)
        print("sparsify %s in %s" % (np.round(
            1 - rcut, 3), os.path.join(FLAG.save_dir, "sparse_dict.npy")))

        #writer.close()
        arr_spareness.append(1 - rcut)
        np.save(os.path.join(FLAG.save_dir, "sprocess.npy"), arr_spareness)
    FLAG.optimizer = opt_type
    FLAG.lr = start_learning_rate
    FLAG.batch_size = batch_size
    FLAG.epoch_end = epoch_counter
    FLAG.val_accu = current_best_val_accu

    header = ''
    row = ''
    for key in sorted(vars(FLAG)):
        if header is '':
            header = key
            row = str(getattr(FLAG, key))
        else:
            header += "," + key
            row += "," + str(getattr(FLAG, key))
    row += "\n"
    header += "\n"
    if os.path.exists("/home/cmchang/new_CP_CNN/model.csv"):
        with open("/home/cmchang/new_CP_CNN/model.csv", "a") as myfile:
            myfile.write(row)
    else:
        with open("/home/cmchang/new_CP_CNN/model.csv", "w") as myfile:
            myfile.write(header)
            myfile.write(row)
コード例 #29
0
def train(net,
          data,
          epochs=10,
          batch_size=10,
          seq_length=50,
          lr=0.001,
          clip=5,
          val_frac=0.1,
          print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()

    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # create training and validation data
    val_idx = int(len(data) * (1 - val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    if (net.train_on_gpu):
        net.cuda()

    counter = 0
    n_chars = len(net.chars)

    progress = IntProgress(
        min=0,
        max=epochs * len(list(get_batches(data, batch_size, seq_length))),
        description="Training...")
    display(progress)

    for e in range(epochs):

        # initialize hidden state
        h = net.init_hidden(batch_size)

        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            progress.value += 1

            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

            if (net.train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output,
                             targets.view(batch_size * seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()

            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)

                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])

                    inputs, targets = x, y
                    if (net.train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(
                        output,
                        targets.view(batch_size * seq_length).long())

                    val_losses.append(val_loss.item())

                net.train(
                )  # reset to train mode after iterationg through validation data

                print("Epoch: {}/{}...".format(e + 1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

    progress.close()
    print("Finished training.")
コード例 #30
0
ファイル: movie_maker.py プロジェクト: BlueBrain/BioExplorer
    def create_snapshot(self,
                        size,
                        path,
                        base_name,
                        samples_per_pixel,
                        export_intermediate_frames=False):
        """
        Create a snapshot of the current frame

        :size: Frame buffer size
        :path: Path where the snapshot file is exported
        :base_name: Base name of the snapshot file
        :samples_per_pixel: Samples per pixel
        :export_intermediate_frames: If True, intermediate samples are stored to disk. Otherwise,
        only the final accumulation is exported
        """
        application_params = self._client.get_application_parameters()
        renderer_params = self._client.get_renderer()
        old_image_stream_fps = application_params['image_stream_fps']
        old_viewport_size = application_params['viewport']
        old_samples_per_pixel = renderer_params['samples_per_pixel']
        old_max_accum_frames = renderer_params['max_accum_frames']
        old_smoothed_key_frames = copy.deepcopy(self._smoothed_key_frames)

        self._client.set_renderer(samples_per_pixel=1,
                                  max_accum_frames=samples_per_pixel)
        self._client.set_application_parameters(viewport=size)
        self._client.set_application_parameters(image_stream_fps=0)

        control_points = [self.get_camera()]
        current_animation_frame = int(
            self._client.get_animation_parameters()['current'])
        animation_frames = [current_animation_frame]

        self.build_camera_path(control_points=control_points,
                               nb_steps_between_control_points=1,
                               smoothing_size=1)

        progress_widget = IntProgress(description='In progress...',
                                      min=0,
                                      max=100,
                                      value=0)
        display(progress_widget)

        self.export_frames(
            path=path,
            base_name=base_name,
            animation_frames=animation_frames,
            size=size,
            samples_per_pixel=samples_per_pixel,
            export_intermediate_frames=export_intermediate_frames)

        done = False
        while not done:
            time.sleep(1)
            progress = self.get_export_frames_progress()['progress']
            progress_widget.value = progress * 100
            done = self.get_export_frames_progress()['done']

        progress_widget.description = 'Done'
        progress_widget.value = 100

        self._client.set_application_parameters(
            image_stream_fps=old_image_stream_fps, viewport=old_viewport_size)
        self._client.set_renderer(samples_per_pixel=old_samples_per_pixel,
                                  max_accum_frames=old_max_accum_frames)
        self._smoothed_key_frames = copy.deepcopy(old_smoothed_key_frames)