Ejemplo n.º 1
0
def extract_from_dataset(dataset, index=None, topK=None):
    '''
    Args:
        dataet : 代提取的数据集
        idx : 提取 dataset 返回数据的第几个 如果只有一个数据则不填
    '''
    s = len(dataset)
    if topK is not None:
        s = topK
    with _tqdm(total=s) as pbar:
        pbar.set_description_str("Loading images")
        for idx, data in enumerate(dataset):
            if idx == 0:
                if index is None:
                    result = data
                else:
                    result = data[index]
            else:
                if index is None:
                    result = _torch.cat((result, data), dim=0)
                else:
                    result = _torch.cat((result, data[index]), dim=0)
            pbar.update(1)
            if topK is not None and topK == idx + 1:
                break
        pbar.set_description_str("Finished")
        return result
Ejemplo n.º 2
0
    def test(self, test_loader=None):

        if test_loader is not None:
            self._testset = test_loader

        with _torch.no_grad():
            self._before_test_epoch_start()
            self.test_epoch_start()
            epoch_outputs = []
            with _tqdm(dynamic_ncols=True, total=len(self._testset)) as bar:
                bar.set_description(f"Epoch={self.trained_epochs} Testing")
                for idx, batch in enumerate(self._testset):
                    batch = self._type_transfer(batch)
                    for optimizer_idx in range(len(self._optimizers)):
                        step_out = self.test_step(batch, idx, optimizer_idx)
                        epoch_outputs.append(step_out)
                        loss = None
                        if isinstance(step_out, dict):
                            loss = step_out['loss']
                        elif isinstance(step_out, _torch.Tensor):
                            loss = step_out
                        else:
                            raise TypeError(
                                "what training_step return should be Tensor or dict with key=loss with and its value type is Tensor"
                            )
                    bar.set_postfix_str("loss={:.3}".format(loss.item()))
                    bar.update(1)
                bar.set_description(f"Epoch={self.trained_epochs} Tested")

            self.test_epoch_end(epoch_outputs)
            return self._after_test_epoch_end(epoch_outputs)
Ejemplo n.º 3
0
def batch_image_generator(generator,
                          noise_z,
                          num_batch=50,
                          batch_size=256,
                          conditional=False,
                          classes=None, best_size=30):
    with _tqdm(total=num_batch) as pbar:
        pbar.set_description_str("Saving images")
        if best_size > num_batch:
            best_size = num_batch
        results = _batch_image_generator(generator, noise_z, best_size, batch_size,
                               conditional, classes)
        pbar.update(best_size)
        num_batch -= best_size
        while num_batch >= best_size:
            out = _batch_image_generator(generator, noise_z, best_size, batch_size,
                                   conditional, classes)
            results = _torch.cat((results, out), dim=0)
            pbar.update(best_size)
            num_batch -= best_size
        if num_batch > 0:
            out = _batch_image_generator(generator, noise_z, num_batch, batch_size,
                                   conditional, classes)
            results = _torch.cat((results, out), dim=0)
            pbar.update(num_batch)
        pbar.set_description_str("Finished")
    return results
    def get_archive_mp3s(self, archive_entries, filepath):
        start = _timer()
        earliest_download = min([
            entry['start_time'] for entry in archive_entries
        ]).strftime('%m-%d-%y %H:%M')
        latest_download = max([
            entry['start_time'] for entry in archive_entries
        ]).strftime('%m-%d-%y %H:%M')

        t = _tqdm(archive_entries,
                  desc='Overall progress',
                  leave=True,
                  dynamic_ncols=True)

        t.write(f'Downloading {earliest_download} to {latest_download}')
        t.write(f'Storing at {filepath}.')

        for file in t:
            feed_id = self._parent.feed_id
            archive_uri = file['uri']
            file_date = self._format_entry_date(file['end_time'])

            # Build the path for saving the downloaded .mp3
            out_file_name = filepath + '-'.join([feed_id, file_date]) + '.mp3'

            # Get the URL of the mp3 file
            mp3_soup = self.get_download_soup(archive_uri)
            file_url = self._parse_mp3_path(mp3_soup)

            self._fetch_mp3([out_file_name, file_url], t)
    def _fetch_mp3(self, entry, main_progress_bar):
        path, url = entry
        file_name = url.split('/')[-1]

        if not _os.path.exists(path):
            self._parent.throttle.throttle('file')

            r = _requests.get(url, stream=True)
            file_size = int(r.headers['Content-Length'])

            t = _tqdm(total=file_size,
                      desc=f'Downloading {file_name}',
                      dynamic_ncols=True)

            if r.status_code == 200:
                self._parent.throttle.got_last_file = True
                with open(path, 'wb') as f:
                    for chunk in r:
                        f.write(chunk)
                        t.update(len(chunk))
            elif r.status_code == 403:
                t.write(
                    f'\tReceived 403 on {file_name}. Archive file does not '
                    f' exist. Skipping.')
            else:
                t.write(f'\tCould not retrieve {url} (code {r.status_code}'
                        f'). Skipping.')
        else:
            main_progress_bar.write(f'\t{file_name} already exists. Skipping.')
Ejemplo n.º 6
0
def multiprocess_interpolate(input,
                             caches,
                             max_process=32,
                             size=None,
                             scale_factor=None,
                             mode='bilinear',
                             align_corners=False):

    delta = len(input) // max_process + 1
    processes = []
    for idx in range(max_process):
        data = input[idx * delta:(idx + 1) * delta]
        cache = caches[idx * delta:(idx + 1) * delta]
        p = _processes.Process(target=_interpolate,
                           args=(data, cache, size, scale_factor,
                                 mode, align_corners))
        processes.append(p)
    for p in processes:
        p.start()
    print(f"{max_process} processes have been started", file=_sys.stderr)
    with _tqdm(total=len(processes)) as pbar:
        pbar.set_description_str("Executing")
        for p in processes:
            p.join()
            pbar.update(1)
        pbar.set_description_str("Executed")
    print("All the images have benn interpolated in caches", file=_sys.stderr)
Ejemplo n.º 7
0
def tqdm(itr=None, **kwargs):

    color = _get_color.color()

    # if itr is not None:


        
    #     if len(list(itr)) == 0:
    #         return itr
    
    return (_tqdm(itr, colour=color, **kwargs) if threeML_config.interface.progress_bars else itr)
Ejemplo n.º 8
0
def thread_save_image(tensor,
                      base_dir,
                      file_type: str = "jpg",
                      normalize=True,
                      max_threads=32,
                      prefix="",
                      suffix="",
                      base_num=1,
                      placeholder="0",
                      just_length=10,
                      batch_size=2048):
    '''
    Parameters:
    tensor: images shape as : BxCxLxH
    base_dir: base directory of image will be saved
    file_type: save type
    prefix: file name prefix
    suffix: file name suffix
    base_num: file name is a number string and increase from base_num
    just_length: the length of string
    batch_size: how many images should be saved in every process
    '''
    s = len(tensor)
    max_threads = min(64, max_threads)
    if s < batch_size * max_threads:
        batch_size = s // max_threads
    semaphor = _threading.Semaphore(value=max_threads)
    length = s // batch_size + 1
    if s % batch_size == 0:
        length -= 1
    with _tqdm(total=length) as pbar:
        pbar.set_description_str("Saving")
        for idx in range(length):
            if len(str(base_num + idx)) > just_length:
                raise RuntimeError(
                    f"the length of {base_num + idx} > {just_length}")
            semaphor.acquire()
            thread = _threading.Process(
                target=_thread_save_image,
                args=(tensor[idx * batch_size:(idx + 1) * batch_size],
                      base_dir, file_type, normalize, prefix, suffix,
                      base_num + batch_size * idx, placeholder, just_length))
            thread.start()
            pbar.update(1)
            semaphor.release()
        thread.join()
        pbar.set_description_str("Finished")
        print("All images will be saved after a few seconds ...",
              file=_sys.stderr)
Ejemplo n.º 9
0
def images_normalize(tensor, max_process=32):
    delta = len(tensor) // max_process + 1
    processes = []
    for idx in range(max_process):
        data = tensor[idx * delta:(idx + 1) * delta]
        p = _processes.Process(target=_normalize_data,
                           args=(data, ))
        processes.append(p)
    for p in processes:
        p.start()
    print(f"{max_process} processes have been started", file=_sys.stderr)
    with _tqdm(total=len(processes)) as pbar:
        pbar.set_description_str("Executing")
        for p in processes:
            p.join()
            pbar.update(1)
        pbar.set_description_str("Executed")
    return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(0, 2, 3, 1).to('cpu', _torch.uint8).numpy()
Ejemplo n.º 10
0
def genFibreH5(cellSize, hkl_str, uni_hkls_idx, symHKL_loop, xyz_pf, omega,
               qgrid, od):
    """
    wrapper
    """

    if not _os.path.exists('fibres.h5'): f = _h5.File('fibres.h5', 'w')
    else: f = _h5.File('fibres.h5', 'r+')
    f.close()

    hkl_loop_str = _np.array(hkl_str)[uni_hkls_idx]

    for hi, hfam in _tqdm(enumerate(symHKL_loop)):
        _calcFibreHDF5(
            hfam, xyz_pf, omega, qgrid, od, 'fibres.h5',
            hkl_loop_str[hi] + '_' + str(int(round(_np.rad2deg(cellSize)))))

    return
Ejemplo n.º 11
0
 def eval(self, train=False):
     self.model.eval()
     loader = self.train_loader
     if not train:
         assert self.test_loader is not None, "test_loader is None , "\
             "please pass test_loader first : clf_eval.test_loader = test_loader"
         loader = self.test_loader
     s = 0
     a = 0
     with _tqdm(total=len(loader)) as pbar:
         pbar.set_description("Evaluating")
         for data in loader:
             pbar.update(1)
             out, labels = self.step(batch_data = data)
             t_a, t_s = self.computer_acc(out, labels)
             a += t_a
             s += t_s
         pbar.set_description("Evaluated")
     self.model.train()
     return a / s
Ejemplo n.º 12
0
    def optimize(self,
                 maximize: Union[str, Callable[[pd.Series], float]] = 'SQN',
                 constraint: Callable[[dict], bool] = None,
                 return_heatmap: bool = False,
                 **kwargs) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
        """
        Optimize strategy parameters to an optimal combination using
        parallel exhaustive search. Returns result `pd.Series` of
        the best run.

        `maximize` is a string key from the
        `backtesting.backtesting.Backtest.run`-returned results series,
        or a function that accepts this series object and returns a number;
        the higher the better. By default, the method maximizes
        Van Tharp's [System Quality Number](https://google.com/search?q=System+Quality+Number).

        `constraint` is a function that accepts a dict-like object of
        parameters (with values) and returns `True` when the combination
        is admissible to test with. By default, any parameters combination
        is considered admissible.

        If `return_heatmap` is `True`, besides returning the result
        series, an additional `pd.Series` is returned with a multiindex
        of all admissible parameter combinations, which can be further
        inspected or projected onto 2D to plot a heatmap
        (see `backtesting.lib.plot_heatmaps()`).

        Additional keyword arguments represent strategy arguments with
        list-like collections of possible values. For example, the following
        code finds and returns the "best" of the 7 admissible (of the
        9 possible) parameter combinations:

            backtest.optimize(sma1=[5, 10, 15], sma2=[10, 20, 40],
                              constraint=lambda p: p.sma1 < p.sma2)

        .. TODO::
            Add parameter `max_tries: Union[int, float] = None` which switches
            from exhaustive grid search to random search. See notes in the source.

        .. TODO::
            Improve multiprocessing/parallel execution on Windos with start method 'spawn'.
        """
        if not kwargs:
            raise ValueError('Need some strategy parameters to optimize')

        if isinstance(maximize, str):

            stats = self._results if self._results is not None else self.run()
            if maximize not in stats:
                raise ValueError(
                    '`maximize`, if str, must match a key in pd.Series '
                    'result of backtest.run()')

            def maximize(stats: pd.Series, _key=maximize):
                return stats[_key]

        elif not callable(maximize):
            raise TypeError(
                '`maximize` must be str (a field of backtest.run() result '
                'Series) or a function that accepts result Series '
                'and returns a number; the higher the better')

        if constraint is None:

            def constraint(_):
                return True

        elif not callable(constraint):
            raise TypeError(
                "`constraint` must be a function that accepts a dict "
                "of strategy parameters and returns a bool whether "
                "the combination of parameters is admissible or not")

        def _tuple(x):
            return x if isinstance(
                x, Sequence) and not isinstance(x, str) else (x, )

        class AttrDict(dict):
            def __getattr__(self, item):
                return self[item]

        param_combos = tuple(
            map(
                dict,  # back to dict so it pickles
                filter(
                    constraint,  # constraints applied on our fancy dict
                    map(
                        AttrDict,
                        product(*(zip(repeat(k), _tuple(v))
                                  for k, v in kwargs.items()))))))
        if not param_combos:
            raise ValueError('No admissible parameter combinations to test')

        if len(param_combos) > 300:
            warnings.warn('Searching best of {} configurations.'.format(
                len(param_combos)),
                          stacklevel=2)

        heatmap = pd.Series(np.nan,
                            index=pd.MultiIndex.from_tuples(
                                [p.values() for p in param_combos],
                                names=next(iter(param_combos)).keys()))

        # TODO: add parameter `max_tries:Union[int, float]=None` which switches
        # exhaustive grid search to random search. This might need to avoid
        # returning NaNs in stats on runs with no trades to differentiate those
        # from non-tested parameter combos in heatmap.

        def _batch(seq):
            n = np.clip(len(seq) // (os.cpu_count() or 1), 5, 300)
            for i in range(0, len(seq), n):
                yield seq[i:i + n]

        # Save necessary objects into "global" state; pass into concurrent executor
        # (and thus pickle) nothing but two numbers; receive nothing but numbers.
        # With start method "fork", children processes will inherit parent address space
        # in a copy-on-write manner, achieving better performance/RAM benefit.
        backtest_uuid = np.random.random()
        param_batches = list(_batch(param_combos))
        Backtest._mp_backtests[backtest_uuid] = (self, param_batches, maximize)
        try:
            # If multiprocessing start method is 'fork' (i.e. on POSIX), use
            # a pool of processes to compute results in parallel.
            # Otherwise (i.e. on Windos), sequential computation will be "faster".
            if mp.get_start_method(allow_none=False) == 'fork':
                with ProcessPoolExecutor() as executor:
                    futures = [
                        executor.submit(Backtest._mp_task, backtest_uuid, i)
                        for i in range(len(param_batches))
                    ]
                    for future in _tqdm(as_completed(futures),
                                        total=len(futures)):
                        batch_index, values = future.result()
                        for value, params in zip(values,
                                                 param_batches[batch_index]):
                            heatmap[tuple(params.values())] = value
            else:
                if os.name == 'posix':
                    warnings.warn(
                        "For multiprocessing support in `Backtest.optimize()` "
                        "set multiprocessing start method to 'fork'.")
                for batch_index in _tqdm(range(len(param_batches))):
                    _, values = Backtest._mp_task(backtest_uuid, batch_index)
                    for value, params in zip(values,
                                             param_batches[batch_index]):
                        heatmap[tuple(params.values())] = value
        finally:
            del Backtest._mp_backtests[backtest_uuid]

        best_params = heatmap.idxmax()

        if pd.isnull(best_params):
            # No trade was made in any of the runs. Just make a random
            # run so we get some, if empty, results
            self.run(**param_combos[0])
        else:
            # Re-run best strategy so that the next .plot() call will render it
            self.run(**dict(zip(heatmap.index.names, best_params)))

        if return_heatmap:
            return self._results, heatmap
        return self._results
Ejemplo n.º 13
0
def wimv(pfs, orient_dist, iterations=12):
    """
    perform WIMV inversion
    fixed grid in PF space requiredpointer
    
    # TODO: remove requirement to pre-generate odf

    input:
        exp_pfs    : poleFigure object
        orient_dist: orientDist object
        iterations : number of iterations
    """
    """ calculate pointer """

    orient_dist._calcPointer('wimv', pfs)
    """ done with pointer generation """

    od_data = _np.ones(orient_dist.bungeList.shape[0] *
                       orient_dist.bungeList.shape[1] *
                       orient_dist.bungeList.shape[2])
    calc_od = {}
    recalc_pf = {}

    numPoles = pfs._numHKL
    numHKLs = [len(fam) for fam in pfs._symHKL]

    fullPFgrid = pfs.genGrid(pfs.res, radians=True, centered=False)

    for i in _tqdm(range(iterations),
                   desc='Performing WIMV iterations',
                   position=0,
                   leave=True):
        """ first iteration, skip recalc of PF """

        if i == 0:  #first iteration is direct from PFs

            od_data = _np.ones(orient_dist.bungeList.shape[0] *
                               orient_dist.bungeList.shape[1] *
                               orient_dist.bungeList.shape[2])
            calc_od[0] = _np.zeros((od_data.shape[0], numPoles))

            for fi in range(numPoles):

                for pf_cell in _np.ravel(fullPFgrid):

                    if pf_cell in orient_dist.pointer['full']['pf to od'][fi]:

                        od_cells = _np.array(orient_dist.pointer['full']
                                             ['pf to od'][fi][pf_cell])
                        ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1])

                        if pf_cell < pfs.data[fi].shape[0] * pfs.data[
                                fi].shape[1]:  #inside of measured PF range

                            od_data[od_cells.astype(int)] *= pfs.data[fi][
                                int(ai), int(bi)]
                """ loop over od_cells (alternative) """
                #    for od_cell in _np.ravel(orient_dist.bungeList):

                #        pf_cells = orient_dist.pointer['full']['od to pf'][fi][od_cell]

                #        pf_cellMax = pfs.data[fi].shape[0]*pfs.data[fi].shape[1]
                #        pf_cells = pf_cells[pf_cells < pf_cellMax]

                #        ai, bi = _np.divmod(pf_cells, fullPFgrid.shape[1])
                #        od_data[int(od_cell)] = _np.product( pfs.data[fi][ai.astype(int),bi.astype(int)] )

                calc_od[0][:, fi] = _np.power(od_data, (1 / numHKLs[fi]))
                # calc_od[0][:,fi] = _np.power(od_data,1)

            calc_od[0] = _np.product(calc_od[0], axis=1)**(1 / numPoles)
            #place into OD object
            calc_od[0] = _bunge(orient_dist.res,
                                orient_dist.cs,
                                orient_dist.ss,
                                weights=calc_od[0])
            calc_od[0].normalize()
        """ recalculate pole figures """
        recalc_pf[i] = _np.zeros(
            (fullPFgrid.shape[0], fullPFgrid.shape[1], numPoles))

        for fi in range(numPoles):

            for pf_cell in _np.ravel(fullPFgrid):

                if pf_cell in orient_dist.pointer['full']['pf to od'][
                        fi]:  #pf_cell is defined

                    od_cells = _np.array(
                        orient_dist.pointer['full']['pf to od'][fi][pf_cell])
                    ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1])
                    recalc_pf[i][int(ai), int(bi),
                                 fi] = (1 / len(od_cells)) * _np.sum(
                                     calc_od[i].weights[od_cells.astype(int)])

        recalc_pf[i] = _poleFigure(recalc_pf[i],
                                   pfs.hkls,
                                   orient_dist.cs,
                                   'recalc',
                                   resolution=5)
        recalc_pf[i].normalize()
        """ compare recalculated to experimental """

        RP_err = {}
        prnt_str = None

        _np.seterr(divide='ignore')

        for fi in range(numPoles):

            expLim = pfs.data[fi].shape
            RP_err[fi] = _np.abs(
                recalc_pf[i].data[fi][:expLim[0], :expLim[1]] -
                pfs.data[fi]) / recalc_pf[i].data[fi][:expLim[0], :expLim[1]]
            RP_err[fi][_np.isinf(RP_err[fi])] = 0
            RP_err[fi] = _np.sqrt(_np.mean(RP_err[fi]**2))

            if prnt_str is None:
                prnt_str = 'RP Error: {:.4f}'.format(
                    _np.round(RP_err[fi], decimals=4))
            else:
                prnt_str += ' | {:.4f}'.format(
                    _np.round(RP_err[fi], decimals=4))

        _tqdm.write(prnt_str)
        """ (i+1)th inversion """

        od_data = _np.ones(orient_dist.bungeList.shape[0] *
                           orient_dist.bungeList.shape[1] *
                           orient_dist.bungeList.shape[2])
        calc_od[i + 1] = _np.zeros((od_data.shape[0], numPoles))

        for fi in range(numPoles):

            for pf_cell in _np.ravel(fullPFgrid):

                if pf_cell in orient_dist.pointer['full']['pf to od'][fi]:

                    od_cells = _np.array(
                        orient_dist.pointer['full']['pf to od'][fi][pf_cell])
                    ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1])

                    if pf_cell < pfs.data[fi].shape[0] * pfs.data[fi].shape[
                            1]:  #inside of measured PF range

                        if recalc_pf[i].data[fi][int(ai), int(bi)] == 0:
                            continue
                        else:
                            od_data[od_cells.astype(int)] *= (
                                pfs.data[fi][int(ai), int(bi)] /
                                recalc_pf[i].data[fi][int(ai),
                                                      int(bi)])
            """ loop over od_cells (alternative) """
            #    for od_cell in _tqdm(_np.ravel(orient_dist.bungeList)):

            #        pf_cells = orient_dist.pointer['full']['od to pf'][fi][od_cell]

            #        pf_cellMax = pfs.data[fi].shape[0]*pfs.data[fi].shape[1]
            #        pf_cells = pf_cells[pf_cells < pf_cellMax]

            #        ai, bi = _np.divmod(pf_cells, fullPFgrid.shape[1])
            #        od_data[int(od_cell)] = _np.product( pfs.data[fi][ai.astype(int),bi.astype(int)] / recalc_pf[i].data[fi][ai.astype(int), bi.astype(int)] )

            calc_od[i + 1][:, fi] = _np.power(od_data, (1 / numHKLs[fi]))

        calc_od[i + 1] = calc_od[i].weights * _np.power(
            _np.product(calc_od[i + 1], axis=1), (0.8 / numPoles))

        #place into OD object
        calc_od[i + 1] = _bunge(orient_dist.res,
                                orient_dist.cs,
                                orient_dist.ss,
                                weights=calc_od[i + 1])
        calc_od[i + 1].normalize()

    return recalc_pf, calc_od
Ejemplo n.º 14
0
def e_wimv(pfs,
           orient_dist,
           tube_rad,
           tube_exp,
           rad_type,
           crystal_dict,
           iterations=12,
           ret_origOD=False):
    """
    perform e-WIMV inversion
    arbitrary PF directions allowed
    minimium entropy solution
    
    input:
        exp_pfs      : poleFigure object
        orient_dist  : orientDist object
        rad_type     : xrd or nd
        crystal_dict : dictionary defining variables for reflection weight calculators in pyTex.diffrac
        
    """

    # rotations around y (integration variable along path)
    phi = _np.linspace(0, 2 * _np.pi, 73)

    _np.seterr(divide='ignore')

    # handle reflection weights
    if rad_type == 'xrd':
        pass  #TODO: implement this
        # elif rad_type == 'nd': refl_wgt = _calc_NDreflWeights(crystal_dict, pfs.refls) #based on ND
    elif rad_type == 'nd':
        refl_wgt = _np.ones((len(pfs.hkls)))
    elif rad_type == 'none':
        refl_wgt = _np.ones((len(pfs.hkls)))  #all ones
    else:
        raise ValueError('Please specify either xrd or nd or none (all = 1)')
    """ calculate 5x5 pf grid XYZ for paths """

    fullPFgrid, alp, bet, xyz_pf = pfs.genGrid(res=_np.deg2rad(5),
                                               radians=True,
                                               centered=False,
                                               ret_ab=True,
                                               ret_xyz=True,
                                               offset=True)
    """ use sklearn KDTree for reduction of points for query (euclidean) """

    #throw q_grid into positive hemisphere (SO3) for euclidean distance
    qgrid_pos = _np.copy(orient_dist.q_grid)
    qgrid_pos[qgrid_pos[:, 0] < 0] *= -1
    tree = _KDTree(qgrid_pos)

    #gnomic rotation angle
    rad = _np.sqrt(2 * (1 - _np.cos(0.5 * tube_rad)))
    #euclidean rotation angle
    euc_rad = _np.sqrt(4 * _np.sin(0.25 * tube_rad)**2)

    #calculate arbitrary paths
    orient_dist._calcPath('arb', pfs._normHKLs, pfs.y, phi, rad, euc_rad, tree)
    """ search for unique hkls to save time during path calculation """

    hkls_loop, uni_hkls_idx, hkls_loop_idx = _np.unique(_normalize(
        _np.array(pfs.hkls)),
                                                        axis=0,
                                                        return_inverse=True,
                                                        return_index=True)

    if len(uni_hkls_idx) < len(pfs.hkls):
        #time can be saved by only calculating paths for unique reflections
        # symHKL_loop = _symmetrise(orient_dist.cs, hkls_loop)
        # symHKL_loop = _normalize(symHKL_loop)

        #calculate paths
        orient_dist._calcPath('full_trun',
                              hkls_loop,
                              xyz_pf,
                              phi,
                              rad,
                              euc_rad,
                              tree,
                              hkls_loop_idx=hkls_loop_idx)

    #time can't be saved.. calculate all paths
    else:
        orient_dist._calcPath('full', pfs._normHKLs, xyz_pf, phi, rad, euc_rad,
                              tree)
    """ calculate pointer """

    orient_dist._calcPointer('e-wimv', pfs, tube_exp=tube_exp)
    """ e-wimv iterations """

    od_data = _np.ones(orient_dist.bungeList.shape[0] *
                       orient_dist.bungeList.shape[1] *
                       orient_dist.bungeList.shape[2])
    calc_od = {}
    recalc_pf = {}
    rel_err = {}

    recalc_pf_full = {}

    numPoles = pfs._numHKL
    numHKLs = [len(fam) for fam in pfs._symHKL]

    for i in _tqdm(range(iterations),
                   position=0,
                   desc='Performing E-WIMV iterations'):
        """ first iteration, skip recalc of PF """

        if i == 0:  #first iteration is direct from PFs

            od_data = _np.ones(orient_dist.bungeList.shape[0] *
                               orient_dist.bungeList.shape[1] *
                               orient_dist.bungeList.shape[2])
            calc_od[0] = _np.ones((od_data.shape[0], numPoles))

            for fi in range(numPoles):

                temp = _np.ones(
                    (orient_dist.bungeList.shape[0] *
                     orient_dist.bungeList.shape[1] *
                     orient_dist.bungeList.shape[2], len(pfs.y[fi])))

                for yi in range(len(pfs.y[fi])):

                    #check for zero OD cells that correspond to the specified pole figure direction
                    if yi in orient_dist.pointer['arb']['pf to od'][fi]:

                        od_cells = orient_dist.pointer['arb']['pf to od'][fi][
                            yi]['cell']
                        wgts = orient_dist.pointer['arb']['pf to od'][fi][yi][
                            'weight']

                        temp[od_cells.astype(int), yi] *= abs(pfs.data[fi][yi])
                """ zero to 1E-5 """
                temp = _np.where(temp == 0, 1E-5, temp)
                """ log before sum instead of product """
                temp = _np.log(temp)
                n = _np.count_nonzero(temp, axis=1)
                n = _np.where(n == 0, 1, n)

                try:
                    calc_od[0][:, fi] = _np.exp(
                        (_np.sum(temp, axis=1) * refl_wgt[fi]) / numHKLs[fi])
                except:
                    print(temp)
                    print(refl_wgt[fi])
                    print(fi)
                    print(yi)

            calc_od[0] = _np.product(calc_od[0], axis=1)**(1 / numPoles)
            #place into OD object
            calc_od[0] = _bunge(orient_dist.res,
                                orient_dist.cs,
                                orient_dist.ss,
                                weights=calc_od[0])
            calc_od[0].normalize()
        """ recalculate poles """
        recalc_pf[i] = {}

        for fi in range(numPoles):

            recalc_pf[i][fi] = _np.zeros(len(pfs.y[fi]))

            for yi in range(len(pfs.y[fi])):

                if yi in orient_dist.pointer['arb']['pf to od'][
                        fi]:  #pf_cell is defined

                    od_cells = _np.array(
                        orient_dist.pointer['arb']['pf to od'][fi][yi]['cell'])

                    #( 1 / (2*_np.pi) ) *
                    recalc_pf[i][fi][yi] = (1 / (2 * _np.pi)) * (
                        1 / sum(orient_dist.pointer['arb']['pf to od'][fi][yi]
                                ['weight'])) * _np.sum(
                                    orient_dist.pointer['arb']['pf to od'][fi]
                                    [yi]['weight'] *
                                    calc_od[i].weights[od_cells.astype(int)])
        """ compare recalculated to experimental """

        prnt_str = None

        rel_err[i] = {}
        _np.seterr(divide='ignore')

        if numPoles < 5: iter_num = numPoles
        else: iter_num = 5

        for fi in range(iter_num):  # display only first three poles error

            rel_err[i][fi] = _np.abs(recalc_pf[i][fi] -
                                     pfs.data[fi]) / recalc_pf[i][fi]
            rel_err[i][fi][_np.isinf(rel_err[i][fi])] = 0
            rel_err[i][fi] = _np.sqrt(_np.mean(rel_err[i][fi]**2))

            if prnt_str is None:
                prnt_str = 'RP Error: {:.4f}'.format(
                    _np.round(rel_err[i][fi], decimals=4))
            else:
                prnt_str += ' | {:.4f}'.format(
                    _np.round(rel_err[i][fi], decimals=4))

        _tqdm.write(prnt_str)
        """ recalculate full pole figures """
        ##for reduced grid
        # recalc_pf_full[i] = {}

        #for 5x5 grid
        recalc_pf_full[i] = _np.zeros(
            (fullPFgrid.shape[0], fullPFgrid.shape[1], numPoles))

        for fi in range(numPoles):

            ##for reduced grid
            # recalc_pf_full[i][fi] = _np.zeros(len(xyz_pf))

            # for yi in range(len(xyz_pf)):
            for yi in _np.ravel(fullPFgrid):

                if yi in orient_dist.pointer['full']['pf to od'][
                        fi]:  #pf_cell is defined

                    od_cells = _np.array(orient_dist.pointer['full']
                                         ['pf to od'][fi][yi]['cell'])

                    ##for reduced grid
                    # recalc_pf_full[i][fi][yi] = ( 1 / _np.sum(orient_dist.pointer['full']['pf to od'][fi][yi]['weight']) ) * _np.sum( orient_dist.pointer['full']['pf to od'][fi][yi]['weight'] * calc_od[i].weights[od_cells.astype(int)] )

                    #for 5x5 grid
                    ai, bi = _np.divmod(yi, fullPFgrid.shape[1])
                    recalc_pf_full[i][int(ai), int(bi), fi] = (1 / _np.sum(
                        orient_dist.pointer['full']['pf to od'][fi][yi]
                        ['weight'])) * _np.sum(
                            orient_dist.pointer['full']['pf to od'][fi][yi]
                            ['weight'] *
                            calc_od[i].weights[od_cells.astype(int)])

        #for reduced grid
        # recalc_pf_full[i] = _poleFigure(recalc_pf_full[i], pfs.hkls, orient_dist.cs, 'recalc', resolution=5, arb_y=xyz_pf)
        #for 5x5 grid
        recalc_pf_full[i] = _poleFigure(recalc_pf_full[i],
                                        pfs.hkls,
                                        orient_dist.cs,
                                        'recalc',
                                        resolution=5)
        recalc_pf_full[i].normalize()

        if i == 0:
            pass
            #terminate early, error increased
        elif rel_err[i][0] >= rel_err[i - 1][0]:
            break
        """ (i+1)th inversion """

        od_data = _np.ones(orient_dist.bungeList.shape[0] *
                           orient_dist.bungeList.shape[1] *
                           orient_dist.bungeList.shape[2])
        calc_od[i + 1] = _np.zeros((od_data.shape[0], numPoles))

        for fi in range(numPoles):

            temp = _np.ones((orient_dist.bungeList.shape[0] *
                             orient_dist.bungeList.shape[1] *
                             orient_dist.bungeList.shape[2], len(pfs.y[fi])))

            for yi in range(len(pfs.y[fi])):

                #check for zero OD cells that correspond to the specified pole figure direction
                if yi in orient_dist.pointer['arb']['pf to od'][fi]:

                    od_cells = orient_dist.pointer['arb']['pf to od'][fi][yi][
                        'cell']
                    wgts = orient_dist.pointer['arb']['pf to od'][fi][yi][
                        'weight']

                    if recalc_pf[i][fi][yi] == 0: continue
                    else:
                        temp[od_cells.astype(int),
                             yi] = (abs(pfs.data[fi][yi]) /
                                    recalc_pf[i][fi][yi])
            """ zero to 1E-5 """
            temp = _np.where(temp == 0, 1E-5, temp)
            """ log sum """
            temp = _np.log(temp)
            n = _np.count_nonzero(temp, axis=1)
            n = _np.where(n == 0, 1, n)
            calc_od[i + 1][:, fi] = _np.exp(
                (_np.sum(temp, axis=1) * refl_wgt[fi]) / numHKLs[fi])

        calc_od[i + 1] = calc_od[i].weights * _np.power(
            _np.product(calc_od[i + 1], axis=1), (1 / numPoles))

        #place into OD object
        calc_od[i + 1] = _bunge(orient_dist.res,
                                orient_dist.cs,
                                orient_dist.ss,
                                weights=calc_od[i + 1])
        calc_od[i + 1].normalize()

    if ret_origOD: return recalc_pf_full, calc_od, orient_dist
    else: return recalc_pf_full, calc_od
Ejemplo n.º 15
0
    def fit(self, epochs, prog_bar_refresh_rate=1, val_every_n_epoch=1):

        self._data_init(prog_bar_refresh_rate, val_every_n_epoch)
        length = len(self._trainset)
        with _tqdm(dynamic_ncols=True, total=len(self._trainset)) as pbar:
            for i in range(epochs):
                self.training_epoch_start()
                self.on_epoch_start()
                epoch_outputs = []
                pbar.set_description_str(
                    f"Epoch={1 + self.trained_epochs} step[{i + 1}/{epochs}]")
                for idx, batch in enumerate(self._trainset):
                    batch = self._type_transfer(batch)
                    for optimizer_idx in range(len(self._optimizers)):
                        step_out = self.training_step(batch, idx,
                                                      optimizer_idx)
                        if step_out is None:
                            break
                        epoch_outputs.append(step_out)
                        loss = None
                        if isinstance(step_out, dict):
                            loss = step_out['loss']
                        elif isinstance(step_out, _torch.Tensor):
                            loss = step_out
                        else:
                            raise TypeError(
                                "what training_step return should be Tensor or dict with key=loss with and its value type is Tensor"
                            )
                        self.optimizer_step(loss,
                                            self._optimizers[optimizer_idx],
                                            optimizer_idx=optimizer_idx)
                    self.trained_steps += 1

                    self._bar_show(pbar)
                    if (
                            idx + 1
                    ) % self.prog_bar_refresh_rate == 0 or idx + 1 == length:
                        if idx - 1 == length:
                            pbar.update(idx % self.prog_bar_refresh_rate + 1)
                        else:
                            pbar.update(self.prog_bar_refresh_rate)

                for optimizer_idx in range(len(self._optimizers)):
                    if self._lr_scheduler_enabled and len(
                            self._lr_schedulers) >= optimizer_idx:
                        self._lr_schedulers[optimizer_idx].step()

                if i < epochs - 1:
                    pbar.update(-length)
                self.training_epoch_end(epoch_outputs)

                if (
                        i + 1
                ) % self.val_every_n_epoch == 0 and self._validation_enbale:
                    epoch_outputs = []
                    self._before_validation_epoch_start()
                    self.validation_epoch_start()
                    with _torch.no_grad():
                        with _tqdm(dynamic_ncols=True,
                                   total=len(self._valset)) as bar:
                            bar.set_description(
                                f"Epoch={self.trained_epochs} Validating")
                            for idx, batch in enumerate(self._valset):
                                batch = self._type_transfer(batch)
                                for optimizer_idx in range(
                                        len(self._optimizers)):
                                    step_out = self.validation_step(
                                        batch, idx, optimizer_idx)
                                    epoch_outputs.append(step_out)
                                    loss = None
                                    if isinstance(step_out, dict):
                                        loss = step_out['loss']
                                    elif isinstance(step_out, _torch.Tensor):
                                        loss = step_out
                                    else:
                                        raise TypeError(
                                            "what test_step return should be Tensor or dict with key=loss with and its value type is Tensor"
                                        )
                                bar.set_postfix_str("loss={:.3}".format(
                                    loss.item()))
                                bar.update(1)
                            bar.set_description(
                                f"Epoch={self.trained_epochs + 1} Validated")
                    self.validation_epoch_end(epoch_outputs)
                    self._after_validation_epoch_end()

                self.on_epoch_end()
                self.trained_epochs += 1
Ejemplo n.º 16
0
    def optimize(self,
                 maximize: Union[str, Callable[[pd.Series], float]] = 'SQN',
                 constraint: Callable[[dict], bool] = None,
                 return_heatmap: bool = False,
                 **kwargs) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
        """
        Optimize strategy parameters to an optimal combination using
        parallel exhaustive search. Returns result `pd.Series` of
        the best run.

        `maximize` is a string key from the
        `backtesting.backtesting.Backtest.run`-returned results series,
        or a function that accepts this series object and returns a number;
        the higher the better. By default, the method maximizes
        Van Tharp's [System Quality Number](https://google.com/search?q=System+Quality+Number).

        `constraint` is a function that accepts a dict-like object of
        parameters (with values) and returns `True` when the combination
        is admissible to test with. By default, any parameters combination
        is considered admissible.

        If `return_heatmap` is `True`, besides returning the result
        series, an additional `pd.Series` is returned with a multiindex
        of all admissible parameter combinations, which can be further
        inspected or projected onto 2D to plot a heatmap.

        Additional keyword arguments represent strategy arguments with
        list-like collections of possible values. For example, the following
        code finds and returns the "best" of the 7 admissible (of the
        9 possible) parameter combinations:

            backtest.optimize(sma1=[5, 10, 15], sma2=[10, 20, 40],
                              constraint=lambda p: p.sma1 < p.sma2)

        .. TODO::
            Add parameter `max_tries: Union[int, float] = None` which switches
            from exhaustive grid search to random search. See notes in the source.
        """
        if not kwargs:
            raise ValueError('Need some strategy parameters to optimize')

        if isinstance(maximize, str):

            stats = self._results if self._results is not None else self.run()
            if maximize not in stats:
                raise ValueError(
                    '`maximize`, if str, must match a key in pd.Series '
                    'result of backtest.run()')

            def maximize(stats: pd.Series, _key=maximize):
                return stats[_key]

        elif not callable(maximize):
            raise TypeError(
                '`maximize` must be str (a field of backtest.run() result '
                'Series) or a function that accepts result Series '
                'and returns a number; the higher the better')

        if constraint is None:

            def constraint(_):
                return True

        elif not callable(constraint):
            raise TypeError(
                "`constraint` must be a function that accepts a dict "
                "of strategy parameters and returns a bool whether "
                "the combination of parameters is admissible or not")

        def _tuple(x):
            return x if isinstance(
                x, Sequence) and not isinstance(x, str) else (x, )

        class AttrDict(dict):
            def __getattr__(self, item):
                return self[item]

        param_combos = tuple(
            map(
                dict,  # back to dict so it pickles
                filter(
                    constraint,  # constraints applied on our fancy dict
                    map(
                        AttrDict,
                        product(*(zip(repeat(k), _tuple(v))
                                  for k, v in kwargs.items()))))))
        if not param_combos:
            raise ValueError('No admissible parameter combinations to test')

        if len(param_combos) > 300:
            warnings.warn('Searching best of {} configurations.'.format(
                len(param_combos)),
                          stacklevel=2)

        heatmap = pd.Series(np.nan,
                            index=pd.MultiIndex.from_tuples(
                                [p.values() for p in param_combos],
                                names=next(iter(param_combos)).keys()))

        # TODO: add parameter `max_tries:Union[int, float]=None` which switches
        # exhaustive grid search to random search. This might need to avoid
        # returning NaNs in stats on runs with no trades to differentiate those
        # from non-tested parameter combos in heatmap.

        def _batch(seq):
            n = np.clip(len(param_combos) // (os.cpu_count() or 1), 5, 300)
            for i in range(0, len(seq), n):
                yield seq[i:i + n]

        with ProcessPoolExecutor() as executor:
            futures = [
                executor.submit(self._mp_task, params)
                for params in _batch(param_combos)
            ]
            for future in _tqdm(as_completed(futures), total=len(futures)):
                for params, stats in future.result():
                    heatmap[tuple(params.values())] = maximize(stats)

        best_params = heatmap.idxmax()

        if pd.isnull(best_params):
            # No trade was made in any of the runs. Just make a random
            # run so we get some, if empty, results
            self.run(**param_combos[0])
        else:
            # Re-run best strategy so that the next .plot() call will render it
            self.run(**dict(zip(heatmap.index.names, best_params)))

        if return_heatmap:
            return self._results, heatmap
        return self._results
    def build(self,
              start=None,
              end=None,
              days_back=None,
              chronological=False,
              rebuild=False):
        """
        Build archive entry data for the BroadcastifyArchive's feed_id and
        populate as a dictionary to the .entries attribute.

        Parameters
        ----------
            start : datetime.date
                The earliest date for which to populate the archive. If None,
                go from the earliest date on the calendar (inclusive).
            end : datetime.date
                The latest date for which to populate the archive. If None,
                go to the latest date on the calendar (inclusive).
            days_back : int
                The number of days before the current day to retrieve informa-
                tion for. A value of `0` retrieves only archive entries corres-
                ponding to the current day. Pass either days_back OR a valid
                combination of start/end dates.
            chronological : bool
                By default, start with the latest date and work backward in
                time. If True, reverse that.
            rebuild : bool
                Specifies that existing data in the `entries` list should be
                overwritten with data newly fetched from Broadcastify.
        """
        # Prevent the user from unintentionally erasing existing archive info
        if self.entries and not rebuild:
            raise ValueError(
                f'Archive already built: Entries already exist for'
                f' this BroadcastifyArchive. To erase and rebuild,'
                f' specify `rebuild=True` when calling .build()')

        # Make sure valid arguments were passed
        ## Either start/end or days_back; not both
        if (start or end) and days_back:
            raise ValueError(f'Expected either `days_back` OR a `start`/`end` '
                             f'combination. Both were passed.')

        ## `days_back` must be a non-negative integer
        if days_back is not None:
            bad_days_back = False
            try:
                if days_back < 0:
                    bad_days_back = True
            except:
                bad_days_back = True

            if bad_days_back:
                raise TypeError(f'`days_back` must be a non-negative integer.')

            # Capture the archive end date to count back from
            end = self.end_date

            # Make sure days_back is no larger than the archive date range size
            start = self.start_date
            archive_size = (end - start).days
            if days_back > archive_size:
                _warnings.warn(
                    f"The number of days_back passed ({days_back}) "
                    f"exceeds the size of the archive's date range ("
                    f"{archive_size}). Only valid dates will be "
                    f"built.")
                days_back = archive_size

        else:
            ## Check that `start` and `end` within archive's start/end dates
            ## If they weren't passed, set them to the archive's start/end dates
            out_of_range = ''

            if start:
                if start < self.start_date:
                    out_of_range = (f'start date out of archive range: '
                                    f'{start} < {self.start_date}\n')
                elif start > self.end_date:
                    out_of_range = (f'start date out of archive range: '
                                    f'{start} > {self.end_date}\n')
            else:
                start = self.start_date

            if end:
                if end > self.end_date:
                    out_of_range += (f'end date out of archive range: '
                                     f'{end} > {self.end_date}')
                elif end < self.start_date:
                    out_of_range += (f'end date out of archive range: '
                                     f'{end} < {self.start_date}')
            else:
                end = self.end_date

            if out_of_range:
                raise AttributeError(out_of_range)

            ## `start` cannot be > `end`
            if start > end:
                raise AttributeError(f'`start` date ({start}) cannot be after '
                                     f'`end` date ({end}).')

            # Get size of the date range
            days_back = (end - start).days

        # Adjust for exclusive end of range()
        days_back += 1

        # Build the list of dates to scrape
        date_list = sorted(
            [end - _dt.timedelta(days=x) for x in range(days_back)],
            reverse=not (chronological))

        archive_entries = []

        # Spin up a browser and an ArchiveCalendar
        # Set whether to show browser UI while fetching
        print('Launching webdriver...')
        options = _Options()
        if not self.show_browser_ui:
            options.add_argument('--headless')
            options.add_argument('--disable-gpu')

        with _webdriver.Chrome(executable_path=self.webdriver_path,
                               chrome_options=options) as browser:
            browser.get(self.archive_url)
            self.arch_cal = ArchiveCalendar(self, browser)

            # Get archive entries for each date in list
            t = _tqdm(date_list,
                      desc=f'Building dates',
                      leave=True,
                      dynamic_ncols=True)
            for date in t:
                t.set_description(f'Building {date}', refresh=True)
                self.arch_cal.go_to_date(date)

                if self.arch_cal.entries_for_date:
                    archive_entries.extend(self.arch_cal.entries_for_date)

        # Empty & replace the current archive entries
        self.entries = []

        # Store URIs and end times in the entries attritbute
        for entry in archive_entries:
            entry_dict = {
                'uri': entry[0],
                'start_time': entry[1],
                'end_time': entry[2]
            }

            self.entries.append(entry_dict)

        self.earliest_entry = min(
            [entry['end_time'] for entry in self.entries]).date()
        self.latest_entry = max([entry['end_time']
                                 for entry in self.entries]).date()

        print(self)