Exemple #1
0
    def __call__(
        self, in_paths: List[str], out_paths: List[str], mfcc=False, n_workers=2
    ) -> Tuple[List[str], List[str]]:
        def write_out(inp, out):
            try:
                if os.path.exists(out):
                    print("Skipping:", out)
                    return (out, True)
                tensors = self.model.from_path(
                    inp, return_two=self.return_two, return_mfcc=mfcc
                )
                dir = os.path.dirname(out)
                if not os.path.exists(dir):
                    os.mkdir(dir)
                torch.save(tensors, out)
                print("Success:", out)
                return (out, True)
            except Exception as e:
                print("Failure:", e, out)
                return (out, False)

        with ThreadPool(nodes=n_workers) as P:
            results = P.uimap(write_out, in_paths, out_paths)
        successes = [path for path, res in results if res]
        failures = [path for path, res in results if not res]
        return successes, failures
Exemple #2
0
def GMM_Ineq_parall(Theta0, DATA_STRUCT, d_struct):
    Theta = {
        "comm_mu": Theta0[0],
        "priv_mu": Theta0[1],
        "epsilon_mu": Theta0[2],
        "comm_var": Theta0[3],
        "priv_var": Theta0[4],
        "epsilon_var": Theta0[5],
    }

    rng = np.random.RandomState(d_struct['rng_seed'])

    start = time.time()

    print('--------------------------------------------------------')
    print('current parameter set are :')
    print(Theta)
    '''
    parallel programming with two levels
        data separating
        runing the estimation
    '''
    data_n = len(DATA_STRUCT)

    work_pool = ThreadPool(nodes=data_n)

    cpu_num = multiprocessing.cpu_count()

    cpu_num_node = int((cpu_num - 1) / data_n)
    # change the submit to mpa so that we can run multi-part altogether
    results = work_pool.amap(
        partial(para_data_allo_1, Theta, cpu_num_node, rng, d_struct),
        iter(DATA_STRUCT))
    work_pool.close()
    while not results.ready():
        time.sleep(5)
        print(".")


#    work_pool.join()

    auction_result = np.nanmean(list(results.get()))

    end = time.time()

    print("object value : " + str(auction_result))
    print("time spend in this loop: ")
    print(end - start)
    print('--------------------------------------------------------\n')

    ## save the parameters and objective value

    with open('para.txt', 'a+') as f:
        for item in Theta0:
            f.write("%f\t" % item)

        f.write("%f\t" % auction_result)
        f.write("%f\n" % (end - start) / 60)

    return auction_result
Exemple #3
0
    def run(self, *args):
        """ kickoff the program """
        self.add_args()

        if len(args) > 0:
            program_args = self.parser.parse_args(args)
        else:
            program_args = self.parser.parse_args()

        # setup the mongoDB connection
        mongo_connection = GritsMongoConnection(program_args)

        # Confirm the user wants to apply the indexes
        confirm = True
        if not program_args.force:
            confirm = self.query_yes_no(
                "This will lock the database.  Are your sure?", "no")
        if confirm:
            # ensure that the indexes are applied to the collections
            pool = ThreadPool(nodes=1)
            results = pool.amap(mongo_connection.ensure_indexes, [None])

            while not results.ready():
                # command-line spinner
                for cursor in '|/-\\':
                    sys.stdout.write('\b%s' % cursor)
                    sys.stdout.flush()
                    time.sleep(.25)

            sys.stdout.write('\b')
            sys.stdout.flush()
            # async-poll is done, get the results
            result = results.get()
            logging.info(result)
Exemple #4
0
    def begin_processing(self):
        pool = ThreadPool(nodes=Helper.config('threads'))

        for course in self.course_data:
            pool.map(self.download_lesson, course['lessons'])
            print(
                '--- Course "{course_title}" has been downloaded, with total of "{lessons_amount}" lessons.'
                .format(course_title=course['title'],
                        lessons_amount=len(course['lessons'])))
            time.sleep(Helper.config('sleep'))
Exemple #5
0
def build(
    charm_list,
    layer_list,
    layer_index,
    charm_branch,
    layer_branch,
    resource_spec,
    filter_by_tag,
    to_channel,
    rebuild_cache,
):
    build_env = BuildEnv(build_type=BuildType.CHARM)
    build_env.db["build_args"] = {
        "artifact_list": charm_list,
        "layer_list": layer_list,
        "layer_index": layer_index,
        "charm_branch": charm_branch,
        "layer_branch": layer_branch,
        "resource_spec": resource_spec,
        "filter_by_tag": list(filter_by_tag),
        "to_channel": to_channel,
        "rebuild_cache": rebuild_cache,
    }

    build_env.pull_layers()

    entities = []
    for charm_map in build_env.artifacts:
        for charm_name, charm_opts in charm_map.items():
            if not any(match in filter_by_tag for match in charm_opts["tags"]):
                continue

            charm_entity = f"cs:~{charm_opts['namespace']}/{charm_name}"
            entities.append(
                BuildEntity(build_env, charm_name, charm_opts, charm_entity))
            click.echo(f"Queued {charm_entity} for building")

    def _run_build(build_entity):
        build_entity.setup()

        if not build_entity.has_changed:
            return

        build_entity.proof_build()

        build_entity.push()
        build_entity.attach_resource("unpublished")
        build_entity.promote(to_channel=to_channel)

    pool = ThreadPool()
    pool.map(_run_build, entities)
    build_env.save()
Exemple #6
0
def candle_df(candles, candleamount):
    print("candle_df")
    # iterate over rows with iterrows()
    cpool = ThreadPool()
    #for index, data in candles.tail(candleamount).iterrows():
    #candle_df_thread(index, data)
    indices = candles.tail(candleamount).index.values.tolist()
    data = candles.tail(candleamount).values.tolist()
    results = cpool.uimap(candle_df_thread, indices, data)
    print(
        "Computing candlestick dataframe for given params with candles multithreaded..."
    )
    result = list(results)
    print(results)
    return (result)
Exemple #7
0
    def pull_layers(self):
        """clone all downstream layers to be processed locally when doing charm builds"""
        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        results = pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = [result for result in results]
Exemple #8
0
    def make_science_image(self, rstate=None):

        if rstate is not None:
            np.random.seed(rstate)

        science_image = np.zeros(self.image_shape_pix)
        if self.parallel:
            pool = ThreadPool(8)
            coadds = list(
                tqdm(pool.imap(self._science_image_loop, range(self.ncoadds)),
                     total=self.ncoadds))
            return np.array(coadds).sum(axis=0)
        else:
            coadds = np.array(
                list(map(self._science_image_loop, trange(self.ncoadds))))
            return science_image + coadds.sum(axis=0)
Exemple #9
0
    def pull_layers(self):
        """ clone all downstream layers to be processed locally when doing charm builds
        """
        if self.rebuild_cache:
            click.echo("-  rebuild cache triggered, cleaning out cache.")
            shutil.rmtree(str(self.layers_dir))
            shutil.rmtree(str(self.interfaces_dir))
            os.mkdir(str(self.layers_dir))
            os.mkdir(str(self.interfaces_dir))

        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = []
        _paths_to_process = {
            "layer": glob("{}/*".format(str(self.layers_dir))),
            "interface": glob("{}/*".format(str(self.interfaces_dir))),
        }
        for prefix, paths in _paths_to_process.items():
            for _path in paths:
                build_path = _path
                if not build_path:
                    raise BuildException(
                        f"Could not determine build path for {_path}")

                git.checkout(self.layer_branch, _cwd=build_path)

                layer_manifest = {
                    "rev":
                    git("rev-parse", "HEAD",
                        _cwd=build_path).stdout.decode().strip(),
                    "url":
                    f"{prefix}:{Path(build_path).stem}",
                }
                self.db["pull_layer_manifest"].append(layer_manifest)
                click.echo(
                    f"- {layer_manifest['url']} at commit: {layer_manifest['rev']}"
                )
    def process(self, mongo_connection):
        """ process a chunk of rows in the file """
        reader = UnicodeReader(self.program_arguments.infile,
                               dialect=self.provider_type.dialect)
        self.find_header(reader)

        for chunk in GritsFileReader.gen_chunks(reader, mongo_connection):
            # collections of valid and invaid records to be batch upsert / insert many
            valid_records = []
            invalid_records = []
            # is threading enabled?  this may increase performance when mongoDB
            # is not running on localhost due to busy wait on finding an airport
            # in the case of FlightGlobalType.
            if settings._THREADING_ENABLED:
                pool = ThreadPool(nodes=settings._NODES)
                results = pool.amap(self.process_row, chunk)

                while not results.ready():
                    # command-line spinner
                    for cursor in '|/-\\':
                        sys.stdout.write('\b%s' % cursor)
                        sys.stdout.flush()
                        time.sleep(.25)

                sys.stdout.write('\b')
                sys.stdout.flush()
                # async-poll is done, get the results
                result = results.get()
                valid_records = [x[0] for x in result if x[0] is not None]
                invalid_records = [x[1] for x in result if x[1] is not None]

            else:
                # single-threaded synchronous processing
                for data in chunk:
                    valid, invalid = self.process_row(data)
                    if valid != None: valid_records.append(valid)
                    if invalid != None: invalid_records.append(invalid)

            # bulk upsert / inset many of the records
            valid_result = mongo_connection.bulk_upsert(
                self.provider_type.collection_name, valid_records)
            invalid_result = mongo_connection.insert_many(
                settings._INVALID_RECORD_COLLECTION_NAME, invalid_records)
            logging.debug('valid_result: %r', valid_result)
            logging.debug('invalid_result: %r', invalid_result)
Exemple #11
0
            def data_func(measurement):
                if not use_threads:
                    data = numpy.full(sources.shape + geobox.shape,
                                      measurement['nodata'],
                                      dtype=measurement['dtype'])
                    for index, datasets in numpy.ndenumerate(sources.values):
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)
                else:

                    def work_load_data(array_name, index, datasets):
                        data = sa.attach(array_name)
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)

                    array_name = '_'.join(
                        ['DCCORE',
                         str(uuid.uuid4()),
                         str(os.getpid())])
                    sa.create(array_name,
                              shape=sources.shape + geobox.shape,
                              dtype=measurement['dtype'])
                    data = sa.attach(array_name)
                    data[:] = measurement['nodata']

                    pool = ThreadPool(32)
                    pool.map(work_load_data, repeat(array_name),
                             *zip(*numpy.ndenumerate(sources.values)))
                    sa.delete(array_name)
                return data
Exemple #12
0
def saveEngulfingSignals(candles, candleamount, params=[], symbol='XBTUSD'):
    global t_e_candles
    global t_symbol
    global t_candleamount
    t_e_candles = ind.candle_df(candles, candleamount)
    t_symbol = symbol
    t_candleamount = candleamount
    epool = ThreadPool()
    results = epool.uimap(saveEngulf_thread, params)
    print("Computing engulfing signals for all params multithreaded...")
    #DO NOT REMOVE THIS PRINT, IT IS NEEDED TO FINISH THE MULTITHREAD
    result = list(results)
    print(result)

    return (result)


#Examples
#saveKeltnerBands(100, [10,1], [True, False])
#saveATR(100, [1,20,30])
 def GlobalThreadPool( self, *args, **kwargs ):
     if self.thread_pool is None:
         self.thread_pool = ThreadPool(*args, **kwargs)
     return self.thread_pool
 def ThreadPool( self, *args, **kwargs ):
     thread_pool = ThreadPool(*args, **kwargs)
     self.register_atexit( thread_pool )
     return thread_pool
Exemple #15
0
    def create_storage(coords,
                       geobox,
                       measurements,
                       data_func=None,
                       use_threads=False):
        """
        Create a :class:`xarray.Dataset` and (optionally) fill it with data.

        This function makes the in memory storage structure to hold datacube data, loading data from datasets that have
         been grouped appropriately by :meth:`group_datasets`.

        :param dict coords:
            OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox`

        :param GeoBox geobox:
            A GeoBox defining the output spatial projection and resolution

        :param measurements:
            list of :class:`datacube.model.Measurement`

        :param data_func:
            function to fill the storage with data. It is called once for each measurement, with the measurement
            as an argument. It should return an appropriately shaped numpy array. If not provided, an empty
            :class:`xarray.Dataset` is returned.

        :param bool use_threads:
            Optional. If this is set to True, IO will be multi-thread.
            May not work for all drivers due to locking/GIL.

            Default is False.

        :rtype: :class:`xarray.Dataset`

        .. seealso:: :meth:`find_datasets` :meth:`group_datasets`
        """
        def empty_func(measurement_):
            coord_shape = tuple(coord_.size for coord_ in coords.values())
            return numpy.full(coord_shape + geobox.shape,
                              measurement_.nodata,
                              dtype=measurement_.dtype)

        data_func = data_func or empty_func

        result = xarray.Dataset(attrs={'crs': geobox.crs})
        for name, coord in coords.items():
            result[name] = coord
        for name, coord in geobox.coordinates.items():
            result[name] = (name, coord.values, {'units': coord.units})

        def work_measurements(measurement, data_func):
            return data_func(measurement)

        use_threads = use_threads and THREADING_REQS_AVAILABLE

        if use_threads:
            pool = ThreadPool(32)
            results = pool.map(work_measurements, measurements,
                               repeat(data_func))
        else:
            results = [data_func(a) for a in measurements]

        for measurement in measurements:
            data = results.pop(0)
            attrs = measurement.dataarray_attrs()
            attrs['crs'] = geobox.crs
            dims = tuple(coords.keys()) + tuple(geobox.dimensions)
            result[measurement.name] = (dims, data, attrs)

        return result
Exemple #16
0
import os
from PIL import Image
import numpy as np
import hub
from pathlib import Path
from pathos.threading import ThreadPool
import time

pool = ThreadPool(nodes=20)
#val_path = list(Path('./ILSVRC/Data/CLS-LOC/val').glob('*.JPEG'))
val_path = list(Path('./ILSVRC/Data/CLS-LOC/train').glob('**/*.JPEG'))
shape = (len(val_path), 500, 375, 3)
x = hub.array(shape, name='imagenet/test:latest', dtype='uint8')
print(x.shape)

index = 1


def upload_val(index):
    t1 = time.time()
    # Preprocess the image
    img = Image.open(val_path[index])
    img = img.resize((500, 375), Image.ANTIALIAS)
    img = np.asarray(img)
    if len(img.shape) == 2:
        img = np.expand_dims(img, -1)
    if img.shape[-1] == 4:
        img = img[..., :3]
    img = np.transpose(img, axes=(1, 0, 2))

    # Upload the image
Exemple #17
0
def tuneHyperParameters(simsettingsFileName,
                        hyperSettings=None,
                        saved_fd_model_path=None):
    """
        For some set of parameters the function will sample a number of them
        In order to find a more optimal configuration.
    """
    import os

    result_data = {}

    file = open(simsettingsFileName)
    settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    file = open(hyperSettings)
    hyper_settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    num_sim_samples = hyper_settings['meta_sim_samples']

    ## Check to see if there exists a saved fd model, if so save the path in the hyper settings
    if (not (saved_fd_model_path is None)):
        directory = getDataDirectory(settings)
        # file_name_dynamics=directory+"forward_dynamics_"+"_Best_pretrain.pkl"
        if not os.path.exists(directory):
            hyper_settings['saved_fd_model_path'] = saved_fd_model_path

    param_settings = get_param_values(hyper_settings)
    result_data['hyper_param_settings_files'] = []
    sim_data = []
    data_name = settings['data_folder']
    for params in param_settings:  ## Loop over each setting of parameters
        data_name_tmp = ""
        for par in range(
                len(params)
        ):  ## Assemble the vector of parameters and data folder name
            param_of_interest = hyper_settings['param_to_tune'][par]
            data_name_tmp = data_name_tmp + "/_" + param_of_interest + "_" + str(
                params[par]) + "/"
            settings[param_of_interest] = params[par]

        settings['data_folder'] = data_name + data_name_tmp
        directory = getBaseDataDirectory(settings)
        if not os.path.exists(directory):
            os.makedirs(directory)
        # file = open(settingsFileName, 'r')

        out_file_name = directory + os.path.basename(simsettingsFileName)
        result_data['hyper_param_settings_files'].append(out_file_name)
        print("Saving settings file with data to: ", out_file_name)
        print("settings['data_folder']: ", settings['data_folder'])
        out_file = open(out_file_name, 'w')
        out_file.write(json.dumps(settings, indent=4))
        # file.close()

        out_file.close()
        sim_data.append(
            (simsettingsFileName, num_sim_samples,
             copy.deepcopy(settings), hyper_settings['meta_sim_threads'],
             copy.deepcopy(hyper_settings)))

    # p = ProcessingPool(2)
    p = ThreadPool(hyper_settings['tuning_threads'])
    t0 = time.time()
    result = p.map(_trainMetaModel, sim_data)
    t1 = time.time()
    print("Hyper parameter tuning complete in " +
          str(datetime.timedelta(seconds=(t1 - t0))) + " seconds")
    result_data['sim_time'] = "Meta model training complete in " + str(
        datetime.timedelta(seconds=(t1 - t0))) + " seconds"
    result_data['meta_sim_result'] = result
    result_data['raw_sim_time_in_seconds'] = t1 - t0
    result_data['Number_of_simulations_sampled'] = len(param_settings)
    result_data['Number_of_threads_used'] = hyper_settings['tuning_threads']
    print(result)
    return result_data
Exemple #18
0
    def create_storage(coords,
                       geobox,
                       measurements,
                       data_func=None,
                       use_threads=False):
        """
        Create a :class:`xarray.Dataset` and (optionally) fill it with data.

        This function makes the in memory storage structure to hold datacube data, loading data from datasets that have
         been grouped appropriately by :meth:`group_datasets`.

        :param dict coords:
            OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox`

        :param GeoBox geobox:
            A GeoBox defining the output spatial projection and resolution

        :param measurements:
            list of measurement dicts with keys: {'name', 'dtype', 'nodata', 'units'}

        :param data_func:
            function to fill the storage with data. It is called once for each measurement, with the measurement
            as an argument. It should return an appropriately shaped numpy array.

        :param bool use_threads:
            Optional. If this is set to True, IO will be multi-thread.
            May not work for all drivers due to locking/GIL.

            Default is False.

        :rtype: :class:`xarray.Dataset`

        .. seealso:: :meth:`find_datasets` :meth:`group_datasets`
        """
        def empty_func(measurement_):
            coord_shape = tuple(coord_.size for coord_ in coords.values())
            return numpy.full(coord_shape + geobox.shape,
                              measurement_['nodata'],
                              dtype=measurement_['dtype'])

        data_func = data_func or empty_func

        result = xarray.Dataset(attrs={'crs': geobox.crs})
        for name, coord in coords.items():
            result[name] = coord
        for name, coord in geobox.coordinates.items():
            result[name] = (name, coord.values, {'units': coord.units})

        def work_measurements(measurement, data_func):
            return data_func(measurement)

        if use_threads and ('SharedArray' not in sys.modules
                            or 'pathos.threading' not in sys.modules):
            use_threads = False

        if use_threads:
            pool = ThreadPool(32)
            results = pool.map(work_measurements, measurements,
                               repeat(data_func))
        else:
            results = [data_func(a) for a in measurements]

        for measurement in measurements:
            data = results.pop(0)

            attrs = {
                'nodata': measurement.get('nodata'),
                'units': measurement.get('units', '1'),
                'crs': geobox.crs
            }
            if 'flags_definition' in measurement:
                attrs['flags_definition'] = measurement['flags_definition']
            if 'spectral_definition' in measurement:
                attrs['spectral_definition'] = measurement[
                    'spectral_definition']

            dims = tuple(coords.keys()) + tuple(geobox.dimensions)
            result[measurement['name']] = (dims, data, attrs)

        return result
Exemple #19
0
    def generate(self,
                 instruments: List[int],
                 batch_size: int,
                 n_threads=4,
                 max_chunks_per_music=-1,
                 chunk_reuse=1,
                 chunk_pool_size=1000):
        """
        Creates a generator that iterates over the dataset to generate chunks. The generator 
        first starts will filling a pool of chunks. 
        
        :param instruments: the id of the instruments to keep when generating chunks
        :param batch_size: the size of the batches yielded
        :param n_threads: the number of threads to synthesize waveforms in parallel
        :param chunk_reuse: the number of times a single chunk will be used per epoch
        :param chunk_pool_size: the minimum number of chunks the pool must contain before 
        starting to yield batches
        :return: 
        """
        assert chunk_pool_size >= batch_size, \
            "The chunk pool size should be greater or equal to the batch size."

        # Reset all generation statistics
        self.epochs = 0
        self.epoch_progress = 0.
        self.musics_sampled = 0
        self.chunks_generated = 0

        # Create a generator that loops infinitely over the songs in a random order
        def midi_fpath_generator():
            midi_fpaths = list(
                self._get_files_by_instruments(instruments, at_least=2))
            midi_fpaths = shuffle(midi_fpaths)
            while True:
                for i, midi_fpath in enumerate(midi_fpaths, 1):
                    yield midi_fpath
                    self.debug_midi_fpaths.append(midi_fpath)
                    if len(self.debug_midi_fpaths) > n_threads * 2:
                        del self.debug_midi_fpaths[0]
                    self.epoch_progress = i / len(midi_fpaths)
                self.epochs += 1

        midi_fpath_generator = midi_fpath_generator()

        # Define a function to fill a buffer
        def begin_next_buffer():
            # Estimate how many musics to sample from to generate a full batch
            avg_n_chunks = self.chunks_generated / self.musics_sampled if self.musics_sampled else 0
            n_musics = int(
                np.ceil(batch_size /
                        avg_n_chunks) if avg_n_chunks else 0) + n_threads
            self.musics_sampled += n_musics

            # Begin filling the buffer with threads from the threadpool
            func = lambda fpath: self.extract_chunks(fpath, instruments,
                                                     max_chunks_per_music)
            midi_fpaths = [next(midi_fpath_generator) for _ in range(n_musics)]
            return thread_pool.uimap(func, midi_fpaths)

        # Define a function the fill the chunk pool
        def refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer):
            # Do nothing if the pool is already full
            if len(chunk_pool) >= chunk_pool_size:
                return chunk_pool, chunk_pool_uses, buffer

            while len(chunk_pool) < chunk_pool_size:
                # Retrieve the elements from the next buffer that were generated in the
                # background. If it is not done generating, block until so with a call to list().
                start = timer()
                buffer = list(buffer)

                # Flatten the buffer to retrieve a list of chunks, and append all the contents of
                # the buffer to the chunk pool
                n_musics = len(buffer)
                buffer = [chunk for chunks in buffer for chunk in chunks]
                chunk_pool.extend(buffer)
                chunk_pool_uses.extend([chunk_reuse] * len(buffer))
                delta = timer() - start
                print("Blocked %dms to generate %d chunks from %d musics." %
                      (int(delta * 1000), len(buffer), n_musics))

                # Register statistics about the number of generated chunks to better estimate how
                # many jobs will be needed to fill the pool the next time
                self.chunks_generated += len(buffer)

                # Begin a new buffer in the background
                buffer = begin_next_buffer()

            # Shuffle the chunk pool so as to mix different musics in a same batch
            chunk_pool, chunk_pool_uses = shuffle(chunk_pool, chunk_pool_uses)
            return chunk_pool, chunk_pool_uses, buffer

        # Create the threadpool, the chunk pool and initialize the buffers
        thread_pool = ThreadPool(n_threads)
        chunk_pool = []
        chunk_pool_uses = []
        buffer = begin_next_buffer()

        # We wrap the generator inside an explicit generator function. We could simply make this
        # function (MidiDataset.generate()) the generator itself, but splitting the initialization
        # code and the actual generator allows us to execute the initialization when
        # MidiDataset.generate() is called for the first time, rather than when we start iterating
        # from the dataset.
        def generator(chunk_pool, chunk_pool_uses, buffer):
            while True:
                # Make sure the chunk pool is full
                chunk_pool, chunk_pool_uses, buffer = \
                    refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer)

                # Consume elements from the chunk pool to generate a batch
                chunks = chunk_pool[:batch_size]
                chunks_uses = chunk_pool_uses[:batch_size]
                del chunk_pool[:batch_size]
                del chunk_pool_uses[:batch_size]
                for chunk, chunk_uses in zip(chunks, chunks_uses):
                    if chunk_uses == 1:
                        continue
                    chunk_pool.append(chunk)
                    chunk_pool_uses.append(chunk_uses - 1)

                # Yield the chunks as a batch
                yield self.collate(chunks, instruments)

        return generator(chunk_pool, chunk_pool_uses, buffer)
Exemple #20
0
 def process(self):
     """ process rules
     """
     pool = ThreadPool()
     pool.map(self.__process, self.files_to_process)
Exemple #21
0
def backtest_mt(params):
    global capital
    su = None
    saveIndicators(candleamount=candleamount)
    #fix later
    candleSplice = candleData.tail(candleamount)

    atrseries = pd.Series(dtype=np.uint16)
    keltner_signals = pd.Series(dtype=object)
    engulf_signals = pd.Series(dtype=object)
    signals = pd.DataFrame(columns=['S'])
    atrperiod = params['atrperiod']
    #candleSplice = candleSplice.reset_index(drop=True)

    if (params['keltner'] == True) and (params['engulf'] == True):
        engulf_signals = pd.read_csv(
            'IndicatorData//' + params['symbol'] + '//Engulfing//' +
            "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' +
            str(params['ignoredoji']) + '.csv',
            sep=',')
        keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] +
                                      '//Keltner//' + "SIGNALS_kp" +
                                      str(params['kperiod']) + '_sma' +
                                      str(params['ksma']) + '.csv',
                                      sep=',')
        signals = pd.concat([engulf_signals, keltner_signals], axis=1)
        signals.columns = ["E", "K"]
        signals['S'] = np.where((signals['E'] == signals['K']), Signal(0),
                                signals['E'])
    elif (params['keltner'] == True):
        keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] +
                                      '//Keltner//' + "SIGNALS_kp" +
                                      str(params['kperiod']) + '_sma' +
                                      str(params['ksma']) + '.csv',
                                      sep=',')
        signals['S'] = np.array(keltner_signals).reshape(
            1, len(keltner_signals))[0]
    elif (params['engulf'] == True):
        engulf_signals = pd.read_csv(
            'IndicatorData//' + params['symbol'] + '//Engulfing//' +
            "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' +
            str(params['ignoredoji']) + '.csv',
            sep=',')
        signals['S'] = np.array(engulf_signals).reshape(
            1, len(engulf_signals))[0]
    print(signals['S'])
    #signals.to_csv('BacktestData//Signals//' + currentTime + '.csv')
    atrseries = pd.read_csv('IndicatorData//' + params['symbol'] + "//ATR//" +
                            "p" + str(atrperiod) + '.csv',
                            sep=',')
    copyIndex = candleSplice.index
    candleSplice = candleSplice.reset_index(drop=True)
    #candleSplice.merge(atrseries, left_index=True)
    #candleSplice.merge(signals['S'], right_on='S', left_index=True)
    candleSplice = pd.DataFrame.join(candleSplice, atrseries)
    candleSplice = pd.DataFrame.join(
        candleSplice, signals['S'])  #COMBINE SIGNALS AND CANDLE DATA
    candleSplice.index = copyIndex
    candleSplice['timestamp'] = pd.to_datetime(candleSplice.timestamp)
    finalCapitalData = None
    currentTime = datetime.now().strftime("%Y%m%d-%H%M")
    backtestDir = params['symbol'] + '//' + "len" + str(
        candleamount) + "_k" + str(params['keltner']) + "_e" + str(
            params['engulf']
        ) + "_id" + str(params['ignoredoji']) + "_eThrs" + str(
            params['engulfthreshold']
        ) + "_ATR" + str(params['atrperiod']) + "_kP" + str(
            params['kperiod']) + "_kSMA" + str(params['ksma']) + "_pm" + str(
                params['posmult']) + "_ST" + params['stoptype'] + "_sm" + str(
                    params['stopmult']) + "_tm" + str(
                        params['tmult']) + "_TR" + params['trade']

    bt_profit = 0

    if (percision != 1):
        isafe = []
        candleSplit = []
        initialLength = len(candleSplice)
        firstStart = candleSplice.index[0]
        lastDistanceSafe = None
        if params['symbol'] == 'XBTUSD':
            su = xbtusd_su
        elif params['symbol'] == 'ETHUSD':
            su = ethusd_su
        for i in range(percision - 1):
            #abs() is a temporary fix to running the backtest on short intervals
            isafe.append((i + 1) *
                         ((abs(initialLength - percision * su)) / percision) +
                         i * su)
        #candleSplit = list(np.array_split(candleSplice, percision))
        #candleSplit = list(candleSplit)
        for i in isafe:
            ia = int(i)
            if isafe.index(i) != 0:
                candleSplit.append(candleSplice.iloc[int(isafe[isafe.index(i) -
                                                               1]):ia + 1])
            lastDistanceSafe = ia
            #print("lds", lastDistanceSafe)
        # else:
        #candleSplit.append(candleSplice.iloc[:ia+1])
        #print("lds", lastDistanceSafe)
        #if(len(isafe) > 1):
        candleSplit.append(candleSplice.iloc[lastDistanceSafe:])

        #print(candleSplit)
        #time.sleep(100)
        #generate parameters for multithreading
        safe_length = len(candleSplit)
        safe_candleamount = np.repeat(candleamount, safe_length).tolist()
        safe_capital = np.repeat(capital, safe_length).tolist()
        safe_params = np.repeat(params, safe_length).tolist()

        withSafe = np.repeat(True, safe_length).tolist()

        print("safe thread amount:", safe_length)
        #create multithread pool
        start = time.time()
        #print(candleSplit)
        #time.sleep(1000)
        pool = ThreadPool(safe_length)

        #run initial chunks multithreaded to find safepoints
        safe_results = pool.uimap(backtest_strategy, safe_candleamount,
                                  safe_capital, safe_params, candleSplit,
                                  withSafe)

        pool.close()  #Compute anything we need to while threads are running
        candleSafe = []
        final_length = safe_length + 2
        withoutSafe = np.repeat(False, final_length).tolist()
        final_candleamount = np.repeat(candleamount, final_length).tolist()
        final_capital = np.repeat(capital, final_length).tolist()
        final_params = np.repeat(params, final_length).tolist()
        static_capital = capital

        safePoints = list(safe_results)  ######################################
        #time.sleep(1000)
        pool.join()

        for i in safePoints:
            if i == -1:
                backtest_mt.q.put(
                    'Not all safe points found for given percision. Reduce percision, or increase timeframe'
                )
                return
        safePoints = sorted(safePoints)

        if find_su:
            su = []
            for i, point in enumerate(safePoints):
                su.append(point - candleSplit[i].index[0])
            suAvg = mean(su)
            #only works on evenly spliced chunks
            chunkLength = len(candleSplit[0])
            backtest_mt.q.put(["su average:", suAvg, ' / ', chunkLength])
            return (su)

        print("safe points:", safePoints)
        idx = 0
        for i in safePoints:
            ia = i - firstStart
            idx = safePoints.index(i)
            if safePoints.index(i) != 0:
                candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:ia +
                                                    1])
                lastDistanceSafe = ia + 1
            else:
                candleSafe.append(candleSplice.iloc[:ia + 1])
                lastDistanceSafe = ia + 1
        candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:])

        print("final thread amount:", final_length)
        #print(candleSafe)
        #time.sleep(10000)
        fpool = ThreadPool(final_length)
        final_results = fpool.uimap(backtest_strategy, final_candleamount,
                                    final_capital, final_params, candleSafe,
                                    withoutSafe)
        fpool.close()
        final_result = list(final_results)
        fpool.join()

        ordered_result = sorted(final_result, key=lambda x: x[0])
        for i in range(len(ordered_result)):
            #print(final_result.index)
            if i != 0:
                #for non-static position size:
                ##capital += capital*((i[1]-static_capital)/static_capital)
                ordered_result[i][1]['capital'] += bt_profit
                bt_profit = ordered_result[i][1].iloc[-1][
                    'capital'] - static_capital
                finalCapitalData = pd.concat(
                    [finalCapitalData, ordered_result[i][1]],
                    ignore_index=True)
            else:
                bt_profit = ordered_result[i][1].iloc[-1][
                    'capital'] - static_capital
                finalCapitalData = pd.DataFrame(ordered_result[i][1])
        capital = finalCapitalData['capital'].iloc[-1]
    else:
        #run chunks spliced by safepoints multithreaded to retrieve fully accurate results
        final_results = backtest_strategy(candleamount, capital, params,
                                          candleSplice, False)
        final_result = list(final_results)
        capital = str(final_result[1]['capital'].iloc[-1])
        finalCapitalData = final_result[1]

    print(finalCapitalData)
    #time.sleep(1000)
    visualize_trades(finalCapitalData, backtestDir)
    saveBacktest(capital, params, backtestDir)
    backtest_mt.q.put(capital)
    end = time.time()
    print("Thread time: ", end - start)
    return ('done')
Exemple #22
0
def mt_decompile_apks(apk_fpaths, out_dir, nproc):
    with ThreadPool(nproc) as p:
        apk_dirs = p.map(decompile_one_apk, apk_fpaths,
                         [out_dir] * len(apk_fpaths))
    # apk_dirs = [i for i in apk_dirs if i is not None]
    return apk_dirs
Exemple #23
0
    doc_embed_dict[docid] = mean_vec


parser = argparse.ArgumentParser(
    description="Generate ELMo embeddings for docs")
parser.add_argument("-d",
                    "--data_dict",
                    required=True,
                    help="Path to bbc data dict file")
parser.add_argument("-tn",
                    "--thread_count",
                    type=int,
                    required=True,
                    help="No of threads in Thread pool")
parser.add_argument("-o", "--out", required=True, help="Path to output file")
args = vars(parser.parse_args())
bbc_data_dict_file = args["data_dict"]
thread_count = args["thread_count"]
outfile = args["out"]
with open(bbc_data_dict_file, 'r') as dd:
    bbc_data_dict = json.load(dd)
preproc_doctext_dict = preprocessed_paratext(bbc_data_dict)
doc_embed_dict = dict()
print("Data loaded")
doclist = list(preproc_doctext_dict.keys())

with ThreadPool(nodes=thread_count) as pool:
    pool.map(get_mean_elmo_embeddings, doclist)

np.save(outfile, doc_embed_dict)
Exemple #24
0
def make_patches(data_root, patches_root, patch_size, outline_filled=None, remove_filled=False, min_widths=('def',),
                 mirror=True, rotations=(0,), translations=((0, 0),), distinguishability_threshold=.5, num_workers=0,
                 random_samples=None, leave_width_percentile=None):
    if num_workers != 0:
        from pathos.multiprocessing import cpu_count, ProcessingPool
        from pathos.threading import ThreadPool
        if num_workers == -1:
            optimal_workers = cpu_count() - 1
            workers_pool = ProcessingPool(optimal_workers)
        else:
            workers_pool = ProcessingPool(num_workers)
        print(f'Workers pool: {workers_pool}')

        savers_pool = ThreadPool(1)
        saving_patches_in_bg = savers_pool.amap(lambda a: None, [])
    else:
        workers_pool = 0

    path = lambda basename, origin, width='def', ori='def', rot=0, t=(0, 0): os.path.join(patches_root, basename,
                                                                                          '{}x{}'.format(*patch_size),
                                                                                          'width_{}'.format(width),
                                                                                          'orientation_{}'.format(ori),
                                                                                          'rotated_deg_{}'.format(rot),
                                                                                          'translated_{}_{}'.format(*t),
                                                                                          '{}_{}.svg'.format(*origin))

    orientations = ['def']
    if mirror:
        orientations.append('mir')

    if random_samples is not None:
        min_widths_all = deepcopy(min_widths)
        orientations_all = deepcopy(orientations)
        rotations_all = deepcopy(rotations)
        translations_all = deepcopy(translations)

    source_images = glob(os.path.join(data_root, '**', '*.svg'), recursive=True)
    for file in source_images:
        print('Processing file {}'.format(file))
        basename = file[len(data_root) + 1:-4]  # split data_root and extension

        vector_image = VectorImage.from_svg(file)
        if remove_filled:
            vector_image.remove_filled()
        if outline_filled is not None:
            vector_image.leave_only_contours(outline_filled)
        if leave_width_percentile is not None:
            vector_image.leave_width_percentile(leave_width_percentile)

        if random_samples is not None:
            min_widths = np.random.choice(min_widths_all, size=min(random_samples, len(min_widths_all)), replace=False)
            orientations = np.random.choice(orientations_all, size=min(random_samples, len(orientations_all)),
                                            replace=False)
            rotations = np.random.choice(rotations_all, size=min(random_samples, len(rotations_all)), replace=False)
            translations = translations_all[
                np.random.choice(len(translations_all), size=min(random_samples, len(translations_all)), replace=False)]

        for width in min_widths:
            print('\twidth {}'.format(width))
            if width == 'def':
                vector_image_scaled = vector_image
            else:
                vector_image_scaled = vector_image.copy()
                vector_image_scaled.scale_to_width('min', width)
            for orientation in orientations:
                print('\t\torientation {}'.format(orientation))
                if orientation == 'def':
                    vector_image_reoriented = vector_image_scaled
                else:
                    vector_image_reoriented = vector_image_scaled.mirrored()
                for rotation in rotations:
                    print('\t\t\trotation {}'.format(rotation))
                    vector_image_rotated = vector_image_reoriented.rotated(rotation, adjust_view=True)
                    for translation in translations:
                        print('\t\t\t\ttranslation {}'.format(translation))
                        vector_image_translated = vector_image_rotated.translated(translation, adjust_view=True)

                        vector_patches = vector_image_translated.split_to_patches(patch_size, workers=workers_pool)
                        if num_workers != 0:
                            print('\t\t\t\t\twaiting for previous batch to be saved')
                            saving_patches_in_bg.get()

                        def simplify_and_save(vector_patch, basename=basename, width=width, orientation=orientation,
                                              rotation=rotation, translation=translation):
                            vector_patch.simplify_segments(distinguishability_threshold=distinguishability_threshold)
                            if len(vector_patch.paths) == 0:
                                return
                            save_path = path(basename,
                                             (int(vector_patch.x.as_pixels()), int(vector_patch.y.as_pixels())), width,
                                             orientation, rotation, translation)
                            os.makedirs(os.path.dirname(save_path), exist_ok=True)
                            vector_patch.save(save_path)

                        if num_workers == 0:
                            print('\t\t\t\t\tsaving patches')
                            for vector_path in vector_patches.reshape(-1):
                                simplify_and_save(vector_path)
                        else:
                            print('\t\t\t\t\tsaving patches')
                            saving_patches_in_bg = savers_pool.amap(simplify_and_save, vector_patches.reshape(-1))

    if num_workers != 0:
        workers_pool.close()
        workers_pool.join()
        workers_pool.clear()

        savers_pool.close()
        savers_pool.join()
        savers_pool.clear()
Exemple #25
0
def mt_download_apk(urls, out_dir, nproc):
    with ThreadPool(nproc) as p:
        apk_fns = p.map(download_apk, urls, [out_dir] * len(urls))
    return apk_fns
Exemple #26
0
from pathos.threading import ThreadPool
import time
pool = ThreadPool(nodes=4)

# do a blocking map on the chosen function
print(pool.map(pow, [1,2,3,4], [5,6,7,8]))

# do a non-blocking map, then extract the results from the iterator
results = pool.imap(pow, [1,2,3,4], [5,6,7,8])
print("...")
print(list(results))


# do an asynchronous map, then get the results
results = pool.amap(pow, [1,2,3,4], [5,6,7,8])
while not results.ready():
    time.sleep(5)
    print(".")

print(results.get())


# do one item at a time, using a pipe

print(pool.pipe(pow, 1, 5))
print(pool.pipe(pow, 2, 6))

# do one item at a time, using an asynchronous pipe

result1 = pool.apipe(pow, 1, 5)
result2 = pool.apipe(pow, 2, 6)
Exemple #27
0
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE
"""
minimal interface to python's multiprocessing module
"""

from pathos.multiprocessing import ProcessPool, __STATE
from pathos.threading import ThreadPool  #XXX: thread __STATE not imported
from pathos.helpers import cpu_count
mp = ProcessPool()
tp = ThreadPool()


# backward compatibility
#FIXME: deprecated... and buggy!  (fails to dill on imap/uimap)
def mp_map(function, sequence, *args, **kwds):
    '''extend python's parallel map function to multiprocessing

Inputs:
    function  -- target function
    sequence  -- sequence to process in parallel

Additional Inputs:
    nproc     -- number of 'local' cpus to use  [defaut = 'autodetect']
    type      -- processing type ['blocking', 'non-blocking', 'unordered']
    threads   -- if True, use threading instead of multiprocessing
    '''
Exemple #28
0
def main():

    main_dir = Path(r'E:\dwd_meteo')
    os.chdir(main_dir)

    out_dir = Path(r'zipped_DWD_data')

    test_exist_dir = Path('extracted')

    main_site = r'https://opendata.dwd.de'

    out_dir_names = [
        'hist_daily_met',
        'pres_daily_met',
        'hist_daily_more_precip',
        'pres_daily_more_precip',
        'hist_daily_soil_temp',
        'pres_daily_soil_temp',
        'daily_solar',
        'hist_hourly_precip',
        'pres_hourly_precip',
        'hist_hourly_temp',
        'pres_hourly_temp',
        'hist_hourly_cloud_type',
        'pres_hourly_cloud_type',
        'hist_hourly_cloudiness',
        'pres_hourly_cloudiness',
        'hist_hourly_pressure',
        'pres_hourly_pressure',
        'hist_hourly_soil_temp',
        'pres_hourly_soil_temp',
        'hourly_solar',
        'hist_hourly_sun',
        'pres_hourly_sun',
        'hist_hourly_visib',
        'pres_hourly_visib',
    ]

    sub_links = [
        r'/climate_environment/CDC/observations_germany/climate/daily/kl/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/kl/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/more_precip/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/more_precip/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/soil_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/daily/soil_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/daily/solar/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/precipitation/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloudiness/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/cloudiness/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/pressure/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/pressure/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/solar/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/sun/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/sun/recent/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/visibility/historical/',
        r'/climate_environment/CDC/observations_germany/climate/hourly/visibility/recent/',
    ]

    assert len(out_dir_names) == len(sub_links)

    out_dir.mkdir(exist_ok=True)

    n_threads = len(out_dir_names)

    if n_threads == 1:
        for i in range(len(out_dir_names)):
            download_data(main_site + sub_links[i], out_dir / out_dir_names[i],
                          test_exist_dir)

    else:
        thread_pool = ThreadPool(nodes=n_threads)

        thread_pool.map(
            download_data, [main_site + sub_link for sub_link in sub_links],
            [out_dir / out_dir_name
             for out_dir_name in out_dir_names], [test_exist_dir] * n_threads)

    return