Example #1
0
 def _get_results_by_threading(self, func, params):
     """
     Query github API by multithreading.
     return a list containing all results.
     """
     num_workers = self.num_workers
     if func.__name__ not in [
             "multi_pulls", "multi_commits", "multi_watchers"
     ]:
         num_workers = 1
     if self.debug_counts:
         p = ThPool(num_workers)
         pool_args = params[:self.debug_counts]
         return p.map(func, pool_args)
     else:
         stats = []
         start = time.time()
         for i in range(int(params.totalCount / self.batch_size) + 1):
             if self.num_workers != 1 and i != 0 and (
                     i + 1) * self.batch_size % 800 == 0:
                 print("Sleep 30 sec")
                 sleep(30)
             p = ThPool(num_workers)
             temp = p.map(
                 func,
                 params[i * self.batch_size:(i + 1) * self.batch_size])
             stats += temp
         print(
             f"{self.repo_name}, {func.__name__} takes: {round(time.time() - start, 3)} secs"
         )
     return stats
    def run(self, *args):
        """ kickoff the program """
        self.add_args()
        
        if len(args) > 0:
            program_args = self.parser.parse_args(args)
        else:
            program_args = self.parser.parse_args()
        
        # setup the mongoDB connection
        mongo_connection = GritsMongoConnection(program_args)
        
        # Confirm the user wants to apply the indexes
        confirm = True
        if not program_args.force:
            confirm = self.query_yes_no("This will lock the database.  Are your sure?", "no")
        if confirm:
            # ensure that the indexes are applied to the collections
            pool = ThreadPool(nodes=1)
            results = pool.amap(mongo_connection.ensure_indexes, [None])
            
            while not results.ready():
                # command-line spinner
                for cursor in '|/-\\':
                    sys.stdout.write('\b%s' % cursor)
                    sys.stdout.flush()
                    time.sleep(.25)

            sys.stdout.write('\b')
            sys.stdout.flush()
            # async-poll is done, get the results
            result = results.get()
            logging.info(result)
Example #3
0
def GMM_Ineq_parall(Theta0, DATA_STRUCT, d_struct):
    Theta = {
        "comm_mu": Theta0[0],
        "priv_mu": Theta0[1],
        "epsilon_mu": Theta0[2],
        "comm_var": Theta0[3],
        "priv_var": Theta0[4],
        "epsilon_var": Theta0[5],
    }

    rng = np.random.RandomState(d_struct['rng_seed'])

    start = time.time()

    print('--------------------------------------------------------')
    print('current parameter set are :')
    print(Theta)
    '''
    parallel programming with two levels
        data separating
        runing the estimation
    '''
    data_n = len(DATA_STRUCT)

    work_pool = ThreadPool(nodes=data_n)

    cpu_num = multiprocessing.cpu_count()

    cpu_num_node = int((cpu_num - 1) / data_n)
    # change the submit to mpa so that we can run multi-part altogether
    results = work_pool.amap(
        partial(para_data_allo_1, Theta, cpu_num_node, rng, d_struct),
        iter(DATA_STRUCT))
    work_pool.close()
    while not results.ready():
        time.sleep(5)
        print(".")


#    work_pool.join()

    auction_result = np.nanmean(list(results.get()))

    end = time.time()

    print("object value : " + str(auction_result))
    print("time spend in this loop: ")
    print(end - start)
    print('--------------------------------------------------------\n')

    ## save the parameters and objective value

    with open('para.txt', 'a+') as f:
        for item in Theta0:
            f.write("%f\t" % item)

        f.write("%f\t" % auction_result)
        f.write("%f\n" % (end - start) / 60)

    return auction_result
Example #4
0
    def run(self, *args):
        """ kickoff the program """
        self.add_args()

        if len(args) > 0:
            program_args = self.parser.parse_args(args)
        else:
            program_args = self.parser.parse_args()

        # setup the mongoDB connection
        mongo_connection = GritsMongoConnection(program_args)

        # Confirm the user wants to apply the indexes
        confirm = True
        if not program_args.force:
            confirm = self.query_yes_no(
                "This will lock the database.  Are your sure?", "no")
        if confirm:
            # ensure that the indexes are applied to the collections
            pool = ThreadPool(nodes=1)
            results = pool.amap(mongo_connection.ensure_indexes, [None])

            while not results.ready():
                # command-line spinner
                for cursor in '|/-\\':
                    sys.stdout.write('\b%s' % cursor)
                    sys.stdout.flush()
                    time.sleep(.25)

            sys.stdout.write('\b')
            sys.stdout.flush()
            # async-poll is done, get the results
            result = results.get()
            logging.info(result)
Example #5
0
    def _get_results_by_threading(self, func, params):
        """
        Query github API by multithreading.
        return a list containing all results.
        """
        num_workers = self.num_workers
        if func.__name__ not in [
                "multi_pulls", "multi_commits", "multi_watchers"
        ]:
            num_workers = 1
        stats = []
        start = time.time()
        for i in range(len(params) // NUM_PER_PAGE):
            # pdb.set_trace()
            if self.num_workers != 1 and (i == 0 or
                                          (i + 1) * NUM_PER_PAGE % 400 == 0):
                sec = random.choice(range(10, 60))
                print("Sleep {} sec".format(sec))
                sleep(sec)
            p = ThPool(num_workers)
            temp = p.map(func, params[i * NUM_PER_PAGE:(i + 1) * NUM_PER_PAGE])
            stats += temp

        print(
            f"{self.repo_name}, {func.__name__} takes: {round(time.time()-start,3)} secs"
        )
        return stats
Example #6
0
    def begin_processing(self):
        pool = ThreadPool(nodes=Helper.config('threads'))

        for course in self.course_data:
            pool.map(self.download_lesson, course['lessons'])
            print(
                '--- Course "{course_title}" has been downloaded, with total of "{lessons_amount}" lessons.'
                .format(course_title=course['title'],
                        lessons_amount=len(course['lessons'])))
            time.sleep(Helper.config('sleep'))
Example #7
0
 def _split_variable(self):
     """Split by variable."""
     outputfiles = [
         self._define_outputfilename(var, self.years)
         for var in self.variables
     ]
     years = len(outputfiles) * [self.years]
     if not self.threads:
         pool = Pool()
     else:
         pool = Pool(nodes=self.threads)
     pool.map(self._getdata, self.variables, years, outputfiles)
Example #8
0
def build(
    charm_list,
    layer_list,
    layer_index,
    charm_branch,
    layer_branch,
    resource_spec,
    filter_by_tag,
    to_channel,
    rebuild_cache,
):
    build_env = BuildEnv(build_type=BuildType.CHARM)
    build_env.db["build_args"] = {
        "artifact_list": charm_list,
        "layer_list": layer_list,
        "layer_index": layer_index,
        "charm_branch": charm_branch,
        "layer_branch": layer_branch,
        "resource_spec": resource_spec,
        "filter_by_tag": list(filter_by_tag),
        "to_channel": to_channel,
        "rebuild_cache": rebuild_cache,
    }

    build_env.pull_layers()

    entities = []
    for charm_map in build_env.artifacts:
        for charm_name, charm_opts in charm_map.items():
            if not any(match in filter_by_tag for match in charm_opts["tags"]):
                continue

            charm_entity = f"cs:~{charm_opts['namespace']}/{charm_name}"
            entities.append(
                BuildEntity(build_env, charm_name, charm_opts, charm_entity))
            click.echo(f"Queued {charm_entity} for building")

    def _run_build(build_entity):
        build_entity.setup()

        if not build_entity.has_changed:
            return

        build_entity.proof_build()

        build_entity.push()
        build_entity.attach_resource("unpublished")
        build_entity.promote(to_channel=to_channel)

    pool = ThreadPool()
    pool.map(_run_build, entities)
    build_env.save()
Example #9
0
 def _split_variable_yr(self):
     """Fetch variable split by variable and year."""
     outputfiles = []
     variables = []
     for var in self.variables:
         outputfiles = [
             self._define_outputfilename(var, [yr]) for yr in self.years
         ]
         variables += len(outputfiles) * [var]
     if not self.threads:
         pool = Pool()
     else:
         pool = Pool(nodes=self.threads)
     pool.map(self._getdata, variables, self.years, outputfiles)
Example #10
0
def candle_df(candles, candleamount):
    print("candle_df")
    # iterate over rows with iterrows()
    cpool = ThreadPool()
    #for index, data in candles.tail(candleamount).iterrows():
    #candle_df_thread(index, data)
    indices = candles.tail(candleamount).index.values.tolist()
    data = candles.tail(candleamount).values.tolist()
    results = cpool.uimap(candle_df_thread, indices, data)
    print(
        "Computing candlestick dataframe for given params with candles multithreaded..."
    )
    result = list(results)
    print(results)
    return (result)
Example #11
0
    def pull_layers(self):
        """clone all downstream layers to be processed locally when doing charm builds"""
        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        results = pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = [result for result in results]
Example #12
0
    def __call__(
        self, in_paths: List[str], out_paths: List[str], mfcc=False, n_workers=2
    ) -> Tuple[List[str], List[str]]:
        def write_out(inp, out):
            try:
                if os.path.exists(out):
                    print("Skipping:", out)
                    return (out, True)
                tensors = self.model.from_path(
                    inp, return_two=self.return_two, return_mfcc=mfcc
                )
                dir = os.path.dirname(out)
                if not os.path.exists(dir):
                    os.mkdir(dir)
                torch.save(tensors, out)
                print("Success:", out)
                return (out, True)
            except Exception as e:
                print("Failure:", e, out)
                return (out, False)

        with ThreadPool(nodes=n_workers) as P:
            results = P.uimap(write_out, in_paths, out_paths)
        successes = [path for path, res in results if res]
        failures = [path for path, res in results if not res]
        return successes, failures
Example #13
0
    def make_science_image(self, rstate=None):

        if rstate is not None:
            np.random.seed(rstate)

        science_image = np.zeros(self.image_shape_pix)
        if self.parallel:
            pool = ThreadPool(8)
            coadds = list(
                tqdm(pool.imap(self._science_image_loop, range(self.ncoadds)),
                     total=self.ncoadds))
            return np.array(coadds).sum(axis=0)
        else:
            coadds = np.array(
                list(map(self._science_image_loop, trange(self.ncoadds))))
            return science_image + coadds.sum(axis=0)
Example #14
0
    def pull_layers(self):
        """ clone all downstream layers to be processed locally when doing charm builds
        """
        if self.rebuild_cache:
            click.echo("-  rebuild cache triggered, cleaning out cache.")
            shutil.rmtree(str(self.layers_dir))
            shutil.rmtree(str(self.interfaces_dir))
            os.mkdir(str(self.layers_dir))
            os.mkdir(str(self.interfaces_dir))

        layers_to_pull = []
        for layer_map in self.layers:
            layer_name = list(layer_map.keys())[0]

            if layer_name == "layer:index":
                continue

            layers_to_pull.append(layer_name)

        pool = ThreadPool()
        pool.map(self.download, layers_to_pull)

        self.db["pull_layer_manifest"] = []
        _paths_to_process = {
            "layer": glob("{}/*".format(str(self.layers_dir))),
            "interface": glob("{}/*".format(str(self.interfaces_dir))),
        }
        for prefix, paths in _paths_to_process.items():
            for _path in paths:
                build_path = _path
                if not build_path:
                    raise BuildException(
                        f"Could not determine build path for {_path}")

                git.checkout(self.layer_branch, _cwd=build_path)

                layer_manifest = {
                    "rev":
                    git("rev-parse", "HEAD",
                        _cwd=build_path).stdout.decode().strip(),
                    "url":
                    f"{prefix}:{Path(build_path).stem}",
                }
                self.db["pull_layer_manifest"].append(layer_manifest)
                click.echo(
                    f"- {layer_manifest['url']} at commit: {layer_manifest['rev']}"
                )
    def process(self, mongo_connection):
        """ process a chunk of rows in the file """
        reader = UnicodeReader(self.program_arguments.infile,
                               dialect=self.provider_type.dialect)
        self.find_header(reader)

        for chunk in GritsFileReader.gen_chunks(reader, mongo_connection):
            # collections of valid and invaid records to be batch upsert / insert many
            valid_records = []
            invalid_records = []
            # is threading enabled?  this may increase performance when mongoDB
            # is not running on localhost due to busy wait on finding an airport
            # in the case of FlightGlobalType.
            if settings._THREADING_ENABLED:
                pool = ThreadPool(nodes=settings._NODES)
                results = pool.amap(self.process_row, chunk)

                while not results.ready():
                    # command-line spinner
                    for cursor in '|/-\\':
                        sys.stdout.write('\b%s' % cursor)
                        sys.stdout.flush()
                        time.sleep(.25)

                sys.stdout.write('\b')
                sys.stdout.flush()
                # async-poll is done, get the results
                result = results.get()
                valid_records = [x[0] for x in result if x[0] is not None]
                invalid_records = [x[1] for x in result if x[1] is not None]

            else:
                # single-threaded synchronous processing
                for data in chunk:
                    valid, invalid = self.process_row(data)
                    if valid != None: valid_records.append(valid)
                    if invalid != None: invalid_records.append(invalid)

            # bulk upsert / inset many of the records
            valid_result = mongo_connection.bulk_upsert(
                self.provider_type.collection_name, valid_records)
            invalid_result = mongo_connection.insert_many(
                settings._INVALID_RECORD_COLLECTION_NAME, invalid_records)
            logging.debug('valid_result: %r', valid_result)
            logging.debug('invalid_result: %r', invalid_result)
Example #16
0
            def data_func(measurement):
                if not use_threads:
                    data = numpy.full(sources.shape + geobox.shape,
                                      measurement['nodata'],
                                      dtype=measurement['dtype'])
                    for index, datasets in numpy.ndenumerate(sources.values):
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)
                else:

                    def work_load_data(array_name, index, datasets):
                        data = sa.attach(array_name)
                        _fuse_measurement(
                            data[index],
                            datasets,
                            geobox,
                            measurement,
                            fuse_func=fuse_func,
                            skip_broken_datasets=skip_broken_datasets,
                            driver_manager=driver_manager)

                    array_name = '_'.join(
                        ['DCCORE',
                         str(uuid.uuid4()),
                         str(os.getpid())])
                    sa.create(array_name,
                              shape=sources.shape + geobox.shape,
                              dtype=measurement['dtype'])
                    data = sa.attach(array_name)
                    data[:] = measurement['nodata']

                    pool = ThreadPool(32)
                    pool.map(work_load_data, repeat(array_name),
                             *zip(*numpy.ndenumerate(sources.values)))
                    sa.delete(array_name)
                return data
    def process(self, mongo_connection):
        """ process a chunk of rows in the file """
        reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect)
        self.find_header(reader)

        for chunk in GritsFileReader.gen_chunks(reader, mongo_connection):
            # collections of valid and invaid records to be batch upsert / insert many
            valid_records = []
            invalid_records = []
            # is threading enabled?  this may increase performance when mongoDB
            # is not running on localhost due to busy wait on finding an airport
            # in the case of FlightGlobalType.
            if settings._THREADING_ENABLED:
                pool = ThreadPool(nodes=settings._NODES)
                results = pool.amap(self.process_row, chunk)

                while not results.ready():
                    # command-line spinner
                    for cursor in '|/-\\':
                        sys.stdout.write('\b%s' % cursor)
                        sys.stdout.flush()
                        time.sleep(.25)

                sys.stdout.write('\b')
                sys.stdout.flush()
                # async-poll is done, get the results
                result = results.get()
                valid_records = [ x[0] for x in result if x[0] is not None ]
                invalid_records = [ x[1] for x in result if x[1] is not None ]

            else:
                # single-threaded synchronous processing
                for data in chunk:
                    valid, invalid = self.process_row(data)
                    if valid != None: valid_records.append(valid)
                    if invalid != None: invalid_records.append(invalid)

            # bulk upsert / inset many of the records
            valid_result = mongo_connection.bulk_upsert(self.provider_type.collection_name, valid_records)
            invalid_result = mongo_connection.insert_many(settings._INVALID_RECORD_COLLECTION_NAME, invalid_records)
            logging.debug('valid_result: %r', valid_result)
            logging.debug('invalid_result: %r', invalid_result)
Example #18
0
def saveEngulfingSignals(candles, candleamount, params=[], symbol='XBTUSD'):
    global t_e_candles
    global t_symbol
    global t_candleamount
    t_e_candles = ind.candle_df(candles, candleamount)
    t_symbol = symbol
    t_candleamount = candleamount
    epool = ThreadPool()
    results = epool.uimap(saveEngulf_thread, params)
    print("Computing engulfing signals for all params multithreaded...")
    #DO NOT REMOVE THIS PRINT, IT IS NEEDED TO FINISH THE MULTITHREAD
    result = list(results)
    print(result)

    return (result)


#Examples
#saveKeltnerBands(100, [10,1], [True, False])
#saveATR(100, [1,20,30])
Example #19
0
def tuneHyperParameters(simsettingsFileName,
                        hyperSettings=None,
                        saved_fd_model_path=None):
    """
        For some set of parameters the function will sample a number of them
        In order to find a more optimal configuration.
    """
    import os

    result_data = {}

    file = open(simsettingsFileName)
    settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    file = open(hyperSettings)
    hyper_settings = json.load(file)
    print("Settings: " + str(json.dumps(settings, indent=4)))
    file.close()
    num_sim_samples = hyper_settings['meta_sim_samples']

    ## Check to see if there exists a saved fd model, if so save the path in the hyper settings
    if (not (saved_fd_model_path is None)):
        directory = getDataDirectory(settings)
        # file_name_dynamics=directory+"forward_dynamics_"+"_Best_pretrain.pkl"
        if not os.path.exists(directory):
            hyper_settings['saved_fd_model_path'] = saved_fd_model_path

    param_settings = get_param_values(hyper_settings)
    result_data['hyper_param_settings_files'] = []
    sim_data = []
    data_name = settings['data_folder']
    for params in param_settings:  ## Loop over each setting of parameters
        data_name_tmp = ""
        for par in range(
                len(params)
        ):  ## Assemble the vector of parameters and data folder name
            param_of_interest = hyper_settings['param_to_tune'][par]
            data_name_tmp = data_name_tmp + "/_" + param_of_interest + "_" + str(
                params[par]) + "/"
            settings[param_of_interest] = params[par]

        settings['data_folder'] = data_name + data_name_tmp
        directory = getBaseDataDirectory(settings)
        if not os.path.exists(directory):
            os.makedirs(directory)
        # file = open(settingsFileName, 'r')

        out_file_name = directory + os.path.basename(simsettingsFileName)
        result_data['hyper_param_settings_files'].append(out_file_name)
        print("Saving settings file with data to: ", out_file_name)
        print("settings['data_folder']: ", settings['data_folder'])
        out_file = open(out_file_name, 'w')
        out_file.write(json.dumps(settings, indent=4))
        # file.close()

        out_file.close()
        sim_data.append(
            (simsettingsFileName, num_sim_samples,
             copy.deepcopy(settings), hyper_settings['meta_sim_threads'],
             copy.deepcopy(hyper_settings)))

    # p = ProcessingPool(2)
    p = ThreadPool(hyper_settings['tuning_threads'])
    t0 = time.time()
    result = p.map(_trainMetaModel, sim_data)
    t1 = time.time()
    print("Hyper parameter tuning complete in " +
          str(datetime.timedelta(seconds=(t1 - t0))) + " seconds")
    result_data['sim_time'] = "Meta model training complete in " + str(
        datetime.timedelta(seconds=(t1 - t0))) + " seconds"
    result_data['meta_sim_result'] = result
    result_data['raw_sim_time_in_seconds'] = t1 - t0
    result_data['Number_of_simulations_sampled'] = len(param_settings)
    result_data['Number_of_threads_used'] = hyper_settings['tuning_threads']
    print(result)
    return result_data
Example #20
0
    def filter_results(self,
                       im_array,
                       results,
                       image_times,
                       model,
                       psf_sigma=1.0,
                       batch_size=32,
                       chunk_size=10000):
        """
        Use a keras neural network model to detect real objects based upon
        the coadded postage stamps of those objects. Filter and keep only
        actual objects going forward.

        Parameters
        ----------

        im_array: numpy array, required
        The masked original images. See loadMaskedImages
        in searchImage.py.

        results_arr: numpy recarray, required
        The results output from findObjects in searchImage.

        image_times: numpy array, required
        An array containing the image times in DAYS with the first image at
        time 0.
        Note: This is different than other methods so the  units of 
        this may change. Watch this documentation.

        model: keras model, required
        A previously trained model loaded from an hdf5 file.

        batch_size: int
        Batch size for keras predict.

        Returns
        -------

        filtered_results: numpy array
        An edited version of results_arr with only the rows where 
        true objects were classified.
        
        """

        keep_objects = np.array([])
        total_chunks = np.ceil(len(results) / float(chunk_size))
        chunk_num = 1
        circle_vals = []

        enumerated_results = list(enumerate(results))
        self.im_array = im_array
        self.image_times = image_times
        self.psf_sigma = psf_sigma

        #        for chunk_start in range(0, len(results), chunk_size):
        #            test_class = []
        #            p_stamp_arr = []
        #            #circle_chunk = []
        #            for imNum in range(chunk_start, chunk_start+chunk_size):
        #                try:
        #                    p_stamp = self.createPostageStamp(im_array,
        #                                                      list(results[['t0_x', 't0_y']][imNum]),
        #                                                      np.array(list(results[['v_x', 'v_y']][imNum])),
        #                                                      image_times, [25., 25.])[0]
        #                    p_stamp = np.array(p_stamp)
        #                    p_stamp[np.isnan(p_stamp)] = 0.
        #                    p_stamp[np.isinf(p_stamp)] = 0.
        #                    #p_stamp -= np.min(p_stamp)
        #                    #p_stamp /= np.max(p_stamp)
        #                    #p_stamp
        #                    image_thresh = np.max(p_stamp)*0.5
        #                    image = (p_stamp > image_thresh)*1.
        #                    #pre_image = p_stamp > image_thresh
        #                    #image = np.array(pre_image*1.)
        #                    mom = measure.moments(image)
        #                    cr = mom[0,1]/mom[0,0]
        #                    cc = mom[1,0]/mom[0,0]
        #                    #moments = measure.moments(image, order=3)
        #                    #cr = moments[0,1]/moments[0,0]
        #                    #cc = moments[1,0]/moments[0,0]
        #                    cent_mom = measure.moments_central(image, cr, cc, order=4)
        #                    norm_mom = measure.moments_normalized(cent_mom)
        #                    hu_mom = measure.moments_hu(norm_mom)
        #                    #p_stamp_arr.append(hu_mom)
        #                    #print moments[0,0], measure.perimeter(image)
        #                    #circularity = (4*np.pi*moments[0,0])/(measure.perimeter(image)**2.)
        #                    #circularity = (cent_mom[0,0]**2.)/(2.*np.pi*(cent_mom[2,0] + cent_mom[0,2]))
        #                    circularity = (1/(2.*np.pi))*(1/hu_mom[0])
        #                    #circularity = (cent_mom[0,0]**2.)/(2*np.pi*(cent_mom[2,0] + cent_mom[0,2]))
        #                    psf_sigma = psf_sigma
        #                    gaussian_fwhm = psf_sigma*2.35
        #                    fwhm_area = np.pi*(gaussian_fwhm/2.)**2.
        #                    #print circularity, cr, cc
        #                    if ((circularity > 0.6) & (cr > 10.) & (cr < 14.) & (cc > 10.) & (cc < 14.) &
        #                        (cent_mom[0,0] < (9.0*fwhm_area)) & (cent_mom[0,0] > 3.0)): #Use 200% error margin on psf_sigma for now
        #                    #    test_class.append(1.)
        #                    #    print circularity, cr, cc, moments[0,0]
        #                    #else:
        #                    #    test_class.append(0.)
        #                        test_class.append(1.)
        #                    else:
        #                        test_class.append(0.)
        #                    circle_vals.append([circularity, cr, cc, cent_mom[0,0], image_thresh])
        #                    #print circularity, cr, cc, cent_mom[0,0], image_thresh
        #                except:
        #                    #p_stamp_arr.append(np.ones((25, 25)))
        #                    p_stamp_arr.append(np.zeros(7))
        #                    test_class.append(0.)
        #                    circle_vals.append([0., 0., 0., 0., 0.])
        #                    continue
        #            p_stamp_arr = np.array(p_stamp_arr)#.reshape(chunk_size, 625)
        #test_class = model.predict_classes(p_stamp_arr, batch_size=batch_size,
        #                                   verbose=1)
        pool = Pool(nodes=8)
        test_classes = pool.map(self.circularity_test, enumerated_results)
        test_classes = np.array(test_classes).T
        keep_idx = test_classes[0][np.where(
            np.array(test_classes[1]) > .5)]  # + chunk_start
        print keep_idx
        #print np.where(np.array(test_class) > .5)
        print test_classes[0][np.where(np.array(test_classes[1]) > .5)]
        keep_objects = keep_idx  #np.append(keep_objects, keep_idx)
        #circle_vals[keep_idx] = np.array(circle_chunk)
        print "Finished chunk %i of %i" % (chunk_num, total_chunks)
        chunk_num += 1

        #        keep_objects = np.arange(len(results))
        filtered_results = results[np.array(keep_objects, dtype=np.int)]
        #circle_vals = np.array(circle_vals)
        #circle_vals_keep = circle_vals[np.array(keep_objects, dtype=np.int)]

        return filtered_results  #, circle_vals_keep
Example #21
0
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE
"""
minimal interface to python's multiprocessing module
"""

from pathos.multiprocessing import ProcessPool, __STATE
from pathos.threading import ThreadPool  #XXX: thread __STATE not imported
from pathos.helpers import cpu_count
mp = ProcessPool()
tp = ThreadPool()


# backward compatibility
#FIXME: deprecated... and buggy!  (fails to dill on imap/uimap)
def mp_map(function, sequence, *args, **kwds):
    '''extend python's parallel map function to multiprocessing

Inputs:
    function  -- target function
    sequence  -- sequence to process in parallel

Additional Inputs:
    nproc     -- number of 'local' cpus to use  [defaut = 'autodetect']
    type      -- processing type ['blocking', 'non-blocking', 'unordered']
    threads   -- if True, use threading instead of multiprocessing
    '''
Example #22
0
def mt_decompile_apks(apk_fpaths, out_dir, nproc):
    with ThreadPool(nproc) as p:
        apk_dirs = p.map(decompile_one_apk, apk_fpaths,
                         [out_dir] * len(apk_fpaths))
    # apk_dirs = [i for i in apk_dirs if i is not None]
    return apk_dirs
Example #23
0
def mt_download_apk(urls, out_dir, nproc):
    with ThreadPool(nproc) as p:
        apk_fns = p.map(download_apk, urls, [out_dir] * len(urls))
    return apk_fns
Example #24
0
    def create_storage(coords,
                       geobox,
                       measurements,
                       data_func=None,
                       use_threads=False):
        """
        Create a :class:`xarray.Dataset` and (optionally) fill it with data.

        This function makes the in memory storage structure to hold datacube data, loading data from datasets that have
         been grouped appropriately by :meth:`group_datasets`.

        :param dict coords:
            OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox`

        :param GeoBox geobox:
            A GeoBox defining the output spatial projection and resolution

        :param measurements:
            list of :class:`datacube.model.Measurement`

        :param data_func:
            function to fill the storage with data. It is called once for each measurement, with the measurement
            as an argument. It should return an appropriately shaped numpy array. If not provided, an empty
            :class:`xarray.Dataset` is returned.

        :param bool use_threads:
            Optional. If this is set to True, IO will be multi-thread.
            May not work for all drivers due to locking/GIL.

            Default is False.

        :rtype: :class:`xarray.Dataset`

        .. seealso:: :meth:`find_datasets` :meth:`group_datasets`
        """
        def empty_func(measurement_):
            coord_shape = tuple(coord_.size for coord_ in coords.values())
            return numpy.full(coord_shape + geobox.shape,
                              measurement_.nodata,
                              dtype=measurement_.dtype)

        data_func = data_func or empty_func

        result = xarray.Dataset(attrs={'crs': geobox.crs})
        for name, coord in coords.items():
            result[name] = coord
        for name, coord in geobox.coordinates.items():
            result[name] = (name, coord.values, {'units': coord.units})

        def work_measurements(measurement, data_func):
            return data_func(measurement)

        use_threads = use_threads and THREADING_REQS_AVAILABLE

        if use_threads:
            pool = ThreadPool(32)
            results = pool.map(work_measurements, measurements,
                               repeat(data_func))
        else:
            results = [data_func(a) for a in measurements]

        for measurement in measurements:
            data = results.pop(0)
            attrs = measurement.dataarray_attrs()
            attrs['crs'] = geobox.crs
            dims = tuple(coords.keys()) + tuple(geobox.dimensions)
            result[measurement.name] = (dims, data, attrs)

        return result
Example #25
0
    doc_embed_dict[docid] = mean_vec


parser = argparse.ArgumentParser(
    description="Generate ELMo embeddings for docs")
parser.add_argument("-d",
                    "--data_dict",
                    required=True,
                    help="Path to bbc data dict file")
parser.add_argument("-tn",
                    "--thread_count",
                    type=int,
                    required=True,
                    help="No of threads in Thread pool")
parser.add_argument("-o", "--out", required=True, help="Path to output file")
args = vars(parser.parse_args())
bbc_data_dict_file = args["data_dict"]
thread_count = args["thread_count"]
outfile = args["out"]
with open(bbc_data_dict_file, 'r') as dd:
    bbc_data_dict = json.load(dd)
preproc_doctext_dict = preprocessed_paratext(bbc_data_dict)
doc_embed_dict = dict()
print("Data loaded")
doclist = list(preproc_doctext_dict.keys())

with ThreadPool(nodes=thread_count) as pool:
    pool.map(get_mean_elmo_embeddings, doclist)

np.save(outfile, doc_embed_dict)
 def ThreadPool( self, *args, **kwargs ):
     thread_pool = ThreadPool(*args, **kwargs)
     self.register_atexit( thread_pool )
     return thread_pool
 def GlobalThreadPool( self, *args, **kwargs ):
     if self.thread_pool is None:
         self.thread_pool = ThreadPool(*args, **kwargs)
     return self.thread_pool
saver = tf.train.Saver(max_to_keep = 4)
# =============================================================================
#  Initialize the variables (i.e. assign their default value)
# =============================================================================
init = tf.global_variables_initializer()

# =============================================================================
#  Start Training
# =============================================================================
# Start a new TF session
conf = tf.ConfigProto()
conf.gpu_options.allow_growth=True
conf.log_device_placement=False #@myself: use this for debugging
conf.allow_soft_placement=True
P = Pool()
with tf.Session(config = conf) as sess:

    # Run the initializer
    sess.run(init)
    sess.run(normalize_rel_op)
    # Training
    NOW_DISPLAY = False
    epoch=1
    step=1    
    temp_Type2Data = deepcopy(Type2Data)
    mean_losses = np.zeros([5])
    mean_delta = 0
    while (epoch < NUM_EPOCHS):
        if sum(map(len,temp_Type2Data.values())) < 0.1 * TOT_RELATIONS:
            epoch += 1
Example #29
0
    def generate(self,
                 instruments: List[int],
                 batch_size: int,
                 n_threads=4,
                 max_chunks_per_music=-1,
                 chunk_reuse=1,
                 chunk_pool_size=1000):
        """
        Creates a generator that iterates over the dataset to generate chunks. The generator 
        first starts will filling a pool of chunks. 
        
        :param instruments: the id of the instruments to keep when generating chunks
        :param batch_size: the size of the batches yielded
        :param n_threads: the number of threads to synthesize waveforms in parallel
        :param chunk_reuse: the number of times a single chunk will be used per epoch
        :param chunk_pool_size: the minimum number of chunks the pool must contain before 
        starting to yield batches
        :return: 
        """
        assert chunk_pool_size >= batch_size, \
            "The chunk pool size should be greater or equal to the batch size."

        # Reset all generation statistics
        self.epochs = 0
        self.epoch_progress = 0.
        self.musics_sampled = 0
        self.chunks_generated = 0

        # Create a generator that loops infinitely over the songs in a random order
        def midi_fpath_generator():
            midi_fpaths = list(
                self._get_files_by_instruments(instruments, at_least=2))
            midi_fpaths = shuffle(midi_fpaths)
            while True:
                for i, midi_fpath in enumerate(midi_fpaths, 1):
                    yield midi_fpath
                    self.debug_midi_fpaths.append(midi_fpath)
                    if len(self.debug_midi_fpaths) > n_threads * 2:
                        del self.debug_midi_fpaths[0]
                    self.epoch_progress = i / len(midi_fpaths)
                self.epochs += 1

        midi_fpath_generator = midi_fpath_generator()

        # Define a function to fill a buffer
        def begin_next_buffer():
            # Estimate how many musics to sample from to generate a full batch
            avg_n_chunks = self.chunks_generated / self.musics_sampled if self.musics_sampled else 0
            n_musics = int(
                np.ceil(batch_size /
                        avg_n_chunks) if avg_n_chunks else 0) + n_threads
            self.musics_sampled += n_musics

            # Begin filling the buffer with threads from the threadpool
            func = lambda fpath: self.extract_chunks(fpath, instruments,
                                                     max_chunks_per_music)
            midi_fpaths = [next(midi_fpath_generator) for _ in range(n_musics)]
            return thread_pool.uimap(func, midi_fpaths)

        # Define a function the fill the chunk pool
        def refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer):
            # Do nothing if the pool is already full
            if len(chunk_pool) >= chunk_pool_size:
                return chunk_pool, chunk_pool_uses, buffer

            while len(chunk_pool) < chunk_pool_size:
                # Retrieve the elements from the next buffer that were generated in the
                # background. If it is not done generating, block until so with a call to list().
                start = timer()
                buffer = list(buffer)

                # Flatten the buffer to retrieve a list of chunks, and append all the contents of
                # the buffer to the chunk pool
                n_musics = len(buffer)
                buffer = [chunk for chunks in buffer for chunk in chunks]
                chunk_pool.extend(buffer)
                chunk_pool_uses.extend([chunk_reuse] * len(buffer))
                delta = timer() - start
                print("Blocked %dms to generate %d chunks from %d musics." %
                      (int(delta * 1000), len(buffer), n_musics))

                # Register statistics about the number of generated chunks to better estimate how
                # many jobs will be needed to fill the pool the next time
                self.chunks_generated += len(buffer)

                # Begin a new buffer in the background
                buffer = begin_next_buffer()

            # Shuffle the chunk pool so as to mix different musics in a same batch
            chunk_pool, chunk_pool_uses = shuffle(chunk_pool, chunk_pool_uses)
            return chunk_pool, chunk_pool_uses, buffer

        # Create the threadpool, the chunk pool and initialize the buffers
        thread_pool = ThreadPool(n_threads)
        chunk_pool = []
        chunk_pool_uses = []
        buffer = begin_next_buffer()

        # We wrap the generator inside an explicit generator function. We could simply make this
        # function (MidiDataset.generate()) the generator itself, but splitting the initialization
        # code and the actual generator allows us to execute the initialization when
        # MidiDataset.generate() is called for the first time, rather than when we start iterating
        # from the dataset.
        def generator(chunk_pool, chunk_pool_uses, buffer):
            while True:
                # Make sure the chunk pool is full
                chunk_pool, chunk_pool_uses, buffer = \
                    refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer)

                # Consume elements from the chunk pool to generate a batch
                chunks = chunk_pool[:batch_size]
                chunks_uses = chunk_pool_uses[:batch_size]
                del chunk_pool[:batch_size]
                del chunk_pool_uses[:batch_size]
                for chunk, chunk_uses in zip(chunks, chunks_uses):
                    if chunk_uses == 1:
                        continue
                    chunk_pool.append(chunk)
                    chunk_pool_uses.append(chunk_uses - 1)

                # Yield the chunks as a batch
                yield self.collate(chunks, instruments)

        return generator(chunk_pool, chunk_pool_uses, buffer)
Example #30
0
 def process(self):
     """ process rules
     """
     pool = ThreadPool()
     pool.map(self.__process, self.files_to_process)
Example #31
0
import os
from PIL import Image
import numpy as np
import hub
from pathlib import Path
from pathos.threading import ThreadPool
import time

pool = ThreadPool(nodes=20)
#val_path = list(Path('./ILSVRC/Data/CLS-LOC/val').glob('*.JPEG'))
val_path = list(Path('./ILSVRC/Data/CLS-LOC/train').glob('**/*.JPEG'))
shape = (len(val_path), 500, 375, 3)
x = hub.array(shape, name='imagenet/test:latest', dtype='uint8')
print(x.shape)

index = 1


def upload_val(index):
    t1 = time.time()
    # Preprocess the image
    img = Image.open(val_path[index])
    img = img.resize((500, 375), Image.ANTIALIAS)
    img = np.asarray(img)
    if len(img.shape) == 2:
        img = np.expand_dims(img, -1)
    if img.shape[-1] == 4:
        img = img[..., :3]
    img = np.transpose(img, axes=(1, 0, 2))

    # Upload the image
Example #32
0
def backtest_mt(params):
    global capital
    su = None
    saveIndicators(candleamount=candleamount)
    #fix later
    candleSplice = candleData.tail(candleamount)

    atrseries = pd.Series(dtype=np.uint16)
    keltner_signals = pd.Series(dtype=object)
    engulf_signals = pd.Series(dtype=object)
    signals = pd.DataFrame(columns=['S'])
    atrperiod = params['atrperiod']
    #candleSplice = candleSplice.reset_index(drop=True)

    if (params['keltner'] == True) and (params['engulf'] == True):
        engulf_signals = pd.read_csv(
            'IndicatorData//' + params['symbol'] + '//Engulfing//' +
            "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' +
            str(params['ignoredoji']) + '.csv',
            sep=',')
        keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] +
                                      '//Keltner//' + "SIGNALS_kp" +
                                      str(params['kperiod']) + '_sma' +
                                      str(params['ksma']) + '.csv',
                                      sep=',')
        signals = pd.concat([engulf_signals, keltner_signals], axis=1)
        signals.columns = ["E", "K"]
        signals['S'] = np.where((signals['E'] == signals['K']), Signal(0),
                                signals['E'])
    elif (params['keltner'] == True):
        keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] +
                                      '//Keltner//' + "SIGNALS_kp" +
                                      str(params['kperiod']) + '_sma' +
                                      str(params['ksma']) + '.csv',
                                      sep=',')
        signals['S'] = np.array(keltner_signals).reshape(
            1, len(keltner_signals))[0]
    elif (params['engulf'] == True):
        engulf_signals = pd.read_csv(
            'IndicatorData//' + params['symbol'] + '//Engulfing//' +
            "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' +
            str(params['ignoredoji']) + '.csv',
            sep=',')
        signals['S'] = np.array(engulf_signals).reshape(
            1, len(engulf_signals))[0]
    print(signals['S'])
    #signals.to_csv('BacktestData//Signals//' + currentTime + '.csv')
    atrseries = pd.read_csv('IndicatorData//' + params['symbol'] + "//ATR//" +
                            "p" + str(atrperiod) + '.csv',
                            sep=',')
    copyIndex = candleSplice.index
    candleSplice = candleSplice.reset_index(drop=True)
    #candleSplice.merge(atrseries, left_index=True)
    #candleSplice.merge(signals['S'], right_on='S', left_index=True)
    candleSplice = pd.DataFrame.join(candleSplice, atrseries)
    candleSplice = pd.DataFrame.join(
        candleSplice, signals['S'])  #COMBINE SIGNALS AND CANDLE DATA
    candleSplice.index = copyIndex
    candleSplice['timestamp'] = pd.to_datetime(candleSplice.timestamp)
    finalCapitalData = None
    currentTime = datetime.now().strftime("%Y%m%d-%H%M")
    backtestDir = params['symbol'] + '//' + "len" + str(
        candleamount) + "_k" + str(params['keltner']) + "_e" + str(
            params['engulf']
        ) + "_id" + str(params['ignoredoji']) + "_eThrs" + str(
            params['engulfthreshold']
        ) + "_ATR" + str(params['atrperiod']) + "_kP" + str(
            params['kperiod']) + "_kSMA" + str(params['ksma']) + "_pm" + str(
                params['posmult']) + "_ST" + params['stoptype'] + "_sm" + str(
                    params['stopmult']) + "_tm" + str(
                        params['tmult']) + "_TR" + params['trade']

    bt_profit = 0

    if (percision != 1):
        isafe = []
        candleSplit = []
        initialLength = len(candleSplice)
        firstStart = candleSplice.index[0]
        lastDistanceSafe = None
        if params['symbol'] == 'XBTUSD':
            su = xbtusd_su
        elif params['symbol'] == 'ETHUSD':
            su = ethusd_su
        for i in range(percision - 1):
            #abs() is a temporary fix to running the backtest on short intervals
            isafe.append((i + 1) *
                         ((abs(initialLength - percision * su)) / percision) +
                         i * su)
        #candleSplit = list(np.array_split(candleSplice, percision))
        #candleSplit = list(candleSplit)
        for i in isafe:
            ia = int(i)
            if isafe.index(i) != 0:
                candleSplit.append(candleSplice.iloc[int(isafe[isafe.index(i) -
                                                               1]):ia + 1])
            lastDistanceSafe = ia
            #print("lds", lastDistanceSafe)
        # else:
        #candleSplit.append(candleSplice.iloc[:ia+1])
        #print("lds", lastDistanceSafe)
        #if(len(isafe) > 1):
        candleSplit.append(candleSplice.iloc[lastDistanceSafe:])

        #print(candleSplit)
        #time.sleep(100)
        #generate parameters for multithreading
        safe_length = len(candleSplit)
        safe_candleamount = np.repeat(candleamount, safe_length).tolist()
        safe_capital = np.repeat(capital, safe_length).tolist()
        safe_params = np.repeat(params, safe_length).tolist()

        withSafe = np.repeat(True, safe_length).tolist()

        print("safe thread amount:", safe_length)
        #create multithread pool
        start = time.time()
        #print(candleSplit)
        #time.sleep(1000)
        pool = ThreadPool(safe_length)

        #run initial chunks multithreaded to find safepoints
        safe_results = pool.uimap(backtest_strategy, safe_candleamount,
                                  safe_capital, safe_params, candleSplit,
                                  withSafe)

        pool.close()  #Compute anything we need to while threads are running
        candleSafe = []
        final_length = safe_length + 2
        withoutSafe = np.repeat(False, final_length).tolist()
        final_candleamount = np.repeat(candleamount, final_length).tolist()
        final_capital = np.repeat(capital, final_length).tolist()
        final_params = np.repeat(params, final_length).tolist()
        static_capital = capital

        safePoints = list(safe_results)  ######################################
        #time.sleep(1000)
        pool.join()

        for i in safePoints:
            if i == -1:
                backtest_mt.q.put(
                    'Not all safe points found for given percision. Reduce percision, or increase timeframe'
                )
                return
        safePoints = sorted(safePoints)

        if find_su:
            su = []
            for i, point in enumerate(safePoints):
                su.append(point - candleSplit[i].index[0])
            suAvg = mean(su)
            #only works on evenly spliced chunks
            chunkLength = len(candleSplit[0])
            backtest_mt.q.put(["su average:", suAvg, ' / ', chunkLength])
            return (su)

        print("safe points:", safePoints)
        idx = 0
        for i in safePoints:
            ia = i - firstStart
            idx = safePoints.index(i)
            if safePoints.index(i) != 0:
                candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:ia +
                                                    1])
                lastDistanceSafe = ia + 1
            else:
                candleSafe.append(candleSplice.iloc[:ia + 1])
                lastDistanceSafe = ia + 1
        candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:])

        print("final thread amount:", final_length)
        #print(candleSafe)
        #time.sleep(10000)
        fpool = ThreadPool(final_length)
        final_results = fpool.uimap(backtest_strategy, final_candleamount,
                                    final_capital, final_params, candleSafe,
                                    withoutSafe)
        fpool.close()
        final_result = list(final_results)
        fpool.join()

        ordered_result = sorted(final_result, key=lambda x: x[0])
        for i in range(len(ordered_result)):
            #print(final_result.index)
            if i != 0:
                #for non-static position size:
                ##capital += capital*((i[1]-static_capital)/static_capital)
                ordered_result[i][1]['capital'] += bt_profit
                bt_profit = ordered_result[i][1].iloc[-1][
                    'capital'] - static_capital
                finalCapitalData = pd.concat(
                    [finalCapitalData, ordered_result[i][1]],
                    ignore_index=True)
            else:
                bt_profit = ordered_result[i][1].iloc[-1][
                    'capital'] - static_capital
                finalCapitalData = pd.DataFrame(ordered_result[i][1])
        capital = finalCapitalData['capital'].iloc[-1]
    else:
        #run chunks spliced by safepoints multithreaded to retrieve fully accurate results
        final_results = backtest_strategy(candleamount, capital, params,
                                          candleSplice, False)
        final_result = list(final_results)
        capital = str(final_result[1]['capital'].iloc[-1])
        finalCapitalData = final_result[1]

    print(finalCapitalData)
    #time.sleep(1000)
    visualize_trades(finalCapitalData, backtestDir)
    saveBacktest(capital, params, backtestDir)
    backtest_mt.q.put(capital)
    end = time.time()
    print("Thread time: ", end - start)
    return ('done')