예제 #1
0
    def generate(self, number: int, method: str = 'poisson',
                 regenerate: bool = False) -> None:
        """Generates an ensemble of matrices and estimates standard deviation

        Perturbs the initial raw matrix using either a Gaussian or Poisson
        process, unfolds them and applies the first generation method to them.
        Uses the variation to estimate standard deviation of each step.

        Args:
            number: The number of perturbed matrices to generate.
            method: The stochastic method to use to generate the perturbations
                Can be 'gaussian' or 'poisson'.
            regenerate: Whether to use already generated files (False) or
                generate them all anew (True).
        """
        assert self.raw is not None, "Set the raw matrix"
        assert self.unfolder is not None, "Set unfolder"
        assert self.first_generation_method is not None, \
            "Set first generation method"

        self.size = number
        self.regenerate = regenerate

        LOG.info(f"Start normalization with {self.nprocesses} cpus")
        pool = ProcessPool(nodes=self.nprocesses)
        ss = np.random.SeedSequence(self.seed)
        iterator = pool.imap(self.step, range(number), ss.spawn(number),
                             repeat(method))
        ensembles = np.array(list(tqdm(iterator, total=number)))
        pool.close()
        pool.join()
        pool.clear()

        raw_ensemble = ensembles[:, 0, :, :]
        unfolded_ensemble = ensembles[:, 1, :, :]
        firstgen_ensemble = ensembles[:, 2, :, :]

        # TODO Move this to a save step
        self.raw.save(self.path / 'raw.npy')
        # saving for firstgen is in step due to pickling
        self.firstgen = Matrix(path=self.path / 'firstgen.npy')

        # Calculate standard deviation
        raw_ensemble_std = np.std(raw_ensemble, axis=0)
        raw_std = Matrix(raw_ensemble_std, self.raw.Eg, self.raw.Ex,
                         state='std')
        raw_std.save(self.path / "raw_std.npy")

        unfolded_ensemble_std = np.std(unfolded_ensemble, axis=0)
        unfolded_std = Matrix(unfolded_ensemble_std, self.raw.Eg,
                              self.raw.Ex, state='std')
        unfolded_std.save(self.path / "unfolded_std.npy")

        firstgen_ensemble_std = np.std(firstgen_ensemble, axis=0)
        firstgen_std = Matrix(firstgen_ensemble_std, self.firstgen.Eg,
                              self.firstgen.Ex, state='std')
        firstgen_std.save(self.path / "firstgen_std.npy")

        self.std_raw = raw_std
        self.std_unfolded = unfolded_std
        self.std_firstgen = firstgen_std

        self.raw_ensemble = raw_ensemble
        self.unfolded_ensemble = unfolded_ensemble
        self.firstgen_ensemble = firstgen_ensemble
예제 #2
0
    def calculate_ldos(self, npts, emax, emin, **args):
        self.emax = emax
        self.emin = emin
        self.npts = npts
        self.phi = args['phi']
        self.x = array([[0.0 for i in range(self.npts[0])]
                        for j in range(self.npts[1])])
        self.y = array([[0.0 for i in range(self.npts[0])]
                        for j in range(self.npts[1])])
        self.z = array([[0.0 for i in range(self.npts[0])]
                        for j in range(self.npts[1])])
        self.ldos = array([[[0.0 for j in range(self.npts[0])]
                            for i in range(self.npts[1])]
                           for k in range(len(self.orbitals))])
        if 'nprocs' in args:
            self.nprocs = int(args['nprocs'])
        if 'tip_disp' in args:
            self.tip_disp = float(args['tip_disp'])

        #the list exclude includes the indices of atoms to exlcude from LDOS integration
        self.exclude = []
        if 'exclude' in args:
            self.exclude_args = args['exclude']
            counter = 0
            for i in self.atomtypes:
                if i in args['exclude']:
                    for j in range(self.atomnums[self.atomtypes.index(i)]):
                        self.exclude.append(counter)
                        counter += 1
                else:
                    counter += self.atomnums[self.atomtypes.index(i)]
        print(str(len(self.exclude)) + ' atoms excluded from LDOS averaging')

        if 'unit_cell_num' in args:
            self.unit_cell_num = args['unit_cell_num']

        for i in range(-1 * self.unit_cell_num, self.unit_cell_num + 1):
            for j in range(-1 * self.unit_cell_num, self.unit_cell_num + 1):
                for k in self.coord:
                    self.periodic_coord.append(k + self.lv[0] * i +
                                               self.lv[1] * j)
        self.periodic_coord = array(self.periodic_coord)

        for i in range(len(self.energies)):
            if self.energies[i] < self.emin:
                self.estart = i
            if self.energies[i] > self.emax:
                self.eend = i
                break

        if self.energies[0] > self.emin:
            self.estart = 0
            self.emin = self.energies[0]
            print(
                'specified emin is less than minimum energy in DOSCAR. setting emin to {}'
                .format(self.emax))
        if self.energies[-1] < self.emax:
            self.eend = len(self.energies) - 1
            self.emax = self.energies[-1]
            print(
                'specified emax exceeds maximum energy in DOSCAR. setting emax to {}'
                .format(self.emax))

        if self.phi != 0:
            self.K = array([
                tunneling_factor(self.emax, i, self.phi)
                for i in self.energies[self.estart:self.eend]
            ])
        else:
            self.K = array([1.0 for i in range(self.estart - self.eend)])

        for i in range(self.npts[1]):
            for j in range(self.npts[0]):
                pos = array([0.0, 0.0, max(self.coord[:, 2]) + self.tip_disp])
                pos += self.lv[0] * (j + 0.5) / (self.npts[0]) + self.lv[1] * (
                    i + 0.5) / (self.npts[1])
                self.x[i][j], self.y[i][j], self.z[i][j] = pos[0], pos[1], pos[
                    2]
        start = time()
        #executes ldos integration in parallel on a ProcessPool of self.nprocs processors
        if self.nprocs > 1:
            pool = ProcessPool(self.nprocs)
            output = pool.map(
                self.integrator,
                [i for i in range(self.npts[1]) for j in range(self.npts[0])],
                [j for i in range(self.npts[1]) for j in range(self.npts[0])])
            self.ldos = sum(output)
            pool.close()
        #executes ldos integration on a single processor
        else:
            for i in range(self.npts[1]):
                for j in range(self.npts[0]):
                    pos = array([self.x[i][j], self.y[i][j], self.z[i][j]])
                    counter = 1
                    for k in self.periodic_coord:
                        if counter == sum(self.atomnums) + 1:
                            counter = 1
                        if counter - 1 not in self.exclude:
                            posdiff = norm(pos - k)
                            sf = exp(-1.0 * posdiff * self.K * 1.0e-10)
                            for l in range(len(self.dos[counter])):
                                self.ldos[l][i][j] += sum(
                                    self.dos[counter][l][self.estart:self.eend]
                                    * sf)
                        counter += 1
        print('total time to integrate {} points: {} seconds on {} processors'.
              format(self.npts[0] * self.npts[1],
                     time() - start, self.nprocs))
예제 #3
0
    def new(self):
        return tf.data.Dataset.from_generator(
            self._generator,
            output_types=('int8', 'int8', 'bool', 'float32', 'int32'),
            output_shapes=((None, *self.shapes[0]), (None, *self.shapes[0]),
                           (None, *self.shapes[1]), (None, *self.shapes[2]),
                           (None, *self.shapes[3])))


if __name__ == '__main__':
    GEN_ENDED_AT = int(input())
    GEN_ENDS_AT = int(input())

    mp.set_start_method('spawn')

    pool = ProcessPool(mp.cpu_count())

    critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X)
    critic(critic.stock)

    if GEN_ENDED_AT >= 0:
        with open(f'ddrive/{GEN_ENDED_AT}.txt', 'rb') as f:
            weights = pickle.loads(lzma.decompress(base64.b85decode(f.read())))

        critic.set_weights(weights)

    critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse')

    cg = CellGroup()

    for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1):
예제 #4
0
    def __init__(self, evol_params):
        '''
        Initialize evolutionary search
        ARGS:
        evol_params: dict
            required keys -
                pop_size: int - population size,
                genotype_size: int - genotype_size,
                fitness_function: function - a user-defined function that takes a genotype as arg and returns updated genotype and float fitness value
                elitist_fraction: float - fraction of top performing individuals to retain for next generation
                mutation_variance: float - variance of the gaussian distribution used for mutation noise
            optional keys -
                fitness_args: list-like - optional additional arguments to pass while calling fitness function
                                           list such that len(list) == 1 or len(list) == pop_size
                num_processes: int -  pool size for multiprocessing.pool.Pool - defaults to os.cpu_count()
        '''
        # check for required keys
        required_keys = [
            'pop_size', 'genotype_size', 'fitness_function',
            'elitist_fraction', 'mutation_variance'
        ]
        for key in required_keys:
            if key not in evol_params.keys():
                raise Exception(
                    'Argument evol_params does not contain the following required key: {}'
                    .format(key))

        # checked for all required keys
        self.pop_size = evol_params['pop_size']
        self.genotype_size = evol_params['genotype_size']
        self.fitness_function = evol_params['fitness_function']
        self.elitist_fraction = int(
            np.ceil(evol_params['elitist_fraction'] * self.pop_size))
        self.mutation_variance = evol_params['mutation_variance']

        # validating fitness function
        assert self.fitness_function, "Invalid fitness_function"
        rand_genotype = np.random.rand(self.genotype_size)
        fitness_return = self.fitness_function(rand_genotype)
        assert len(
            fitness_return
        ) == 2, "Fitness function must return 2 items - updated_genotype and fitness"
        updated_genotype = fitness_return[0]
        rand_genotype_fitness = fitness_return[1]
        assert type(rand_genotype_fitness) == type(0.) or type(rand_genotype_fitness) in np.sctypes['float'],\
               "Invalid return type for second return of fitness_function. Should be float or np.dtype('np.float*')"
        assert len(updated_genotype) == self.genotype_size, \
                "Invalid length for first return type of fitness function: length should be equal to genotype_size={}".format(self.genotype_size)

        # create other required data
        self.num_processes = evol_params.get('num_processes', None)
        self.pop = np.random.rand(self.pop_size, self.genotype_size)
        self.fitness = np.zeros(self.pop_size)
        self.num_batches = int(self.pop_size / self.num_processes)
        self.num_remainder = int(self.pop_size % self.num_processes)

        # check for fitness function kwargs
        if 'fitness_args' in evol_params.keys():
            optional_args = evol_params['fitness_args']
            assert len(optional_args) == 1 or len(optional_args) == self.pop_size,\
                    "fitness args should be length 1 or pop_size."
            self.optional_args = optional_args
        else:
            self.optional_args = None

        # creating the global process pool to be used across all generations
        global __search_process_pool
        __search_process_pool = ProcessPool(self.num_processes)
        time.sleep(0.5)
                return check_monotone(meaning, B, AandB + 1,
                                      AminusB) and check_monotone(
                                          meaning, B, AandB, AminusB + 1)

        def check_monotone(meaning, B, AandB=0, AminusB=0, truth_found=False):
            if is_monotone[B][AandB][AminusB] is None:
                is_monotone[B][AandB][AminusB] = check_monotone_inner(
                    meaning, B, AandB, AminusB, truth_found)

            return is_monotone[B][AandB][AminusB]

        monotone = True
        for B in range(args.model_size + 1):
            if not check_monotone(meaning, B):
                monotone = False
                break

    is_monotone = process_pool.map(check_monotone, meanings)
    return set(i for (i, val) in enumerate(is_monotone) if val)


with ProcessPool(nodes=args.processes) as process_pool:
    a_up = get_monotone_quantifiers('A', 'up', process_pool)
    b_up = get_monotone_quantifiers('B', 'up', process_pool)
    a_down = get_monotone_quantifiers('A', 'down', process_pool)
    b_down = get_monotone_quantifiers('B', 'down', process_pool)

indices = a_up.union(b_up).union(a_down).union(b_down)

file_util.dump_dill(indices, 'monotone_expression_indices.dill')
예제 #6
0
    def run_pso(self,
                function,
                searchspace,
                target,
                nparticles,
                maxiter,
                precision,
                domain,
                verbose=True,
                pool_size=None):
        """ Performs a PSO for the given function in the searchspace, looking for the target, which is in the output space.

        The asynchronous evaluation means the exact definition of iterations may be lost. To preserve some sense of this
        an iteration is defined to be `nparticles` evaluations performed. This means that not every particle is updated
        in the history for every iteration. However, the total number of function evaluations (iterations * nparticles)
        will still be preserved for this definition.

        function - the function to be optimized. Its domain must include the seachspace and its output must be in the space of target.
        searchspace - np.array((ssdim, 2))
        target - Not used by `ImplicitTargetPSO`. `function` should include any necessary target data.
        nparticles - number of particles to use in the optimization
        maxiter - maximum number of iterations to the optimization routine
        precision - how close to the target to attemp to get
        domain - absolute boundaries on the trial solutions/particles
        pool_size - (int) set the ProcessingPool size explicitly. Defaults to 4 if not set.
        """
        if not pool_size:
            pool_size = 4

        # update attributes
        self.maxiter = maxiter
        self.precision = precision

        # search space dimensionality
        if searchspace.shape[1] != 2:
            print('WARNING! searchspace does not have dimenstions (N,2).')
        ssdim = searchspace.shape[0]

        # init particle positions and velocities
        xpart = np.random.random((nparticles, ssdim))

        for ii in range(ssdim):
            xpart[:, ii] = (searchspace[ii, 1] -
                            searchspace[ii, 0]) * xpart[:, ii] + searchspace[
                                ii, 0]  # scale the uniform radnom dist

        vpart = np.zeros(xpart.shape)

        # init particle best solution
        pbest = 1.0 * xpart
        # NOTE: Best not to assume the form of obj function input
        cpbest = np.array([self.cost(function(*xp), target) for xp in pbest])
        # init global best solutions
        im = np.argmin(cpbest)
        gbest = pbest[im]
        cgbest = cpbest[im]

        if False:
            return xpart, vpart, pbest, cpbest, gbest, cgbest

        # intermediate arrays
        # multiply by 1.0 to make copies not bind references
        xarr = 1.0 * xpart[:, :, None]
        varr = 1.0 * vpart[:, :, None]
        parr = 1.0 * pbest[:, :, None]
        cparr = 1.0 * cpbest[:, None]
        garr = 1.0 * gbest[:, None]
        cgarr = 1.0 * np.array([cgbest])

        iternum = 0
        evalnum = 0
        # Asynchronous process management
        pool = ProcessPool(pool_size)
        results = []

        # initial submission

        for fi in range(nparticles):
            # update velocity
            vpart[fi] = self.velocity(vpart[fi], xpart[fi], pbest[fi], gbest)
            # update position
            xpart[fi] = xpart[fi] + vpart[fi]

            # keeps particles inside the absolute boundaries given by `domain`
            xpart[fi] = np.maximum(xpart[fi], domain[:, 0])
            xpart[fi] = np.minimum(xpart[fi], domain[:, 1])

            # compute cost of new position
            results.append(pool.apipe(function, xpart[fi]))

        t1 = time.time()
        while (iternum <= maxiter) and (cgbest > precision):

            for i, res in enumerate(results):
                if res.ready():
                    # Get result and update
                    cpp = res.get()

                    # update best position
                    if cpp < cpbest[i]:
                        pbest[i] = xpart[i]
                        cpbest[i] = cpp
                    if cpp < cgbest:
                        gbest = xpart[i]
                        cgbest = cpp

                    # update velocity
                    vpart[i] = self.velocity(vpart[i], xpart[i], pbest[i],
                                             gbest)
                    # update position
                    xpart[i] = xpart[i] + vpart[i]

                    # keeps particles inside the absolute boundaries given by `domain`
                    xpart[i] = np.maximum(xpart[i], domain[:, 0])
                    xpart[i] = np.minimum(xpart[i], domain[:, 1])

                    # Resubmit
                    results[i] = pool.apipe(function, xpart[i])

                    evalnum += 1

            current_iternum = evalnum // nparticles

            if (current_iternum > iternum) or (cgbest < precision):

                xarr = np.concatenate((xarr, xpart[:, :, None]), axis=2)
                varr = np.concatenate((varr, vpart[:, :, None]), axis=2)
                parr = np.concatenate((parr, pbest[:, :, None]), axis=2)
                cparr = np.concatenate((cparr, cpbest[:, None]), axis=1)
                garr = np.concatenate((garr, gbest[:, None]), axis=1)
                cgarr = np.append(cgarr, cgbest)

                iternum = current_iternum

        t2 = time.time()
        if verbose:
            print('optimization took {:5.2f} seconds'.format(*[t2 - t1]))

        return xarr, varr, parr, cparr, garr, cgarr
예제 #7
0
    def train(self, outer_n_epoch, outer_l2, outer_std, outer_learning_rate, outer_n_samples_per_ep,
              n_cpu=None, fix_ppo=None, **_):
        # Requires more than 1 MPI process.
        assert MPI.COMM_WORLD.Get_size() > 1
        assert n_cpu is not None
        if fix_ppo:
            ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 16), 0.5)],
                                                    outside_value=0.5)
        else:
            ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 8), 0.)],
                                                    outside_value=0.)

        outer_lr_scheduler = PiecewiseSchedule([(0, outer_learning_rate),
                                                (int(outer_n_epoch / 2), outer_learning_rate * 0.1)],
                                               outside_value=outer_learning_rate * 0.1)

        def objective(env, theta, pool_rank):
            agent = self.create_agent(env, pool_rank)
            loss_n_params = len(agent.get_loss().get_params_1d())
            agent.get_loss().set_params_1d(theta[:loss_n_params])
            if self._outer_evolve_policy_init:
                agent.pi.set_params_1d(theta[loss_n_params:])
            # Agent lifetime is inner_opt_freq * inner_max_n_epoch
            return run_batch_rl(env, agent,
                                inner_opt_freq=self._inner_opt_freq,
                                inner_buffer_size=self._inner_buffer_size,
                                inner_max_n_epoch=self._inner_max_n_epoch,
                                pool_rank=pool_rank,
                                ppo_factor=ppo_factor_schedule.value(epoch),
                                epoch=None)

        # Initialize theta.
        theta = self.init_theta(self._env)
        num_params = len(theta)
        logger.log('Theta dim: {}'.format(num_params))

        # Set up outer loop parameter update schedule.
        adam = Adam(shape=(num_params,), beta1=0., stepsize=outer_learning_rate, dtype=np.float32)

        # Set up intra-machine parallelization.
        logger.log('Using {} proceses per MPI process.'.format(n_cpu))
        from pathos.multiprocessing import ProcessPool
        pool = ProcessPool(nodes=n_cpu)

        begin_time, best_test_return = time.time(), -np.inf
        for epoch in range(outer_n_epoch):

            # Anneal outer learning rate
            adam.stepsize = outer_lr_scheduler.value(epoch)

            noise = np.random.randn(outer_n_samples_per_ep // NUM_EQUAL_NOISE_VECTORS, num_params)
            noise = np.repeat(noise, NUM_EQUAL_NOISE_VECTORS, axis=0)
            theta_noise = theta[np.newaxis, :] + noise * outer_std
            theta_noise = theta_noise.reshape(MPI.COMM_WORLD.Get_size(), -1)

            # Distributes theta_noise vectors to all nodes.
            logger.log('Scattering all perturbed theta vectors and running inner loops ...')

            recvbuf = np.empty(theta_noise.shape[1], dtype='float')
            MPI.COMM_WORLD.Scatter(theta_noise, recvbuf, root=0)
            theta_noise = recvbuf.reshape(-1, num_params)

            # Noise vectors are scattered, run inner loop, parallelized over `pool_size` processes.
            start_time = time.time()
            pool_size = int(outer_n_samples_per_ep / MPI.COMM_WORLD.Get_size())
            results = pool.amap(objective, [self._env] * pool_size, theta_noise, range(pool_size)).get()

            # Extract relevant results
            returns = [utils.ret_to_obj(r['ep_final_rew']) for r in results]
            update_time = [np.mean(r['update_time']) for r in results]
            env_time = [np.mean(r['env_time']) for r in results]
            ep_length = [np.mean(r['ep_length']) for r in results]
            n_ep = [len(r['ep_length']) for r in results]
            mean_ep_kl = [np.mean(r['ep_kl']) for r in results]
            final_rets = [np.mean(r['ep_return'][-3:]) for r in results]

            # We gather the results at node 0
            recvbuf = np.empty([MPI.COMM_WORLD.Get_size(), 7 * pool_size],
                               # 7 = number of scalars in results vector
                               dtype='float') if MPI.COMM_WORLD.Get_rank() == 0 else None
            results_processed_arr = np.asarray(
                [returns, update_time, env_time, ep_length, n_ep, mean_ep_kl, final_rets],
                dtype='float').ravel()
            MPI.COMM_WORLD.Gather(results_processed_arr, recvbuf, root=0)

            # Do outer loop update calculations at node 0
            if MPI.COMM_WORLD.Get_rank() == 0:
                end_time = time.time()
                logger.log(
                    'All inner loops completed, returns gathered ({:.2f} sec).'.format(
                        time.time() - start_time))

                results_processed_arr = recvbuf.reshape(MPI.COMM_WORLD.Get_size(), 7, pool_size)
                results_processed_arr = np.transpose(results_processed_arr, (0, 2, 1)).reshape(-1, 7)
                results_processed = [dict(returns=r[0],
                                          update_time=r[1],
                                          env_time=r[2],
                                          ep_length=r[3],
                                          n_ep=r[4],
                                          mean_ep_kl=r[5],
                                          final_rets=r[6]) for r in results_processed_arr]
                returns = np.asarray([r['returns'] for r in results_processed])

                # ES update
                noise = noise[::NUM_EQUAL_NOISE_VECTORS]
                returns = np.mean(returns.reshape(-1, NUM_EQUAL_NOISE_VECTORS), axis=1)
                theta_grad = relative_ranks(returns).dot(noise) / outer_n_samples_per_ep \
                             - outer_l2 * theta
                theta -= adam.step(theta_grad)

                # Perform `NUM_TEST_SAMPLES` evaluation runs on root 0.
                if epoch % self._outer_plot_freq == 0 or epoch == outer_n_epoch - 1:
                    start_test_time = time.time()
                    logger.log('Performing {} test runs in parallel on node 0 ...'.format(NUM_TEST_SAMPLES))
                    # Evaluation run with current theta
                    test_results = pool.amap(
                        objective,
                        [self._env] * NUM_TEST_SAMPLES,
                        theta[np.newaxis, :] + np.zeros((NUM_TEST_SAMPLES, num_params)),
                        range(NUM_TEST_SAMPLES)
                    ).get()
                    plotting.plot_results(epoch, test_results)
                    test_return = np.mean([utils.ret_to_obj(r['ep_return']) for r in test_results])
                    if test_return > best_test_return:
                        best_test_return = test_return
                        # Save theta as numpy array.
                        self.save_theta(theta)
                    self.save_theta(theta, str(epoch))
                    logger.log('Test runs performed ({:.2f} sec).'.format(time.time() - start_test_time))

                logger.logkv('Epoch', epoch)
                utils.log_misc_stats('Obj', logger, returns)
                logger.logkv('PPOFactor', ppo_factor_schedule.value(epoch))
                logger.logkv('EpochTimeSpent(s)', end_time - start_time)
                logger.logkv('TotalTimeSpent(s)', end_time - begin_time)
                logger.logkv('BestTestObjMean', best_test_return)
                logger.dumpkvs()
 def ProcessPool( self, key=None, new=False, *args, **kwargs ):
     process_pool = ProcessPool(*args, **kwargs)
     self.register_atexit( process_pool )
     return process_pool
 def GlobalProcessPool( self, *args, **kwargs ):
     if self.process_pool is None:
         self.process_pool = ProcessPool(*args, **kwargs)
     return self.process_pool
예제 #10
0
 def wrapper(*args):
     with ProcessPool() as p:
         p.map(insert_func, *args)
### check if point is within polygon
bool_list = []
for point in range(0, len(grid_jamaica)):
    bool_val = grid_jamaica.geometry.iloc[point].within(
        jamaica.iloc[0].geometry)
    bool_list.append(bool_val)

### extract points inside
grid_jamaica = grid_jamaica[bool_list]
grid_jamaica['country'] = 'Jamaica'
grid_jamaica.to_file('Input/grid_jamaica.gpkg', driver='GPKG')
### add some additional information
grid_jamaica['region'] = str(region)

list_years = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for sample in list_years:
    grid_jamaica['sample_num'] = str(sample)

    print(len(grid_jamaica))

    pool = ProcessPool(nodes=cpu_count() - 2)
    output = pool.map(TC_analysis, grid_jamaica.latitude,
                      grid_jamaica.longitude, grid_jamaica.ID_point,
                      grid_jamaica.country, grid_jamaica.region,
                      grid_jamaica.sample_num)
    output_files = pd.concat(output)
    output_files.to_csv('Output/TC_Jamaica_' + str(region) + '_' +
                        str(sample) + '.csv',
                        index=False)
예제 #12
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "i:t:m:c:o:p:", [
            "infile=", "trainfile=", "model=", "cutoff=", "output=",
            "pool_num="
        ])
    except getopt.GetoptError:
        print(
            'TrainingTool.py -i <infile> -t <trainfile> -m <model> -c <cutoff> -o <output_name> -p <pool_num>'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                'TrainingTool.py -i <infile> -t <trainfile> -m <model> -c <cutoff> -o <output_name> -p <pool_num>'
            )
            sys.exit()
        elif opt in ("-i", "--infile"):
            infile = arg
        elif opt in ("-t", "--trainfile"):
            trainfile = arg
        elif opt in ("-m", "--model"):
            model = arg
        elif opt in ("-c", "--cutoff"):
            cutoff = arg
        elif opt in ("-p", "--pool_num"):
            pool_num = arg
        elif opt in ("-o", "--output_name"):
            output_name = arg

    pool = ProcessPool(int(pool_num))

    ##Read predict sample...
    ##modify the name/order of features
    df_predict = pd.read_csv(infile, delimiter='\t', low_memory=False)
    colnames = df_predict.columns.values.tolist()
    #df_predict=changeName(colnames,df_predict)

    column_num = df_predict.shape[1]

    df_predict.rename(columns={
        colnames[0]: 'V1',
        colnames[1]: 'V2',
        colnames[2]: 'V3'
    },
                      inplace=True)
    df_predict = df_predict.set_index(["V1"], append=False, drop=False)

    chromName_list = df_predict.index.values.tolist()
    chromName_list_new = []
    extract_out = []
    for i in chromName_list:
        if i not in chromName_list_new:
            chromName_list_new.append(i)

    ##extract features form the output of Extractfeatures.sh
    setDir(output_name + "/tmp2")
    list_df_predict = [df_predict for row in range(len(chromName_list_new))]
    list_output_name = [output_name for row in range(len(chromName_list_new))]
    extract_out = pool.map(extractFunc, list_df_predict, chromName_list_new,
                           list_output_name)

    ##merge features
    feature_num = 2 * column_num
    df_features = pd.DataFrame(columns=[feature_num])
    rootdir = output_name + '/tmp2'
    list_path = os.listdir(rootdir)
    list_rootdir = [rootdir for row in range(len(list_path))]
    list_column_num = [column_num for row in range(len(list_path))]
    list_output_name = [output_name for row in range(len(list_path))]

    ##load the scale and model of training set
    df_train = pd.read_csv(trainfile, delimiter='\t', header=None)
    data_train = df_train.values.astype('float')
    X = data_train[:, 1:40]
    Y = data_train[:, 0]
    for i in range(len(Y)):
        if Y[i] == 0:
            Y[i] = math.log(Y[i] + 1)
        else:
            Y[i] = math.log(Y[i])

    scaler_x = StandardScaler().fit(X)
    scaler_y = StandardScaler().fit(Y.reshape(-1, 1))
    clf = joblib.load(model)
    print("Loading model success!")

    list_clf = [clf for row in range(len(list_path))]
    list_scaler_x = [scaler_x for row in range(len(list_path))]
    list_scaler_y = [scaler_y for row in range(len(list_path))]
    list_cutoff = [cutoff for row in range(len(list_path))]

    ##scale the features and predict
    setDir(output_name + "/predict_results/")
    pool.map(scaleAndPredict, list_rootdir, list_path, list_scaler_x,
             list_scaler_y, list_clf, list_column_num, list_output_name,
             list_cutoff)
예제 #13
0
def data_generator(annotation_lines, input_shape, anchors, nb_classes,
                   batch_size=1, augment=True, max_boxes=20,
                   jitter=0.3, img_scaling=1.2, resize_img=True, allow_rnd_shift=True,
                   color_hue=0.1, color_sat=1.5, color_val=1.5,
                   flip_horizontal=True, flip_vertical=False,
                   bbox_overlap=0.95, nb_threads=1):
    """data generator for fit_generator

    :param list(str) annotation_lines:
    :param int batch_size:
    :param ndarray anchors:
    :param int nb_classes:
    :param tuple(int,int) input_shape: CNN input size
    :param bool augment: perform augmentation
    :param int max_boxes: maximal number of training bounding boxes
    :param float jitter:
    :param float color_hue: range of change of HSV color HUE
    :param float color_sat: range of change of HSV color SAT
    :param float color_val: range of change of HSV color value
    :param float img_scaling: upper image scaling
    :param bool flip_horizontal: allow random flop image/boxes vertical
    :param bool flip_vertical: allow random flop image/boxes horizontal
    :param bool resize_img: resize image to fit fully to CNN
    :param bool allow_rnd_shift: allow shifting image not only centered crop
    :param float bbox_overlap: threshold in case cut image, drop all boxes with lower overlap
    :param float|int nb_threads: nb threads running in parallel
    :return:

    >>> np.random.seed(0)
    >>> path_img = os.path.join(update_path('model_data'), 'bike-car-dog.jpg')
    >>> line = path_img + ' 100,150,200,250,0 300,50,400,200,1'
    >>> anchors = get_anchors(os.path.join(update_path('model_data'), 'yolo_anchors.csv'))
    >>> gen = data_generator([line], (416, 416), anchors, 3, nb_threads=2)
    >>> batch = next(gen)
    >>> len(batch)
    2
    >>> [b.shape for b in batch[0]]
    [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)]
    >>> gen = data_generator([line], (416, 416), anchors, 3, augment=False)
    >>> batch = next(gen)
    >>> len(batch)
    2
    >>> [b.shape for b in batch[0]]
    [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)]
    """
    nb_lines = len(annotation_lines)
    circ_i = 0
    if nb_lines == 0 or batch_size <= 0:
        return None

    color_hue = abs(color_hue)
    color_sat = color_sat if color_sat > 1 else 1. / color_sat
    color_val = color_val if color_val > 1 else 1. / color_val

    nb_threads = nb_workers(nb_threads)
    pool = ProcessPool(nb_threads) if nb_threads > 1 else None
    _wrap_rand_data = partial(
        get_augmented_data,
        input_shape=input_shape,
        augment=augment,
        max_boxes=max_boxes,
        jitter=jitter,
        resize_img=resize_img,
        img_scaling=img_scaling,
        allow_rnd_shift=allow_rnd_shift,
        hue=color_hue,
        sat=color_sat,
        val=color_val,
        flip_horizontal=flip_horizontal,
        flip_vertical=flip_vertical,
        bbox_overlap=bbox_overlap,
    )

    while True:
        if circ_i < batch_size:
            # shuffle while you are starting new cycle
            np.random.shuffle(annotation_lines)
        batch_image_data = []
        batch_box_data = []

        # create the list of lines to be loaded in batch
        annot_lines = annotation_lines[circ_i:circ_i + batch_size]
        batch_offset = (circ_i + batch_size) - nb_lines
        # chekck if the loaded batch size have sufficient size
        if batch_offset > 0:
            annot_lines += annotation_lines[:batch_offset]
        # multiprocessing loading of batch data
        map_process = pool.imap if pool else map
        for image, box in map_process(_wrap_rand_data, annot_lines):
            batch_image_data.append(image)
            batch_box_data.append(box)

        circ_i = (circ_i + batch_size) % nb_lines

        batch_image_data = np.array(batch_image_data)
        batch_box_data = np.array(batch_box_data)
        y_true = preprocess_true_boxes(batch_box_data, input_shape, anchors, nb_classes)
        batch = [batch_image_data, *y_true], np.zeros(batch_size)
        yield batch
        gc.collect()

    if pool:
        pool.close()
        pool.join()
        pool.clear()
예제 #14
0
import random

from pathos.multiprocessing import ProcessPool

import Generator
from Monotonicity import MonotonicityMeasurer
import matplotlib.pyplot as plt

universe = Generator.generate_simplified_models(10)

meanings = [tuple(random.choice([True, False]) for i in range(len(universe))) for j in range(5000)]

measurer_up = MonotonicityMeasurer(universe, 10, 'B')
measurer_down = MonotonicityMeasurer(universe, 10, 'B', down=True)

with ProcessPool(4) as process_pool:
    monotonicities_up = process_pool.map(measurer_up, meanings)
    monotonicities_down = process_pool.map(measurer_down, meanings)
    monotonicities = process_pool.map(max, monotonicities_up, monotonicities_down)

fig = plt.figure()

plt.hist(monotonicities,bins=30,range=[0,1])

plt.show()

fig.savefig('results/random_monotone.png', bbox_inches='tight')
예제 #15
0
def iterate_mproc_map(wrap_func, iterate_vals, nb_workers=CPU_COUNT, desc='', ordered=True):
    """ create a multi-porocessing pool and execute a wrapped function in separate process

    :param func wrap_func: function which will be excited in the iterations
    :param list iterate_vals: list or iterator which will ide in iterations,
        if -1 then use all available threads
    :param int nb_workers: number og jobs running in parallel
    :param str|None desc: description for the bar,
        if it is set None, bar is suppressed
    :param bool ordered: whether enforce ordering in the parallelism

    Waiting reply on:

    * https://github.com/celery/billiard/issues/280
    * https://github.com/uqfoundation/pathos/issues/169

    See:

    * https://sebastianraschka.com/Articles/2014_multiprocessing.html
    * https://github.com/nipy/nipype/pull/2754
    * https://medium.com/contentsquare-engineering-blog/multithreading-vs-multiprocessing-in-python-ece023ad55a
    * http://mindcache.me/2015/08/09/
        python-multiprocessing-module-daemonic-processes-are-not-allowed-to-have-children.html
    * https://medium.com/@bfortuner/python-multithreading-vs-multiprocessing-73072ce5600b

    >>> list(iterate_mproc_map(np.sqrt, range(5), nb_workers=1, desc=None))  # doctest: +ELLIPSIS
    [0.0, 1.0, 1.41..., 1.73..., 2.0]
    >>> list(iterate_mproc_map(sum, [[0, 1]] * 5, nb_workers=2, ordered=False))
    [1, 1, 1, 1, 1]
    >>> list(iterate_mproc_map(max, [(2, 1)] * 5, nb_workers=2, desc=''))
    [2, 2, 2, 2, 2]
    """
    iterate_vals = list(iterate_vals)
    nb_workers = 1 if not nb_workers else int(nb_workers)
    nb_workers = CPU_COUNT if nb_workers < 0 else nb_workers

    if desc is not None:
        pbar = tqdm.tqdm(total=len(iterate_vals), desc=str('%r @%i-threads' % (desc, nb_workers)))
    else:
        pbar = None

    if nb_workers > 1:
        logging.debug('perform parallel in %i threads', nb_workers)
        # Standard mproc.Pool created a demon processes which can be called
        # inside its children, cascade or multiprocessing
        # https://stackoverflow.com/questions/6974695/python-process-pool-non-daemonic

        # pool = mproc.Pool(nb_workers)
        # pool = NonDaemonPool(nb_workers)
        pool = ProcessPool(nb_workers)
        # pool = Pool(nb_workers)
        mapping = pool.imap if ordered else pool.uimap
    else:
        logging.debug('perform sequential')
        pool = None
        mapping = map

    for out in mapping(wrap_func, iterate_vals):
        pbar.update() if pbar else None
        yield out

    if pool:
        pool.close()
        pool.join()
        pool.clear()

    pbar.close() if pbar else None
예제 #16
0
    def test_repeatability(self):
        import matplotlib.pyplot as mpl
        import numpy as np

        start, end = (10, 10), (350, 250)
        repeats = 2
        equal_paths = []
        rdrs = np.linspace(-1, 1, 10)
        risk_sums = []

        def make_path(start, end, rdr):
            algo = RiskGridAStar(
                ManhattanRiskHeuristic(self.large_diag_environment,
                                       risk_to_dist_ratio=rdr))
            return algo.find_path(self.large_diag_environment, start, end)

        def run_params(rdr):
            paths = [make_path(start, end, rdr) for _ in range(repeats)]
            equal_paths.append(all([p == paths[0] for p in paths]))
            if not paths[0]:
                return [rdr, np.inf]
            risk_sum = sum([
                self.large_diag_environment.grid[n[0], n[1]] for n in paths[0]
            ])
            return [rdr, risk_sum]

        pool = ProcessPool(nodes=8)
        params = np.array(rdrs)
        risk_sums = pool.map(run_params, params)
        pool.close()

        # for rdr in rdrs:
        #     paths = [make_path(start, end, rdr) for _ in range(repeats)]
        #     equal_paths.append(all([p == paths[0] for p in paths]))
        #     if not paths[0]:
        #         risk_sums.append([rdr, np.inf])
        #         continue
        #     risk_sum = sum([n.n for n in paths[0]])
        #     risk_sums.append([rdr, risk_sum])
        #
        #     fig = mpl.figure()
        #     ax = fig.add_subplot(111)
        #     for path in paths:
        #         ax.plot([n.x for n in path], [n.y for n in path], color='red')
        #     im = ax.imshow(self.large_no_diag_environment.grid)
        #     fig.colorbar(im, ax=ax, label='Population')
        #     ax.set_title(f'RiskA* with RDR={rdr:.4g} \n Risk sum={risk_sum:.4g}')
        #     fig.show()

        risk_sums = np.array(risk_sums)

        rdr_fig = mpl.figure()
        ax = rdr_fig.add_subplot(111)
        ax.scatter(risk_sums[:, 0], risk_sums[:, 1])
        # ax.set_xscale('log')
        ax.set_yscale('symlog')
        ax.set_xlabel('Risk-Distance Ratio')
        ax.set_ylabel('Path Risk sum')
        ax.set_title('Risk Grid A*')
        rdr_fig.show()
        self.assertTrue(all(equal_paths), 'Paths are not generated repeatably')
예제 #17
0
파일: update_terms.py 프로젝트: leosj1/API
num_processes = 6
if __name__ == "__main__":

    def process_updates(x):
        from API_TTERMS import getconf2, query, testingKWT
        # tid = x[0]
        tid = 428
        testingKWT(tid, '144.167.35.89')

    conf = getconf2()
    q_trackers = f"select tid from trackers where userid = '*****@*****.**' or YEAR(date_created) in (2019,2020)"
    # q_trackers = f"select t.tid from trackers t left join tracker_keyword tk on  t.tid = tk.tid where t.tid is null or tk.tid is null or tk.status_percentage < 100 or tk.status != 1 or tk.status_percentage is null or tk.status is null"

    tracker_result = query(conf, q_trackers)
    if parallel:
        process_pool = ProcessPool(num_processes)
        for record in tqdm(process_pool.imap(process_updates, tracker_result),
                           desc="Terms",
                           ascii=True,
                           file=sys.stdout,
                           total=len(tracker_result)):
            pass
        process_pool.close()
        print("Joining pool")
        process_pool.join()
        print("Clearing pool")
        process_pool.clear()
        print("Finished!")
    else:
        for x in tqdm(tracker_result,
                      desc="Terms",
예제 #18
0
    def test_repeatability(self):
        import matplotlib.pyplot as mpl
        import numpy as np
        from pathos.multiprocessing import ProcessPool
        from itertools import product

        start, end = (10, 10), (350, 250)
        repeats = 2
        equal_paths = []
        rdrs = np.linspace(-100, 100, 10)
        jgs = [0]  # np.linspace(0, 5000, 2)
        jls = np.linspace(0, 50, 2)

        def make_path(start, end, rdr, jg, jl):
            algo = RiskJumpPointSearchAStar(ManhattanRiskHeuristic(
                self.large_diag_environment, risk_to_dist_ratio=rdr),
                                            jump_gap=jg,
                                            jump_limit=jl)
            return algo.find_path(self.large_diag_environment, start, end)

        def run_params(rdr, jg, jl):
            paths = [
                make_path(start, end, rdr, jg, jl) for _ in range(repeats)
            ]
            equal_paths.append(all([p == paths[0] for p in paths]))
            if not paths[0]:
                return [rdr, np.inf, jl, jg]
            risk_sum = sum([
                self.large_diag_environment.grid[n[0], n[1]] for n in paths[0]
            ])
            return [rdr, risk_sum, jl, jg]

        pool = ProcessPool(nodes=8)
        pool.restart(force=True)
        params = np.array(list(product(rdrs, jgs, jls)))
        risk_sums = pool.map(run_params, params[:, 0], params[:, 1], params[:,
                                                                            2])
        pool.close()

        # risk_sums = []
        # for rdr, jg, jl in product(rdrs, jgs, jls):
        #     paths = [make_path(start, end, rdr, jg, jl) for _ in range(repeats)]
        #     equal_paths.append(all([p == paths[0] for p in paths]))
        #     if not paths[0]:
        #         risk_sums.append([rdr, np.inf, jl, jg])
        #         continue
        #     risk_sum = sum([n.n for n in paths[0]])
        #     risk_sums.append([rdr, risk_sum, jl, jg])
        #
        #     fig = mpl.figure()
        #     ax = fig.add_subplot(111)
        #     for path in paths:
        #         ax.plot([n.x for n in path], [n.y for n in path], color='red')
        #     im = ax.imshow(self.large_diag_environment.grid)
        #     fig.colorbar(im, ax=ax, label='Population')
        #     ax.set_title(f'Risk JPS A* with RDR={rdr:.4g}, JL={jl} \n Risk sum={risk_sum:.4g}')
        #     fig.show()

        risk_sums = np.array(risk_sums)

        jl_fig = mpl.figure()
        ax = jl_fig.add_subplot(111)
        sc = ax.scatter(risk_sums[:, 0], risk_sums[:, 1], c=risk_sums[:, 2])
        ax.set_yscale('symlog')
        ax.set_xlabel('Risk-Distance Ratio')
        ax.set_ylabel('Path Risk sum')
        ax.set_title('R JPS+ A* Jump Limits')
        jl_fig.colorbar(sc, ax=ax, label='Jump Limit')
        jl_fig.show()

        jg_fig = mpl.figure()
        ax = jg_fig.add_subplot(111)
        sc = ax.scatter(risk_sums[:, 0], risk_sums[:, 1], c=risk_sums[:, 3])
        ax.set_yscale('symlog')
        ax.set_xlabel('Risk-Distance Ratio')
        ax.set_ylabel('Path Risk sum')
        ax.set_title('R JPS+ A* Jump Gaps')
        jg_fig.colorbar(sc, ax=ax, label='Jump Gap')
        jg_fig.show()

        self.assertTrue(all(equal_paths), 'Paths are not generated repeatably')
예제 #19
0
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2016 California Institute of Technology.
# Copyright (c) 2016-2022 The Uncertainty Quantification Foundation.
# License: 3-clause BSD.  The full license text is available at:
#  - https://github.com/uqfoundation/pathos/blob/master/LICENSE
"""
minimal interface to python's multiprocessing module

Notes:
    This module has been deprecated in favor of ``pathos.pools``.
"""

from pathos.multiprocessing import ProcessPool, __STATE
from pathos.threading import ThreadPool #XXX: thread __STATE not imported
from pathos.helpers import cpu_count
mp = ProcessPool()
tp = ThreadPool()

__all__ = ['mp_map']

# backward compatibility
#FIXME: deprecated... and buggy!  (fails to dill on imap/uimap)
def mp_map(function, sequence, *args, **kwds):
    '''extend python's parallel map function to multiprocessing

Inputs:
    function  -- target function
    sequence  -- sequence to process in parallel

Additional Inputs:
    nproc     -- number of 'local' cpus to use  [defaut = 'autodetect']
예제 #20
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr'
    )

    os.chdir(main_dir)

    interp_var = 'temp'

    ft_type = 'mag'

    #==========================================================================
    if interp_var == 'temp':
        # MEAN TEMPERATURE
        in_data_file = os.path.join(f'temperature_{ft_type}_spec_df.csv')

        in_vgs_file = os.path.join(r'temperature_cftns.csv')

        in_stns_coords_file = os.path.join(os.path.dirname(in_data_file),
                                           r'temperature_avg_coords.csv')

        out_dir = r'temperature_kriging'
        var_units = u'\u2103'  # 'centigrade'
        var_name = 'temperature'
        out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc'

        # interpolated values
        # can be int, float, 'min_in'/'max_in' or None
        # min_var_val = 'min_in'
        # max_var_val = 'max_in'
#         min_var_val = None
#         max_var_val = None

#==========================================================================

#==========================================================================
    elif interp_var == 'ppt':
        # PRECIPITATION
        in_data_file = os.path.join(f'precipitation_{ft_type}_spec_df.csv')

        in_vgs_file = os.path.join(r'precipitation_cftns.csv')

        in_stns_coords_file = os.path.join(os.path.dirname(in_data_file),
                                           r'precipitation_coords.csv')

        out_dir = r'precipitation_kriging'
        var_units = 'mm'
        var_name = 'precipitation'
        out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc'

        # interpolated values
        # can be int, float, 'min_in'/'max_in' or None
        # min_var_val = 'min_in'
        # max_var_val = 'max_in'
#         min_var_val = None
#         max_var_val = None

#==========================================================================
    else:
        raise ValueError(f'Invalid value for interp_var: {interp_var}!')

    out_krig_net_cdf_file = out_krig_net_cdf_file

    # assuming in_drift_raster and in_stns_coords_file and in_bounds_shp_file
    # have the same coordinates system
    # assuming in_drift_rasters_list have the same cell sizes, bounds and NDVs
    # basically they are copies of each other except for the drift values
    in_drift_rasters_list = ([
        r'P:\Synchronize\IWS\QGIS_Neckar\raster\lower_de_gauss_z3_1km.tif'
    ])

    #     in_bounds_shp_file = (
    #         os.path.join(r'P:\Synchronize\IWS\QGIS_Neckar\raster',
    #                      r'taudem_out_spate_rockenau\watersheds.shp'))

    in_bounds_shp_file = (os.path.join(
        r'P:\Synchronize\IWS\QGIS_Neckar\raster\taudem_out_spate_rockenau\watersheds.shp'
    ))

    align_ras_file = in_drift_rasters_list[0]

    out_figs_dir = os.path.join(out_dir, 'krige_figs')

    x_coords_lab = 'X'
    y_coords_lab = 'Y'
    time_dim_lab = 'freq'
    nc_mode = 'w'

    #     min_ppt_thresh = 1.0

    idw_exp = 5
    n_cpus = 1
    buffer_dist = 20e3
    sec_buffer_dist = 2e3

    in_sep = str(';')

    ord_krige_flag = True
    sim_krige_flag = True
    edk_krige_flag = True
    idw_flag = True
    plot_figs_flag = True

    #     ord_krige_flag = False
    sim_krige_flag = False
    edk_krige_flag = False
    idw_flag = False
    plot_figs_flag = False

    os.chdir(main_dir)

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    if (not os.path.exists(out_figs_dir)) and plot_figs_flag:
        os.mkdir(out_figs_dir)

#     print('min_var_val:', min_var_val)
#     print('max_var_val:', max_var_val)
    print('idw_exp:', idw_exp)
    print('n_cpus:', n_cpus)
    print('nc_mode:', nc_mode)
    print('var_name:', var_name)
    print('out_dir:', out_dir)
    print('in_bounds_shp_file:', in_bounds_shp_file)
    print('out_krig_net_cdf_file:', out_krig_net_cdf_file)

    assert any([ord_krige_flag, sim_krige_flag, edk_krige_flag, idw_flag])

    #==========================================================================
    # read the data frames
    #==========================================================================
    in_data_df = pd.read_csv(in_data_file,
                             sep=in_sep,
                             index_col=0,
                             encoding='utf-8')

    in_vgs_df = pd.read_csv(in_vgs_file,
                            sep=in_sep,
                            index_col=0,
                            encoding='utf-8')

    in_stns_coords_df = pd.read_csv(in_stns_coords_file,
                                    sep=in_sep,
                                    index_col=0,
                                    encoding='utf-8')

    all_stns = in_data_df.columns.intersection(in_stns_coords_df.index)
    assert all_stns.shape[0]

    in_data_df = in_data_df.loc[:, all_stns]
    in_stns_coords_df = in_stns_coords_df.loc[all_stns, :]

    #==========================================================================
    # Get stations that are around/in the bounds_shp only
    #==========================================================================

    bds_vec = ogr.Open(in_bounds_shp_file)
    assert bds_vec

    bds_lyr = bds_vec.GetLayer(0)

    feat_buffs_list = []
    feat_sec_buffs_list = []
    for feat in bds_lyr:  # just to get the names of the catchments
        geom = feat.GetGeometryRef().Clone()
        assert geom

        feat_buffs_list.append(geom.Buffer(buffer_dist))
        feat_sec_buffs_list.append(geom.Buffer(sec_buffer_dist))

    bds_vec.Destroy()

    assert feat_buffs_list and feat_sec_buffs_list

    print(len(feat_buffs_list), 'polygons in the in_bounds_shp_file...')

    fin_stns = []
    for poly in feat_buffs_list:
        for stn in all_stns:
            if stn in fin_stns:
                continue

            curr_pt = cnvt_to_pt(*in_stns_coords_df.loc[stn,
                                                        ['X', 'Y']].values)

            if chk_cntmt(curr_pt, poly):
                fin_stns.append(stn)

    assert fin_stns

    print('%d stations out of %d within buffer zone of in_bounds_shp_file' %
          (len(fin_stns), in_stns_coords_df.shape[0]))

    fin_stns = np.unique(fin_stns)
    in_data_df = in_data_df.loc[:, fin_stns]
    in_stns_coords_df = in_stns_coords_df.loc[fin_stns, :]

    #==========================================================================
    # Read the DEM
    #==========================================================================

    #     if edk_krige_flag:
    #         in_drift_arr_list = []
    #         _rows_list = []
    #         _cols_list = []
    #
    #         for in_drift_raster in in_drift_rasters_list:
    #             in_drift_ds = gdal.Open(in_drift_raster)
    #
    #             assert in_drift_ds, 'GDAL cannot open %s' % in_drift_raster
    #
    #             drift_rows = in_drift_ds.RasterYSize
    #             drift_cols = in_drift_ds.RasterXSize
    #
    #             drift_geotransform = in_drift_ds.GetGeoTransform()
    #
    #             _drift_x_min = drift_geotransform[0]
    #             _drift_y_max = drift_geotransform[3]
    #
    #             drift_band = in_drift_ds.GetRasterBand(1)
    #             drift_ndv = drift_band.GetNoDataValue()
    #
    #             cell_width = drift_geotransform[1]
    #             cell_height = abs(drift_geotransform[5])
    #
    #             _drift_x_max = _drift_x_min + (drift_cols * cell_width)
    #             _drift_y_min = _drift_y_max - (drift_rows * cell_height)
    #
    #             _arr = in_drift_ds.ReadAsArray()
    #
    #             in_drift_arr_list.append(_arr)
    #             _rows_list.append(_arr.shape[0])
    #             _cols_list.append(_arr.shape[1])
    #
    #         assert all(_ == _rows_list[0] for _ in _rows_list), (
    #             'Drift raster have unequal number of rows!')
    #
    #         assert all(_ == _cols_list[0] for _ in _cols_list), (
    #             'Drift raster have unequal number of columns!')

    #==========================================================================
    # Read the bounding shapefile
    #==========================================================================
    #     sf = shp.Reader(in_bounds_shp_file)
    #     polys_list = [i.__geo_interface__ for i in sf.iterShapes()]

    ((fin_x_min, fin_x_max, fin_y_min, fin_y_max),
     cell_width) = get_aligned_shp_bds_and_cell_size(in_bounds_shp_file,
                                                     align_ras_file)

    cell_height = cell_width

    fin_x_min -= 2 * cell_width
    fin_x_max += 2 * cell_width
    fin_y_min -= 2 * cell_height
    fin_y_max += 2 * cell_height

    #     if edk_krige_flag:
    #         assert fin_x_min > _drift_x_min
    #         assert fin_x_max < _drift_x_max
    #         assert fin_y_min > _drift_y_min
    #         assert fin_y_max < _drift_y_max
    #
    #         min_col = int(max(0, (fin_x_min - _drift_x_min) / cell_width))
    #         max_col = int(ceil((fin_x_max - _drift_x_min) / cell_width))
    #
    #         min_row = int(max(0, (_drift_y_max - fin_y_max) / cell_height))
    #         max_row = int(ceil((_drift_y_max - fin_y_min) / cell_height))
    #
    #     else:
    min_col = 0
    max_col = int(ceil((fin_x_max - fin_x_min) / cell_width))

    min_row = 0
    max_row = int(ceil((fin_y_max - fin_y_min) / cell_height))

    #==========================================================================
    # Calculate coordinates at which to krige
    #==========================================================================

    assert 0 <= min_col <= max_col, (min_col, max_col)
    assert 0 <= min_row <= max_row, (min_row, max_row)

    strt_x_coord = fin_x_min + (0.5 * cell_width)
    end_x_coord = strt_x_coord + ((max_col - min_col) * cell_width)

    strt_y_coord = fin_y_max - (0.5 * cell_height)
    end_y_coord = strt_y_coord - ((max_row - min_row) * cell_height)

    krige_x_coords = np.linspace(strt_x_coord, end_x_coord,
                                 (max_col - min_col + 1))

    krige_y_coords = np.linspace(strt_y_coord, end_y_coord,
                                 (max_row - min_row + 1))

    krige_x_coords_mesh, krige_y_coords_mesh = np.meshgrid(
        krige_x_coords, krige_y_coords)

    krige_coords_orig_shape = krige_x_coords_mesh.shape

    #     if plot_figs_flag:
    #         # xy coords for pcolormesh
    #         pcolmesh_x_coords = np.linspace(
    #             fin_x_min, fin_x_max, (max_col - min_col + 1))
    #
    #         pcolmesh_y_coords = np.linspace(
    #             fin_y_max, fin_y_min, (max_row - min_row + 1))
    #
    #         krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = (
    #             np.meshgrid(pcolmesh_x_coords, pcolmesh_y_coords))
    #
    #     else:
    #         krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = None, None

    krige_x_coords_mesh = krige_x_coords_mesh.ravel()
    krige_y_coords_mesh = krige_y_coords_mesh.ravel()

    #     print('\n\n')
    #     print('#' * 10)
    #
    #     _beg_t = timeit.default_timer()
    #
    #     print(krige_x_coords_mesh.shape[0],
    #           'cells to interpolate per step before intersection!')
    #
    fin_cntn_idxs = np.ones(krige_x_coords_mesh.shape[0], dtype=bool)
    #     fin_cntn_idxs = np.zeros(krige_x_coords_mesh.shape[0], dtype=bool)
    #     ogr_pts = np.vectorize(cnvt_to_pt)(krige_x_coords_mesh, krige_y_coords_mesh)
    #
    #     for poly in feat_sec_buffs_list:
    #         curr_cntn_idxs = np.vectorize(chk_cntmt)(ogr_pts, poly)
    #         fin_cntn_idxs = fin_cntn_idxs | curr_cntn_idxs
    #
    #     print(fin_cntn_idxs.sum(),
    #           'cells to interpolate per step after intersection!')
    #
    #     _end_t = timeit.default_timer()
    #     _tot_t = _end_t - _beg_t
    #
    #     print(f'Took {_tot_t:0.4f} seconds!')
    #     print('#' * 10)
    #
    #     krige_x_coords_mesh = krige_x_coords_mesh[fin_cntn_idxs]
    #     krige_y_coords_mesh = krige_y_coords_mesh[fin_cntn_idxs]

    #     if edk_krige_flag:
    #         drift_vals_list = []
    #
    #         krige_cols = np.arange(min_col, max_col + 1, dtype=int)
    #         krige_rows = np.arange(min_row, max_row + 1, dtype=int)
    #
    #         assert krige_x_coords.shape[0] == krige_cols.shape[0]
    #         assert krige_y_coords.shape[0] == krige_rows.shape[0]
    #
    #         (krige_drift_cols_mesh,
    #          krige_drift_rows_mesh) = np.meshgrid(krige_cols, krige_rows)
    #
    #         krige_drift_cols_mesh = krige_drift_cols_mesh.ravel()
    #         krige_drift_rows_mesh = krige_drift_rows_mesh.ravel()
    #
    #         krige_drift_cols_mesh = krige_drift_cols_mesh[fin_cntn_idxs]
    #         krige_drift_rows_mesh = krige_drift_rows_mesh[fin_cntn_idxs]
    #
    #         for _drift_arr in in_drift_arr_list:
    #             _drift_vals = _drift_arr[
    #                 krige_drift_rows_mesh, krige_drift_cols_mesh]
    #
    #             drift_vals_list.append(_drift_vals)
    #
    # #         drift_vals_arr = np.array(drift_vals_list, dtype=float)
    #
    #         drift_df_cols = list(range(len(in_drift_rasters_list)))
    #         in_stns_drift_df = pd.DataFrame(
    #             index=in_stns_coords_df.index,
    #             columns=drift_df_cols,
    #             dtype=float)
    #
    #         for stn in in_stns_drift_df.index:
    #             stn_x = in_stns_coords_df.loc[stn, x_coords_lab]
    #             stn_y = in_stns_coords_df.loc[stn, y_coords_lab]
    #
    #             stn_col = int((stn_x - _drift_x_min) / cell_width)
    #             stn_row = int((_drift_y_max - stn_y) / cell_height)
    #
    #             for col, _arr in zip(drift_df_cols, in_drift_arr_list):
    #                 try:
    #                     _ = _arr[stn_row, stn_col]
    #                     if not np.isclose(drift_ndv, _):
    #                         in_stns_drift_df.loc[stn, col] = _
    #
    #                 except IndexError:
    #                     pass
    #
    #         in_stns_drift_df.dropna(inplace=True)

    #==========================================================================
    # Open NC
    #==========================================================================
    out_nc = nc.Dataset(os.path.join(out_dir, out_krig_net_cdf_file),
                        mode=str(nc_mode))

    if nc_mode == 'w':
        out_nc.set_auto_mask(False)
        out_nc.createDimension(x_coords_lab, krige_x_coords.shape[0])
        out_nc.createDimension(y_coords_lab, krige_y_coords.shape[0])
        out_nc.createDimension(time_dim_lab, in_data_df.shape[0])

        x_coords_nc = out_nc.createVariable(x_coords_lab,
                                            'd',
                                            dimensions=x_coords_lab)

        x_coords_nc[:] = krige_x_coords

        y_coords_nc = out_nc.createVariable(y_coords_lab,
                                            'd',
                                            dimensions=y_coords_lab)

        y_coords_nc[:] = krige_y_coords

        time_nc = out_nc.createVariable(time_dim_lab,
                                        'i8',
                                        dimensions=time_dim_lab)

        time_nc[:] = np.arange(in_data_df.shape[0])

    else:
        raise RuntimeError('Not configured for this option!')

        time_nc = out_nc.variables[time_dim_lab]
        krige_y_coords = y_coords_nc[:]
        krige_x_coords = x_coords_nc[:]

    #==========================================================================
    # MP stuff
    #==========================================================================
    mp_cond = False

    if ((n_cpus > 1) and (in_data_df.shape[0] > (n_cpus + 1))):
        idxs = pd.np.linspace(0,
                              in_data_df.shape[0], (n_cpus) + 1,
                              endpoint=True,
                              dtype=int)

        idxs = np.unique(idxs)
        print('MP idxs:', idxs)

        if idxs.shape[0] == 1:
            idxs = np.concatenate((np.array([0]), idxs))

        mp_cond = True

    else:
        idxs = [0, in_data_df.shape[0]]

    #==========================================================================
    # Krige
    #==========================================================================
    if ord_krige_flag:
        print('\n\n')
        print('#' * 10)

        _beg_t = timeit.default_timer()

        print('Ordinary Kriging...')

        if 'OK' not in out_nc.variables:
            ok_nc = out_nc.createVariable('OK',
                                          'd',
                                          dimensions=(time_dim_lab,
                                                      y_coords_lab,
                                                      x_coords_lab),
                                          fill_value=False)

        else:
            ok_nc = out_nc.variables['OK']

        ok_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
                        in_stns_coords_df, in_vgs_df.loc[ft_type][0],
                        krige_x_coords_mesh, krige_y_coords_mesh,
                        krige_coords_orig_shape, (idxs[i],
                                                  idxs[i + 1]), fin_cntn_idxs)
                       for i in range(n_cpus))

        if mp_cond:
            ok_krige_flds = np.full(
                (in_data_df.shape[0], krige_coords_orig_shape[0],
                 krige_coords_orig_shape[1]),
                np.nan,
                dtype=np.float32)

            mp_ress = []

            try:
                mp_pool = ProcessPool(n_cpus)
                mp_pool.restart(True)

                mp_ress = list(mp_pool.uimap(ordinary_kriging, ok_vars_gen))

                mp_pool.clear()

            except Exception as msg:
                mp_pool.close()
                mp_pool.join()
                print('Error in ordinary_kriging:', msg)

            for mp_res in mp_ress:
                if (len(mp_res) != 3) and (not isinstance(list)):
                    print('\n', mp_res, '\n')
                    continue

                [strt_index, end_index, sub_ok_krige_flds] = mp_res
                ok_krige_flds[strt_index:end_index] = sub_ok_krige_flds

                # free memory
                mp_res[2], sub_ok_krige_flds = None, None

            ok_nc[:] = ok_krige_flds

        else:
            [strt_index, end_index,
             ok_krige_flds] = ordinary_kriging(next(ok_vars_gen))

            ok_nc[:] = ok_krige_flds

        ok_nc.units = var_units
        ok_nc.standard_name = var_name + ' (ordinary kriging)'

        ok_krige_flds = None

        _end_t = timeit.default_timer()
        _tot_t = _end_t - _beg_t

        print(f'Took {_tot_t:0.4f} seconds!')
        print('#' * 10)


#     if sim_krige_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('Simple Kriging...')
#         if 'SK' not in out_nc.variables:
#             sk_nc = out_nc.createVariable(
#                 'SK',
#                 'd',
#                 dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                 fill_value=False)
#
#         else:
#             sk_nc = out_nc.variables['SK']
#
#         sk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                         in_stns_coords_df,
#                         in_vgs_df.iloc[idxs[i]:idxs[i + 1]],
#                         min_ppt_thresh,
#                         var_name,
#                         krige_x_coords_mesh,
#                         krige_y_coords_mesh,
#                         krige_coords_orig_shape,
#                         min_var_val,
#                         max_var_val,
#                         (idxs[i], idxs[i + 1]),
#                         plot_figs_flag,
#                         krige_x_coords_plot_mesh,
#                         krige_y_coords_plot_mesh,
#                         var_units,
#                         polys_list,
#                         out_figs_dir,
#                         fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             sk_krige_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(simple_kriging, sk_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in simple_kriging:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_sk_krige_flds] = mp_res
#                 sk_krige_flds[strt_index:end_index] = sub_sk_krige_flds
#
#                 # free memory
#                 mp_res[2], sub_sk_krige_flds = None, None
#
#             sk_nc[:] = sk_krige_flds
#
#         else:
#             [strt_index,
#              end_index,
#              sk_krige_flds] = simple_kriging(next(sk_vars_gen))
#
#             sk_nc[:] = sk_krige_flds
#
#         sk_nc.units = var_units
#         sk_nc.standard_name = var_name + ' (simple kriging)'
#
#         sk_krige_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)
#
#     if edk_krige_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('External Drift Kriging...')
#         if 'EDK' not in out_nc.variables:
#             edk_nc = out_nc.createVariable(
#                 'EDK',
#                 'd',
#                 dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                 fill_value=False)
#
#         else:
#             edk_nc = out_nc.variables['EDK']
#
#         edk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                          in_stns_drift_df,
#                          in_stns_coords_df,
#                          in_vgs_df.iloc[idxs[i]:idxs[i + 1]],
#                          min_ppt_thresh,
#                          var_name,
#                          krige_x_coords_mesh,
#                          krige_y_coords_mesh,
#                          drift_vals_arr,
#                          krige_coords_orig_shape,
#                          drift_ndv,
#                          min_var_val,
#                          max_var_val,
#                          (idxs[i], idxs[i + 1]),
#                          plot_figs_flag,
#                          krige_x_coords_plot_mesh,
#                          krige_y_coords_plot_mesh,
#                          var_units,
#                          polys_list,
#                          out_figs_dir,
#                          fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             edk_krige_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(
#                     external_drift_kriging, edk_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in external_drift_kriging:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_edk_krige_flds] = mp_res
#                 edk_krige_flds[strt_index:end_index] = sub_edk_krige_flds
#
#                 print('sub_min:', np.nanmin(sub_edk_krige_flds))
#                 print('sub_max:', np.nanmax(sub_edk_krige_flds))
#
#                 # free memory
#                 mp_res[2], sub_edk_krige_flds = None, None
#
#         else:
#             [strt_index,
#              end_index,
#              edk_krige_flds] = external_drift_kriging(next(edk_vars_gen))
#
#         edk_nc[:] = edk_krige_flds
#
#         edk_nc.units = var_units
#         edk_nc.standard_name = var_name + ' (external drift kriging)'
#
#         edk_krige_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)
#
#     #==========================================================================
#     # IDW
#     #==========================================================================
#     if idw_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('Inverse Distance Weighting...')
#         if 'IDW' not in out_nc.variables:
#             idw_nc = out_nc.createVariable(
#                 'IDW',
#                 'd',
#                  dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                  fill_value=False)
#
#         else:
#             idw_nc = out_nc.variables['IDW']
#
#         idw_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                         in_stns_coords_df,
#                         min_ppt_thresh,
#                         idw_exp,
#                         var_name,
#                         krige_x_coords_mesh,
#                         krige_y_coords_mesh,
#                         krige_coords_orig_shape,
#                         min_var_val,
#                         max_var_val,
#                         (idxs[i], idxs[i + 1]),
#                         plot_figs_flag,
#                         krige_x_coords_plot_mesh,
#                         krige_y_coords_plot_mesh,
#                         var_units,
#                         polys_list,
#                         out_figs_dir,
#                         fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             idw_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(
#                     inverse_distance_wtng, idw_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in inverse_distance_wtng:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_idw_flds] = mp_res
#                 idw_flds[strt_index:end_index] = sub_idw_flds
#
#                 # free memory
#                 mp_res[2], sub_idw_flds = None, None
#
#         else:
#             [strt_index,
#              end_index,
#              idw_flds] = inverse_distance_wtng(next(idw_vars_gen))
#
#         idw_nc[:] = idw_flds
#
#         idw_nc.units = var_units
#         idw_nc.standard_name = (
#             var_name + ' (IDW (exp=%0.3f))' % float(idw_exp))
#
#         idw_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)

    out_nc.Author = 'Faizan IWS Uni-Stuttgart'
    out_nc.Source = out_nc.filepath()
    out_nc.close()
    return
예제 #21
0
 def run(self, data):
     pool = ProcessPool(nodes=self.processes)
     result = pool.map(self.operations, data)
     # with mp.Pool(self.processes) as pool:
     # result = pool.map(self.operations, data)
     return result
예제 #22
0
# Say x-dimension corresponds to alpha, y-dimension corresponds to scatter
XX, YY = np.meshgrid(alphas, scatters)
ndim1, ndim2 = XX.shape

# Calculate the stochastic covariance matrix at these values
Niter = 40
Ntot = XX.size

means = np.zeros(shape=(ndim1, ndim2, p.nbins))
covmats = np.zeros(shape=(ndim1, ndim2, p.nbins, p.nbins))

k = 1
extime = list()
filename = "./Pickles/theta_{}_.p".format(logSMlim)

pool = ProcessPool(ncore)
for i in range(ndim1):
    for j in range(ndim2):
        start = time()

        alpha, scatter = XX[i, j], YY[i, j]
        p.dump_pickle([alpha, scatter], filename)

        def catfunc(i):
            alpha, scatter = p.load_pickle(filename)
            return model.abundance_match(alpha, scatter, 1)[0]

        catalogs = pool.map(catfunc, np.arange(Niter))
        os.system("rm " + filename)
        print("Generated catalogs")
        #catalogs = model.abundance_match(alpha, scatter, Niter)
예제 #23
0
    def samples(self, param1, emaxins, model):
        """
		Returns a sample M of utility values
		"""

        #number of choices
        J = 2 * 2 * 3

        #updating sample with new betas and emax
        simdata_ins = simdata.SimData(
            self.N, param1, emaxins, self.x_w, self.x_m, self.x_k, self.x_wmk,
            self.passign, self.nkids0, self.married0, self.agech0_a,
            self.agech0_b, self.d_childa, self.d_childb, self.hours_p,
            self.hours_f, self.wr, self.cs, self.ws, model)

        #save here
        util_list = []
        income_matrix = np.zeros((self.N, self.nperiods, self.M))
        consumption_matrix = np.zeros((self.N, self.nperiods, self.M))
        iscost_matrix = np.zeros((self.N, self.nperiods, self.M))
        cscost_matrix = np.zeros((self.N, self.nperiods, self.M))
        childcare_a_matrix = np.zeros((self.N, self.nperiods, self.M))
        childcare_b_matrix = np.zeros((self.N, self.nperiods, self.M))
        utils_periodt = np.zeros((self.N, J, self.nperiods, self.M))
        utils_c_periodt = np.zeros((self.N, J, self.nperiods, self.M))
        theta_matrix_a = np.zeros((self.N, self.nperiods, self.M))
        theta_matrix_b = np.zeros((self.N, self.nperiods, self.M))
        wage_matrix = np.zeros((self.N, self.nperiods, self.M))
        spouse_income_matrix = np.zeros((self.N, self.nperiods, self.M))
        spouse_employment_matrix = np.zeros((self.N, self.nperiods, self.M))
        hours_matrix = np.zeros((self.N, self.nperiods, self.M))
        ssrs_t2_matrix_a = np.zeros((self.N, self.M))
        ssrs_t2_matrix_b = np.zeros((self.N, self.M))
        ssrs_t5_matrix_a = np.zeros((self.N, self.M))
        ssrs_t5_matrix_b = np.zeros((self.N, self.M))

        #Computing samples (in parallel)
        def sample_gen(j):
            np.random.seed(j + 100)
            return simdata_ins.fake_data(self.nperiods)

        pool = ProcessPool(nodes=10)
        dics = pool.map(sample_gen, range(self.M))
        pool.close()
        pool.join()
        pool.clear()
        #	dics = []
        #	for j in range(self.M):
        #		np.random.seed(j+100)
        #		dics.append(simdata_ins.fake_data(self.nperiods))

        #Saving results
        for j in range(0, self.M):
            income_matrix[:, :, j] = dics[j]['Income']
            consumption_matrix[:, :, j] = dics[j]['Consumption']
            iscost_matrix[:, :, j] = dics[j]['nh_matrix']
            cscost_matrix[:, :, j] = dics[j]['cs_cost_matrix']
            childcare_a_matrix[:, :, j] = dics[j]['Childcare_a']
            childcare_b_matrix[:, :, j] = dics[j]['Childcare_b']
            theta_matrix_a[:, :, j] = dics[j]['Theta'][0]
            theta_matrix_b[:, :, j] = dics[j]['Theta'][1]
            ssrs_t2_matrix_a[:, j] = dics[j]['SSRS_t2'][0]
            ssrs_t2_matrix_b[:, j] = dics[j]['SSRS_t2'][1]
            ssrs_t5_matrix_a[:, j] = dics[j]['SSRS_t5'][0]
            ssrs_t5_matrix_b[:, j] = dics[j]['SSRS_t5'][1]
            wage_matrix[:, :, j] = dics[j]['Wage']
            spouse_income_matrix[:, :, j] = dics[j]['Spouse_income']
            spouse_employment_matrix[:, :,
                                     j] = dics[j]['Spouse_employment_matrix']
            hours_matrix[:, :, j] = dics[j]['Hours']

            for periodt in range(0, self.nperiods):
                utils_periodt[:, :, periodt,
                              j] = dics[j]['Uti_values_dic'][periodt]
                utils_c_periodt[:, :, periodt,
                                j] = dics[j]['Uti_values_c_dic'][periodt]

        return {
            'utils_periodt': utils_periodt,
            'utils_c_periodt': utils_c_periodt,
            'income_matrix': income_matrix,
            'theta_matrix_a': theta_matrix_a,
            'theta_matrix_b': theta_matrix_b,
            'ssrs_t2_matrix_a': ssrs_t2_matrix_a,
            'ssrs_t2_matrix_b': ssrs_t2_matrix_b,
            'ssrs_t5_matrix_a': ssrs_t5_matrix_a,
            'ssrs_t5_matrix_b': ssrs_t5_matrix_b,
            'childcare_a_matrix': childcare_a_matrix,
            'childcare_b_matrix': childcare_b_matrix,
            'wage_matrix': wage_matrix,
            'consumption_matrix': consumption_matrix,
            'spouse_income_matrix': spouse_income_matrix,
            'spouse_employment_matrix': spouse_employment_matrix,
            'hours_matrix': hours_matrix,
            'cscost_matrix': cscost_matrix,
            'iscost_matrix': iscost_matrix
        }
예제 #24
0
    def recursive(self):
        """
		Recursively computes a series of interpolating instances
		Generates a dictionary with the emax instances

		There is a sequence of Emax for each child age (1-10)
		

		"""
        def emax_gen(j):

            for t in range(j, 0, -1):

                if t == j:  #last period
                    emax_bigt_ins = self.emax_bigt(j)

                    emax_dic = {'emax' + str(t): emax_bigt_ins[0]}

                elif t == j - 1:  #at T-1
                    emax_t1_ins = self.emax_t(t, j, emax_bigt_ins[0])

                    emax_dic['emax' + str(t)] = emax_t1_ins[0]

                else:
                    emax_t1_ins = self.emax_t(t, j, emax_t1_ins[0])

                    emax_dic['emax' + str(t)] = emax_t1_ins[0]

            return [emax_dic]

        pool = ProcessPool(nodes=10)

        #7: old child (11 years old) solves for 7 emax
        #19: young child (0 years old) solves for 18 emax
        list_emax = pool.map(emax_gen, range(8, 18))
        pool.close()
        pool.join()
        pool.clear()
        """
			
		list_emax = []
		for j in range(7,19):
			print ('Im in emax j ', j)
			
			for t in range(j,0,-1):
				print ('In period t ', t)
				
				if t==j:#last period
					emax_bigt_ins=self.emax_bigt(j)
					emax_dic={'emax'+str(t): emax_bigt_ins[0]}
					#emax_values={'emax'+str(t): emax_bigt_ins[1]}
				elif t==j-1: #at T-1
					emax_t1_ins=self.emax_t(t,j,emax_bigt_ins[0])
					emax_dic['emax'+str(t)]=emax_t1_ins[0]
					#emax_values['emax'+str(t)]=emax_t1_ins[1]
					
				else:
					emax_t1_ins=self.emax_t(t,j,emax_t1_ins[0])
					emax_dic['emax'+str(t)]=emax_t1_ins[0]
					#emax_values['emax'+str(t)]=emax_t1_ins[1]

			list_emax.append([emax_dic])

	
		"""

        return list_emax
예제 #25
0
    def fit(self,
            X,
            y,
            trials=5,
            indicators=indicators,
            ranges=ranges,
            tune_series=tune_series,
            tune_params=tune_params,
            spearman=True,
            weights=None,
            early_stop=50):
        self.fitted = []
        X.columns = X.columns.str.lower()  # columns must be lower case

        pool = ProcessPool(nodes=self.n_jobs)

        for low, high in ranges:
            if low <= 1:
                raise ValueError("Range low must be > 1")
            if high >= len(X):
                raise ValueError(
                    f"Range high:{high} must be > length of X:{len(X)}")
            for ind in indicators:
                idx = 0
                if ":" in ind:
                    idx = int(ind.split(":")[1])
                    ind = ind.split(":")[0]
                fn = f"{ind}("
                if ind[0:3] == "tta":
                    usage = eval(f"{ind}.__doc__").split(")")[0].split("(")[1]
                    params = re.sub('[^0-9a-zA-Z_\s]', '', usage).split()
                else:
                    sig = inspect.signature(eval(ind))
                    params = sig.parameters.values()
                for param in params:
                    param = re.split(':|=', str(param))[0].strip()
                    if param == "open_":
                        param = "open"
                    if param == "real":
                        fn += f"X.close, "
                    elif param == "ohlc":
                        fn += f"X, "
                    elif param == "ohlcv":
                        fn += f"X, "
                    elif param in tune_series:
                        fn += f"X.{param}, "
                    elif param in tune_params:
                        fn += f"{param}=trial.suggest_int('{param}', {low}, {high}), "
                fn += ")"
                self.fitted.append(
                    pool.apipe(
                        Optimize(function=fn,
                                 n_trials=trials,
                                 spearman=spearman).fit,
                        X,
                        y,
                        idx=idx,
                        verbose=self.verbose,
                        weights=weights,
                        early_stop=early_stop,
                    ))
        self.fitted = [fit.get() for fit in self.fitted]  # Get results of jobs
def improve_circuit(circuit, subcircuit_size=5, connected=True):
    print('Trying to improve a circuit of size',
          len(circuit.gates),
          flush=True)
    circuit_graph = circuit.construct_graph()

    # total, current, time = correct_subcircuit_count(circuit, subcircuit_size, connected=connected), 0, 0
    # print(f'\nEnumerating subcircuits of size {subcircuit_size} (total={total})...')

    def worker(graph):
        if connected and not nx.is_weakly_connected(graph):
            return None
        subcircuit = tuple(graph.nodes)
        # start = timer()
        subcircuit_inputs, subcircuit_outputs = get_inputs_and_outputs(
            circuit, circuit_graph, subcircuit)
        if len(subcircuit_outputs) == subcircuit_size:
            return None
        # current += 1
        # print(f'\n{subcircuit_size}: {current}/{total} ({100 * current // total}%) ', end='', flush=True)

        random.shuffle(subcircuit_inputs)
        sub_in_tt, sub_out_tt = make_truth_tables(circuit, subcircuit_inputs,
                                                  subcircuit_outputs)
        improved_circuit = find_circuit(subcircuit_inputs, subcircuit_size - 1,
                                        sub_in_tt, sub_out_tt)

        if isinstance(improved_circuit, Circuit):
            replaced_graph = circuit.replace_subgraph(improved_circuit,
                                                      subcircuit,
                                                      subcircuit_outputs)
            if nx.is_directed_acyclic_graph(replaced_graph):
                print('\nCircuit Improved!\n', end='', flush=True)
                improved_full_circuit = Circuit.make_circuit(
                    replaced_graph, circuit.input_labels,
                    make_improved_circuit_outputs(circuit.outputs,
                                                  subcircuit_outputs,
                                                  improved_circuit.outputs))
                return fix_labels(improved_full_circuit), 1

        # stop = timer()
        # time += stop - start
        # remaining = time / current * (total - current)
        # print(f' | curr: {int(stop - start)} sec | rem: {int(remaining)} sec ({round(remaining / 60, 1)} min)', end='',
        #       flush=True)

        return None

    all_subgraphs = (
        circuit_graph.subgraph(selected_nodes)
        for selected_nodes in combinations(circuit.gates, subcircuit_size))
    all_correct_subgraphs = filter(
        lambda gr:
        (not connected) or (connected and nx.is_weakly_connected(gr)),
        all_subgraphs)
    total = correct_subcircuit_count(circuit, subcircuit_size, connected=True)
    print("start multiprocessing")
    with ProcessPool() as pool:
        res_list = list(
            tqdm.tqdm(pool.imap(worker, all_correct_subgraphs), total=total))
    # res_list = [worker(gr) for gr in all_correct_subgraphs]
    res = next((item for item in res_list if item is not None), None)
    if res is not None:
        return res
    return circuit, 0
예제 #27
0
import glob
import imageio
import scipy.misc
import pickle
import math
from pathos.multiprocessing import ProcessPool
import h5py

flags = tf.app.flags
flags.DEFINE_integer("epoch", 200, "Epoch to train [200]")
flags.DEFINE_float("learning_rate", 0.0005, "Learning rate [0.0005]")
flags.DEFINE_float("beta1", 0.9, "Momentum term of adam [0.5]")
flags.DEFINE_float("beta2", 0.999, "Momentum term of adam [0.5]")
flags.DEFINE_integer("batch_size", 64, "batch size used in training [64]")
flags.DEFINE_integer("param_size", 15, "batch size used in training [64]")
pool = ProcessPool()

FLAGS = flags.FLAGS


def load_real_images():
    f = h5py.File("../RenderGAN-tensorflow/data/beestags/real_tags.hdf5", "r")

    raw = f["rois"]
    return raw


def load():
    """load data and labels in parallel"""
    load_helper_im = lambda i: np.reshape(
        np.array(
예제 #28
0
    def get_square_blender_dir(self,
                               out_dir,
                               n_total_frames,
                               use_key_frames=True):
        if not isdir(out_dir):
            os.makedirs(out_dir)
        n_key_frames = 4
        with open(join(out_dir, 'last_frame.txt'), 'w') as f:
            f.write('{} {}\n'.format(n_key_frames, n_total_frames))
        if use_key_frames:
            key_frame_vt = self.get_key_frame_vt(range(n_key_frames))
        else:
            camera_pos_list = []
        total_length = (2 + 2 * np.sqrt(2))
        cam_path = join(self.sample_dir, 'camera.txt')
        cameras = np.loadtxt(cam_path)

        for i in range(n_total_frames):
            t = i / n_total_frames * total_length
            if t < 1:
                seg_t = t
                camera_pos = (1 - seg_t) * cameras[0] + seg_t * cameras[1]
                w = np.array([1 - seg_t, seg_t, 0, 0])
            elif t < 1 + np.sqrt(2):
                seg_t = (t - 1) / np.sqrt(2)
                camera_pos = (1 - seg_t) * cameras[1] + seg_t * cameras[2]
                w = np.array([
                    seg_t * (1 - seg_t), (1 - seg_t) * (1 - seg_t),
                    seg_t * seg_t, (1 - seg_t) * seg_t
                ])
            elif t < 2 + np.sqrt(2):
                seg_t = t - (1 + np.sqrt(2))
                camera_pos = (1 - seg_t) * cameras[2] + seg_t * cameras[3]
                w = np.array([0, 0, 1 - seg_t, seg_t])
            else:
                seg_t = (t - (2 + np.sqrt(2))) / np.sqrt(2)
                camera_pos = (1 - seg_t) * cameras[3] + seg_t * cameras[0]
                w = np.array([
                    seg_t * seg_t, (1 - seg_t) * seg_t, seg_t * (1 - seg_t),
                    (1 - seg_t) * (1 - seg_t)
                ])

            if use_key_frames:
                vt = sum([key_frame_vt[i] * w[i] for i in range(4)])
                out_path = join(out_dir, 'frame_{}.obj'.format(i))
                print('write to', out_path)
                write_obj(Obj(v=self.pd_v, f=self.fcs, vt=vt), out_path)
            else:
                camera_pos_list.append(camera_pos)

            camera_transform = self.blender_pos(camera_pos), np.array(
                [1, 0, 0, 0])
            out_path = join(out_dir, 'cam_{}.txt'.format(i))
            # print('write to',out_path)
            self.write_camera_transform(out_path, camera_transform)

        if not use_key_frames:
            n_threads = 30

            def f(frame_i):
                self.get_fill_obj(camera_pos_list[frame_i],
                                  postfix=str(frame_i),
                                  frame=frame_i,
                                  check=False)

            pool = ProcessPool(nodes=n_threads)
            pool.map(f, range(n_total_frames))
예제 #29
0
alpha_12 = np.zeros(boot_n)
alpha_13 = np.zeros(boot_n)
alpha_14 = np.zeros(boot_n)
alpha_15 = np.zeros(boot_n)
beta_0 = np.zeros(boot_n)
beta_1 = np.zeros(boot_n)
beta_2 = np.zeros(boot_n)
beta_3 = np.zeros(boot_n)
beta_4 = np.zeros(boot_n)
gamma_0 = np.zeros(boot_n)
gamma_1 = np.zeros(boot_n)
gamma_2 = np.zeros(boot_n)

start_time = time.time()

pool = ProcessPool(nodes=18)
dics = pool.map(simulation, range(boot_n))
pool.close()
pool.join()
pool.clear()

time_opt = time.time() - start_time
print('Done in')
print("--- %s seconds ---" % (time_opt))

#saving results

for j in range(boot_n):

    alpha_00[j] = dics[j][0]
    alpha_01[j] = dics[j][1]
예제 #30
0
def graph_sampling(graph: FSN, strategy: Optional[str] = "MetaDiff",
                   n_jobs: Optional[int] = 4,
                   use_cache: Optional[bool] = True, **kwargs) \
        -> List[List[Union[str, int]]]:
    """
    Sampling the sequences of nodes from FSN w.r.t. chosen strategy
    Parameters
    ----------
    graph : FSN object
        Graph to be processed
    strategy : str, default is 'MetaDiff'
        Walking strategy to be used
    n_jobs : int, default is 4
        Number of workers to be created in parallel pool
    use_cache : bool, default is True
        To use the previously cached files

    Returns
    -------
    Sampled sequences of BP nodes
    """
    set_new_config(**kwargs)
    local_logger = logging.getLogger(f"{__name__}")
    if use_cache and os.path.isfile(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl"):
        local_logger.info("Loading sequences from cache... wait...")
        try:
            with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "rb") as file:
                res = pickle.load(file)
            local_logger.info(f"Total number of raw sampled sequences is {len(res)}")
            local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}")
            return res
        except FileNotFoundError:
            local_logger.info("File not found... Recalculate \n")
            pass
        except Exception as e:
            local_logger.error(f"Unexpected error: {e}")
    local_logger.info("Sampling sequences... wait...")
    max_processes = max(n_jobs, os.cpu_count())
    global walk
    if strategy in strategy_to_class.keys():
        walk = strategy_to_class[strategy](G=graph, walk_length=CONFIG.WALKS_LENGTH,
                                           direction=CONFIG.DIRECTION,
                                           pressure=CONFIG.PRESSURE, allow_back=CONFIG.ALLOW_BACK)
    else:
        raise KeyError(
            f"The given strategy {strategy} is unknown. The following ones are implemented: {strategy_to_class.keys()}")
    sampling_pool = ProcessPool(nodes=max_processes)
    local_logger.info("Created a Pool with " + str(max_processes) + " processes ")
    # required to restart pool to update CONFIG inside the parallel part
    sampling_pool.terminate()
    sampling_pool.restart()
    BPs = graph.get_BPs()
    n_BPs = len(BPs)
    sampled = list()
    try:
        with tqdm(total=n_BPs) as pbar:
            for i, res in enumerate(sampling_pool.uimap(wrappedWalk, BPs)):
                sampled.append(res)
                pbar.update()
    except KeyboardInterrupt:
        print('Got ^C while pool mapping, terminating the pool')
        sampling_pool.terminate()
    res = list(itertools.chain(*sampled))
    sampling_pool.terminate()
    sampling_pool.restart()
    local_logger.info("Cashing sampled sequences!")
    if use_cache:
        with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "wb") as file:
            pickle.dump(res, file)
    local_logger.info(f"Total number of raw sampled sequences is {len(res)}")
    local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}")
    return res