Example #1
0
def main():
    if os.path.exists('log\\' + GeneralConfig.ENV + '_' +
                      GeneralConfig.TOPIC_PREFIX + '_' +
                      GeneralConfig.ROWCOUNT_LOG_FILE):
        os.remove('log\\' + GeneralConfig.ENV + '_' +
                  GeneralConfig.TOPIC_PREFIX + '_' +
                  GeneralConfig.ROWCOUNT_LOG_FILE)

    pool = ProcessPool(nodes=cpu_count() - 1 or 1)

    # pool.amap(send_aftermarket_part, [AftermarketPartConfig.TOPIC], [AftermarketPartConfig.KAFKA_KEY])
    # pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part, [EngineeringPartConfig.TOPIC], [EngineeringPartConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part_function, [EngineeringPartFunctionConfig.TOPIC], [EngineeringPartFunctionConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part_usage, [EngineeringPartUsageConfig.TOPIC], [EngineeringPartUsageConfig.KAFKA_KEY])
    # pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY])
    # pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy, [HierarchyConfig.TOPIC],
              [HierarchyConfig.KAFKA_KEY])
    # pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY])
    # pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY])
    # pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY])
    # pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY])
    # pool.amap(send_supersession, [SupersessionConfig.TOPIC], [SupersessionConfig.KAFKA_KEY])
    # pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY])
    # pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY])

    pool.close()
    pool.join()
Example #2
0
def Scrap_landing(allowed_domains, start_urls):
    pool = ProcessPool(nodes=4)

    def f_runner(spider):
        ScrapperSpider.allowed_domains = [allowed_domains]
        ScrapperSpider.start_urls = [start_urls]
        from twisted.internet import reactor
        from scrapy.settings import Settings
        import Scrapper.settings as my_settings
        from scrapy.crawler import CrawlerProcess, CrawlerRunner
        crawler_settings = Settings()
        crawler_settings.setmodule(my_settings)
        runner = CrawlerRunner(settings=crawler_settings)
        deferred = runner.crawl(spider)
        deferred.addBoth(lambda _: reactor.stop())
        reactor.run()

    #ScrapperSpider.allowed_domains = [allowed_domains]
    #ScrapperSpider.start_urls = [start_urls]
    #print("\nstart URLS:{}".format(ScrapperSpider.start_urls))
    results = pool.amap(f_runner, [ScrapperSpider])
    t = 0
    while not results.ready():
        time.sleep(5);
        print(".", end=' ');
        t = t + 5
        if t == 30:
            print("\nProcess limited to 30 seconds...EXITING\n");
            return None

    pool.clear()
Example #3
0
    def train(self, outer_n_epoch, outer_l2, outer_std, outer_learning_rate, outer_n_samples_per_ep,
              n_cpu=None, fix_ppo=None, **_):
        # Requires more than 1 MPI process.
        assert MPI.COMM_WORLD.Get_size() > 1
        assert n_cpu is not None
        if fix_ppo:
            ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 16), 0.5)],
                                                    outside_value=0.5)
        else:
            ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 8), 0.)],
                                                    outside_value=0.)

        outer_lr_scheduler = PiecewiseSchedule([(0, outer_learning_rate),
                                                (int(outer_n_epoch / 2), outer_learning_rate * 0.1)],
                                               outside_value=outer_learning_rate * 0.1)

        def objective(env, theta, pool_rank):
            agent = self.create_agent(env, pool_rank)
            loss_n_params = len(agent.get_loss().get_params_1d())
            agent.get_loss().set_params_1d(theta[:loss_n_params])
            if self._outer_evolve_policy_init:
                agent.pi.set_params_1d(theta[loss_n_params:])
            # Agent lifetime is inner_opt_freq * inner_max_n_epoch
            return run_batch_rl(env, agent,
                                inner_opt_freq=self._inner_opt_freq,
                                inner_buffer_size=self._inner_buffer_size,
                                inner_max_n_epoch=self._inner_max_n_epoch,
                                pool_rank=pool_rank,
                                ppo_factor=ppo_factor_schedule.value(epoch),
                                epoch=None)

        # Initialize theta.
        theta = self.init_theta(self._env)
        num_params = len(theta)
        logger.log('Theta dim: {}'.format(num_params))

        # Set up outer loop parameter update schedule.
        adam = Adam(shape=(num_params,), beta1=0., stepsize=outer_learning_rate, dtype=np.float32)

        # Set up intra-machine parallelization.
        logger.log('Using {} proceses per MPI process.'.format(n_cpu))
        from pathos.multiprocessing import ProcessPool
        pool = ProcessPool(nodes=n_cpu)

        begin_time, best_test_return = time.time(), -np.inf
        for epoch in range(outer_n_epoch):

            # Anneal outer learning rate
            adam.stepsize = outer_lr_scheduler.value(epoch)

            noise = np.random.randn(outer_n_samples_per_ep // NUM_EQUAL_NOISE_VECTORS, num_params)
            noise = np.repeat(noise, NUM_EQUAL_NOISE_VECTORS, axis=0)
            theta_noise = theta[np.newaxis, :] + noise * outer_std
            theta_noise = theta_noise.reshape(MPI.COMM_WORLD.Get_size(), -1)

            # Distributes theta_noise vectors to all nodes.
            logger.log('Scattering all perturbed theta vectors and running inner loops ...')

            recvbuf = np.empty(theta_noise.shape[1], dtype='float')
            MPI.COMM_WORLD.Scatter(theta_noise, recvbuf, root=0)
            theta_noise = recvbuf.reshape(-1, num_params)

            # Noise vectors are scattered, run inner loop, parallelized over `pool_size` processes.
            start_time = time.time()
            pool_size = int(outer_n_samples_per_ep / MPI.COMM_WORLD.Get_size())
            results = pool.amap(objective, [self._env] * pool_size, theta_noise, range(pool_size)).get()

            # Extract relevant results
            returns = [utils.ret_to_obj(r['ep_final_rew']) for r in results]
            update_time = [np.mean(r['update_time']) for r in results]
            env_time = [np.mean(r['env_time']) for r in results]
            ep_length = [np.mean(r['ep_length']) for r in results]
            n_ep = [len(r['ep_length']) for r in results]
            mean_ep_kl = [np.mean(r['ep_kl']) for r in results]
            final_rets = [np.mean(r['ep_return'][-3:]) for r in results]

            # We gather the results at node 0
            recvbuf = np.empty([MPI.COMM_WORLD.Get_size(), 7 * pool_size],
                               # 7 = number of scalars in results vector
                               dtype='float') if MPI.COMM_WORLD.Get_rank() == 0 else None
            results_processed_arr = np.asarray(
                [returns, update_time, env_time, ep_length, n_ep, mean_ep_kl, final_rets],
                dtype='float').ravel()
            MPI.COMM_WORLD.Gather(results_processed_arr, recvbuf, root=0)

            # Do outer loop update calculations at node 0
            if MPI.COMM_WORLD.Get_rank() == 0:
                end_time = time.time()
                logger.log(
                    'All inner loops completed, returns gathered ({:.2f} sec).'.format(
                        time.time() - start_time))

                results_processed_arr = recvbuf.reshape(MPI.COMM_WORLD.Get_size(), 7, pool_size)
                results_processed_arr = np.transpose(results_processed_arr, (0, 2, 1)).reshape(-1, 7)
                results_processed = [dict(returns=r[0],
                                          update_time=r[1],
                                          env_time=r[2],
                                          ep_length=r[3],
                                          n_ep=r[4],
                                          mean_ep_kl=r[5],
                                          final_rets=r[6]) for r in results_processed_arr]
                returns = np.asarray([r['returns'] for r in results_processed])

                # ES update
                noise = noise[::NUM_EQUAL_NOISE_VECTORS]
                returns = np.mean(returns.reshape(-1, NUM_EQUAL_NOISE_VECTORS), axis=1)
                theta_grad = relative_ranks(returns).dot(noise) / outer_n_samples_per_ep \
                             - outer_l2 * theta
                theta -= adam.step(theta_grad)

                # Perform `NUM_TEST_SAMPLES` evaluation runs on root 0.
                if epoch % self._outer_plot_freq == 0 or epoch == outer_n_epoch - 1:
                    start_test_time = time.time()
                    logger.log('Performing {} test runs in parallel on node 0 ...'.format(NUM_TEST_SAMPLES))
                    # Evaluation run with current theta
                    test_results = pool.amap(
                        objective,
                        [self._env] * NUM_TEST_SAMPLES,
                        theta[np.newaxis, :] + np.zeros((NUM_TEST_SAMPLES, num_params)),
                        range(NUM_TEST_SAMPLES)
                    ).get()
                    plotting.plot_results(epoch, test_results)
                    test_return = np.mean([utils.ret_to_obj(r['ep_return']) for r in test_results])
                    if test_return > best_test_return:
                        best_test_return = test_return
                        # Save theta as numpy array.
                        self.save_theta(theta)
                    self.save_theta(theta, str(epoch))
                    logger.log('Test runs performed ({:.2f} sec).'.format(time.time() - start_test_time))

                logger.logkv('Epoch', epoch)
                utils.log_misc_stats('Obj', logger, returns)
                logger.logkv('PPOFactor', ppo_factor_schedule.value(epoch))
                logger.logkv('EpochTimeSpent(s)', end_time - start_time)
                logger.logkv('TotalTimeSpent(s)', end_time - begin_time)
                logger.logkv('BestTestObjMean', best_test_return)
                logger.dumpkvs()
Example #4
0
def main_jagcat():
    if os.path.exists('log\\' + GeneralConfig.ENV + '_' +
                      GeneralConfig.TOPIC_PREFIX + '_' +
                      GeneralConfig.ROWCOUNT_LOG_FILE):
        os.remove('log\\' + GeneralConfig.ENV + '_' +
                  GeneralConfig.TOPIC_PREFIX + '_' +
                  GeneralConfig.ROWCOUNT_LOG_FILE)

    pool = ProcessPool(nodes=cpu_count() - 1 or 1)

    pool.amap(send_part_meta, [PartMetaConfig.TOPIC],
              [PartMetaConfig.KAFKA_KEY])
    pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY])
    pool.amap(send_description, [DescriptionConfig.TOPIC],
              [DescriptionConfig.KAFKA_KEY])
    pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY])
    pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC],
              [FeatureFamilyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy, [HierarchyConfig.TOPIC],
              [HierarchyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC],
              [HierarchyIllustrationConfig.KAFKA_KEY])
    pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC],
              [HierarchyUsageConfig.KAFKA_KEY])
    pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC],
              [SectionCalloutConfig.KAFKA_KEY])
    pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC],
              [SectionPartUsageConfig.KAFKA_KEY])
    pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY])

    pool.close()
    pool.join()
Example #5
0
def do_parallel(closure, args, num_processes=6):

    print('Inside do_parallel')

    pool = ProcessPool(nodes=num_processes)
    result = pool.amap(closure, args)
Example #6
0
import time
from pathos.multiprocessing import ProcessPool

# instantiate and configure the worker pool
pool = ProcessPool(nodes=3)

print "- Do a blocking (=synchronous) map on the chosen function"
print(pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8]))

print "- Do a non-blocking (=asynchronous) map, then get the results"
results = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
while not results.ready():
    time.sleep(1)
    print(".")
print(results.get())

print "- Do a non-blocking (=asynchronous) map, then extract the results from the iterator"
results = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
print("...")
print(list(results))

print "- Do one item at a time, using a pipe"
print(pool.pipe(pow, 1, 5))
print(pool.pipe(pow, 2, 6))

print "- Do one item at a time, using a non-blocking (=asynchronous) pipe"
result1 = pool.apipe(pow, 1, 5)
result2 = pool.apipe(pow, 2, 6)
print(result1.get())
print(result2.get())