def test_no_failure_with_torch_mp(out_dir):
    shutil.rmtree(out_dir, ignore_errors=True)
    path = build_json(out_dir, save_all=True, save_interval="1")
    path = str(path)
    os.environ["SMDEBUG_CONFIG_FILE_PATH"] = path
    device = "cpu"
    dataloader_kwargs = {}
    cpu_count = 2 if mp.cpu_count() > 2 else mp.cpu_count()

    torch.manual_seed(1)

    model = Net().to(device)
    model.share_memory(
    )  # gradients are allocated lazily, so they are not shared here

    processes = []
    for rank in range(cpu_count):
        p = mp.Process(target=train,
                       args=(rank, model, device, dataloader_kwargs))
        # We first train the model across `num_processes` processes
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    trial = create_trial(out_dir)

    assert trial.num_workers == 1  # Ensure only one worker saved data
    assert len(trial.tensor_names()) > 20  # Ensure that data was saved
    assert trial.steps() == [0, 1, 2, 3]  # Ensure that steps were saved
    shutil.rmtree(out_dir, ignore_errors=True)
    shutil.rmtree(data_dir, ignore_errors=True)
    def __init__(self):

        mp.set_start_method('spawn')
        self._top_N = cf().path["inference"]["top_N"]
        self._using_gpu = cf().path["inference"]["using_gpu"]
        self._device = torch.device(cf().path["system"]["device"])

        self._client_len = 0
        self._sku = 0
        self._user_data = OrderedDict()
        self._product_data = OrderedDict()

        self.user_col_name = OrderedDict()
        self.whole_user_col_name = OrderedDict()

        self.product_col_name = OrderedDict()
        self.whole_product_col_name = OrderedDict()

        # set process count
        self._num_processes = max(1, int(mp.cpu_count() * 0.6))
        self._num_sampler_processes = max(1, int(mp.cpu_count() * 0.2))

        self._using_gpu = False
        self._sampler_flag = mp.Manager().list()

        self.load_user_raw_data()
        self.load_product_raw_data()
Exemple #3
0
def main(args):
    if args.load is False and os.path.isfile('./model/walker.pt'):
        while True:
            load = input('Are you sure you want to erase the previous training? (y/n) ')
            if load.lower() in ('y', 'yes', '1'):
                break
            elif load.lower() in ('n', 'no', '0'):
                import sys
                sys.exit()

    # create shared variables between all the processes
    manager = mp.Manager()
    # used to send the results of the net
    common_dict = manager.dict()
    # a queue of batches to be fed to the training net
    mem_queue = manager.Queue(1500 * mp.cpu_count())
    # a queue of operations pending
    process_queue = manager.Queue(mp.cpu_count()-1)
    workers = mp.cpu_count() if args.train else 2
    with mp.Pool(workers) as pool:
        try:
            print(f"Running pool with {workers} workers")
            pool.apply_async(gpu_thread, (args.load, mem_queue, process_queue, common_dict, 0))
            if args.render:
                pool.apply_async(cpu_thread, (2 if not args.train else 1, mem_queue, process_queue, common_dict, 1))
            for i in range(1+args.render, workers):
                pool.apply_async(cpu_thread, (0, mem_queue, process_queue, common_dict, i))

            # Wait for children to finish
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            pool.join()
Exemple #4
0
    def collect_policy_losses(self):
        policy_losses = []
        if self.multi_process:
            game_queue = mp.Queue()
            done_queue = mp.Queue()

            # submit games
            for n in range(self.num_games):
                game_queue.put(self.setup_games(n))

            # start processes
            for i in range(mp.cpu_count()):
                mp.Process(target=self_play_multi,
                           args=(game_queue, done_queue)).start()

            for _ in range(self.num_games):
                color, reward, policy_loss = done_queue.get()
                self.self_play_log(color, reward, policy_loss)
                policy_losses.append(policy_loss)

            # stop the processes
            for _ in range(mp.cpu_count()):
                game_queue.put('STOP')

            return policy_losses
        else:
            for n in range(self.num_games):
                args = self.setup_games(n)
                color, reward, policy_loss = self_play(*args)
                self.self_play_log(color, reward, policy_loss)
                policy_losses.append(policy_loss)
            return policy_losses
Exemple #5
0
def calc_chunksize(num_dicts,
                   min_chunksize=4,
                   max_chunksize=2000,
                   max_processes=128):
    if mp.cpu_count() > 3:
        num_cpus = min(mp.cpu_count() - 1 or 1,
                       max_processes)  # -1 to keep a CPU core free for xxx
    else:
        num_cpus = min(
            mp.cpu_count(),
            max_processes)  # when there are few cores, we use all of them

    dicts_per_cpu = np.ceil(num_dicts / num_cpus)
    # automatic adjustment of multiprocessing chunksize
    # for small files (containing few dicts) we want small chunksize to ulitize all available cores but never less
    # than 2, because we need it to sample another random sentence in LM finetuning
    # for large files we want to minimize processor spawning without giving too much data to one process, so we
    # clip it at 5k
    multiprocessing_chunk_size = int(
        np.clip((np.ceil(dicts_per_cpu / 5)),
                a_min=min_chunksize,
                a_max=max_chunksize))
    # This lets us avoid cases in lm_finetuning where a chunk only has a single doc and hence cannot pick
    # a valid next sentence substitute from another document
    if num_dicts != 1:
        while num_dicts % multiprocessing_chunk_size == 1:
            multiprocessing_chunk_size -= -1
    dict_batches_to_process = int(num_dicts / multiprocessing_chunk_size)
    num_processes = min(num_cpus, dict_batches_to_process) or 1

    return multiprocessing_chunk_size, num_processes
Exemple #6
0
def run_MCTS(args, start_idx=0, iteration=0):
    net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration)
    net = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        net.cuda()
    
    if args.MCTS_num_processes > 1:
        logger.info("Preparing model for multi-process MCTS...")
        mp.set_start_method("spawn",force=True)
        net.share_memory()
        net.eval()
    
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        processes = []
        if args.MCTS_num_processes > mp.cpu_count():
            num_processes = mp.cpu_count()
            logger.info("Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes)
        else:
            num_processes = args.MCTS_num_processes
        
        logger.info("Spawning %d processes..." % num_processes)
        with torch.no_grad():
            for i in range(num_processes):
                p = mp.Process(target=MCTS_self_play, args=(net, args.num_games_per_MCTS_process,
                                                            start_idx, i, args, iteration))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()
        logger.info("Finished multi-process MCTS!")
    
    elif args.MCTS_num_processes == 1:
        logger.info("Preparing model for MCTS...")
        net.eval()
        
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        with torch.no_grad():
            MCTS_self_play(net, args.num_games_per_MCTS_process, start_idx, 0, args, iteration)
        logger.info("Finished MCTS!")
Exemple #7
0
def calc_chunksize(num_dicts):
    MIN_CHUNKSIZE = 4
    MAX_CHUNKSIZE = 2000
    num_cpus = mp.cpu_count() or 1
    dicts_per_cpu = np.ceil(num_dicts / num_cpus)
    # automatic adjustment of multiprocessing chunksize
    # for small files (containing few dicts) we want small chunksize to ulitize all available cores but never less
    # than 2, because we need it to sample another random sentence in LM finetuning
    # for large files we want to minimize processor spawning without giving too much data to one process, so we
    # clip it at 5k
    multiprocessing_chunk_size = int(np.clip((np.ceil(dicts_per_cpu / 5)), a_min=MIN_CHUNKSIZE, a_max=MAX_CHUNKSIZE))
    dict_batches_to_process = int(num_dicts / multiprocessing_chunk_size)
    num_cpus_used = min(mp.cpu_count(), dict_batches_to_process) or 1
    return multiprocessing_chunk_size,num_cpus_used
Exemple #8
0
    def calculate_best_energy(self):
        if self.n_spins <= 10:
            # Generally, for small systems the time taken to start multiple processes is not worth it.
            res = self.calculate_best_brute()

        else:
            # Start up processing pool
            n_cpu = int(mp.cpu_count()) / 2

            pool = mp.Pool(mp.cpu_count())

            # Split up state trials across the number of cpus
            iMax = 2**(self.n_spins)
            args = np.round(
                np.linspace(0,
                            np.ceil(iMax / n_cpu) * n_cpu, n_cpu + 1))
            arg_pairs = [list(args) for args in zip(args, args[1:])]

            # Try all the states.
            #             res = pool.starmap(self._calc_over_range, arg_pairs)
            try:
                res = pool.starmap(self._calc_over_range, arg_pairs)
                # Return the best solution,
                idx_best = np.argmin([e for e, s in res])
                res = res[idx_best]
            except Exception as e:
                # Falling back to single-thread implementation.
                # res = self.calculate_best_brute()
                res = self._calc_over_range(0, 2**(self.n_spins))
            finally:
                # No matter what happens, make sure we tidy up after outselves.
                pool.close()

            if self.spin_basis == SpinBasis.BINARY:
                # convert {1,-1} --> {0,1}
                best_score, best_spins = res
                best_spins = (1 - best_spins) / 2
                res = best_score, best_spins

            if self.optimisation_target == OptimisationTarget.CUT:
                best_energy, best_spins = res
                best_cut = self.calculate_cut(best_spins)
                res = best_cut, best_spins
            elif self.optimisation_target == OptimisationTarget.ENERGY:
                pass
            else:
                raise NotImplementedError()

        return res
Exemple #9
0
def transform_batch_model(model,
                          data,
                          batchsize,
                          logj=None,
                          start=0,
                          end=None,
                          param=None,
                          pool=None,
                          nocuda=False):

    if logj is None:
        logj = torch.zeros(len(data), device=data.device)

    if pool is None:
        _transform_batch_model(model,
                               data,
                               logj,
                               0,
                               batchsize,
                               start=start,
                               end=end,
                               param=param,
                               nocuda=nocuda)
    else:
        if torch.cuda.is_available() and not nocuda:
            nprocess = torch.cuda.device_count()
        else:
            nprocess = mp.cpu_count()
        param0 = [(model, data, logj, i, batchsize, len(data) * i // nprocess,
                   len(data) * (i + 1) // nprocess, start, end, param, nocuda)
                  for i in range(nprocess)]
        pool.starmap(_transform_batch_model, param0)

    return data, logj
Exemple #10
0
def transform_batch_layer(layer,
                          data,
                          batchsize,
                          logj=None,
                          direction='forward',
                          param=None,
                          pool=None,
                          nocuda=False):

    assert direction in ['forward', 'inverse']

    if logj is None:
        logj = torch.zeros(len(data), device=data.device)

    if pool is None:
        _transform_batch_layer(layer,
                               data,
                               logj,
                               0,
                               batchsize,
                               direction=direction,
                               param=param,
                               nocuda=nocuda)
    else:
        if torch.cuda.is_available() and not nocuda:
            nprocess = torch.cuda.device_count()
        else:
            nprocess = mp.cpu_count()
        param0 = [(layer, data, logj, i, batchsize, len(data) * i // nprocess,
                   len(data) * (i + 1) // nprocess, direction, param, nocuda)
                  for i in range(nprocess)]
        pool.starmap(_transform_batch_layer, param0)

    return data, logj
Exemple #11
0
def calc_write_learning_curve(exp: Experiment, max_num_workers=40):
    num_workers = min(min(max_num_workers,
                          multiprocessing.cpu_count() - 1), exp.num_folds)

    name = exp.name
    print("got %d evaluations to calculate" % len(exp.jobs))
    results_path = results_folder + "/" + name
    os.makedirs(results_path, exist_ok=True)
    start = time()
    scores = calc_scores(exp.score_task,
                         [split for train_size, split in exp.jobs],
                         n_jobs=num_workers)
    duration = time() - start
    meta_data = {
        "duration": duration,
        "num-workers": num_workers,
        "experiment": str(exp),
    }
    data_io.write_json(results_path + "/meta_datas.json", meta_data)
    print("calculating learning-curve for %s took %0.2f seconds" %
          (name, duration))
    pprint(scores)
    results = groupandsort_by_first(
        zip([train_size for train_size, _ in exp.jobs], scores))
    data_io.write_json(results_path + "/learning_curve.json", results)

    trainsize_to_mean_std_scores = {
        train_size: tuple_2_dict(calc_mean_and_std(m))
        for train_size, m in results.items()
    }
    data_io.write_json(
        results_path + "/learning_curve_meanstd.json",
        trainsize_to_mean_std_scores,
    )
def recognize_smoke_worker(rank, world_size, learner, transform, rgb_p,
                           file_name_list, ct_sub_list, parallel, smoke_thr,
                           activation_thr, queue):
    # Set the dataloader
    num_workers = max(mp.cpu_count() - 2, 0)
    dataloader = set_dataloader(rank, world_size, file_name_list, ct_sub_list,
                                rgb_p, transform, num_workers, parallel)

    # Set model
    p_model = "../data/saved_i3d/paper_result/full-augm-rgb/55563e4-i3d-rgb-s3/model/573.pt"
    model = learner.set_model(rank,
                              world_size,
                              learner.mode,
                              p_model,
                              parallel,
                              phase="test")
    model.train(False)  # set model to evaluate mode (IMPORTANT)
    grad_cam = GradCam(model, use_cuda=learner.use_cuda, normalize=False)

    # Iterate over batch data
    smoke_pb_list = []
    activation_ratio_list = []
    epochtime_list = []
    for d in tqdm.tqdm(dataloader):
        epochtime_list.append(int(d["epochtime"][0]))
        # Compute probability of having smoke
        v = d["frames"][0]
        if learner.use_cuda and torch.cuda.is_available:
            v = v.cuda()
        pred, pred_upsample = learner.make_pred(model, v, upsample=None)
        pred = F.softmax(pred.squeeze().transpose(0,
                                                  1)).cpu().detach().numpy()[:,
                                                                             1]
        pred_upsample = F.softmax(pred_upsample.squeeze().transpose(
            0, 1)).cpu().detach().numpy()[:, 1]
        smoke_pb = np.median(pred)  # use the median as the probability
        smoke_pb_list.append(round(float(smoke_pb), 3))
        # GradCAM (class activation mapping)
        # Compute the ratio of the activated region that will affect the probability
        # This can potentially be used to estimate the number of smoke pixels
        # Need to check more papers about weakly supervised learning
        C = grad_cam.generate_cam(
            v, 1)  # 1 is the target class, which means having smoke emissions
        C = C.reshape((C.shape[0], -1))
        #print(pd.DataFrame(data={"GradCAM": C.flatten()}).describe().applymap(lambda x: "%.3f" % x))
        if smoke_pb > smoke_thr:  # only compute the activation ratio when smoke is predicted
            C = np.multiply(C > activation_thr, 1)  # make the binary mask
            activation_ratio = np.sum(
                C, axis=1, dtype=np.uint32) / (learner.image_size**2)
            activation_ratio[pred_upsample < smoke_thr] = 0
            activation_ratio = np.mean(
                activation_ratio)  # use the mean as the activation ratio
            activation_ratio_list.append(round(float(activation_ratio), 3))
        else:
            activation_ratio_list.append(0.0)

    if queue is None:
        return (smoke_pb_list, activation_ratio_list, epochtime_list)
    else:
        queue.put((smoke_pb_list, activation_ratio_list, epochtime_list))
Exemple #13
0
def run_multiple_times(args, run_fct):
    cpu_count = mp.cpu_count()
    gpu_count = torch.cuda.device_count()

    # Clone arguments into list & Distribute workload across GPUs
    args_across_workers = [copy.deepcopy(args) for r in range(args.RUN_TIMES)]
    if gpu_count > 0:
        gpu_counter = 0
        for r in range(args.RUN_TIMES):
            args_across_workers[r].seed = r
            args_across_workers[r].device_id = gpu_counter
            gpu_counter += 1
            if gpu_counter > gpu_count-1:
                gpu_counter = 0

    # Execute different runs/random seeds in parallel
    pool = mp.Pool(cpu_count-1)
    df_across_runs = pool.map(run_fct, args_across_workers)
    pool.close()

    # Post process results
    df_concat = pd.concat(df_across_runs)
    by_row_index = df_concat.groupby(df_concat.index)
    df_means, df_stds = by_row_index.mean(), by_row_index.std()

    if args.ENV_ID == "dense-v0":
        sfname = "results/GRIDWORLD/" + str(args.RUN_TIMES) + "_RUNS_" + str(args.AGENT) + "_" + args.SAVE_FNAME
    else:
        sfname = "results/ATARI/" + str(args.RUN_TIMES) + "_RUNS_" + str(args.AGENT) + "_" + args.SAVE_FNAME
    print("Saved agents to {}".format(sfname))
    df_means.to_csv(sfname)
    return df_means, df_stds
    def __init__(self, video_vis, n_workers=None):
        """
        Args:
            cfg (CfgNode): configs. Details can be found in
                slowfast/config/defaults.py
            n_workers (Optional[int]): number of CPUs for running video visualizer.
                If not given, use all CPUs.
        """

        num_workers = mp.cpu_count() if n_workers is None else n_workers

        self.task_queue = mp.Queue()
        self.result_queue = mp.Queue()
        self.get_indices_ls = []
        self.procs = []
        self.result_data = {}
        self.put_id = -1
        for _ in range(max(num_workers, 1)):
            self.procs.append(
                AsyncVis._VisWorker(video_vis, self.task_queue,
                                    self.result_queue))

        for p in self.procs:
            p.start()

        atexit.register(self.shutdown)
Exemple #15
0
        def get_pred_large(pan_2ch_all, vid_num, nframes_per_video=6):
            vid_num = len(pan_2ch_all) // nframes_per_video  # 10
            cpu_num = multiprocessing.cpu_count() // 2  # 32 --> 16
            nprocs = min(vid_num, cpu_num)  # 10
            max_nframes = cpu_num * nframes_per_video
            nsplits = (len(pan_2ch_all) - 1) // max_nframes + 1
            annotations, pan_all = [], []
            for i in range(0, len(pan_2ch_all), max_nframes):
                print('==> Read and convert VPS output - split %d/%d' %
                      ((i // max_nframes) + 1, nsplits))
                pan_2ch_part = pan_2ch_all[i:min(i +
                                                 max_nframes, len(pan_2ch_all
                                                                  ))]
                pan_2ch_split = np.array_split(pan_2ch_part, nprocs)
                workers = multiprocessing.Pool(processes=nprocs)
                processes = []
                for proc_id, pan_2ch_set in enumerate(pan_2ch_split):
                    p = workers.apply_async(
                        self.converter_2ch_track_core,
                        (proc_id, pan_2ch_set, color_generator))
                    processes.append(p)
                workers.close()
                workers.join()

                for p in processes:
                    p = p.get()
                    annotations.extend(p[0])
                    pan_all.extend(p[1])

            pan_json = {'annotations': annotations}
            return pan_all, pan_json
Exemple #16
0
    def begin_background(self):
        self.queue = mp.Queue()

        def t(queue):
            while True:
                if queue.empty():
                    continue
                img, name = queue.get()
                if name:
                    try:
                        basename, ext = os.path.splitext(name)
                        if ext != '.png':
                            name = '{}.png'.format(basename)
                        imageio.imwrite(name, img)
                    except Exception as e:
                        print(e)
                else:
                    return

        worker = lambda: mp.Process(
            target=t, args=(self.queue, ), daemon=False)
        cpu_count = min(8, mp.cpu_count() - 1)
        self.process = [worker() for _ in range(cpu_count)]
        for p in self.process:
            p.start()
Exemple #17
0
def whole_images(training_set, validation_set):
    training_batch_size = 16
    validation_batch_size = 2 * training_batch_size

    training_loader = torch.utils.data.DataLoader(
        training_set,
        batch_size=training_batch_size,
        shuffle=True,
        num_workers=multiprocessing.cpu_count(),
    )

    validation_loader = torch.utils.data.DataLoader(
        validation_set,
        batch_size=validation_batch_size,
        num_workers=multiprocessing.cpu_count(),
    )
Exemple #18
0
    def update_holdout_chromosomes(self, holdout_set):
        images = []
        time_zero = time.time()
        block_size = HDF_MULTI_BLOCK_SIZE
        loc_chunks = list(range(0, self.__len__(), block_size))
        num_processes = multiprocessing.cpu_count()
        print('Process with %d processors' % num_processes)
        with multiprocessing.Pool(num_processes) as pool:
            f = functools.partial(process_location,
                                  block_size=block_size,
                                  path=self.h5_path,
                                  holdout_set=holdout_set)
            for map_image in tqdm.tqdm(pool.imap_unordered(f, loc_chunks),
                                       total=len(loc_chunks)):
                images.extend(map_image)

        print('Saving num output locations:')
        print(len(images))
        # Not necessary -- just for debug consistency -- remove to save time if big
        images.sort()
        print(images[:10])
        print('Took %.2fs to process %d loc with %d processes' %
              (time.time() - time_zero, self.__len__(), num_processes))
        for idx in tqdm.tqdm(images, total=len(images)):
            self.chromosome_holdout[idx] = True
Exemple #19
0
    def add_batched_coordinates(self, coords, lr=1, avg=1):
        start_time = time()
        num_procs = cpu_count()
        self.share_memory()
        processes = []

        # sort and bin into layers
        params_coords = {}
        sorted_coords = sorted(coords, key=lambda x: x[0][0])

        for coord_val_pair in sorted_coords:
            layer = coord_val_pair[0][0]

            if layer in params_coords:
                params_coords[layer].append(coord_val_pair)
            else:
                params_coords[layer] = [coord_val_pair]

        # update parameters in parallel
        for layer_index in params_coords.keys():
            p = Process(target=self.add_coordinates,
                        args=(
                            layer_index,
                            params_coords[layer_index],
                            lr,
                            avg,
                        ))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

        self.log.info('time: {} s'.format(time() - start_time))
Exemple #20
0
 def __init__(self, params, model_path):
     self.params = params
     self.model_path = model_path
     self.num_of_processes = mp.cpu_count()
     self.global_model = ActorCritic(self.params.stack_size,
                                     get_action_space())
     self.global_model.share_memory()
Exemple #21
0
    def evaluate(self, batch_size=100, cpu=-1, filtering=True) -> Dict[str, float]:
        """Evaluates a model by retrieving scores from the (implemented) score_batch function.

            :param batch_size:
                Size of a test batch
            :param cpu:
                Number of processors to use, -1 means all processors are used.

            :return:
                Dictionary containing the evaluation results (keys: 'hits@1', 'hits@3', 'hits@10', 'mrr')
        """
        self.filtering = filtering

        start = time.time()
        n_batches, batches = self.dl.get_test_batches(batch_size)

        if cpu == 1 or cpu == 0:
            result = []
            for batch in tqdm(batches, total=n_batches):
                result.append(self.evaluate_batch(batch))
        elif cpu == -1:
            pool = mp.Pool(mp.cpu_count())
            result = pool.map(self.evaluate_batch, batches)
        else:
            pool = mp.Pool(cpu)
            result = pool.map(self.evaluate_batch, batches)
        print('Evaluation took {:.3f} seconds'.format(time.time() - start))
        return self.get_result(result)
Exemple #22
0
def run_multiple_times(args, run_fct):
    cpu_count = mp.cpu_count()
    gpu_count = torch.cuda.device_count()

    # Clone arguments into list & Distribute workload across GPUs
    args_across_workers = [copy.deepcopy(args) for r in range(args.RUN_TIMES)]
    if gpu_count > 0:
        gpu_counter = 0
        for r in range(args.RUN_TIMES):
            args_across_workers[r].device_id = gpu_counter
            gpu_counter += 1
            if gpu_counter > gpu_count - 1:
                gpu_counter = 0

    # Execute different runs/random seeds in parallel
    pool = mp.Pool(cpu_count - 1)
    df_across_runs = pool.map(run_fct, args_across_workers)
    pool.close()

    # Post process results
    df_concat = pd.concat(df_across_runs)
    by_row_index = df_concat.groupby(df_concat.index)
    df_means, df_stds = by_row_index.mean(), by_row_index.std()
    if args.SAVE:
        df_means.to_csv("logs/" + args.SAVE_FNAME + ".csv")
    return df_means, df_stds
Exemple #23
0
        def get_gt(pan_gt_json_file=None, pan_gt_folder=None):
            if pan_gt_json_file is None:
                pan_gt_json_file = self.panoptic_json_file
            if pan_gt_folder is None:
                pan_gt_folder = self.panoptic_gt_folder
            with open(pan_gt_json_file, 'r') as f:
                pan_gt_json = json.load(f)
            files = [item['file_name'] for item in pan_gt_json['images']]
            if 'viper' in pan_gt_folder:
                files = [
                    _.split('/')[-1].replace('.jpg', '.png') for _ in files
                ]
            cpu_num = multiprocessing.cpu_count()
            files_split = np.array_split(files, cpu_num)
            workers = multiprocessing.Pool(processes=cpu_num)
            processes = []
            for proc_id, files_set in enumerate(files_split):
                p = workers.apply_async(BaseDataset._load_image_single_core,
                                        (proc_id, files_set, pan_gt_folder))
                processes.append(p)
            workers.close()
            workers.join()
            pan_gt_all = []
            for p in processes:
                pan_gt_all.extend(p.get())

            categories = pan_gt_json['categories']
            categories = {el['id']: el for el in categories}
            color_gererator = IdGenerator(categories)
            return pan_gt_all, pan_gt_json, categories, color_gererator
Exemple #24
0
    def _get_dataset(self, filename):
        dicts = self.processor._file_to_dicts(filename)
        #shuffle list of dicts here if we later want to have a random dev set splitted from train set
        if filename == self.processor.train_filename:
            if not self.processor.dev_filename:
                if self.processor.dev_split > 0.0:
                    dicts = random.shuffle(dicts)

        dict_batches_to_process = int(
            len(dicts) / self.multiprocessing_chunk_size)
        num_cpus = min(mp.cpu_count(), self.max_processes,
                       dict_batches_to_process) or 1

        with ExitStack() as stack:
            p = stack.enter_context(mp.Pool(processes=num_cpus))

            logger.info(
                f"Got ya {num_cpus} parallel workers to convert dict chunks to datasets (chunksize = {self.multiprocessing_chunk_size})..."
            )
            log_ascii_workers(num_cpus, logger)

            results = p.imap(
                partial(self._multiproc, processor=self.processor),
                grouper(dicts, self.multiprocessing_chunk_size),
                chunksize=1,
            )

            datasets = []
            for dataset, tensor_names in tqdm(results,
                                              total=len(dicts) /
                                              self.multiprocessing_chunk_size):
                datasets.append(dataset)

            concat_datasets = ConcatDataset(datasets)
            return concat_datasets, tensor_names
Exemple #25
0
    def __init__(self, loader):
        self.loader = loader
        self.data_source = loader.data_source
        self.batch_size = loader.batch_size
        self.token_field = loader.token_field
        self.keyphrases_field = loader.keyphrases_field
        self.lazy_loading = loader.lazy_loading
        self.num_workers = multiprocessing.cpu_count() // 2 or 1

        if self.loader.mode == TRAIN_MODE:
            self.chunk_size = self.batch_size * 5
        else:
            self.chunk_size = self.batch_size
        self._data = self.load_data(self.chunk_size)
        self._batch_count_in_output_queue = 0
        self._redundant_batch = []
        self.workers = []
        self.worker_shutdown = False

        if self.loader.mode in {TRAIN_MODE, EVAL_MODE}:
            self.input_queue = multiprocessing.Queue(-1)
            self.output_queue = multiprocessing.Queue(-1)
            self.__prefetch()
            for _ in range(self.num_workers):
                worker = multiprocessing.Process(target=self._data_worker_loop)
                self.workers.append(worker)
            for worker in self.workers:
                worker.daemon = True
                worker.start()
Exemple #26
0
def test(n=10000):
    n_cpus = mp.cpu_count()
    print("num cpus: ", n_cpus)
    p = Pool(n_cpus)

    import time
    # s1 = time.time()
    data = []
    for i in range(n):
        inp_val = np.random.random(size=10)
        vec_val = np.random.random(size=10)
        data.append((inp_val, vec_val))
    #
    # res = p.map(compute_hvp, data)
    # e1 = time.time()
    # print ("Time 1: ", (e1-s1))

    s2 = time.time()
    for i in range(n):
        inp_val, vec_val = data[i]
        inp = Variable(torch.FloatTensor([inp_val]), requires_grad=True)
        v = Variable(torch.FloatTensor([vec_val]), requires_grad=False)
        z = three_sin(inp)
        l = F.mse_loss(z, torch.zeros_like(z))
        # hvp_rop_lop = Hvp_RopLop(f, inp, v)
        # print ("hvp: ", hvp_rop_lop.data)
        # hvp_dbl_bp = Hvp_dbl_bp(l, inp, v)
        # print ("hvp: ", hvp_dbl_bp.data)
        # print ("hvp: ", hvp_rop_lop.data, hvp_dbl_bp.data)
        gnvp_roplop = GNvp_RopLop(l, z, inp, v)
    e2 = time.time()
    print("Time 2: ", (e2 - s2))
Exemple #27
0
def train(rank, args, model, barrier, rankstart, rankstop):
    if args.tp:
        os.system("taskset -apc %d %d" % (rank % mp.cpu_count(), os.getpid()))
    torch.manual_seed(args.seed + rank)

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    optimizer = BATCH_PARTITIONED_SGD(model.parameters(),
                                      lr=args.lr,
                                      momentum=args.momentum)
    gamma = 0.9 + torch.rand(1).item() / 10
    scheduler = MyLR(optimizer,
                     gamma)  #lrs.ReduceLROnPlateau(optimizer, 'min', gamma) #
    for epoch in range(1, args.epochs + 1):
        # scheduler.step()
        print("Training: Epoch = " + str(epoch))
        loss = train_epoch(epoch, args, model, train_loader, optimizer,
                           rankstart, rankstop)
        barrier[rank] += 1
        print("TrainError = " + str('%.6f' % loss.item()) + "\n")
Exemple #28
0
def train(env_params, model_path, episodes=200, episode_length=50):
    print('Actor-Critic training')

    # Global network
    env = PoolEnv(**env_params)
    gnet = Net(env.state_space.n,
               env.action_space.n,
               HIDDEN_DIM,
               action_ranges=env.action_space.ranges)
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=LR)  # global optimizer
    global_ep, global_ep_r = mp.Value('i',
                                      0), mp.Value('d',
                                                   0.)  # 'i': int, 'd': double

    # Parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, i, env_params, HIDDEN_DIM,
               episodes, episode_length, model_path)
        for i in range(mp.cpu_count() // 2)
    ]
    for w in workers:
        w.start()
    for w in workers:
        w.join()

    save_model(model_path, gnet)
def init_worker_pool(args):
    """
    Creates the worker pool for drmsd batch computation. Does nothing if sequential.
    """
    torch.multiprocessing.set_start_method("spawn")
    return torch.multiprocessing.Pool(
        mp.cpu_count()) if not args.sequential_drmsd_loss else None
Exemple #30
0
 def test_dataloader(self):
     batch_size = self.hparams.batch_size_test
     num_workers = cpu_count()
     return DataLoader(self.test_data,
                       batch_size=batch_size,
                       shuffle=False,
                       num_workers=num_workers)