def runMultiProcessTrajectories(self, repeat): pool=Pool(processes=len(self.posIni)) result = pool.map(partial(self.runNtrajectory, repeat=repeat) , [(x, y) for x, y in self.posIni]) pool.close() pool.join() meanCost, meanTraj=0., 0. for Cost, traj in result: meanCost+=Cost meanTraj+=traj size = len(result) return meanCost/size, meanTraj/size
def __init__(self, processes=None, initializer=None, initargs=()): ProcessPool.__init__(self, processes, initializer, initargs) return
def runNtrajectory(self, (x, y), repeat): costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat) for i in range(repeat): costAll[i], trajTimeAll[i] = self.runOneTrajectoryOpti(x, y) meanCost = np.mean(costAll) meanTrajTime = np.mean(trajTimeAll) self.costStore.append([x, y, meanCost]) self.trajTimeStore.append([x, y, meanTrajTime]) return meanCost, meanTrajTime def mapableTrajecrtoryFunction(self,x,y,useless): return self.runOneTrajectory(x, y) def runNtrajectoryMulti(self, (x, y), repeat): pool=Pool(processes=4) result = pool.map(partial(self.mapableTrajecrtoryFunction,x,y) , range(repeat)) pool.close() pool.join() meanCost, meanTraj=0., 0. for Cost, traj in result: meanCost+=Cost meanTraj+=traj size = len(result) return meanCost/size, meanTraj/size def runOneTrajectoryOpti(self, x, y): #self.tm.saveTraj = True cost, trajTime, lastX = self.tm.runTrajectoryOpti(x, y) #cost, trajTime, lastX = self.tm.runTrajectoryOpti(x, y)
def Pool(processes=None, initializer=None, initargs=()): ''' Returns a process pool object ''' from multiprocess.pool import Pool return Pool(processes, initializer, initargs)
def create_test_raw_data(self, ticker_list=None, start_date=None, finish_date=None, folder_prefix=None): """Downloads FX tick data from DukasCopy and then dumps each ticker in a separate HDF5 file if a folder is specified. If no folder is specified returns a list of DataFrames (note: can be a very large list in memory) Parameters ---------- ticker_list : str (list) List of FX tickers to download start_date : datetime/str Start date of FX tick data download finish_date : datetime/str Finish date of FX tick data download folder_prefix : str Folder to dump everything Returns ------- DataFrame (list) """ from findatapy.market import MarketDataRequest, MarketDataGenerator, Market if start_date is None and finish_date is None: finish_date = datetime.datetime.utcnow().date() - timedelta( days=30) start_date = finish_date - timedelta(days=30 * 15) start_date = self._compute_random_date(start_date, finish_date) finish_date = start_date + timedelta(days=90) df_list = [] result = [] # From multiprocessing.dummy import Pool # threading from multiprocess.pool import Pool # actuall new processes import time # If we don't specify a folder if folder_prefix is None: mini_ticker_list = self._split_list(ticker_list, 2) # Use multiprocess to speed up the download for mini in mini_ticker_list: pool = Pool(processes=2) for ticker in mini: time.sleep(1) self.logger.info("Loading " + ticker) md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, category='fx', tickers=ticker, fields=['bid', 'ask', 'bidv', 'askv'], data_source='dukascopy', freq='tick') # self._download(md_request) result.append( pool.apply_async(self._download, args=( md_request, folder_prefix, ))) pool.close() pool.join() else: market = Market(market_data_generator=MarketDataGenerator()) for ticker in ticker_list: md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, category='fx', tickers=ticker, fields=['bid', 'ask', 'bidv', 'askv'], data_source='dukascopy', freq='tick') df = market.fetch_market(md_request=md_request) df.columns = ['bid', 'ask', 'bidv', 'askv'] df['venue'] = 'dukascopy' df['ticker'] = ticker # print(df) if folder_prefix is not None: self.dump_hdf5_file(df, folder_prefix + "_" + ticker + ".h5") # df.to_csv(folder_prefix + "_" + ticker + ".csv") # CSV files can be very large, so try to avoid else: df_list.append(df) return df_list
def __call__( self, batch_or_dataset: Union[Batch, Dataset], columns: List[str], mask: List[int] = None, store_compressed: bool = None, store: bool = None, num_proc: int = None, *args, **kwargs, ): if mask: raise NotImplementedError( "Mask not supported for SubpopulationCollection yet.") if not num_proc or num_proc == 1: slices = [] slice_membership = [] # Apply each slicebuilder in sequence for i, slicebuilder in tqdm(enumerate(self.subpopulations)): # Apply the slicebuilder batch_or_dataset, slices_i, slice_membership_i = slicebuilder( batch_or_dataset=batch_or_dataset, columns=columns, mask=mask, store_compressed=store_compressed, store=store, *args, **kwargs, ) # Add in the slices and slice membership slices.extend(slices_i) slice_membership.append(slice_membership_i) else: # TODO(karan): cleanup, make mp.Pool support simpler across the library with Pool(num_proc) as pool: batches_or_datasets, slices, slice_membership = zip(*pool.map( lambda sb: sb( batch_or_dataset=batch_or_dataset, columns=columns, mask=mask, store_compressed=store_compressed, store=store, *args, **kwargs, ), [slicebuilder for slicebuilder in self.subpopulations], )) # Combine all the slices slices = list(tz.concat(slices)) def _store_updates(batch, indices): # Each Subpopulation will generate slices for i, subpopulation in enumerate(self.subpopulations): updates = subpopulation.construct_updates( slice_membership=slice_membership[i][indices], columns=columns, mask=mask, # TODO(karan): this option should be set correctly compress=True, ) batch = subpopulation.store( batch=batch, updates=updates, ) return batch if isinstance(batch_or_dataset, Dataset): batch_or_dataset = batch_or_dataset.map( _store_updates, with_indices=True, batched=True, ) for subpopulation in self.subpopulations: # Update the Dataset's history batch_or_dataset.update_tape( path=[SLICEBUILDERS, subpopulation.category], identifiers=subpopulation.identifiers, columns=columns, ) else: batch_or_dataset = recmerge(*batches_or_datasets, merge_sequences=True) # Combine all the slice membership matrices slice_membership = np.concatenate(slice_membership, axis=1) return batch_or_dataset, slices, slice_membership
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None): ''' Returns a process pool object ''' from multiprocess.pool import Pool return Pool(processes, initializer, initargs, maxtasksperchild)
def __enter__(self): if self.backend is None or isinstance(self.backend, int): self._backend_activated = Pool(self.backend) elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce): self.backend.__enter__() return self
def save( target: Optional[str], name: Optional[str], config: Optional[str], seed: Optional[str], destination: Optional[str], multiprocessing: bool, ) -> None: """Save the sketch to a SVG file. TARGET may either point at a Python file or at a directory and is interpreted in the same way as the `vsk run` command (see `vsk run --help`). By default, the output is named after the sketch and the provided options. An alternative name my be provided with the --name option. If the sketch as parameters, their default values are used. Alternatively, a pre-existing configuration can be used instead with the --config option. By default, a random seed is used for vsketch's random number generator. If --config is used, the seed saved in the configuration is used instead. A seed may also be provided with the --seed option, in which case it will override the configuration's seed. The --seed option also accepts seed range in the form of FIRST..LAST, e.g. 0..100. In this case, one output file per seed is generated. If the number of files to generate is greater than 4, all available cores are used for the process. This behaviour can be disabled with --no-multiprocessing or the VSK_MULTIPROCESSING variable. By default, all SVG are saved in the sketch's "output" sub-directory. This can be overridden using the --destination option. """ try: path = _find_sketch_script(target) except ValueError as err: print_error("Sketch could not be found: ", str(err)) raise click.Abort() # load configuration param_set: Dict[str, vsketch.ParamType] = {} config_postfix = "" if config is not None: config_path = pathlib.Path(config) if not config_path.exists(): config_path = get_config_path(path) / (config + ".json") if config_path.exists(): param_set = load_config(config_path) config_postfix = "_" + config_path.stem else: print_error("Config file not found: ", str(config_path)) # compute name if name is None: name = canonical_name(path) + config_postfix seed_in_name = seed is not None if seed is None: if param_set is not None and "__seed__" in param_set: seed_start = seed_end = int(param_set["__seed__"]) else: seed_start = seed_end = random.randint(0, 2**31 - 1) else: try: seed_start, seed_end = _parse_seed(seed) except ValueError as err: print_error(f"Could not parse seed {seed}: ", str(err)) raise click.Abort() # prepare output path if destination is not None: output_path = pathlib.Path(destination) if not output_path.exists(): print_error("Provided output path does not exist: ", str(output_path.absolute())) raise click.Abort() if not output_path.is_dir(): print_error("Provided output path is not a directory: ", str(output_path.absolute())) raise click.Abort() else: output_path = path.parent / "output" if not output_path.exists(): output_path.mkdir() elif not output_path.is_dir(): print_error("Could not create output directory: ", str(output_path)) raise click.Abort() # noinspection PyShadowingNames def _write_output(seed: int) -> None: # this needs to be there because the sketch class cannot be pickled apparently sketch_class = load_sketch_class(path) if sketch_class is None: print_error("Could not load script: ", str(path)) raise click.Abort() sketch_class.set_param_set(param_set) output_name = name if seed_in_name: output_name += "_s" + str(seed) # type: ignore output_name += ".svg" # type: ignore output_file = output_path / output_name sketch = sketch_class.execute(finalize=True, seed=seed) if sketch is None: print_error("Could not execute script: ", str(path)) raise click.Abort() doc = sketch.vsk.document with open(output_file, "w") as fp: print_info("Exporting SVG: ", str(output_file)) vp.write_svg(fp, doc, source_string=f"vsketch save -s {seed} {path}", color_mode="layer") seed_range = range(seed_start, seed_end + 1) if len(seed_range) < 4 or not multiprocessing: for s in seed_range: _write_output(s) else: with Pool() as p: list(p.imap(_write_output, seed_range))
class ParallelBackend: """ The unified backend for parallelization. Currently, we support `multiprocess`, `dask`, `sharedmem` and `loky`. `multiprocess` usually has better performance on single-node machines, while `dask` can be used for multi-node parallelization. Note the following known issues: when used for sampling, (1) `dask` and `loky` do not respect the global bayesfast random seed; (2) `sharedmem` may not display the progress messages correctly (multiple messages in the same line); (3) `loky` does not print any messages at all in Jupyter. So we recommend using the default `multiprocess` backend when possible. Parameters ---------- backend : None, int, Pool, Client or MapReduce, optional The backend for parallelization. If `None` or `int`, will be passed as the `processes` argument to initialize a Pool in a with context. Set to `None` by default. """ def __new__(cls, backend=None): if isinstance(backend, ParallelBackend): return backend else: return super(ParallelBackend, cls).__new__(cls) def __init__(self, backend=None): if isinstance(backend, ParallelBackend): return self.backend = backend def __enter__(self): if self.backend is None or isinstance(self.backend, int): self._backend_activated = Pool(self.backend) elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce): self.backend.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): if self.backend is None or isinstance(self.backend, int): self._backend_activated.close() self._backend_activated.join() self._backend_activated = None elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce): self.backend.__exit__(exc_type, exc_val, exc_tb) @property def backend(self): return self._backend @backend.setter def backend(self, be): if be is None or (isinstance(be, int) and be > 0): pass elif isinstance(be, Pool): pass elif HAS_RAY and isinstance(be, RayPool): pass elif HAS_DASK and isinstance(be, Client): pass elif HAS_SHAREDMEM and isinstance(be, MapReduce): pass elif HAS_LOKY and isinstance(be, reusable_executor._ReusablePoolExecutor): pass # elif be == 'serial': # pass else: raise ValueError('invalid value for backend.') self._backend_activated = be self._backend = be @property def backend_activated(self): return self._backend_activated @property def kind(self): if self.backend is None or isinstance(self.backend, int): return 'multiprocess' elif isinstance(self.backend, Pool): return 'multiprocess' elif HAS_RAY and isinstance(self.backend, RayPool): return 'ray' elif HAS_DASK and isinstance(self.backend, Client): return 'dask' elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce): return 'sharedmem' elif HAS_LOKY and isinstance(self.backend, reusable_executor._ReusablePoolExecutor): return 'loky' # elif self.backend == 'serial': # return 'serial' else: raise RuntimeError('unexpected value for self.backend.') def map(self, fun, *iters): if self.backend_activated is None: raise RuntimeError( 'the backend is not activated. Please use it in ' 'a with context.') elif isinstance(self.backend_activated, Pool): return self.backend_activated.starmap(fun, zip(*iters)) elif HAS_RAY and isinstance(self.backend_activated, RayPool): return self.backend_activated.starmap(fun, list(zip(*iters))) # https://github.com/ray-project/ray/issues/11451 # that's why I need to explicitly convert it to a list for now elif HAS_DASK and isinstance(self.backend_activated, Client): return self.gather(self.backend_activated.map(fun, *iters)) elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce): return self.backend_activated.map(fun, list(zip(*iters)), star=True) elif HAS_LOKY and isinstance(self.backend_activated, reusable_executor._ReusablePoolExecutor): return self.gather(self.backend_activated.map(fun, *iters)) # elif self.backend_activated == 'serial': # return [deepcopy(fun)(*[i[j] for i in iters]) for j in range(l)] else: raise RuntimeError('unexpected value for self.backend_activated.') def map_async(self, fun, *iters): if self.backend_activated is None: raise RuntimeError( 'the backend is not activated. Please use it in ' 'a with context.') elif isinstance(self.backend_activated, Pool): return self.backend_activated.starmap_async(fun, zip(*iters)) elif HAS_RAY and isinstance(self.backend_activated, RayPool): return self.backend_activated.starmap_async(fun, list(zip(*iters))) elif HAS_DASK and isinstance(self.backend_activated, Client): return self.backend_activated.map(fun, *iters) elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce): warnings.warn( 'sharedmem does not support map_async. Using map ' 'instead.', RuntimeWarning) return self.backend_activated.map(fun, list(zip(*iters)), star=True) elif HAS_LOKY and isinstance(self.backend_activated, reusable_executor._ReusablePoolExecutor): return self.backend_activated.map(fun, *iters) # elif self.backend_activated == 'serial': # return self.map(fun, *iters) else: raise RuntimeError('unexpected value for self.backend_activated.') def gather(self, async_result): if self.backend_activated is None: raise RuntimeError( 'the backend is not activated. Please use it in ' 'a with context.') elif isinstance(self.backend_activated, Pool): return async_result.get() elif isinstance(self.backend_activated, RayPool): return async_result.get() elif HAS_DASK and isinstance(self.backend_activated, Client): return self.backend_activated.gather(async_result) elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce): return async_result elif HAS_LOKY and isinstance(self.backend_activated, reusable_executor._ReusablePoolExecutor): return list(async_result) # elif self.backend_activated == 'serial': # return async_result else: raise RuntimeError('unexpected value for self.backend_activated.')
return wav def local_mel2samp(filepath): filepath = filepath.split("|")[0] new_filepath = filepath + ".pt" if os.path.isfile(new_filepath): print("skip", new_filepath) return audio = preprocess_wav(filepath, sampling_rate=args.sampling_rate) audio = torch.FloatTensor(audio.astype(np.float32)) melspectrogram = mel2samp.get_mel(audio) print(new_filepath) torch.save(melspectrogram, new_filepath) filepaths = files_to_list(args.filelist_path) with Pool(args.num_processes) as pool: # ThreadPool(8) as pool: # list(tqdm(pool.imap(preprocess_speaker, speaker_dirs), dataset_name, len(speaker_dirs), list(pool.map(local_mel2samp, filepaths)) # for filepath in filepaths: # filepath = filepath.split("|")[0] # audio = preprocess_wav(filepath, sampling_rate=args.sampling_rate) # filename = os.path.basename(filepath) # new_filepath = args.output_dir + '/' + filename + '.npy' # print(new_filepath) # np.save(new_filepath, audio)
('SN20_Bleb_fromStart', 14, 0, 1, -235, 30), ('SN20_Bleb_fromStart', 14, 0, 2, 120, 230), ('SN20_Bleb_fromStart', 14, 0, 3, -230, 105), ('SN20_Bleb_fromStart', 14, 0, 4, 205, 35), ('SN20_Bleb_fromStart', 14, 1, 2, 110, -180), ('SN20_Bleb_fromStart', 14, 1, 3, -220, 25), ('SN20_Bleb_fromStart', 14, 1, 4, -150, 0), ('SN20_Bleb_fromStart', 14, 2, 3, 160, -130), ('SN20_Bleb_fromStart', 14, 2, 4, -75, 210), ('SN20_Bleb_fromStart', 14, 3, 4, 220, 105), ('SN20_Bleb_fromStart', 15, 0, 1, 0, 235), ('SN20_Bleb_fromStart', 16, 0, 1, 0, -225), ('SN20_Bleb_fromStart', 16, 0, 2, -80, 130), ('SN20_Bleb_fromStart', 16, 1, 2, -60, -120), ('SN20_Bleb_fromStart', 17, 0, 2, -180, 0), ('SN20_Bleb_fromStart', 17, 0, 3, 155, 0), ('SN20_Bleb_fromStart', 17, 1, 2, -225, -115), ('SN20_Bleb_fromStart', 17, 1, 3, -135, 20), ('SN20_Bleb_fromStart', 18, 0, 1, -110, -175), ('SN20_Bleb_fromStart', 19, 0, 1, 70, -150), ('SN20_Bleb_fromStart', 19, 1, 2, -100, 115), ('SN20_Bleb_fromStart', 19, 1, 3, 60, -170), ('SN20_Bleb_fromStart', 19, 2, 3, 135, 185), ('SN20_Bleb_fromStart', 20, 0, 1, 175, 20), ('SN20_Bleb_fromStart', 20, 0, 2, 205, -60), ('SN20_Bleb_fromStart', 20, 1, 2, -135, 80), ] _p = Pool(CPUS_TO_USE) _answers = _p.starmap(process_fake_following, _arguments) _p.close()
class DataPipelineWithReward: """ Creates a data pipeline that also outputs discounted reward. """ def __init__(self, observables: List[AgentHandler], actionables: List[AgentHandler], mission_handlers: List[AgentHandler], nsteps, gamma, data_directory, num_workers, worker_batch_size, min_size_to_dequeue): """ Sets up a tensorflow dataset to load videos from a given data directory. :param data_directory: the directory of the data to be loaded, eg: 'minerl.herobraine_parse/output/rendered/' """ self.data_dir = data_directory self.observables = observables self.actionables = actionables self.mission_handlers = mission_handlers # self.vectorizer = vectorizer self.number_of_workers = num_workers self.worker_batch_size = worker_batch_size self.size_to_dequeue = min_size_to_dequeue self.nsteps = nsteps self.gamma = gamma self.processing_pool = Pool(self.number_of_workers) self.m = multiprocessing.Manager() self.data_queue = self.m.Queue(maxsize=self.size_to_dequeue // self.worker_batch_size * 4) pool_size = self.size_to_dequeue * 4 self.random_queue = PriorityQueue(maxsize=pool_size) def batch_iter(self, batch_size): """ Returns a generator for iterating through batches of the dataset. :param batch_size: :param number_of_workers: :param worker_batch_size: :param size_to_dequeue: :return: """ logger.info("Starting batch iterator on {}".format(self.data_dir)) data_list = self._get_all_valid_recordings(self.data_dir) load_data_func = self._get_load_data_func(self.data_queue, self.nsteps, self.worker_batch_size, self.mission_handlers, self.observables, self.actionables, self.gamma) map_promise = self.processing_pool.map_async(load_data_func, data_list) # We map the files -> load_data -> batch_pool -> random shuffle -> yield. # batch_pool = [] start = 0 incr = 0 while not map_promise.ready() or not self.data_queue.empty( ) or not self.random_queue.empty(): #print("d: {} r: {}".format(data_queue.qsize(), random_queue.qsize())) while not self.data_queue.empty() and not self.random_queue.full(): for ex in self.data_queue.get(): if not self.random_queue.full(): r_num = np.random.rand(1)[0] * (1 - start) + start self.random_queue.put((r_num, ex)) incr += 1 # print("d: {} r: {} rqput".format(data_queue.qsize(), random_queue.qsize())) else: break if incr > self.size_to_dequeue: if self.random_queue.qsize() < (batch_size): if map_promise.ready(): break else: continue batch_with_incr = [ self.random_queue.get() for _ in range(batch_size) ] r1, batch = zip(*batch_with_incr) start = 0 traj_obs, traj_acts, traj_handlers, traj_n_obs, discounted_rewards, elapsed = zip( *batch) observation_batch = [ HandlerCollection({ o: np.asarray(traj_ob[i]) for i, o in enumerate(self.observables) }) for traj_ob in traj_obs ] action_batch = [ HandlerCollection({ a: np.asarray(traj_act[i]) for i, a in enumerate(self.actionables) }) for traj_act in traj_acts ] mission_handler_batch = [ HandlerCollection({ m: np.asarray(traj_handler[i]) for i, m in enumerate(self.mission_handlers) }) for traj_handler in traj_handlers ] next_observation_batch = [ HandlerCollection({ o: np.asarray(traj_n_ob[i]) for i, o in enumerate(self.observables) }) for traj_n_ob in traj_n_obs ] yield observation_batch, action_batch, mission_handler_batch, next_observation_batch, discounted_rewards, elapsed # Move on to the next batch bool. # Todo: Move to a running pool, sampling as we enqueue. This is basically the random queue impl. # Todo: This will prevent the data from getting arbitrarily segmented. # batch_pool = [] try: map_promise.get() except RuntimeError as e: logger.error("Failure in data pipeline: {}".format(e)) logger.info("Epoch complete.") def close(self): self.processing_pool.close() self.processing_pool.join() ############################ ## PRIVATE METHODS ############################# @staticmethod def _get_load_data_func(data_queue, nsteps, worker_batch_size, mission_handlers, observables, actionables, gamma): def _load_data(inst_dir): recording_path = str(os.path.join(inst_dir, 'recording.mp4')) univ_path = str(os.path.join(inst_dir, 'univ.json')) try: cap = cv2.VideoCapture(recording_path) # Litty uni with open(univ_path, 'r') as f: univ = {int(k): v for (k, v) in (json.load(f)).items()} univ = OrderedDict(univ) univ = np.array(list(univ.values())) # Litty viddy batches = [] rewards = [] frames_queue = Queue(maxsize=nsteps) # Loop through the video and construct frames # of observations to be sent via the multiprocessing queue # in chunks of worker_batch_size to the batch_iter loop. frame_num = 0 while True: ret, frame = cap.read() if not ret or frame_num >= len(univ): break else: #print("Batches {} and worker batch size {}".format(len(batches), self.worker_batch_size)) if len(batches) >= worker_batch_size: data_queue.put(batches) batches = [] try: # Construct a single observation object. vf = (np.clip(frame[:, :, ::-1], 0, 255)) uf = univ[frame_num] frame = {'pov': vf} frame.update(uf) cur_reward = 0 for m in mission_handlers: try: if isinstance(m, RewardHandler): cur_reward += m.from_universal(frame) except NotImplementedError: pass rewards.append(cur_reward) #print("Frames queue size {}".format(frames_queue.qsize())) frames_queue.put(frame) if frames_queue.full(): next_obs = [ o.from_universal(frame) for o in observables ] frame = frames_queue.get() obs = [ o.from_universal(frame) for o in observables ] act = [ a.from_universal(frame) for a in actionables ] mission = [] for m in mission_handlers: try: mission.append(m.from_universal(frame)) except NotImplementedError: mission.append(None) pass batches.append( (obs, act, mission, next_obs, DataPipelineWithReward. _calculate_discount_rew( rewards[-nsteps:], gamma), frame_num + 1 - nsteps)) except Exception as e: # If there is some error constructing the batch we just start a new sequence # at the point that the exception was observed logger.warn( "Exception {} caught in the middle of parsing {} in " "a worker of the data pipeline.".format( e, inst_dir)) frame_num += 1 return batches except Exception as e: logger.error("Caught Exception") raise e return None return _load_data @staticmethod def _calculate_discount_rew(rewards, gamma): total_reward = 0 for i, rew in enumerate(rewards): total_reward += (gamma**i) * rew return total_reward @staticmethod def _get_all_valid_recordings(path): directoryList = [] # return nothing if path is a file if os.path.isfile(path): return [] # add dir to directorylist if it contains .txt files if len([f for f in os.listdir(path) if f.endswith('.mp4')]) > 0: if len([f for f in os.listdir(path) if f.endswith('.json')]) > 0: directoryList.append(path) for d in os.listdir(path): new_path = os.path.join(path, d) if os.path.isdir(new_path): directoryList += DataPipelineWithReward._get_all_valid_recordings( new_path) directoryList = np.array(directoryList) np.random.shuffle(directoryList) return directoryList.tolist()