def updateTillEnd(self, estimOver=1000, t0=None, Tsave=None, Fname=None): if t0 is None: t0 = tic(ret=True) if type(self.dur) == str: self.dur = timeToSteps(self.dur, self.evaMod.dt) for i in xrange(estimOver): self.update() estimAndPercent(self.rtime, self.dur, avg=estimOver, t0=t0) if self.rtime == estimOver: try: bar = ProgBar(self.dur - estimOver, stream=1) except: pass for i in xrange(estimOver, self.dur): self.update() try: bar.update() except: pass if (Tsave != None) and (Fname != None): if not self.rtime % Tsave: array2data(self.out, Fname + '%i.npy' % self.iFile) self.iFile += 1 for k in self.out.keys(): del self.out[k][:]
def check_classifier(vect: HashingVectorizer) -> None: if not clf_path.is_file(): print('Classifier was not found, creating...') clf = SGDClassifier(loss='log', random_state=1) ds = DocStream('./movie_data.csv') pbar = ProgBar(45) classes = np.array([0, 1]) for _ in range(45): x_train, y_train = ds.get_minibatch(1000) if not x_train: break x_train = vect.transform(x_train) clf.partial_fit(x_train, y_train, classes) pbar.update() print('Training completed...') x_test, y_test = ds.get_minibatch(5000) x_test = vect.transform(x_test) print(f'Score: {clf.score(x_test, y_test)}') clf = clf.partial_fit(x_test, y_test) dump(clf, clf_path, protocol=4)
def extract_weather_data(url, api_key, target_date, days): """Call Wunderground API to extract weather data.""" records = [] bar = ProgBar(days) for _ in range(days): request = BASE_URL.format(API_KEY, target_date.strftime('%Y%m%d')) response = requests.get(request) if response.status_code == 200: data = response.json()['history']['dailysummary'][0] records.append( DailySummary(date=target_date, meantempm=data['meantempm'], meandewptm=data['meandewptm'], meanpressurem=data['meanpressurem'], maxhumidity=data['maxhumidity'], minhumidity=data['minhumidity'], maxtempm=data['maxtempm'], mintempm=data['mintempm'], maxdewptm=data['maxdewptm'], mindewptm=data['mindewptm'], maxpressurem=data['maxpressurem'], minpressurem=data['minpressurem'], precipm=data['precipm'])) time.sleep(6) bar.update() target_date += timedelta(days=1) return records
def download_data(self): if not self.is_connected(): return if self.total_logs < self.min_logs: self.log_print("Not enough logs to start download, {} logs found but minimum logs is set to {}". format(self.total_logs, self.min_logs)) return self.requester.max_logs = self.total_logs self.requester.print_step = self.requester.max_logs // 100 self.start_time = datetime.now() self.full_fname = self.fname + '_%s.dat' % self.mac_address.replace(':', '') + self.start_time.strftime("_%m-%d-%y_%H-%M-%S") self.log_print("Writing data to file: {}".format(self.full_fname)) self.requester.file = open(self.full_fname, 'w+') self.requester.file.write("raw\n" if self.raw else "compressed\n") self.requester.file.write("start_time: " + str(self.start_time) + '\n') self.requester.file.write("sample_period: " + str(self.sample_period) + '\n') self.start_broadcast() bar = ProgBar(100, width=70, stream=self.log_stream) last_check = self.start_time timed_out = False try: while not timed_out and not self.requester.done and not self.stopped: if (datetime.now() - last_check).seconds > 30: self.stop_broadcast() self.read_status(update=True) self.print_status() last_check = datetime.now() self.start_broadcast() self.received.clear() timed_out = not self.received.wait(30) bar.update() if self.requester.done: while bar.cnt < bar.max_iter: bar.update() self.log_print("Download Complete") else: self.log_print("") self.log_print("Download Interrupted") except (KeyboardInterrupt, SystemExit): self.log_print("") self.log_print("Download Interrupted") finally: self.received.clear() if not timed_out: self.log_print("Stopping device from broadcasting ....") self.stop_broadcast() self.log_print("Waiting for all notifications to get handled ....") time.sleep(2) self.log_print("Closing File ....") self.requester.file.close()
def store_raw_images(paths, links): global pic_num for link, path in zip(links, paths): print(u"Processing path {}".format(path)) if not os.path.exists(path): os.makedirs(path) result = requests.get(link) image_urls = result.text.split('\n') pool = Pool(processes=128) inputs = zip(itertools.repeat(path), image_urls, itertools.count(pic_num)) bar = ProgBar(len(inputs), stream=sys.stdout) for i in pool.imap(load_image, inputs): bar.update()
def main(): labels: Dict[str, int] = {'neg': 0, 'pos': 1} dataset_path: Path = Path( __file__).parent.parent / 'data' / 'movie_data.csv' np.random.seed(0) pbar: ProgBar = ProgBar(DOCS_NUM) df: pd.DataFrame = pd.DataFrame() for subset in ('test', 'train'): for label in ('pos', 'neg'): path: Path = Path(__file__).parent / 'aclImdb' / subset / label for file in path.iterdir(): with open(path / file, 'r', encoding='utf-8') as inp: txt: str = inp.read() df = df.append([[txt, labels[label]]], ignore_index=True) pbar.update() df.columns = ['review', 'sentiment'] df = df.reindex(np.random.permutation(df.index)) df.to_csv(dataset_path, index=False)
def __new__(cls, iterable=None, desc=None, total=None, leave=True, backend=None, verbose=True): if backend is None: backend = Progressbar.backend if not verbose: backend = "hide" if backend == "tqdm": from tqdm import tqdm return tqdm(iterable=iterable, desc=desc, total=total, leave=leave, ascii=True, ncols=80, file=sys.stdout, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed" "}<{remaining}{postfix}]") # remove rate_fmt elif backend == "tqdm_notebook": from tqdm import tqdm_notebook return tqdm_notebook(iterable=iterable, desc=desc, total=total, leave=leave) elif backend == "pyprind": from pyprind import ProgBar, prog_bar ProgBar._adjust_width = lambda self: None # keep constant width if iterable is None: return ProgBar(total, title=desc, stream=1) else: return prog_bar(iterable, title=desc, stream=1, iterations=total) elif backend == "hide": return NoProgressbar(iterable=iterable) else: raise NotImplementedError("unknown backend")
def etlFeature(post, img_list, h5filename): # 迭代方式,提取特征值写入h5文件 bar = ProgBar(len(img_list), monitor=True, title="提取图片特征,Image Total:%d" % len(img_list)) for i, img_path in enumerate(img_list): norm_feat = extract_feat(img_path) img_name = os.path.split(img_path)[1] names = [] names.append(img_name) feats2 = np.array(norm_feat) try: wH5FileData(i + post, feats2, names, h5filename) except: print("Feats Write Error") return 1 bar.update() # print ("提取图片特征!进度: %d/%d" % ((i + 1), len (img_list))) print(bar) return 0
async def cmd_update(self, message): channel = message.channel await self.send_typing(channel) msgs = [ elem async for elem in channel.history( limit=None).filter(self.play_search_predicate) ] bar = ProgBar(len(msgs)) for msg in msgs: if "```" not in msg.content and msg.author.name != 'Neurofonix': self.msg_list.append( msg.content.replace('-play ', '').replace('!play', '')) # if msg.author.name not in messages.keys(): # messages[msg.author.name] = [] # messages[msg.author.name].append(msg.content) bar.update() json.dump(self.msg_list, open('neurofonix/msgs.json', 'w+'), sort_keys=True, indent=4) self.train(self.msg_list) await message.channel.send("Updated.")
def extract_weather_data(darksky_api_key, gps_coords, target_date, days): records = [] bar = ProgBar(days) for _ in range(days): url_time = str(int(target_date.timestamp())) request = "https://api.darksky.net/forecast/" + darksky_api_key + "/" + gps_coords + "," + url_time + "?units=si&exclude=currently,minutely,hourly,alerts,flags" response = requests.get(request, []) if response.status_code == 200: try: data = response.json()['daily']['data'][0] records.append( DailySummary( date=target_date, moonPhase=data['moonPhase'], precipIntensity=data['precipIntensity'], precipIntensityMax=data['precipIntensityMax'], precipProbability=data['precipProbability'], temperatureHigh=data['temperatureHigh'], temperatureLow=data['temperatureLow'], apparentTemperatureHigh=data[ 'apparentTemperatureHigh'], apparentTemperatureLow=data['apparentTemperatureLow'], dewPoint=data['dewPoint'], humidity=data['humidity'], windSpeed=data['windSpeed'], windGust=data['windGust'], windBearing=data['windBearing'], cloudCover=data['cloudCover'], uvIndex=data['uvIndex'], visibility=data['visibility'], temperatureMax=data['temperatureMax'], temperatureMin=data['temperatureMin'], apparentTemperatureMax=data['apparentTemperatureMax'], apparentTemperatureMin=data['apparentTemperatureMin'])) except KeyError: bar.update() target_date += timedelta(days=1) continue #time.sleep(6) bar.update() target_date += timedelta(days=1) return records
def obtain_samples(self, log=False, log_prefix='', test=False): print("total_samples:",self.total_samples) print("meta_batch_size:", self.meta_batch_size) print("max_path_length:" ,self.max_path_length) print("--------------obtaining", self.total_samples//self.meta_batch_size//self.max_path_length, "rollouts_per_task, for", self.meta_batch_size, "tasks..--------------") """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] n_samples = 0 running_paths = [_get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs)] print(" runnng_paths length:", len(running_paths)) pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy # initial reset of envs obses = self.vec_env.reset() while n_samples < self.total_samples: # execute policy t = time.time() obs_per_task = np.split(np.asarray(obses), self.meta_batch_size) actions, agent_infos = policy.get_actions(obs_per_task) policy_time += time.time() - t # step environments t = time.time() actions = np.concatenate(actions) # stack meta batch next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts(agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip(itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths[idx // self.envs_per_task].append(dict( observations=np.asarray(running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), env_infos=utils.stack_tensor_dict_list(running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list(running_paths[idx]["agent_infos"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obses = next_obses pbar.stop() if not test: self.total_timesteps_sampled += self.total_samples print("------------self.total_timesteps_sampled:", self.total_timesteps_sampled, "-----------------") else: print("------------tested on:", self.total_samples // self.max_path_length, " rollouts-----------------") if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, log=False, log_prefix='', random=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = [] n_samples = 0 running_paths = _get_empty_running_paths_dict() pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True]) # initial reset of meta_envs obs = np.asarray(self.env.reset()) ts = 0 while n_samples < self.total_samples: # execute policy t = time.time() if random: action = self.env.action_space.sample() agent_info = {} else: action, agent_info = policy.get_action(obs) if action.ndim == 2: action = action[0] policy_time += time.time() - t # step environments t = time.time() next_obs, reward, done, env_info = self.env.step(action) ts += 1 done = done or ts >= self.max_path_length if done: next_obs = self.env.reset() ts = 0 env_time += time.time() - t new_samples = 0 # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths["observations"].append(obs) running_paths["actions"].append(action) running_paths["rewards"].append(reward) running_paths["dones"].append(done) running_paths["env_infos"].append(env_info) running_paths["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths.append( dict( observations=np.asarray(running_paths["observations"]), actions=np.asarray(running_paths["actions"]), rewards=np.asarray(running_paths["rewards"]), dones=np.asarray(running_paths["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths["agent_infos"]), )) new_samples += len(running_paths["rewards"]) running_paths = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obs = next_obs pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
img_remaining = remaining[list(valid.isin(['.jpg', '.gif' ,'.png', '.jpeg','.JPG']))] # what we keep print("Remaining after unvalid extensions removed :" + str(len(remaining))) # In[141]: img_remaining # ## Downloading images # # In[142]: # TODO: Download images with a minimum width or height bar = ProgBar(len(remaining), monitor=True) unfetchables = [] timeouts = [] urlerrors = [] large_names = [] for i, image_name in enumerate(remaining): bar.update(item_id = image_name, force_flush=True) img = None try: img = urlopen(image_name, None, 0.5).read() except (URLError, requests.exceptions.SSLError, ssl.SSLError) as e: urlerrors.append((image_name,e)) print("URLError: ",e ,image_name) continue except socket.timeout as e:
from get_minibatch import get_minibatch import sys sys.path.append("..") from tokenizer import tokenizer from vectorizer import vect from sklearn.linear_model import SGDClassifier from pyprind import ProgBar import os import pickle clf = SGDClassifier(loss='log', random_state=1, max_iter=1) classes = np.array([0, 1]) doc_stream = stream_docs('movie_data.csv') pbar = ProgBar(45) for _ in range(45): X_train, y_train = get_minibatch(doc_stream, 1000) if not X_train: break X_train = vect.transform(X_train) clf.partial_fit(X_train, y_train, classes=classes) pbar.update() X_test, y_test = get_minibatch(doc_stream, 5000) X_test = vect.transform(X_test) print("Accuracy: %.3f" % clf.score(X_test, y_test)) clf.partial_fit(X_test, y_test, classes=classes)
def obtain_samples(self, log=False, log_prefix='', random=False, advance_curriculum=False, policy=None, teacher_dict={}, max_action=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] n_samples = 0 running_paths = [ _get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs) ] total_paths = self.rollouts_per_meta_task * self.meta_batch_size * self.envs_per_task pbar = ProgBar(total_paths) policy_time, env_time = 0, 0 if policy is None: policy = self.policy policy.reset(dones=[True] * self.meta_batch_size) if self.reward_predictor is not None: self.reward_predictor.reset(dones=[True] * self.meta_batch_size) if self.supervised_model is not None: self.supervised_model.reset(dones=[True] * self.meta_batch_size) # initial reset of meta_envs if advance_curriculum: self.vec_env.advance_curriculum() self.update_tasks() obses = self.vec_env.reset() num_paths = 0 itrs = 0 while num_paths < total_paths: print("Loop", num_paths, total_paths, itrs) itrs += 1 t = time.time() obses = self.obs_preprocessor(obses, teacher_dict) if random: actions = np.stack([[self.env.action_space.sample()] for _ in range(len(obses))], axis=0) agent_infos = [[{ 'mean': np.zeros_like(self.env.action_space.sample()), 'log_std': np.zeros_like(self.env.action_space.sample()) }] * self.envs_per_task] * self.meta_batch_size else: actions, agent_infos = policy.get_actions_t(obses) if max_action: # TODO: double check this still works assert False, "We haven't checked this still works with the new model; if it does, feel free to delete." original_action_shape = actions.shape actions = [[[np.argmax(d['probs'])] for d in agent_info] for agent_info in agent_infos] actions = np.array(actions, dtype=np.int32) if not actions.shape == original_action_shape: assert False, (actions.shape, original_action_shape) policy_time += time.time() - t # step environments t = time.time() next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t new_samples = 0 new_paths = 0 for idx, observation, action, reward, env_info, agent_info, done in zip( itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(done) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: curr_path = paths[idx // self.envs_per_task] if len(curr_path) >= self.rollouts_per_meta_task: continue paths[idx // self.envs_per_task].append( dict( observations=np.asarray( running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths[idx]["agent_infos"]), )) num_paths += 1 new_paths += 1 new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() pbar.update(new_paths) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += n_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
data['min_host_leafs'] = min_host_leafs data['max_host_leafs'] = max_host_leafs data['min_guest_leafs'] = min_guest_leafs data['max_guest_leafs'] = max_guest_leafs data['duplication_rate'] = duplication_rate data['loss_rate'] = loss_rate data['switch_rate'] = switch_rate data['k'] = k data['theta'] = theta with open(prefix + '/' + 'data.csv', 'w') as f: f.write(','.join(data.keys()) + '\n') f.write(','.join(map(str, data.values()))) p = ProgBar(args.N, title='simulating trees...', monitor=True, width=30) p.update() if args.config: config = json.load(open(args.config)) # host tree parameters run_dir = config['run_dir'] N = config['N'] birth_rate = config['birth_rate'] death_rate = config['death_rate'] min_host_leafs = config['min_host_leafs'] max_host_leafs = config['max_host_leafs'] # guest tree parameters duplication_rate = config['duplication_rate']
def obtain_samples(self, log=False, log_prefix=''): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = [] n_samples = 0 running_paths = dict() pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy # initial reset of envs obses = self.env.reset() while n_samples < self.total_samples: # execute policy t = time.time() obs_per_task = np.array(obses) actions, logits, values = policy.get_actions(obs_per_task) policy_time += time.time() - t # step environments t = time.time() next_obses, rewards, dones, env_infos = self.env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts new_samples = 0 for observation, action, logit, reward, value, finish_time in zip( obses, actions, logits, rewards, values, env_infos): running_paths["observations"] = observation running_paths["actions"] = action running_paths["logits"] = logit running_paths["rewards"] = reward running_paths["values"] = value running_paths["finish_time"] = finish_time # handling paths.append( dict( observations=np.squeeze( np.asarray(running_paths["observations"])), actions=np.squeeze(np.asarray( running_paths["actions"])), logits=np.squeeze(np.asarray(running_paths["logits"])), rewards=np.squeeze(np.asarray( running_paths["rewards"])), values=np.squeeze(np.asarray(running_paths["values"])), finish_time=np.squeeze( np.asarray(running_paths["finish_time"])))) # if running path is done, add it to paths and empty the running path new_samples += len(running_paths["rewards"]) running_paths = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, itr, log=True, log_prefix='', show_pbar=True): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger show_pbar (boolean): whether to show progress bar Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] n_samples = 0 running_paths = [_get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs)] if show_pbar: pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy # initial reset of envs obses = self.vec_env.reset() while n_samples < self.total_samples: # execute policy t = time.time() obs_per_task = np.split(np.asarray(obses), self.meta_batch_size) actions, agent_infos = policy.get_actions(obs_per_task) policy_time += time.time() - t # step environments t = time.time() actions = np.concatenate(actions) # stack meta batch next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts(agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip(itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(int(done)) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths[idx // self.envs_per_task].append(dict( observations=np.asarray(running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"], dtype=np.float), env_infos=utils.stack_tensor_dict_list(running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list(running_paths[idx]["agent_infos"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() if show_pbar: pbar.update(new_samples) n_samples += new_samples obses = next_obses if show_pbar: pbar.stop() self.total_timesteps_sampled += self.total_samples if log: tabular.record(log_prefix + "PolicyExecTime", policy_time) tabular.record(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, log=False, log_prefix='', random=False, deterministic=False, eval=False, multiple_trajectory=1, dynamics_model=None): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation multiple_trajectories = [] for _ in range(multiple_trajectory): paths = [] n_samples = 0 running_paths = _get_empty_running_paths_dict() if log: pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True]) # initial reset of meta_envs obs = np.asarray(self.env.reset()) ts = 0 while n_samples < self.total_samples: # execute policy t = time.time() if eval: H = self.mpc.horizon mean_list = [] std_list = [] observation = obs for t in range(H + 1): action, agent_info = policy.get_action(observation) action = agent_info['mean'] mean_list.append(action) std_list.append(agent_info['log_std']) if self.policy.squashed: action = np.tanh(action) if observation.ndim == 1: observation = observation[None] if action.ndim == 1: action = action[None] observation = dynamics_model.predict( observation, action) observation = observation.reshape((-1)) action, _ = self.mpc.get_actions(obs[None], mean_list, std_list) if action.ndim == 2: action = action[0] else: obs = obs.reshape((-1)) if random: action = self.env.action_space.sample() agent_info = {} elif deterministic: action, agent_info = policy.get_action(obs) action = agent_info['mean'] if self.policy.squashed: action = np.tanh(action) else: action, agent_info = policy.get_action(obs) if action.ndim == 2: action = action[0] policy_time += time.time() - t # step environments t = time.time() next_obs, reward, done, env_info = self.env.step(action) ts += 1 env_time += time.time() - t new_samples = 0 # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths["observations"].append(obs) running_paths["actions"].append(action) running_paths["rewards"].append(reward) running_paths["dones"].append(done) running_paths["env_infos"].append(env_info) running_paths["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done or ts >= self.max_path_length: paths.append( dict( observations=np.asarray( running_paths["observations"]), actions=np.asarray(running_paths["actions"]), rewards=np.asarray(running_paths["rewards"]), dones=np.asarray(running_paths["dones"]), env_infos=[], agent_infos=[], # env_infos=utils.stack_tensor_dict_list(running_paths["env_infos"]), # agent_infos=utils.stack_tensor_dict_list(running_paths["agent_infos"]), )) new_samples += len(running_paths["rewards"]) running_paths = _get_empty_running_paths_dict() if done or ts >= self.max_path_length: next_obs = self.env.reset() ts = 0 if log: pbar.update(new_samples) n_samples += new_samples obs = next_obs multiple_trajectories.append(paths) if log: pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return multiple_trajectories
def main(): parser = argparse.ArgumentParser( description="Create epubs from Reddit") parser.add_argument('config_file', help="The file containing the links, formatted in YAML") parser.add_argument('output_directory') args = parser.parse_args() config_file = args.config_file directory = args.output_directory # Disable warnings due to a bug in praw warnings.filterwarnings("ignore") reader = praw.Reddit(user_agent="reddit2ebook") config = read_config_file(config_file) # We have to use xhtml here for Epubs renderer = mistune.Renderer(use_xhtml=True, escape=True) markdown = mistune.Markdown(renderer=renderer) for bookname in config.keys(): book = epub.EpubBook() # The epub standard requires an unique identifier, this is normally # the ISBN, but since we dont have one we generate an UUID book.set_identifier(uuid.uuid4().hex) chapter_number = 1 chapters = [] if "links" in config[bookname]: bar = ProgBar( len(config[bookname]["links"]), title="Creating ebook " + bookname + ".epub", bar_char='█') links = config[bookname]["links"] if "cover" in config[bookname]: convert_to_jpeg(config[bookname]["cover"]) with open("cover.jpg", 'rb') as f: book.set_cover("cover.jpg", f.read()) os.remove("cover.jpg") if "author" in config[bookname]: book.add_author(config[bookname]["author"]) if "lang" in config[bookname]: book.set_language(config[bookname]["lang"]) else: book.set_language('en') if "title" in config[bookname]: book.set_title(config[bookname]["title"]) else: book.set_title(bookname) else: links = config[bookname] book.set_language('en') book.set_title(bookname) bar = ProgBar(len(config[bookname]), title="Creating ebook " + bookname + ".epub", bar_char='█') for url in links: bar.update() # Check if the link is a comment or a submission # Submissions have a trailing slash if url.split('/')[-1] == '': submission = get_submission_text(reader, url) chapter = create_chapter( body=markdown(submission[1]), title=submission[0], filename="chapter" + str(chapter_number) + ".xhtml" ) else: comment = get_comment_text(reader, url) chapter = create_chapter( body=markdown(comment[1]), title="Comment by " + comment[0], filename="chapter" + str(chapter_number) + ".xhtml" ) chapters.append(chapter) book.add_item(chapter) chapter_number += 1 book.toc = chapters book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = load_css() default_css = epub.EpubItem( uid="style_default", file_name="style/default.css", media_type="text/css", content=style) book.add_item(default_css) nav_css = epub.EpubItem( uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) spine = ['cover', 'nav'] + chapters book.spine = spine epub.write_epub(os.path.join(directory, bookname + ".epub"), book, {}) print("Finished writing " + bookname + ".epub\n")
def get_switches(road_length=300, n_iterations=250, p_slow=0.1, vmax=5, prog_bar=False): """Convenience functon to obtain the average number of lane switches at a given density Parameters ---------- road_length: int length of road object to be instaited n_iterations: int number of times the system is evolved p_slow: float, 0 <= p_slow < 1 probability of random deceleration vmax: int maximum speed of road prog_bar: boolean if set to True, a progress bar is included. Note that this requires the pyprind module to be installed and hence defaults to False """ car_counts = [i for i in range(1, road_length)] densities = [count / road_length for count in car_counts] if prog_bar: from pyprind import ProgBar prog = ProgBar(len(densities)) # the number of switches is stored in a dataframe switches = pd.DataFrame(np.zeros(len(densities)), dtype=float, columns=['Avg Number of Switches'], index=densities) switches.index.name = 'Density' for density, count in zip(densities, car_counts): # a road object is instantiated for each density value M1 = v4.Road(L=road_length, car_count=count * 2, vmax=vmax, p_slow=p_slow, random_state=3) # the data is then gathered via the get_data() function data, top_speeds, avg_speeds, switch_count = get_data( M1, n_iterations=n_iterations) switch_count = np.array(switch_count) # the average number of switches is then stored at the corresponding density value switches.loc[density] = np.mean(switch_count) if prog_bar: prog.update() return switches
def obtain_samples(self, log=False, log_prefix='', random=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] running_paths = _get_empty_running_paths_dict() pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy for idx in range(self.meta_batch_size): ts = 0 n_samples = 0 init_obs = np.expand_dims(self.env.reset(), 0).copy() obses = [init_obs for _ in range(self.meta_batch_size)] policy.reset(dones=[True] * self.meta_batch_size) while n_samples < self.samples_per_task: # execute policy t = time.time() if random: actions = np.stack([[self.env.action_space.sample()] for _ in range(len(obses))], axis=0) agent_infos = [[{ 'mean': np.zeros_like(self.env.action_space.sample()), 'log_std': np.zeros_like(self.env.action_space.sample()) }] * self.envs_per_task] * self.meta_batch_size else: actions, agent_infos = policy.get_actions(obses) policy_time += time.time() - t # step environments t = time.time() action, agent_info = actions[idx][0], agent_infos[idx][0] observation = obses[idx][0].copy() next_obs, reward, done, env_info = self.env.step(action) ts += 1 done = done or ts >= self.max_path_length if done: next_obs = self.env.reset() # time.sleep(1) ts = 0 env_time += time.time() - t new_samples = 0 # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths["observations"].append(observation) running_paths["actions"].append(action) running_paths["rewards"].append(reward) running_paths["dones"].append(done) running_paths["env_infos"].append(env_info) running_paths["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths[idx].append( dict( observations=np.asarray( running_paths["observations"]), actions=np.asarray(running_paths["actions"]), rewards=np.asarray(running_paths["rewards"]), dones=np.asarray(running_paths["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths["agent_infos"]), )) new_samples += len(running_paths["rewards"]) running_paths = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obses[idx][0] = next_obs self.total_timesteps_sampled += n_samples pbar.stop() if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def mix_data(self, nevents, isUnbalanced=False): sr = self.sr audio_path = '../aed_data/freesound/audio/' audio_names = os.listdir(audio_path) # random.seed(33) random.shuffle(audio_names) if nevents == 5: nsamples = 2000 elif nevents == 10: nsamples = 3000 elif nevents == 15: nsamples = 4000 elif nevents == 20: nsamples = 6000 else: nsamples = 0 audio_names = audio_names[:nevents] if isUnbalanced: print('The first five events are:', audio_names) print('Now the inefficient category is:', audio_names[0]) datas = [] labels = [] count = 0 pb = ProgBar(nsamples) for i in range(nsamples): min_len = 1000000 mixed_data = np.zeros((min_len, )) class_label = [] for j in range(nevents): if isUnbalanced: if j == 0: # let first event unbalanced if count >= 25000: tag = 0 else: tag = random.randint(0, 1) else: tag = random.randint(0, 1) else: tag = random.randint(0, 1) audio_files = os.listdir(audio_path + audio_names[j]) audio_file = random.sample(audio_files, 1)[0] wav_data, _ = lrs.load(audio_path + audio_names[j] + '/' + audio_file, sr=sr) class_label.append(tag) if min_len > len(wav_data): min_len = len(wav_data) mixed_data = mixed_data[:min_len] + wav_data[:min_len] * tag mixed_data = self.signal_norm(mixed_data) noise_files = os.listdir('../aed_data/freesound/noise/') noise_file = random.sample(noise_files, 1)[0] n_data, _ = lrs.load('../aed_data/freesound/noise/' + noise_file, sr=sr) dB = random.sample(list(range(6, 12)), 1)[0] if len(n_data) < len(mixed_data): mixed_data = mixed_data[:len(n_data)] else: n_data = n_data[:len(mixed_data)] k = self.SNR2K(mixed_data, n_data, dB) mixed_data += k * n_data[:len(mixed_data)] # lrs.output.write_wav('test.wav', mixed_data, sr=sr) s = self.__emphasize__(mixed_data) frames, nframe = self.__enframe__(s) if class_label[0] == 1: count += nframe frames = self.__windowing__(frames) mfccs_list = [] labels_list = [] for frame in frames: if np.max(np.abs(frame)) < 0.03: class_label = [0] * nevents mfcc = lrs.feature.mfcc(frame, sr=sr, n_fft=256, n_mfcc=24, n_mels=24, center=False, norm=None) mfcc_delt = lrs.feature.delta(mfcc, width=3) mfcc_delt2 = lrs.feature.delta(mfcc, order=2, width=3) mfccs = np.concatenate([mfcc, mfcc_delt, mfcc_delt2], axis=1) mfccs = np.reshape(mfccs, (np.size(mfccs), )) mfccs_list.append(mfccs) labels_list.append(class_label) datas.append(self.normalize(np.array(mfccs_list))) labels.append(np.array(labels_list)) pb.update() ziped = list(zip(datas, labels)) random.shuffle(ziped) datas[:], labels[:] = zip(*ziped) if isUnbalanced: with open( '../aed_data/freesound/mfccs/datas/DA_mfccs_' + str(nevents) + '.pkl', 'wb') as f: pkl.dump(datas, f) with open( '../aed_data/freesound/mfccs/labels/DA_labels_' + str(nevents) + '.pkl', 'wb') as f: pkl.dump(labels, f) else: with open( '../aed_data/freesound/mfccs/datas/mfccs_' + str(nevents) + '.pkl', 'wb') as f: pkl.dump(datas, f) with open( '../aed_data/freesound/mfccs/labels/labels_' + str(nevents) + '.pkl', 'wb') as f: pkl.dump(labels, f) print() print('Mixing audios with {} nevents and {} nsamples DONE.'.format( nevents, nsamples))
def __init__(self, iterations, track_time=True, width=30, bar_char='#', stream=stream_writer(), title='', monitor=False, update_interval=None): # super(ProgBar, self).__init__(iterations=iterations, track_time=track_time, width=width, bar_char=bar_char, # stream=stream, title=title, monitor=monitor, update_interval=update_interval) Pb.__init__(self, iterations=iterations, track_time=track_time, width=width, bar_char=bar_char, stream=stream, title=title, monitor=monitor, update_interval=update_interval)
def flash(serial_port, force, low_speed): # init stat_image_file = stat(IMG_FILE) p_bar = ProgBar(stat_image_file.st_size) def clear_buffer(): serial_port.reset_output_buffer() serial_port.reset_input_buffer() def read_with_bar(data, timeout=0): return serial_port.read(data) def write_with_bar(data, timeout=0): write_with_bar.current += len(data) p_bar.title = "%d / %d" % (write_with_bar.current, stat_image_file.st_size) p_bar.update(len(data)) ret = serial_port.write(data) sleep(0.001) return ret write_with_bar.current = 0 # go to flash mode = goto_flash_mode(serial_port) if mode == False: return False if mode == 'C': image_file = IMG_FILE mac = '' get_mac_cmd = bytes.fromhex('210600ea2d38000000') serial_port.timeout = 3 sleep(0.1) serial_port.write(get_mac_cmd) mac = (serial_port.read_until()) pos = mac.find(b'MAC:') if pos >= 0: mac = mac[pos + 4:len(mac) - 1].decode("ascii") print('MAC Address: %s' % mac) if isfile(FLASH_SIGNAL) and not force: with open(FLASH_SIGNAL, 'rt') as f: if f.read().strip() == mac: print( "this device already flash this program! skip flash. (--force to overwrite)" ) clear_buffer() control_reset(serial_port) return True else: print( 'bootloader not response MAC address, seems level 1 running.') mode = 'P' if mode == 'P': image_file = FLS_FILE # up speed def switch_baudrate(br): clear_buffer() serial_port.baudrate = br if not low_speed: print('switching to 2M baudrate...') sleep(0.2) speed_magic = bytes.fromhex('210a00ef2a3100000080841e00') serial_port.write(speed_magic) sleep(0.01) switch_baudrate(2000000) serial_port.timeout = 0.3 sleep(0.01) wront_cnt = 0 while True: c_in = serial_port.read(1) # print('got: "%s"' % c_in) if c_in == b'C' or c_in == b'P': break if c_in == b'\x00': continue elif wront_cnt >= 10: print('retry...') wront_cnt = 0 switch_baudrate(115200) serial_port.write(speed_magic) switch_baudrate(2000000) sleep(0.01) else: wront_cnt += 1 print('high speed mode!') serial_port.timeout = None print('sending file:', image_file) stream = open(image_file, 'rb') clear_buffer() modem = XMODEM(getc=read_with_bar, putc=write_with_bar) print("please wait for download....") result = modem.send(stream) print('') if result: print("download image success!") if mode == 'C': with open(FLASH_SIGNAL, 'wt') as f: f.write(mac) else: control_reset(serial_port) else: print("download image fail!") return False stream.close() clear_buffer() return True
def obtain_samples(self, log=False, log_prefix=''): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] n_samples = 0 running_paths = [ _get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs) ] pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy # initial reset of envs obses = self.vec_env.reset() while n_samples < self.total_samples: # execute policy t = time.time() # obs_per_task = np.split(np.asarray(obses), self.meta_batch_size) obs_per_task = np.array(obses) actions, logits, values = policy.get_actions(obs_per_task) policy_time += time.time() - t # step environments t = time.time() # actions = np.concatenate(actions) next_obses, rewards, dones, env_infos = self.vec_env.step(actions) # print("rewards shape is: ", np.array(rewards).shape) # print("finish time shape is: ", np.array(env_infos).shape) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts new_samples = 0 for idx, observation, action, logit, reward, value, done, task_finish_times in zip( itertools.count(), obses, actions, logits, rewards, values, dones, env_infos): # append new samples to running paths # handling for single_ob, single_ac, single_logit, single_reward, single_value, single_task_finish_time \ in zip(observation, action, logit, reward, value, task_finish_times): running_paths[idx]["observations"] = single_ob running_paths[idx]["actions"] = single_ac running_paths[idx]["logits"] = single_logit running_paths[idx]["rewards"] = single_reward running_paths[idx]["finish_time"] = single_task_finish_time running_paths[idx]["values"] = single_value paths[idx // self.envs_per_task].append( dict(observations=np.squeeze( np.asarray(running_paths[idx]["observations"])), actions=np.squeeze( np.asarray(running_paths[idx]["actions"])), logits=np.squeeze( np.asarray(running_paths[idx]["logits"])), rewards=np.squeeze( np.asarray(running_paths[idx]["rewards"])), finish_time=np.squeeze( np.asarray( running_paths[idx]["finish_time"])), values=np.squeeze( np.asarray(running_paths[idx]["values"])))) # if running path is done, add it to paths and empty the running path new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
msg += "Trying with passwords in list...\n" print(msg) dict_file = codecs.open( args['<DICT>'], 'rb', encoding='utf-8', errors='ignore' ) psswd_count = dict_file.read().count('\n') dict_file.seek(0) items = 0 progress_bar = ProgBar( psswd_count, stream=1, title='MKBRUTUS Bruteforce Attack' ) for password in dict_file.readlines(): password = password.strip('\n\r ') items += 1 if args['--verbose']: alert = "[-] Trying {} of {} passwords".format( str(items), str(psswd_count)) print alert + " - current: " + password try: connect(args['<TARGET>'], args['--user'], password) alert = "\n[+] Login successful!!! " alert += "User: "******", Password: " + password
def obtain_samples(self, log=False, log_prefix='', random=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (list): A list of dicts with the samples """ # initial setup / preparation paths = [] n_samples = 0 num_envs = self.vec_env.num_envs running_paths = [ _get_empty_running_paths_dict() for _ in range(num_envs) ] pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True] * self.vec_env.num_envs) # initial reset of meta_envs obses = np.asarray(self.vec_env.reset()) while n_samples < self.total_samples: # execute policy t = time.time() if random: actions = np.stack( [self.env.action_space.sample() for _ in range(num_envs)], axis=0) agent_infos = {} else: a_bs = self.adapt_batch_size if a_bs is not None and len( running_paths[0]['observations']) > a_bs + 1: adapt_obs = [ np.stack(running_paths[idx]['observations'][-a_bs - 1:-1]) for idx in range(num_envs) ] adapt_act = [ np.stack(running_paths[idx]['actions'][-a_bs - 1:-1]) for idx in range(num_envs) ] adapt_next_obs = [ np.stack(running_paths[idx]['observations'][-a_bs:]) for idx in range(num_envs) ] policy.dynamics_model.switch_to_pre_adapt() policy.dynamics_model.adapt(adapt_obs, adapt_act, adapt_next_obs) actions, agent_infos = policy.get_actions(obses) policy_time += time.time() - t # step environments t = time.time() next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts( agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip( itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(done) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths.append( dict( observations=np.asarray( running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths[idx]["agent_infos"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() pbar.update(self.vec_env.num_envs) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, log=False, log_prefix='', random=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (list): A list of dicts with the samples """ # initial setup / preparation paths = [] n_samples = 0 num_envs = self.vec_env.num_envs running_paths = [ _get_empty_running_paths_dict() for _ in range(num_envs) ] pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy if self.use_cem: for i in range(num_envs): self.reset_cem(i) # initial reset of meta_envs obses = np.asarray(self.vec_env.reset()) state_counts = [0] * self.vec_env.num_envs # history self.obs_dim = obses.shape[1] history_state = np.zeros( (obses.shape[0], self.obs_dim * self.history_length)) history_act = np.zeros( (obses.shape[0], self.act_dim * self.history_length)) while n_samples < self.total_samples: # execute policy t = time.time() if random: actions = np.stack( [self.env.action_space.sample() for _ in range(num_envs)], axis=0) agent_infos = {} else: if self.use_cem: if self.context: cem_solutions, agent_infos = policy.get_actions( obses, init_mean=self.prev_sol, init_var=self.init_var, cp_obs=history_state, cp_act=history_act) else: cem_solutions, agent_infos = policy.get_actions( obses, init_mean=self.prev_sol, init_var=self.init_var) self.prev_sol[:, :-1] = cem_solutions[:, 1:].copy() self.prev_sol[:, -1:] = 0. actions = cem_solutions[:, 0].copy() else: if self.context: actions, agent_infos = policy.get_actions( obses, cp_obs=history_state, cp_act=history_act) else: actions, agent_infos = policy.get_actions(obses) if len(self.env.action_space.shape) == 0: actions = actions.reshape(-1) policy_time += time.time() - t # step environments t = time.time() next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts( agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip( itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): if len(self.env.action_space.shape) == 0: action = np.eye(self.act_dim)[action] else: if action.ndim == 0: action = np.expand_dims(action, 0) assert action.ndim == 1, (action, action.shape) # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(done) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) running_paths[idx]["cp_obs"].append(history_state[idx].copy()) running_paths[idx]["cp_act"].append(history_act[idx].copy()) # making a history buffer if state_counts[idx] < self.history_length: if self.state_diff: history_state[idx][state_counts[idx] * self.obs_dim:( state_counts[idx] + 1) * self.obs_dim] = next_obses[idx] - observation else: history_state[idx][state_counts[idx] * self.obs_dim:(state_counts[idx] + 1) * self.obs_dim] = observation history_act[idx][state_counts[idx] * self.act_dim:(state_counts[idx] + 1) * self.act_dim] = action else: history_state[idx][:-self.obs_dim] = history_state[idx][ self.obs_dim:] if self.state_diff: history_state[idx][ -self.obs_dim:] = next_obses[idx] - observation else: history_state[idx][-self.obs_dim:] = observation history_act[idx][:-self. act_dim] = history_act[idx][self.act_dim:] history_act[idx][-self.act_dim:] = action # if running path is done, add it to paths and empty the running path if done: paths.append( dict( observations=np.asarray( running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths[idx]["agent_infos"]), cp_obs=np.asarray(running_paths[idx]["cp_obs"]), cp_act=np.asarray(running_paths[idx]["cp_act"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() if not random and self.use_cem: self.reset_cem(idx) state_counts[idx] = 0 history_state[idx] = np.zeros( (self.obs_dim * self.history_length)) history_act[idx] = np.zeros( (self.act_dim * self.history_length)) else: state_counts[idx] += 1 pbar.update(self.vec_env.num_envs) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
import gym import ecl_gym from random import randint from pyprind import ProgBar env = gym.make('ecl-v0') n_steps = 100 bar = ProgBar(n_steps, bar_char='█') for i_episode in range(1): ## reinitialize the environment observation = env.reset() ## the simulation for n_steps timesteps for t in range(n_steps): ## value, is_rate, is_producer, is_open actions_inje = [[randint(410,430), False, False, True] for _ in range(8)] actions_prod = [[randint(220,250), False, True, True] for _ in range(4)] ## Advance the simulation forward observation, reward, done, observation_full = \ env.step(actions_inje + actions_prod) # print (reward) bar.update() if done.any(): print("Episode finished after {} timesteps".format(t+1)) break env.close()
def obtain_samples(self, log=False, log_prefix='', random=False, deterministic=False, sinusoid=False, verbose=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = [] n_samples = 0 running_paths = [ _get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs) ] if verbose: pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True] * self.vec_env.num_envs) # initial reset of meta_envs obses = np.asarray(self.vec_env.reset()) while n_samples < self.total_samples: # execute policy t = time.time() if self.vae is not None: obses = np.array(obses) obses = self.vae.encode(obses) if random: actions = np.stack([ self.env.action_space.sample() for _ in range(self.vec_env.num_envs) ], axis=0) agent_infos = {} elif deterministic: actions, agent_infos = policy.get_actions(obses) actions = [a_i['mean'] for a_i in agent_infos] elif sinusoid: action_space = self.env.action_space.shape[0] num_envs = self.vec_env.num_envs actions = np.stack([ policy.get_sinusoid_actions(action_space, t / policy.horizon * 2 * np.pi) for _ in range(num_envs) ], axis=0) agent_infos = dict() else: obses = np.array(obses) actions, agent_infos = policy.get_actions(obses) policy_time += time.time() - t # step environments t = time.time() next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts( agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip( itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(done) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths.append( dict( observations=np.asarray( running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths[idx]["agent_infos"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() if verbose: pbar.update(self.vec_env.num_envs) n_samples += new_samples obses = next_obses if verbose: pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "TimeStepsCtr", self.total_timesteps_sampled) logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, log=False, log_prefix='', random=False): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger random (boolean): whether the actions are random Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation paths = OrderedDict() for i in range(self.meta_batch_size): paths[i] = [] n_samples = 0 running_paths = [ _get_empty_running_paths_dict() for _ in range(self.vec_env.num_envs) ] pbar = ProgBar(self.total_samples) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True] * self.meta_batch_size) # initial reset of meta_envs obses = self.vec_env.reset() while n_samples < self.total_samples: # execute policy t = time.time() obs_per_task = np.split(np.asarray(obses), self.meta_batch_size) if random: actions = np.stack([[self.env.action_space.sample()] for _ in range(len(obses))], axis=0) agent_infos = [[{ 'mean': np.zeros_like(self.env.action_space.sample()), 'log_std': np.zeros_like(self.env.action_space.sample()) }] * self.envs_per_task] * self.meta_batch_size else: actions, agent_infos = policy.get_actions(obs_per_task) policy_time += time.time() - t # step environments t = time.time() actions = np.concatenate(actions) # stack meta batch next_obses, rewards, dones, env_infos = self.vec_env.step(actions) env_time += time.time() - t # stack agent_infos and if no infos were provided (--> None) create empty dicts agent_infos, env_infos = self._handle_info_dicts( agent_infos, env_infos) new_samples = 0 for idx, observation, action, reward, env_info, agent_info, done in zip( itertools.count(), obses, actions, rewards, env_infos, agent_infos, dones): # append new samples to running paths if isinstance(reward, np.ndarray): reward = reward[0] running_paths[idx]["observations"].append(observation) running_paths[idx]["actions"].append(action) running_paths[idx]["rewards"].append(reward) running_paths[idx]["dones"].append(done) running_paths[idx]["env_infos"].append(env_info) running_paths[idx]["agent_infos"].append(agent_info) # if running path is done, add it to paths and empty the running path if done: paths[idx // self.envs_per_task].append( dict( observations=np.asarray( running_paths[idx]["observations"]), actions=np.asarray(running_paths[idx]["actions"]), rewards=np.asarray(running_paths[idx]["rewards"]), dones=np.asarray(running_paths[idx]["dones"]), env_infos=utils.stack_tensor_dict_list( running_paths[idx]["env_infos"]), agent_infos=utils.stack_tensor_dict_list( running_paths[idx]["agent_infos"]), )) new_samples += len(running_paths[idx]["rewards"]) running_paths[idx] = _get_empty_running_paths_dict() pbar.update(new_samples) n_samples += new_samples obses = next_obses pbar.stop() self.total_timesteps_sampled += self.total_samples if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) return paths
def obtain_samples(self, log=False, log_prefix='', buffer=None): """ Collect batch_size trajectories from each task Args: log (boolean): whether to log sampling times log_prefix (str) : prefix for logger Returns: (dict) : A dict of paths of size [meta_batch_size] x (batch_size) x [5] x (max_path_length) """ # initial setup / preparation pbar = ProgBar(self.max_path_length) policy_time, env_time = 0, 0 policy = self.policy policy.reset(dones=[True] * self.vec_env.num_envs) # initial reset of meta_envs obses = self.vec_env.reset(buffer) time_step = 0 list_observations = [] list_actions = [] list_rewards = [] list_dones = [] mask = np.ones((self.vec_env.num_envs, )) while time_step < self.max_path_length: # Execute policy t = time.time() if self.vae is not None: obses = np.array(obses) obses = self.vae.encode(obses) obses = np.split(obses, self.vec_env.num_envs, axis=0) if self.dynamics_model is not None: actions, agent_infos = policy.get_actions_batch( obses, update_filter=False) else: obses = np.array(obses) actions, agent_infos = policy.get_actions_batch( obses, update_filter=True) policy_time += time.time() - t # Step environments t = time.time() next_obses, rewards, dones, _ = self.vec_env.step(actions) next_obses, rewards, dones = np.array(next_obses), np.array( rewards), np.array(dones) rewards *= mask dones = dones + (1 - mask) mask *= (1 - dones) env_time += time.time() - t list_observations.append(obses) list_actions.append(actions) list_rewards.append(rewards) list_dones.append(dones) time_step += 1 obses = next_obses pbar.update(1) pbar.stop() self.total_timesteps_sampled += np.sum(1 - np.array(list_dones)) if log: logger.logkv(log_prefix + "PolicyExecTime", policy_time) logger.logkv(log_prefix + "EnvExecTime", env_time) samples_data = dict(observations=np.array(list_observations), actions=np.array(list_actions), rewards=np.array(list_rewards), returns=np.sum(list_rewards, axis=0), dones=np.array(list_dones)) return samples_data