def convert_samples_to_signals(samples: List[Sample]) -> List[Signal]: signals = [] for index in range(len(samples) - 1): length = samples[index + 1].timestamp - samples[index].timestamp value = samples[index].value signals.append(Signal(length=length, value=value)) signals.append(Signal(length=signals[-1].length, value=samples[-1].value)) return signals
def _extract_single_signal( samples: List[Sample]) -> Tuple[Signal, List[Sample]]: value = samples[0].value start_time = samples[0].timestamp for index in range(1, len(samples)): diff = samples[index].timestamp - samples[index - 1].timestamp if diff > MIN_DIFF_BETWEEN_SIGNALS: signal = Signal(value=value, length=samples[index - 1].timestamp - start_time) return signal, samples[index - 1:] return Signal(value=value, length=samples[-1].timestamp - start_time), []
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id) self.l_values = Signal("L") self.a_values = Signal("Advantage") self.mu_values = Signal("Action") self.v_values = Signal("V") self.signals += [ self.l_values, self.a_values, self.mu_values, self.v_values ]
def join_signals(signals: List[Signal]) -> List[Signal]: stack = [] grouped_signals = [] for signal in signals: if signal.length < MIN_DIFF_BETWEEN_SIGNALS: stack.append(signal) else: if stack: new_signal = Signal(value=stack[0].value, length=sum([s.length for s in stack])) grouped_signals.append(new_signal) stack = [] grouped_signals.append(signal) new_signal = Signal(value=stack[0].value, length=sum([s.length for s in stack])) grouped_signals.append(new_signal) return grouped_signals
def merge_same_signals_into_one(signals: List[int], frequency: int) -> List[Signal]: grouped_signals = [] for signal_value, signal_group in groupby(signals): signal_length = frequency_to_milliseconds(frequency) * len( list(signal_group)) grouped_signals.append(Signal(length=signal_length, value=signal_value)) return grouped_signals
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=True) self.last_gradient_update_step_idx = 0 self.q_values = Signal('Q Values') self.unclipped_grads = Signal('Grads (unclipped)') self.value_loss = Signal('Value Loss') self.signals.append(self.q_values) self.signals.append(self.unclipped_grads) self.signals.append(self.value_loss)
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=True): Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main', self.replicated_device, self.worker_device) self.networks.append(self.main_network) self.q_values = Signal("Q") self.signals.append(self.q_values) self.reset_game(do_not_reset_env=True)
def __init__(self, env, tuning_parameters, replicated_device=None, task_id=0): """ :param env: An environment instance :type env: EnvironmentWrapper :param tuning_parameters: A Preset class instance with all the running paramaters :type tuning_parameters: Preset :param replicated_device: A tensorflow device for distributed training (optional) :type replicated_device: instancemethod :param thread_id: The current thread id :param thread_id: int """ screen.log_title("Creating agent {}".format(task_id)) self.task_id = task_id self.sess = tuning_parameters.sess self.env = tuning_parameters.env_instance = env self.imitation = False # i/o dimensions if not tuning_parameters.env.desired_observation_width or not tuning_parameters.env.desired_observation_height: tuning_parameters.env.desired_observation_width = self.env.width tuning_parameters.env.desired_observation_height = self.env.height self.action_space_size = tuning_parameters.env.action_space_size = self.env.action_space_size self.measurements_size = tuning_parameters.env.measurements_size = self.env.measurements_size if tuning_parameters.agent.use_accumulated_reward_as_measurement: self.measurements_size = tuning_parameters.env.measurements_size = ( self.measurements_size[0] + 1, ) # modules if tuning_parameters.agent.load_memory_from_file_path: screen.log_title( "Loading replay buffer from pickle. Pickle path: {}".format( tuning_parameters.agent.load_memory_from_file_path)) self.memory = read_pickle( tuning_parameters.agent.load_memory_from_file_path) else: self.memory = eval(tuning_parameters.memory + '(tuning_parameters)') # self.architecture = eval(tuning_parameters.architecture) self.has_global = replicated_device is not None self.replicated_device = replicated_device self.worker_device = "/job:worker/task:{}/cpu:0".format( task_id) if replicated_device is not None else "/gpu:0" self.exploration_policy = eval(tuning_parameters.exploration.policy + '(tuning_parameters)') self.evaluation_exploration_policy = eval( tuning_parameters.exploration.evaluation_policy + '(tuning_parameters)') self.evaluation_exploration_policy.change_phase(RunPhase.TEST) # initialize all internal variables self.tp = tuning_parameters self.in_heatup = False self.total_reward_in_current_episode = 0 self.total_steps_counter = 0 self.running_reward = None self.training_iteration = 0 self.current_episode = self.tp.current_episode = 0 self.curr_state = {} self.current_episode_steps_counter = 0 self.episode_running_info = {} self.last_episode_evaluation_ran = 0 self.running_observations = [] logger.set_current_time(self.current_episode) self.main_network = None self.networks = [] self.last_episode_images = [] self.renderer = Renderer() # signals self.signals = [] self.loss = Signal('Loss') self.signals.append(self.loss) self.curr_learning_rate = Signal('Learning Rate') self.signals.append(self.curr_learning_rate) if self.tp.env.normalize_observation and not self.env.is_state_type_image: if not self.tp.distributed or not self.tp.agent.share_statistics_between_workers: self.running_observation_stats = RunningStat( (self.tp.env.desired_observation_width, )) self.running_reward_stats = RunningStat(()) else: self.running_observation_stats = SharedRunningStats( self.tp, replicated_device, shape=(self.tp.env.desired_observation_width, ), name='observation_stats') self.running_reward_stats = SharedRunningStats( self.tp, replicated_device, shape=(), name='reward_stats') # env is already reset at this point. Otherwise we're getting an error where you cannot # reset an env which is not done self.reset_game(do_not_reset_env=True) # use seed if self.tp.seed is not None: random.seed(self.tp.seed) np.random.seed(self.tp.seed)
def __init__(self): self.parameters = {} self.missing_ids = set() self._event = threading.Event() self.sig_changed = Signal() CommManager().register_model(self)
input_all = data[:, 1] * 5.86 * 0.01 # v_in: volts freq_all = data[:, 2] t_all = np.array([i * 0.002 for i, _ in enumerate(output_all)]) output = [] input = [] t = [] signal = [] for f in np.unique(freq_all): idxs = np.argwhere(freq_all == f) output.append(output_all[idxs].flatten()) input.append(input_all[idxs].flatten()) t.append(np.array([i * 0.01 for i, _ in enumerate(output[-1])])) try: signal.append(Signal(t[-1], input[-1], output[-1])) except: print(f) break fig = plt.figure(figsize=(9, 5)) ax1 = fig.add_subplot(111) ax1.plot(t_all, output_all, 'b') ax1.plot(t_all, input_all, 'r') ax1.grid() fig2 = plt.figure(figsize=(9, 5)) ax2 = fig2.add_subplot(211) ax3 = fig2.add_subplot(212) w = []
def __init__(self): self.messages = [] self.sig_changed = Signal() CommManager().register_model(self)
def __init__(self): self.last_message = None self.sig_changed = Signal()
def __init__(self, path: str): self._path = path self._signal = Signal() self._db = self._load_db()