def change_capacity(self, value): if value > len(self.buffer): newBuf = RingBuffer(capacity=value, dtype=float) newBuf.extend(self.buffer) self.buffer = newBuf elif value < len(self.buffer): newBuf = RingBuffer(capacity=value, dtype=float) newBuf.extend(self.buffer[:-value]) self.buffer = newBuf
def test_getitem(self): r = RingBuffer(5) r.extend([1, 2, 3]) r.extendleft([4, 5]) expected = np.array([4, 5, 1, 2, 3]) np.testing.assert_equal(r, expected) for i in range(r.maxlen): self.assertEqual(expected[i], r[i]) ii = [0, 4, 3, 1, 2] np.testing.assert_equal(r[ii], expected[ii])
class AudioStream(object): def __init__(self,rate=8192,T=4): # stream constants self.RATE = rate self.CHUNK = int(self.RATE/100) # 40 times per second #self.FORMAT = pyaudio.paFloat32 self.CHANNELS = 1 self.pause = False self.T =T # self.init_buff() # stream object # self.p = pyaudio.PyAudio() self.stream = sd.InputStream(channels=1, samplerate=rate, callback=self.fill_buffer, blocksize=self.CHUNK) self.stream.start() #self.stream = self.p.open( # format=pyaudio.paInt16, # channels=1, rate=self.RATE, # input=True, frames_per_buffer=self.CHUNK, # Run the audio stream asynchronously to fill the buffer object. # This is necessary so that the input device's buffer doesn't # overflow while the calling thread makes network requests, etc. # stream_callback=self.fill_buffer, # ) def init_buff(self): # at first I wanted to extend the data previously recorded, but obviously they were recorded at a different sampling rate. We therefore have to start anew self.buffsize =int(self.RATE*self.T) #self.buff = deque(maxlen=self.buffsize) self.buff = RingBuffer(capacity=self.buffsize)#,dtype=np.int16) self.fftwindow = hann(self.buffsize) def fill_buffer(self, in_data, frame_count, time_info, status_flags): """Continuously collect data from the audio stream, into the buffer.""" #d_converted = np.fromstring(in_data, 'int16') #print(d_converted) #print(in_data) self.buff.extend(in_data[:,0]) #self.buff.extend(in_data[1::2]) #return None, pyaudio.paContinue def calc_fft(self): data = np.array(self.buff) if data.shape[0]!= self.buffsize: print('loading samples %i%%'%(data.shape[0]/self.buffsize*100)) return(None) data = np.multiply(data,self.fftwindow) yf = fft(data) yf = yf[1:int(yf.shape[0]/2)] fftdata = np.abs(yf) return(fftdata)
class KeywordDetector: def __init__( self, checkpoint: Path, threshold: float = 0.9, smoothing: int = 5, timeout: float = 1.0, ): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.model, self.config = model_from_checkpoint(checkpoint, device=self.device) self.model = self.model.eval().to(self.device) self.extractor = init_feature_extractor(self.config) self.threshold = threshold self.smoothing = smoothing self.timeout = timeout self.input_samples = self.config.fft_window_step * ( self.model.input_size - 1) self.preds_buffer = RingBuffer(self.smoothing, "float32") self.raw_input_buffer = RingBuffer(capacity=self.input_samples, dtype="float32") self.raw_input_buffer.extend( np.zeros(self.input_samples, dtype="float32")) self.cooldown_time = 0 def push_audio(self, signal: np.ndarray): self.raw_input_buffer.extend(signal) @torch.no_grad() def predict(self): chunk = self.raw_input_buffer[-self.input_samples:].copy() chunk = librosa.util.normalize(chunk) x = self.extractor(chunk).astype("float32") x = x.reshape((1, self.model.input_channels, self.model.input_size)) x = torch.as_tensor(x).to(self.device) pred = self.model.forward(x) pred = torch.softmax(pred[0], 0).cpu().numpy()[1] self.preds_buffer.append(pred) mean = np.mean(self.preds_buffer) trigger = mean > self.threshold and self.cooldown_time < time() if trigger: self.cooldown_time = time() + self.timeout return mean, trigger
def test_no_overwrite(self): r = RingBuffer(3, allow_overwrite=False) r.append(1) r.append(2) r.appendleft(3) with self.assertRaisesRegex(IndexError, 'overwrite'): r.appendleft(4) with self.assertRaisesRegex(IndexError, 'overwrite'): r.extendleft([4]) r.extendleft([]) np.testing.assert_equal(r, np.array([3, 1, 2])) with self.assertRaisesRegex(IndexError, 'overwrite'): r.append(4) with self.assertRaisesRegex(IndexError, 'overwrite'): r.extend([4]) r.extend([]) # works fine if we pop the surplus r.pop() r.append(4) np.testing.assert_equal(r, np.array([3, 1, 4]))
def test_extend(self): r = RingBuffer(5) r.extend([1, 2, 3]) np.testing.assert_equal(r, np.array([1, 2, 3])) r.popleft() r.extend([4, 5, 6]) np.testing.assert_equal(r, np.array([2, 3, 4, 5, 6])) r.extendleft([0, 1]) np.testing.assert_equal(r, np.array([0, 1, 2, 3, 4])) r.extendleft([1, 2, 3, 4, 5, 6, 7]) np.testing.assert_equal(r, np.array([1, 2, 3, 4, 5])) r.extend([1, 2, 3, 4, 5, 6, 7]) np.testing.assert_equal(r, np.array([3, 4, 5, 6, 7]))
class Kalman(Strategy): """ Models fairs based on correlated movements between pairs. Weights predictions by volume and likelihood of cointegration. """ def __init__( self, window_size, movement_hl, trend_hl, cointegration_period, warmup_signals, warmup_data ): self.window_size = window_size self.moving_prices = HoltEma(movement_hl, trend_hl, trend_hl) self.moving_err_from_prev_fair = Emse(trend_hl) self.cointegration_period = cointegration_period self.sample_counter = 0 self.r = None self.r2 = None # TODO: do some checks for length/pairs of warmup signals/outputs prices = pd.concat( [warmup_signals.xs("price", axis=1, level=1), warmup_data.xs("price", axis=1, level=1)], axis=1, sort=False, ) volumes = pd.concat( [ warmup_signals.xs("volume", axis=1, level=1), warmup_data.xs("volume", axis=1, level=1), ], axis=1, sort=False, ) self.price_history = RingBuffer(self.window_size, dtype=(np.float64, len(prices.columns))) self.price_history.extend(prices.values) for _, p in prices.iloc[-trend_hl * 4 :].iterrows(): self.moving_prices.step(p) self.moving_volumes = Ema(movement_hl, volumes.mean()) self.moving_variances = TrendEstimator( Emse(window_size / 2, (prices.diff()[1:] ** 2).mean()), prices.iloc[-1] ) self.prev_fair = Gaussian(self.moving_prices.value, [1e100 for _ in prices.columns]) self.coint_f = pd.DataFrame( 1, index=warmup_signals.columns.unique(0), columns=prices.columns ) if len(self.price_history) < self.window_size or not self.moving_prices.ready: Log.warn("Insufficient warmup data. Price model will warm up (slowly) in real time.") else: Log.info("Price model initialized and warm.") # the fair combination step assumes that all signals are i.i.d. They are not (and obviously not in the case # of funds). Is this a problem? def tick(self, frame, signals): prices = pd.concat([signals.xs("price", level=1), frame.xs("price", level=1)], sort=False) volumes = pd.concat( [signals.xs("volume", level=1), frame.xs("volume", level=1)], sort=False ) input_names = prices.index signal_names = signals.index.unique(0) self.price_history.append(prices) price_history = pd.DataFrame(self.price_history, columns=input_names) moving_prices = self.moving_prices.step(prices) moving_volumes = self.moving_volumes.step(volumes) stddev = np.sqrt(self.moving_variances.step(prices)) if len(self.price_history) < self.window_size or not self.moving_prices.ready: return Gaussian(pd.Series([]), []) # calculate p values for pair cointegration if self.sample_counter == 0: for i in signal_names: for j in input_names: # ignore collinearity warning with warnings.catch_warnings(): warnings.filterwarnings("ignore") p = coint( price_history[i], price_history[j], trend="nc", maxlag=0, autolag=None )[1] self.coint_f.loc[i, j] = 1 + p * 20 self.r = price_history.corr().loc[signal_names] self.r2 = self.r ** 2 self.sample_counter = (self.sample_counter - 1) % self.cointegration_period correlated_slopes = self.r.mul(stddev, axis=1).div(stddev[signal_names], axis=0) # ideally use mkt cap instead of volume? log_volume = np.log1p(moving_volumes) volume_f = pd.DataFrame( log_volume[np.newaxis, :] - log_volume[signal_names][:, np.newaxis], index=signal_names, columns=input_names, ) volume_f = volume_f * (volume_f > 0) + 1 delta = signals.xs("price", level=1) - moving_prices[signal_names] fair_delta_means = correlated_slopes.mul(delta, axis=0) delta_vars = self.moving_prices.mse correlated_delta_vars = np.square(correlated_slopes).mul(delta_vars[signal_names], axis=0) fair_delta_vars = (correlated_delta_vars + (1 - self.r2) * self.coint_f * delta_vars).mul( volume_f, axis=0 ) fair_deltas = [ Gaussian(fair_delta_means.loc[i], fair_delta_vars.loc[i]) for i in signal_names ] fair_delta = intersect_with_disagreement(fair_deltas) absolute_fair = fair_delta + moving_prices step = prices - (self.prev_fair.mean + self.moving_prices.trend) step_vars = self.moving_err_from_prev_fair.step(step) fair_step_means = correlated_slopes.mul(step[signal_names], axis=0) correlated_step_vars = np.square(correlated_slopes).mul(step_vars[signal_names], axis=0) fair_step_vars = (correlated_step_vars + (1 - self.r2) * self.coint_f * step_vars).mul( volume_f, axis=0 ) fair_steps = [Gaussian(fair_step_means.loc[i], fair_step_vars.loc[i]) for i in signal_names] fair_step = intersect_with_disagreement(fair_steps) relative_fair = fair_step + self.prev_fair + self.moving_prices.trend fair = intersect_with_disagreement([absolute_fair, relative_fair]) self.prev_fair = fair return fair[frame.index.unique(0)]
class AudioPlotFilter(GstBase.BaseTransform): __gstmetadata__ = ('AudioPlotFilter','Filter', \ 'Plot audio waveforms', 'Mathieu Duponchelle') __gsttemplates__ = (Gst.PadTemplate.new("src", Gst.PadDirection.SRC, Gst.PadPresence.ALWAYS, OCAPS), Gst.PadTemplate.new("sink", Gst.PadDirection.SINK, Gst.PadPresence.ALWAYS, ICAPS)) __gproperties__ = { "window-duration": (float, "Window Duration", "Duration of the sliding window, in seconds", 0.01, 100.0, DEFAULT_WINDOW_DURATION, GObject.ParamFlags.READWRITE ) } def __init__(self): GstBase.BaseTransform.__init__(self) self.window_duration = DEFAULT_WINDOW_DURATION def do_get_property(self, prop): if prop.name == 'window-duration': return self.window_duration else: raise AttributeError('unknown property %s' % prop.name) def do_set_property(self, prop, value): if prop.name == 'window-duration': self.window_duration = value else: raise AttributeError('unknown property %s' % prop.name) def do_transform(self, inbuf, outbuf): if not self.h: self.h, = self.ax.plot(np.array(self.ringbuffer), lw=0.5, color='k', path_effects=[pe.Stroke(linewidth=1.0, foreground='g'), pe.Normal()]) else: self.h.set_ydata(np.array(self.ringbuffer)) self.fig.canvas.restore_region(self.background) self.ax.draw_artist(self.h) self.fig.canvas.blit(self.ax.bbox) s = self.agg.tostring_argb() outbuf.fill(0, s) outbuf.pts = self.next_time outbuf.duration = self.frame_duration self.next_time += self.frame_duration return Gst.FlowReturn.OK def __append(self, data): arr = np.array(data) end = self.thinning_factor * int(len(arr) / self.thinning_factor) arr = np.mean(arr[:end].reshape(-1, self.thinning_factor), 1) self.ringbuffer.extend(arr) def do_generate_output(self): inbuf = self.queued_buf _, info = inbuf.map(Gst.MapFlags.READ) res, data = self.converter.convert(GstAudio.AudioConverterFlags.NONE, info.data) data = memoryview(data).cast('i') nsamples = len(data) - self.buf_offset if nsamples == 0: self.buf_offset = 0 inbuf.unmap(info) return Gst.FlowReturn.OK, None if self.cur_offset + nsamples < self.next_offset: self.__append(data[self.buf_offset:]) self.buf_offset = 0 self.cur_offset += nsamples inbuf.unmap(info) return Gst.FlowReturn.OK, None consumed = self.next_offset - self.cur_offset self.__append(data[self.buf_offset:self.buf_offset + consumed]) inbuf.unmap(info) _, outbuf = GstBase.BaseTransform.do_prepare_output_buffer(self, inbuf) ret = self.do_transform(inbuf, outbuf) self.next_offset += self.samplesperbuffer self.cur_offset += consumed self.buf_offset += consumed return ret, outbuf def do_transform_caps(self, direction, caps, filter_): if direction == Gst.PadDirection.SRC: res = ICAPS else: res = OCAPS if filter_: res = res.intersect(filter_) return res def do_fixate_caps(self, direction, caps, othercaps): if direction == Gst.PadDirection.SRC: return othercaps.fixate() else: so = othercaps.get_structure(0).copy() so.fixate_field_nearest_fraction("framerate", DEFAULT_FRAMERATE_NUM, DEFAULT_FRAMERATE_DENOM) so.fixate_field_nearest_int("width", DEFAULT_WIDTH) so.fixate_field_nearest_int("height", DEFAULT_HEIGHT) ret = Gst.Caps.new_empty() ret.append_structure(so) return ret.fixate() def do_set_caps(self, icaps, ocaps): in_info = GstAudio.AudioInfo() in_info.from_caps(icaps) out_info = GstVideo.VideoInfo() out_info.from_caps(ocaps) self.convert_info = GstAudio.AudioInfo() self.convert_info.set_format(GstAudio.AudioFormat.S32, in_info.rate, in_info.channels, in_info.position) self.converter = GstAudio.AudioConverter.new(GstAudio.AudioConverterFlags.NONE, in_info, self.convert_info, None) self.fig = plt.figure() dpi = self.fig.get_dpi() self.fig.patch.set_alpha(0.3) self.fig.set_size_inches(out_info.width / float(dpi), out_info.height / float(dpi)) self.ax = plt.Axes(self.fig, [0., 0., 1., 1.]) self.fig.add_axes(self.ax) self.ax.set_axis_off() self.ax.set_ylim((GLib.MININT, GLib.MAXINT)) self.agg = self.fig.canvas.switch_backends(FigureCanvasAgg) self.h = None samplesperwindow = int(in_info.rate * in_info.channels * self.window_duration) self.thinning_factor = max(int(samplesperwindow / out_info.width - 1), 1) cap = int(samplesperwindow / self.thinning_factor) self.ax.set_xlim([0, cap]) self.ringbuffer = RingBuffer(capacity=cap) self.ringbuffer.extend([0.0] * cap) self.frame_duration = Gst.util_uint64_scale_int(Gst.SECOND, out_info.fps_d, out_info.fps_n) self.next_time = self.frame_duration self.agg.draw() self.background = self.fig.canvas.copy_from_bbox(self.ax.bbox) self.samplesperbuffer = Gst.util_uint64_scale_int(in_info.rate * in_info.channels, out_info.fps_d, out_info.fps_n) self.next_offset = self.samplesperbuffer self.cur_offset = 0 self.buf_offset = 0 return True
class WorldState: """Keeps track of all simulated worldy elements.""" def __init__(self): self.coord_gen = CoordinateGenerator() self.latent_coord_gen = LatentCoordinateGenerator(self.coord_gen) # Stepper.STEPS_PER_REV = 51200 motor_phi = Motor(VirtualMotor(accel_max=1.96, velocity_max=0.49), bound_min=-np.inf, bound_max=np.inf) motor_th = Motor(VirtualMotor(accel_max=1.96, velocity_max=0.49), bound_min=-0.5 * np.pi, bound_max=0.0) self.motion_controller = MotionController(self.latent_coord_gen, motor_phi, motor_th, latency_compensation=0.200) capacity = 512 self.error_history = RingBuffer(capacity=capacity, dtype=np.float32) self.error_history_latent = RingBuffer(capacity=capacity, dtype=np.float32) self.error_history.extend(np.zeros(capacity)) self.error_history_latent.extend(np.zeros(capacity)) def draw_3d(self, ax): origin = np.zeros((1, 3)) ax.clear() ax.scatter3D(*tuple(origin.T), color="red") draw_sphere(ax, 8, 16, color="#222222") self.motion_controller.draw(ax) ax.margins(x=0, y=0) ax.axis('off') ax.grid(False) # ax.set_title('Simulation') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') set_axes_radius(ax, origin[0], 0.7) def draw_error(self, ax): x = np.arange(len(self.error_history)) y = np.array(self.error_history) x_l = np.arange(len(self.error_history_latent)) y_l = np.array(self.error_history_latent) ax.clear() ax.axhline(y=np.pi * 5 / 90, linewidth=2, color='#77bb33') ax.plot(x, y, color="#ff55bb") ax.plot(x_l, y_l, color="#772255") ax.axis('off') ax.grid(False) ax.set_title('Error', position=(0.5, 0.9)) ax.set_xlabel('x') ax.set_ylabel('y') # error=3.141 corresponds to 90 degrees # so set limits to 1.047 <=> 30 degrees ax.set_ylim([0.00, 1.047]) def update(self, dt): self.coord_gen.update( dt, self.motion_controller.curr_rot ) # TODO this updates using a curr_rot that doesn't account for new position of motors... oh well self.motion_controller.update(dt) error = self._calc_error(self.coord_gen.dest_quat) error_l = self._calc_error(self.latent_coord_gen.dest_quat) self.error_history.append(error) self.error_history_latent.append(error_l) def _calc_error(self, q): return self.motion_controller.calc_error(q)
def change_buffer_size(self, value): if value > self.sampleBufferSize: newBufPV = RingBuffer(capacity=value, dtype=np.float16) newBufTotal = RingBuffer(capacity=value, dtype=np.float32) newTimestampBuf = RingBuffer(capacity=value, dtype=datetime) newBufPV.extend(self.samplesPV) newBufTotal.extend(self.samplesTotalizer) newTimestampBuf.extend(self.sampleTimestamps) self.samplesPV = newBufPV self.samplesTotalizer = newBufTotal self.sampleTimestamps = newTimestampBuf elif value < self.sampleBufferSize: newBufPV = RingBuffer(capacity=value, dtype=np.float16) newBufTotal = RingBuffer(capacity=value, dtype=np.float32) newTimestampBuf = RingBuffer(capacity=value, dtype=datetime) newBufPV.extend(self.samplesPV[:-value]) newBufTotal.extend(self.samplesTotalizer[:-value]) newTimestampBuf.extend(self.sampleTimestamps[:-value]) self.samplesPV = newBufPV self.samplesTotalizer = newBufTotal self.sampleTimestamps = newTimestampBuf
def main(): ############## Hyperparameters ############## env_name = f"{math.floor(time.time())}__id_{id}" writer = SummaryWriter("./logs/"+env_name) tick_time = 0.20 log_interval = 2 # print avg reward in the interval max_episodes = 10000 # max training episodes max_timesteps = int(15 * (1/tick_time)) # max actions in one episode update_timestep = max_timesteps * 2 # update policy every n timesteps action_std = 0.5 # constant std for action distribution (Multivariate Normal) K_epochs = 10 # update policy for K epochs eps_clip = 0.2 # clip parameter for PPO gamma = 0.99 # discount factor lr = 0.0003 # parameters for Adam optimizerd betas = (0.9, 0.999) random_seed = None ############################################# # creating environment num_captures = 3 num_tanks = 15 action_dim = 3 state_dim = (num_tanks*2)*(num_captures)+(action_dim*num_captures) # 15 tanks with (x,y) * 3 captures print( "state_dim:", state_dim, "\n", "action_dim:", action_dim, "\n", "max_timesteps:", max_timesteps, "\n", "update_timestep:", update_timestep, "\n", ) if random_seed: print("Random Seed: {}".format(random_seed)) torch.manual_seed(random_seed) env.seed(random_seed) np.random.seed(random_seed) memory = Memory() ppo = PPO(state_dim, action_dim, action_std, lr, betas, gamma, K_epochs, eps_clip) print(lr,betas) # logging variables running_reward = 0 avg_length = 0 time_step = 0 def get_dist(x: float, y: float) -> float: return math.sqrt(math.pow(x, 2) + math.pow(y, 2)) def parse_distances(arr: tuple): return get_dist(arr[0], arr[1]) def parse_observations(observations: dict, old_obs: dict) -> (list, int, bool): """ parse observation dict into desirable data structures """ # print(observations) if not observations: # print("no observations.. there is no one around me?") observations = { 'radarScan': [], } observations.update(old_obs) # grab the 15 closest tanks tank_locations = [ (float(tank_dic['x']), float(tank_dic['y']) ) for tank_dic in observations['radarScan']] tank_locations = sorted(tank_locations, key=parse_distances) # print([(parse_distances(dis), 1/parse_distances(dis)) for dis in tank_locations]) ret_obs = { 'killCount': observations['killCount'], 'hitCount': observations['hitCount'], 'deathCount': observations['deathCount'] } # List of the closest tank coordinates ret_state = np.zeros((num_tanks,2)) for i, loc in enumerate(tank_locations): ret_state[i] = loc return ret_state, ret_obs, True if observations['deathCount'] >= 3 else False # training loop for i_episode in tqdm(range(1, max_episodes+1)): batch_time_begin = time.time() try: # env = RlWorldClient("129.127.147.237", 1337) env = RlWorldClient("10.90.159.11", 1337) state = RingBuffer(capacity=state_dim, dtype=np.float32) # instantiate buffer with all zeros state.extend(np.zeros((state_dim))) global_obs = { 'killCount': 0, 'hitCount': 0, 'deathCount': 0, } obs = global_obs for t in range(max_timesteps): time_step += 1 start_time = time.time() state_current, obs, done = parse_observations(env.read_observation_dict(), obs) state.extend(state_current.reshape(-1)) action = ppo.select_action(np.array(state), memory) state.extend(action.reshape(-1)) # calculate rewards mm_clip = lambda x, l, u: max(l, min(u, x)) # Rewards delta_kills = obs['killCount'] - global_obs['killCount'] delta_hits = obs['hitCount'] - global_obs['hitCount'] delta_death = obs['deathCount'] - global_obs['deathCount'] # dist_start = (((num_tanks*2)+(action_dim)) * max(num_captures-1, 1)) dist_start = 0 dist_step = (num_tanks*2)+action_dim dist_end = dist_step*num_captures # tmp_distances = [get_dist(x_cord, y_cord) for x_cord, y_cord in zip(state[dist_start:dist_end:dist_step], state[dist_start+1:dist_end:dist_step])] # reward_velocity_to_closest = mm_clip(sum(np.diff(tmp_distances))/(tick_time*num_captures), 0, 5) # reward_smallest_distance = sum(mm_clip((-1/10)*dist+1, 0, 1) if dist>0 else 0 for dist in tmp_distances) # print('looking at:',state[dist_start:dist_end:dist_step]) # print('dist_diff:', np.diff(tmp_distances)) # print('tmp_distances:', tmp_distances) # print('reward_velocity_to_closest:', reward_velocity_to_closest) # print('reward_smallest_distance:', [mm_clip((-1/10)*dist+1, 0, 1) if dist>0 else 0 for dist in tmp_distances]) time_discount = mm_clip((-1/(max_timesteps*1.1)) * t + 1, 0, 1) reward = [ 5 * delta_kills * time_discount, 1 * delta_hits * time_discount, ] global_obs['killCount'] += delta_kills global_obs['hitCount'] += delta_hits global_obs['deathCount'] += delta_death # action = [mm_clip(float(act), -1, 1) for act in action] action = [float(act) for act in action] # print( # "action:", action, "\n", # "state:", state, "\n", # "global_obs:", global_obs, "\n", # "done:", done, "\n", # "reward:", reward, '\t', sum(reward), "\n", # ) # Apply action to dict action_dict = { "name": f"swarm_{(i_episode/(max_episodes+1))*100:.1f}%_id:{id}", "colour": "#7017a1", "moveForwardBack": action[0], "moveRightLeft": action[1], "turnRightLeft": action[2], "fire": True, } env.send_action_dict(action_dict) # Saving reward and is_terminals: memory.rewards.append(sum(reward)) memory.is_terminals.append(done) running_reward += sum(reward) if done: death_count += 1 break avg_length += t # sleep end_time = time.time() time_diff = end_time-start_time time.sleep(0 if time_diff > tick_time else tick_time-time_diff) writer.add_scalar('rewards/ep_killCount', global_obs['killCount'], i_episode) writer.add_scalar('rewards/ep_hitCount', global_obs['hitCount'], i_episode) writer.add_scalar('rewards/ep_deathCount', global_obs['deathCount'], i_episode) if i_episode % log_interval == 0 and len(memory.states) > 0: # load memory from other saved batches batch_time_end = time.time() batch_time_diff = batch_time_end - batch_time_begin # print("batch_time_diff", batch_time_diff) memory.save_memory() found_count = memory.find_experiences(seconds_ago=batch_time_diff*1.95) loss = ppo.update(memory) memory.clear_memory() time_step = 0 # logging # if i_episode % log_interval == 0: avg_length = avg_length//log_interval running_reward = running_reward/log_interval print(f'Episode {i_episode} \t Avg length: {avg_length} \t Avg reward: {running_reward:.3f}') writer.add_scalar('rewards/reward', running_reward, i_episode) writer.add_scalar('rewards/avg_length', avg_length, i_episode) writer.add_scalar('debug/found_exp', found_count, i_episode) writer.add_scalar('debug/loss', -float(loss.sum().detach()), i_episode) running_reward = 0 avg_length = 0 torch.save(ppo.policy.state_dict(), "./weights/" + env_name + ".pt") except Exception as e: print("caught exception:", e) time.sleep(1)
def change_capacity(self): value = int(self.bufferSizeEdit.text()) if value > len(self.buffer1): newBuf1 = RingBuffer(capacity=value, dtype=np.float16) newBuf1.extend(self.buffer1) newBuf2 = RingBuffer(capacity=value, dtype=np.float16) newBuf2.extend(self.buffer2) newBuf3 = RingBuffer(capacity=value, dtype=np.float16) newBuf3.extend(self.buffer3) newBuf4 = RingBuffer(capacity=value, dtype=np.float16) newBuf4.extend(self.buffer4) self.buffer1 = newBuf1 self.buffer2 = newBuf2 self.buffer3 = newBuf3 self.buffer4 = newBuf4 elif value < len(self.buffer1): newBuf1 = RingBuffer(capacity=value, dtype=np.float16) newBuf1.extend(self.buffer1[:-value]) newBuf2 = RingBuffer(capacity=value, dtype=np.float16) newBuf2.extend(self.buffer2[:-value]) newBuf3 = RingBuffer(capacity=value, dtype=np.float16) newBuf3.extend(self.buffer3[:-value]) newBuf4 = RingBuffer(capacity=value, dtype=np.float16) newBuf4.extend(self.buffer4[:-value]) self.buffer1 = newBuf1 self.buffer2 = newBuf2 self.buffer3 = newBuf3 self.buffer4 = newBuf4
class Producer: def __init__(self, worker_id, hparams, processing_queue): self.id = worker_id self.hparams = hparams self.processing_queue = processing_queue self.inbox_queue = mp.Queue() self.experience_queue = mp.Queue() self.global_obs = dict() self.state = RingBuffer(capacity=self.hparams['state_dim'], dtype=np.float32) self.process = mp.Process( target=self.run ) def reset(self): # flush buffer with all zeros self.state.extend(np.zeros((self.hparams['state_dim']))) self.global_obs = { 'killCount': 0, 'hitCount': 0, 'deathCount': 0, } def parse_observations(self, observations: dict, old_obs: dict) -> (list, int, bool): """ parse observation dict into desirable data structures """ def get_dist(x: float, y: float) -> float: return math.sqrt(math.pow(x, 2) + math.pow(y, 2)) def parse_distances(arr: tuple): return get_dist(arr[0], arr[1]) if not observations: # print("no observations.. there is no one around me?") observations = { 'radarScan': [], } observations.update(old_obs) # grab the 15 closest tanks tank_locations = [ (float(tank_dic['x']), float(tank_dic['y']) ) for tank_dic in observations['radarScan']] tank_locations = sorted(tank_locations, key=parse_distances) ret_obs = { 'killCount': observations['killCount'], 'hitCount': observations['hitCount'], 'deathCount': observations['deathCount'] } # List of the closest tank coordinates ret_state = np.zeros((self.hparams['num_tanks'],2)) for i, loc in enumerate(tank_locations): ret_state[i] = loc return ret_state, ret_obs, False #True if observations['deathCount'] >= 3 else False def run(self): try: while True: # clean up self.reset() obs = self.global_obs # RlWorldClient("129.127.147.237", 1337) env = RlWorldClient("10.90.159.11", 1337) for t in range(self.hparams['max_timesteps']): start_time = time.time() state_current, obs, done = self.parse_observations(env.read_observation_dict(), obs) self.state.extend(state_current.reshape(-1)) # send state to processing queue self.processing_queue.put({ 'id': self.id, 'state': np.array(self.state), }) # get action response response = self.inbox_queue.get() # print(response) if response is None: print(f"Process {self.id} is shutting down..") return assert response['id'] == self.id action = response['action'] action_logprob = response['action_logprob'] self.state.extend(action.reshape(-1)) # calculate rewards mm_clip = lambda x, l, u: max(l, min(u, x)) # Rewards delta_kills = obs['killCount'] - self.global_obs['killCount'] delta_hits = obs['hitCount'] - self.global_obs['hitCount'] delta_death = obs['deathCount'] - self.global_obs['deathCount'] time_discount = mm_clip((-1/(self.hparams['max_timesteps']*1.1)) * t + 1, 0, 1) reward = [ 5 * delta_kills * time_discount, 1 * delta_hits * time_discount, ] # Send back experience asap action = action.tolist() xp = { 'id': self.id, 'state': np.array(self.state), 'action': action, 'action_logprob': action_logprob, 'reward': sum(reward), 'done': done } # print(xp) self.experience_queue.put(xp) self.global_obs['killCount'] += delta_kills self.global_obs['hitCount'] += delta_hits self.global_obs['deathCount'] += delta_death # Send off action to environment env.send_action_dict({ "name": f"swarm_id:{self.id}", "colour": "#7017a1", "moveForwardBack": action[0], "moveRightLeft": action[1], "turnRightLeft": action[2], "fire": True, }) if done: death_count += 1 break # load balancing for the server time_diff = time.time()-start_time time.sleep(0 if time_diff > self.hparams['tick_time'] else self.hparams['tick_time']-time_diff) except Exception as e: print(f"worker {self.id} caught exception: {e}") time.sleep(0.5)