def train(config, network_spec=None): data_provider = DataProvider(config.db) env = StockEnvironment(data_provider, config, 0) agent = overwrite_agent(env, network_spec, config) if config.overwrite_agent else load_agent( config, env, network_spec) mlflow.log_param("agent", "tensorforce.agents.DQNAgent") for key in config.agent_specs: mlflow.log_param(key, config.agent_specs[key]) runner = Runner(agent=agent, environment=env) offset = 20000 num_episodes = 20 step = 0 while data_provider.has_data_key(offset + config.max_step_per_episode): runner.run(num_episodes=num_episodes) offset = offset + config.max_step_per_episode env.offset = offset agent.save(config.agent_dir, config.agent_name) if step % 10 == 0: evaluate(config, data_provider, offset - config.max_step_per_episode, agent) step += 1 return agent, env
def post_detect(target, ck, flag, param, waf): payload = load() agent = load_agent() host = target for i in range(0, len(payload)): if waf == True: time.sleep(0.5) try: urllib2.urlopen(host) except: pass time.sleep(0.5) for j in param.keys(): if flag in param[j]: exp = flag + payload[i] param[j] = param[j].replace(flag, exp) try: post_data = load_post_data(agent, ck, param) req = urllib2.Request(target, data=post_data) res = urllib2.urlopen(req) content_html = res.read() if exp in content_html or payload[i] in content_html: print '[!]success wtih payload', param[j] #post_detect(target,ck,flag,param_dict,True) except: pass param[j] = param[j].replace(exp, flag)
def __init__(self, speed=0.02, size=720): self.draw_next_offset = size / 4 width = size / 2 height = size # + self.draw_next_offset self.root = Tk() self.root.configure(background=COLORS[0]) self.game = Canvas(self.root, width=width, height=height, bg=COLORS[0]) self.game.pack() self.env = Environment() self.env.reset() self.history = pickle.load(open(sys.argv[1], "rb")) self.processed = [] """ for state in self.history: self.processed.append(self.process_channels(state)) """ self.agent = Agent(6) if len(sys.argv) == 1 else load_agent( sys.argv[1]) cnt = 0 rewards = [] for m in self.agent.replay_memory.memory: rewards.append(m[3]) idx = np.argmax(rewards) print(self.agent.replay_memory.memory[idx][0][2]) print(self.agent.replay_memory.memory[idx][4][2]) print("duration", max(self.agent.durations)) print("score", max(self.agent.scores)) print(min(rewards)) print(max(rewards)) self.speed = speed self.size = size self.rectangle_size = size / self.env.row self.pause = False self.quit = False self.image_counter = 0 self.commands = { 113: 1, # Left 114: 2, # Right 53: 3, # Z 52: 4, # X 65: 5, # Drop 37: 0 # Do nothing } self.init() self.root.title('Tetris') self.root.bind("<Key>", self.key_down) #threading.Thread(target=self.debug_channels).start() #threading.Thread(target=self.watch_history).start() #threading.Thread(target=self.play).start() threading.Thread(target=self.watch_play).start() self.root.mainloop()
def get_detect(target,ck,flag,waf): payload = load() agent = load_agent() host = target.split('?')[0] for i in range(0,len(payload)): if waf == True: time.sleep(0.5) try: urllib2.urlopen(host) except: pass time.sleep(0.5) exp = flag + str(payload[i]) now_target = target.replace(flag,exp) #print now_target try: data = load_data(agent,ck) req = urllib2.Request(now_target,data = data) res = urllib2.urlopen(req) content_html = res.read() if waf == True: time.sleep(0.5) try: urllib2.urlopen(host) except: pass time.sleep(0.5) #print content_html if exp in content_html or str(payload[i]) in content_html: #print 'aaa' webbrowser.open(now_target) print '[*]find payload success!',now_target #exit() else: pass #print '[*]find payload failed!',now_target except: pass
import os import urllib import urllib2 import urlparse import requests from get import get_detect from post import post_detect from common import load_data, load_post_data from agent import load_agent import re from bs4 import BeautifulSoup method = '' flag = 'q0w1e2' agent_list = load_agent() def get_fuzzing(target, ck, data): print '[*]now demo test get xss......' parsed_tuple = urlparse.urlparse(urllib.unquote(target)) url_query = urlparse.parse_qs(parsed_tuple.query, True) print url_query for i in url_query.keys(): query = str(i) + '=' + str(url_query[i][0]) tmp = query + flag location = str(url_query[i][0]) + flag now_target = target.replace(query, tmp) #data = load_data(agent_list,ck) try:
load = args.load env_name = 'Firefly-v1' # 'Pendulum-v0' env = gym.make(env_name) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] memory_size = 1000000 num_episodes = 2000 num_steps = env.episode_len batch_size = 64 std = 1. agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001) noise = Noise(action_dim, mean=0., std=std) #replay = ReplayMemory(memory_size) if play: agent = load_agent(file='pretrained/model_7.pth.tar') if load: agent = load_agent(file='pretrained/model_6.0.pth.tar') #6.0 rewards = [] for episode in range(num_episodes): state = torch.Tensor([env.reset()]) episode_reward = 0. std *= 0.9985 noise.reset(0., std) for t in range(num_steps): if not play: action = agent.select_action(state, noise) else: action = agent.select_action(state) #print(action)
from environment import Environment from agent import Agent, load_agent import torch import numpy as np import math import pickle import sys num_actions = 6 num_iter = 50000000 print_interval = 10 save_interval = 200 env = Environment() agent = Agent(num_actions) if len(sys.argv) == 1 else load_agent(sys.argv[1]) #agent.optimizer = torch.optim.Adam(agent.local_Q.parameters(), 5e-4) print(agent.optimizer) for episode in range(agent.start, num_iter): done = False score = 0 ep_duration = 0 state, next_piece = env.reset() while not done: action = agent.select_action(state, next_piece) next_state, reward, done, next_next_piece = env.step(action) agent.store_experience(state, next_piece, action, reward, next_state, next_next_piece, 1-done) state = next_state next_piece = next_next_piece score += reward
env.goal_radius = 0.4 state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] memory_size = 1000000 num_episodes = 2000 num_steps = env.episode_len batch_size = 64 std = 0.1 #agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001) noise = Noise(action_dim, mean=0., std=std) #replay = ReplayMemory(memory_size) gamma = Variable(torch.Tensor([0.99]), requires_grad=True) rewards = [] times = [] agent = load_agent(file='pretrained/model_3.0.pth.tar', gamma=gamma) for episode in range(20): state = torch.Tensor([env.reset()]) episode_reward = 0. #std *= 0.9985 noise.reset(0., std) for t in range(num_steps): action = agent.select_action(state, noise) next_state, reward, done, _ = env.step(action.cpu().numpy()[0]) episode_reward += reward #action = torch.Tensor(action) mask = torch.Tensor([not done]) next_state = torch.Tensor([next_state]) reward = torch.Tensor([reward]) agent.memory.push(state, action, mask, next_state, reward) state = next_state
def eval_(): config = MlConfig(agent_name="abcd-agent") data_provider = DataProvider(config.db) env = StockEnvironment(data_provider, config.max_step_per_episode, 0) agent = load_agent(config, env, None) evaluate(config, data_provider, 0, agent)