Beispiel #1
0
def train(config, network_spec=None):
    data_provider = DataProvider(config.db)
    env = StockEnvironment(data_provider, config, 0)
    agent = overwrite_agent(env, network_spec,
                            config) if config.overwrite_agent else load_agent(
                                config, env, network_spec)

    mlflow.log_param("agent", "tensorforce.agents.DQNAgent")
    for key in config.agent_specs:
        mlflow.log_param(key, config.agent_specs[key])

    runner = Runner(agent=agent, environment=env)
    offset = 20000
    num_episodes = 20
    step = 0
    while data_provider.has_data_key(offset + config.max_step_per_episode):
        runner.run(num_episodes=num_episodes)
        offset = offset + config.max_step_per_episode
        env.offset = offset
        agent.save(config.agent_dir, config.agent_name)
        if step % 10 == 0:
            evaluate(config, data_provider,
                     offset - config.max_step_per_episode, agent)
        step += 1
    return agent, env
Beispiel #2
0
def post_detect(target, ck, flag, param, waf):
    payload = load()
    agent = load_agent()

    host = target

    for i in range(0, len(payload)):
        if waf == True:
            time.sleep(0.5)
            try:
                urllib2.urlopen(host)
            except:
                pass
            time.sleep(0.5)
        for j in param.keys():
            if flag in param[j]:
                exp = flag + payload[i]
                param[j] = param[j].replace(flag, exp)
                try:
                    post_data = load_post_data(agent, ck, param)

                    req = urllib2.Request(target, data=post_data)
                    res = urllib2.urlopen(req)
                    content_html = res.read()
                    if exp in content_html or payload[i] in content_html:
                        print '[!]success wtih payload', param[j]
                        #post_detect(target,ck,flag,param_dict,True)

                except:
                    pass
                param[j] = param[j].replace(exp, flag)
Beispiel #3
0
    def __init__(self, speed=0.02, size=720):
        self.draw_next_offset = size / 4
        width = size / 2
        height = size  # + self.draw_next_offset
        self.root = Tk()
        self.root.configure(background=COLORS[0])
        self.game = Canvas(self.root, width=width, height=height, bg=COLORS[0])
        self.game.pack()
        self.env = Environment()
        self.env.reset()
        self.history = pickle.load(open(sys.argv[1], "rb"))
        self.processed = []
        """
        for state in self.history:
            self.processed.append(self.process_channels(state))
        """
        self.agent = Agent(6) if len(sys.argv) == 1 else load_agent(
            sys.argv[1])
        cnt = 0
        rewards = []
        for m in self.agent.replay_memory.memory:
            rewards.append(m[3])

        idx = np.argmax(rewards)
        print(self.agent.replay_memory.memory[idx][0][2])
        print(self.agent.replay_memory.memory[idx][4][2])
        print("duration", max(self.agent.durations))
        print("score", max(self.agent.scores))
        print(min(rewards))
        print(max(rewards))
        self.speed = speed
        self.size = size
        self.rectangle_size = size / self.env.row
        self.pause = False
        self.quit = False
        self.image_counter = 0
        self.commands = {
            113: 1,  # Left
            114: 2,  # Right
            53: 3,  # Z
            52: 4,  # X
            65: 5,  # Drop
            37: 0  # Do nothing
        }
        self.init()
        self.root.title('Tetris')
        self.root.bind("<Key>", self.key_down)
        #threading.Thread(target=self.debug_channels).start()
        #threading.Thread(target=self.watch_history).start()
        #threading.Thread(target=self.play).start()
        threading.Thread(target=self.watch_play).start()
        self.root.mainloop()
Beispiel #4
0
def get_detect(target,ck,flag,waf):
	payload = load()
	agent = load_agent()

	host = target.split('?')[0]

	for i in range(0,len(payload)):
		if waf == True:
			time.sleep(0.5)
			try:
				urllib2.urlopen(host)
			except:
				pass
			time.sleep(0.5)

		exp = flag + str(payload[i])
		now_target = target.replace(flag,exp)
		#print now_target
		try:
			data = load_data(agent,ck)
			req = urllib2.Request(now_target,data = data)
			res = urllib2.urlopen(req)
			content_html = res.read()
			if waf == True:
				time.sleep(0.5)
				try:
					urllib2.urlopen(host)
				except:
					pass
				time.sleep(0.5)
			#print content_html
			if exp in content_html or str(payload[i]) in content_html:
				#print 'aaa'
				webbrowser.open(now_target)
				print '[*]find payload success!',now_target
				#exit()
			else:
				pass
				#print '[*]find payload failed!',now_target
		except:
			pass
Beispiel #5
0
import os
import urllib
import urllib2
import urlparse
import requests
from get import get_detect
from post import post_detect
from common import load_data, load_post_data
from agent import load_agent
import re
from bs4 import BeautifulSoup

method = ''
flag = 'q0w1e2'

agent_list = load_agent()


def get_fuzzing(target, ck, data):
    print '[*]now demo test get xss......'
    parsed_tuple = urlparse.urlparse(urllib.unquote(target))
    url_query = urlparse.parse_qs(parsed_tuple.query, True)
    print url_query
    for i in url_query.keys():
        query = str(i) + '=' + str(url_query[i][0])
        tmp = query + flag
        location = str(url_query[i][0]) + flag

        now_target = target.replace(query, tmp)
        #data = load_data(agent_list,ck)
        try:
Beispiel #6
0
Datei: main.py Projekt: svd3/DDPG
load = args.load

env_name = 'Firefly-v1'  # 'Pendulum-v0'
env = gym.make(env_name)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
memory_size = 1000000
num_episodes = 2000
num_steps = env.episode_len
batch_size = 64
std = 1.
agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)
noise = Noise(action_dim, mean=0., std=std)
#replay = ReplayMemory(memory_size)
if play:
    agent = load_agent(file='pretrained/model_7.pth.tar')
if load:
    agent = load_agent(file='pretrained/model_6.0.pth.tar')  #6.0

rewards = []
for episode in range(num_episodes):
    state = torch.Tensor([env.reset()])
    episode_reward = 0.
    std *= 0.9985
    noise.reset(0., std)
    for t in range(num_steps):
        if not play:
            action = agent.select_action(state, noise)
        else:
            action = agent.select_action(state)
            #print(action)
Beispiel #7
0
from environment import Environment
from agent import Agent, load_agent
import torch
import numpy as np
import math
import pickle
import sys

num_actions = 6
num_iter = 50000000
print_interval = 10
save_interval = 200

env = Environment()
agent = Agent(num_actions) if len(sys.argv) == 1 else load_agent(sys.argv[1])  

#agent.optimizer = torch.optim.Adam(agent.local_Q.parameters(), 5e-4)
print(agent.optimizer)

for episode in range(agent.start, num_iter):
    done = False
    score = 0
    ep_duration = 0
    state, next_piece = env.reset()
    while not done:
        action = agent.select_action(state, next_piece)
        next_state, reward, done, next_next_piece = env.step(action)
        agent.store_experience(state, next_piece, action, reward, next_state, next_next_piece, 1-done)
        state = next_state
        next_piece = next_next_piece
        score += reward
Beispiel #8
0
env.goal_radius = 0.4
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
memory_size = 1000000
num_episodes = 2000
num_steps = env.episode_len
batch_size = 64
std = 0.1
#agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)
noise = Noise(action_dim, mean=0., std=std)
#replay = ReplayMemory(memory_size)

gamma = Variable(torch.Tensor([0.99]), requires_grad=True)
rewards = []
times = []
agent = load_agent(file='pretrained/model_3.0.pth.tar', gamma=gamma)
for episode in range(20):
    state = torch.Tensor([env.reset()])
    episode_reward = 0.
    #std *= 0.9985
    noise.reset(0., std)
    for t in range(num_steps):
        action = agent.select_action(state, noise)
        next_state, reward, done, _ = env.step(action.cpu().numpy()[0])
        episode_reward += reward
        #action = torch.Tensor(action)
        mask = torch.Tensor([not done])
        next_state = torch.Tensor([next_state])
        reward = torch.Tensor([reward])
        agent.memory.push(state, action, mask, next_state, reward)
        state = next_state
Beispiel #9
0
def eval_():
    config = MlConfig(agent_name="abcd-agent")
    data_provider = DataProvider(config.db)
    env = StockEnvironment(data_provider, config.max_step_per_episode, 0)
    agent = load_agent(config, env, None)
    evaluate(config, data_provider, 0, agent)