Beispiel #1
0
    def __init__(self):
        #100 ticks = (100*0.1) = 10 secs
        self.cooldownMotionMin = 0
        self.cooldownMotionMax = 500

        self.cooldownAccelMin = 0
        self.cooldownAccelMax = 500

        self.cooldownSoundMin = 0
        self.cooldownSoundMax = 500

        self.cooldownBarkMin = 0
        self.cooldownBarkMax = 50

        self.tapDetected = False
        self.motionDetected = False
        self.soundHighVal = 0

        self.barking = False
        self.processSound = None

        sound.init()
        accel.init()
        motion.init()

        self.soundLogger = Logger(SensorNames.SOUND)
        self.motionLogger = Logger(SensorNames.MOTION)
        self.accelLogger = Logger(SensorNames.ACCELEROMETER)

        time.sleep(2)
    def __init__(self,
                 env_fn,
                 save_dir,
                 tensorboard_logdir=None,
                 optimizer_class=RMSprop,
                 oc_kwargs=dict(),
                 logger_kwargs=dict(),
                 eps_start=1.0,
                 eps_end=0.1,
                 eps_decay=1e4,
                 lr=1e-3,
                 gamma=0.99,
                 rollout_length=2048,
                 beta_reg=0.01,
                 entropy_weight=0.01,
                 gradient_clip=5,
                 target_network_update_freq=200,
                 max_ep_len=2000,
                 save_freq=200,
                 seed=0,
                 **kwargs):

        self.seed = seed
        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.lr = lr
        self.env_fn = env_fn
        self.env = env_fn()
        self.oc_kwargs = oc_kwargs
        self.network_fn = self.get_network_fn(self.oc_kwargs)
        self.network = self.network_fn().to(self.device)
        self.target_network = self.network_fn().to(self.device)
        self.optimizer_class = optimizer_class
        self.optimizer = optimizer_class(self.network.parameters(), self.lr)
        self.target_network.load_state_dict(self.network.state_dict())
        self.eps_start = eps_start
        self.eps_end = eps_end
        self.eps_decay = eps_decay
        self.eps_schedule = LinearSchedule(eps_start, eps_end, eps_decay)
        self.gamma = gamma
        self.rollout_length = rollout_length
        self.num_options = oc_kwargs['num_options']
        self.beta_reg = beta_reg
        self.entropy_weight = entropy_weight
        self.gradient_clip = gradient_clip
        self.target_network_update_freq = target_network_update_freq
        self.max_ep_len = max_ep_len
        self.save_freq = save_freq

        self.save_dir = save_dir
        self.logger = Logger(**logger_kwargs)
        self.tensorboard_logdir = tensorboard_logdir
        # self.tensorboard_logger = SummaryWriter(log_dir=tensorboard_logdir)

        self.is_initial_states = to_tensor(np.ones((1))).byte()
        self.prev_options = self.is_initial_states.clone().long().to(
            self.device)

        self.best_mean_reward = -np.inf
Beispiel #3
0
def main():
    # Create log dir
    args = parse_arguments()
    save_dir = args.log_dir
    logger = Logger(output_dir=save_dir)
    title = 'Learning Curve'
    x, y = logger.load_results(["EpLen", "EpRet"])

    x = cumulative_sum(x)
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]
    fig = plt.figure(title)
    plt.plot(x, y, label="Own implementation")
    if args.compare:
        log_dir = os.path.join(
            "Stable_Baselines", "logs",
            os.path.sep.join(args.log_dir.split(os.path.sep)[1:]))
        from stable_baselines3.common.results_plotter import load_results, ts2xy
        x2, y2 = ts2xy(load_results(log_dir), 'timesteps')
        y2 = moving_average(y2, window=50)
        # Truncate x
        x2 = x2[len(x2) - len(y2):]
        x2, y2 = standardise_graph(x, y, x2, y2)

        plt.plot(x2, y2, label="Stable_Baselines3 implementation")

    plt.legend()
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    if args.save:
        fname = "comparison.png" if args.compare else "learning_curve.png"
        plt.savefig(os.path.join(save_dir, fname))
    plt.show()
Beispiel #4
0
 def __init__(self):
     self.login_get_cookies_url = "https://ui.ptlogin2.qq.com/cgi-bin/login?daid=164&target=self&style=16&mibao_css=m_webqq&appid=501004106&enable_qlogin=0&no_verifyimg=1&s_url=http%3A%2F%2Fw.qq.com%2Fproxy.html&f_url=loginerroralert&strong_login=1&login_state=10&t=20131024001"
     self.get_ptqrshow = "https://ssl.ptlogin2.qq.com/ptqrshow?appid=501004106&e=0&l=M&s=5&d=72&v=4&t=0.9142399367333609"
     self.check = "https://ssl.ptlogin2.qq.com/ptqrlogin?ptqrtoken={ptqrtoken}&webqq_type=10&remember_uin=1&login2qq=1&aid=501004106&u1=http%3A%2F%2Fw.qq.com%2Fproxy.html%3Flogin2qq%3D1%26webqq_type%3D10&ptredirect=0&ptlang=2052&daid=164&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=0-0-32750&mibao_css=m_webqq&t=undefined&g=1&js_type=0&js_ver=10197&login_sig=&pt_randsalt=0"
     self.code = open('code.png', 'w')
     self.jm = "http://jiema.wwei.cn/fileupload/index/op/jiema.html"
     self.logger = Logger('Login')
     self.getvfwebqq = "http://s.web2.qq.com/api/getvfwebqq?ptwebqq={ptwebqq}&clientid=53999199&psessionid=&t=1488053293431"
     self.login2 = "http://d1.web2.qq.com/channel/login2"
     self.online = "http://d1.web2.qq.com/channel/get_online_buddies2?vfwebqq={vfwebqq}&clientid=53999199&psessionid={psessionid}&t=1488268527333"
Beispiel #5
0
 def __init__(self, cookies, psessionid):
     self.target = 'https://d1.web2.qq.com/channel/poll2'
     self.header = {
         'origin': 'https://d1.web2.qq.com',
         'referer': 'https://d1.web2.qq.com/cfproxy.html?v=20151105001&callback=1'
     }
     self.psessionid = psessionid
     self.cookies = json.loads(cookies)
     self.data = {
         'r': json.dumps({
             'ptwebqq': self.cookies['ptwebqq'],
             'clientid': 53999199,
             'psessionid': self.psessionid
         })
     }
     self.logger = Logger('Heart')
     while True:
         self.poll()
         time.sleep(1.5)
def plot_results(logs_dir,
                 plot_label,
                 show_each_trial=False,
                 window=200,
                 maxlen=-1):
    # save_dir = os.path.join("Model_Weights", env, agent)
    logger = Logger(output_dir=logs_dir, load=True)
    logger = truncate(logger, length=maxlen)
    EpLen_list, EpRet_list = logger.load_all_results(["EpLen", "EpRet"])
    Ep_Returns, Ep_Lengths = [], []
    max_length = len(EpLen_list[0])
    max_idx = 0
    for idx, (EpLen, EpRet) in enumerate(zip(EpLen_list, EpRet_list)):
        EpLen = cumulative_sum(EpLen)
        EpRet = moving_average(EpRet, window=window)
        if show_each_trial:
            plt.plot(EpLen, EpRet, label=f"trial: {idx+1}")
        if len(EpLen) > max_length:
            max_length = len(EpLen)
            max_idx = idx
        Ep_Returns.append(EpRet)
        Ep_Lengths.append(EpLen)

    EpLen = Ep_Lengths[max_idx]
    Ep_Returns = np.array(standardise_lengths(Ep_Returns, max_length)).T

    ret_mean = []
    ret_std = []
    for ep_ret in Ep_Returns:
        ret_mean.append(ep_ret.mean())
        ret_std.append(ep_ret.std())

    ret_mean = np.array(ret_mean)
    ret_std = np.array(moving_average(ret_std, 50))

    if not show_each_trial:
        plt.plot(EpLen, ret_mean, label=f"{plot_label}")
        plt.fill_between(EpLen,
                         ret_mean - ret_std,
                         ret_mean + ret_std,
                         alpha=0.2)
    def train(self,path , data_base_name , data_base_table_name):
        try:
            """
            All the packages related to AI modules are called and pipeline is created

            Input : Path , Database name , Database table name
            Output : AI Model file , related model files and various evaluation metrics etc
            """  
            logger = Logger(path)
            logger.generate_metadata_logs()
            file_validation = File_name_validation(logger, path).file_name_validation_package()
            data_validation = Data_validation(logger, path).data_validation_package()
            database = Database(logger,path,data_base_name,data_base_table_name).database_package()
            exploratory_data_analysis = EDA_and_Transformation(logger,path).eda_transformation_package()
            plots = Plots(logger , path).plots_package()
            data_preprocessing = Data_preprocessing(logger,path).data_preprocessing_package()
            machine_learning_model_accuracy = ML_model(logger ,path).ML_training_package()
            return machine_learning_model_accuracy
        except Exception as e:
            self.logger.add_in_logs("ERR" , "Training module")
            self.logger.add_in_logs("LIN" , "Error on line number : {}".format(sys.exc_info()[-1].tb_lineno))
            self.logger.add_in_logs("TYP" , str(e))
Beispiel #8
0
 def __init__(self, cookies, uin, vfwebqq):
     self.logger = Logger('Friends')
     self.db = sqlite3.connect(DB_CONFIG['SQLITE'])
     self.target = 'http://s.web2.qq.com/api/get_user_friends2'
     self.cookies = json.loads(cookies)
     self.header = {
         'origin':
         'https://d1.web2.qq.com',
         'referer':
         'https://d1.web2.qq.com/cfproxy.html?v=20151105001&callback=1'
     }
     self.hash = self.friendsHash(uin, self.cookies['ptwebqq'])
     self.data = {
         'r': '{"vfwebqq":"' + vfwebqq + '","hash":"' + self.hash + '"}'
     }
     self.logger.info('清理过期数据')
     query = "delete from categories"
     self.db.execute(query)
     query = "delete from friends"
     self.db.execute(query)
     self.db.commit()
     self.getFriend()
Beispiel #9
0
 def __init__(self, cookies, uin, vfwebqq):
     self.logger = Logger('Group')
     self.db = sqlite3.connect(DB_CONFIG['SQLITE'])
     self.target = 'http://s.web2.qq.com/api/get_group_name_list_mask2'
     self.cookies = json.loads(cookies)
     self.header = {
         'Host':
         's.web2.qq.com',
         'origin':
         'http://s.web2.qq.com',
         'referer':
         'http://s.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1'
     }
     self.hash = self.friendsHash(uin, self.cookies['ptwebqq'])
     self.data = {
         'r': '{"vfwebqq":"' + vfwebqq + '","hash":"' + self.hash + '"}'
     }
     self.logger.info('清理过期群组数据')
     query = "delete from groups"
     self.db.execute(query)
     self.db.commit()
     self._getGroup()
Beispiel #10
0
 def __init__(self):
     self.logger = Logger('ControllerCenter')
     self.logger.info(
         '#################################################################'
     )
     self.logger.info("#欢迎来到sml2h3的QQ机器人,下面是我的菜单,请回复菜单编号后回车 #")
     self.logger.info(
         "#作者:sml2h3 Github:https://github.com/sml2h3                   #")
     self.logger.info(
         "#作者博客:https://www.fkgeek.com                                #")
     self.logger.info(
         "#1、启动QQ机器人                                                #")
     self.logger.info(
         '#2、访问作者博客                                                #')
     self.logger.info(
         '#################################################################'
     )
     flag = True
     while flag:
         command = raw_input('>>')
         if command == '1':
             flag = False
             self._run()
Beispiel #11
0
    def __init__(self, env_fn, save_dir, ac_kwargs=dict(), seed=0, tensorboard_logdir = None,
         replay_size=int(1e6), gamma=0.99, 
         tau=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, 
         update_after=1000, update_every=50, act_noise=0.1, num_test_episodes=10, 
         max_ep_len=1000, logger_kwargs=dict(), save_freq=1, ngpu=1):    
        '''
        Deep Deterministic Policy Gradients (DDPG)
        Args:
            env_fn: function to create the gym environment
            save_dir: path to save directory
            actor_critic: Class for the actor-critic pytorch module
            ac_kwargs (dict): any keyword argument for the actor_critic
                        (1) hidden_sizes=(256, 256)
                        (2) activation=nn.ReLU
                        (3) device='cpu'
            seed (int): seed for random generators
            replay_size (int): Maximum length of replay buffer.
            gamma (float): Discount factor. (Always between 0 and 1.)
            tau (float): Interpolation factor in polyak averaging for target 
                networks.
            pi_lr (float): Learning rate for policy.
            q_lr (float): Learning rate for Q-networks.
            batch_size (int): Minibatch size for SGD.
            start_steps (int): Number of steps for uniform-random action selection,
                before running real policy. Helps exploration.
            update_after (int): Number of env interactions to collect before
                starting to do gradient descent updates. Ensures replay buffer
                is full enough for useful updates.
            update_every (int): Number of env interactions that should elapse
                between gradient descent updates. Note: Regardless of how long 
                you wait between updates, the ratio of env steps to gradient steps 
                is locked to 1.
            act_noise (float): Stddev for Gaussian exploration noise added to 
                policy at training time. (At test time, no noise is added.)
            num_test_episodes (int): Number of episodes to test the deterministic
                policy at the end of each epoch.
            max_ep_len (int): Maximum length of trajectory / episode / rollout.
            logger_kwargs (dict): Keyword args for Logger. 
                        (1) output_dir = None
                        (2) output_fname = 'progress.pickle'
            save_freq (int): How often (in terms of gap between episodes) to save
                the current policy and value function.
        '''
        # logger stuff
        self.logger = Logger(**logger_kwargs)

        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.env = env_fn()

        # Action Limit for clamping
        self.act_limit = self.env.action_space.high[0]

        # Create actor-critic module
        self.ngpu = ngpu
        self.actor_critic = get_actor_critic_module(ac_kwargs, 'ddpg')
        self.ac_kwargs = ac_kwargs
        self.ac = self.actor_critic(self.env.observation_space, self.env.action_space, device=self.device, ngpu=self.ngpu, **ac_kwargs)
        self.ac_targ = deepcopy(self.ac)

        # Freeze target networks with respect to optimizers
        for p in self.ac_targ.parameters():
            p.requires_grad = False
        
        # Experience buffer
        self.replay_size = replay_size
        self.replay_buffer = ReplayBuffer(int(replay_size))

        # Set up optimizers for actor and critic
        self.pi_lr = pi_lr
        self.q_lr = q_lr
        self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr)
        self.q_optimizer = Adam(self.ac.q.parameters(), lr=q_lr)

        self.gamma = gamma
        self.tau = tau
        self.act_noise = act_noise
        # self.obs_dim = self.env.observation_space.shape[0]
        self.act_dim = self.env.action_space.shape[0]
        self.num_test_episodes = num_test_episodes
        self.max_ep_len = self.env.spec.max_episode_steps if self.env.spec.max_episode_steps is not None else max_ep_len
        self.start_steps = start_steps
        self.update_after = update_after
        self.update_every = update_every
        self.batch_size = batch_size
        self.save_freq = save_freq

        self.best_mean_reward = -np.inf
        self.save_dir = save_dir
        self.tensorboard_logdir = tensorboard_logdir
Beispiel #12
0
    def __init__(self,
                 env_fn,
                 save_dir,
                 tensorboard_logdir=None,
                 optimizer_class=Adam,
                 weight_decay=0,
                 oc_kwargs=dict(),
                 logger_kwargs=dict(),
                 lr=1e-3,
                 optimization_epochs=5,
                 mini_batch_size=64,
                 ppo_ratio_clip=0.2,
                 gamma=0.99,
                 rollout_length=2048,
                 beta_weight=0,
                 entropy_weight=0.01,
                 gradient_clip=5,
                 gae_tau=0.95,
                 max_ep_len=2000,
                 save_freq=200,
                 seed=0,
                 **kwargs):

        self.seed = seed
        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.lr = lr
        self.env_fn = env_fn
        self.env = env_fn()
        self.oc_kwargs = oc_kwargs
        self.network_fn = self.get_network_fn(self.oc_kwargs)
        self.network = self.network_fn().to(self.device)
        self.optimizer_class = optimizer_class
        self.weight_decay = weight_decay
        self.optimizer = optimizer_class(self.network.parameters(),
                                         self.lr,
                                         weight_decay=self.weight_decay)
        self.gamma = gamma
        self.rollout_length = rollout_length
        self.num_options = oc_kwargs['num_options']
        self.beta_weight = beta_weight
        self.entropy_weight = entropy_weight
        self.gradient_clip = gradient_clip
        self.max_ep_len = max_ep_len
        self.save_freq = save_freq

        self.save_dir = save_dir
        self.logger = Logger(**logger_kwargs)
        self.tensorboard_logdir = tensorboard_logdir
        # self.tensorboard_logger = SummaryWriter(log_dir=tensorboard_logdir)

        self.is_initial_states = to_tensor(np.ones((1))).byte().to(self.device)
        self.prev_options = to_tensor(np.zeros((1))).long().to(self.device)

        self.best_mean_reward = -np.inf

        self.optimization_epochs = optimization_epochs
        self.mini_batch_size = mini_batch_size
        self.ppo_ratio_clip = ppo_ratio_clip
        self.gae_tau = gae_tau
        self.use_gae = self.gae_tau > 0
Beispiel #13
0
from getpass import getpass
from Logger.logger import Logger
from fbchat import FBchatException, FBchatUserError
import json

if __name__ == "__main__":
	cookies = None
	try:
		with open("session", "r") as file:
			cookies = json.loads(file.read())
	except:
		pass
	try:
		try:
			if cookies:
				try:
					client = Logger("cookie", "cookie", session_cookies=cookies, max_tries=1)
				except FBchatUserError:
					client = Logger(input("Login: "******"Password: "******"Login: "******"Password: "******"Error during logging in, wrong password/bad cookies?"))
		client.listen()
	except:
		if cookies:
			client.logout(True)
		else:
			client.logout()
Beispiel #14
0
    def __init__(self,
                 env_fn,
                 save_dir,
                 actor_critic=MLPActorCritic,
                 ac_kwargs=dict(),
                 seed=0,
                 replay_size=int(1e6),
                 gamma=0.99,
                 tau=0.995,
                 pi_lr=1e-3,
                 q_lr=1e-3,
                 batch_size=100,
                 start_steps=10000,
                 update_after=1000,
                 update_every=50,
                 act_noise=0.1,
                 num_test_episodes=10,
                 max_ep_len=1000,
                 logger_kwargs=dict(),
                 save_freq=1,
                 policy_delay=2):
        '''
        Twin Delayed Deep Deterministic Policy Gradients (TD3):
        An Extension of DDPG but with 3 tricks added:
            (1) Clipped Double Q-Learning: TD3 learns two Q-functions instead of one (hence “twin”),
                and uses the smaller of the two Q-values to form the targets in the Bellman error loss functions
            (2) “Delayed” Policy Updates. TD3 updates the policy (and target networks) less frequently 
                than the Q-function. The paper recommends one policy update for every two Q-function updates.
            (3) Target Policy Smoothing. TD3 adds noise to the target action, to make it harder for the policy
                to exploit Q-function errors by smoothing out Q along changes in action.
        Args:
            env_fn: function to create the gym environment
            save_dir: path to save directory
            actor_critic: Class for the actor-critic pytorch module
            ac_kwargs (dict): any keyword argument for the actor_critic
                        (1) hidden_sizes=(256, 256)
                        (2) activation=nn.ReLU
                        (3) device='cpu'
            seed (int): seed for random generators
            replay_size (int): Maximum length of replay buffer.
            gamma (float): Discount factor. (Always between 0 and 1.)
            tau (float): Interpolation factor in polyak averaging for target 
                networks.
            pi_lr (float): Learning rate for policy.
            q_lr (float): Learning rate for Q-networks.
            batch_size (int): Minibatch size for SGD.
            start_steps (int): Number of steps for uniform-random action selection,
                before running real policy. Helps exploration.
            update_after (int): Number of env interactions to collect before
                starting to do gradient descent updates. Ensures replay buffer
                is full enough for useful updates.
            update_every (int): Number of env interactions that should elapse
                between gradient descent updates. Note: Regardless of how long 
                you wait between updates, the ratio of env steps to gradient steps 
                is locked to 1.
            act_noise (float): Stddev for Gaussian exploration noise added to 
                policy at training time. (At test time, no noise is added.)
            num_test_episodes (int): Number of episodes to test the deterministic
                policy at the end of each epoch.
            max_ep_len (int): Maximum length of trajectory / episode / rollout.
            logger_kwargs (dict): Keyword args for Logger. 
                        (1) output_dir = None
                        (2) output_fname = 'progress.pickle'
            save_freq (int): How often (in terms of gap between episodes) to save
                    the current policy and value function.
            policy_delay (int): Policy will only be updated once every 
                                policy_delay times for each update of the Q-networks.
        '''
        # logger stuff
        self.logger = Logger(**logger_kwargs)

        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.env, self.test_env = env_fn(), env_fn()

        # Action Limit for clamping
        self.act_limit = self.env.action_space.high[0]

        # Create actor-critic module
        self.ac = actor_critic(self.env.observation_space,
                               self.env.action_space,
                               device=self.device,
                               **ac_kwargs)
        self.ac_targ = deepcopy(self.ac)

        # Freeze target networks with respect to optimizers
        for p in self.ac_targ.parameters():
            p.requires_grad = False

        # Experience buffer
        self.replay_buffer = ReplayBuffer(int(replay_size))

        # Set up optimizers for actor and critic
        self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr)
        self.q_optimizer = Adam(chain(self.ac.q1.parameters(),
                                      self.ac.q2.parameters()),
                                lr=q_lr)

        self.gamma = gamma
        self.tau = tau
        self.act_noise = act_noise
        self.obs_dim = self.env.observation_space.shape[0]
        self.act_dim = self.env.action_space.shape[0]
        self.num_test_episodes = num_test_episodes
        self.max_ep_len = self.env.max_episode_steps if self.env.max_episode_steps is not None else max_ep_len
        self.start_steps = start_steps
        self.update_after = update_after
        self.update_every = update_every
        self.batch_size = batch_size
        self.save_freq = save_freq
        self.policy_delay = policy_delay

        self.best_mean_reward = -np.inf
        self.save_dir = save_dir
Beispiel #15
0
def main():
    """
    The main method handles all arguments of the call.
    It triggers the two phases, extracting data and reporting data.
    usage: main.py [-h] [-d DEVICE] [-m METHOD] [--db_path DB_PATH]
               [--db_hash DB_HASH] [-l LOG] [-D] [-M] [--only_report]
    """
    parser = argparse.ArgumentParser(
        description=
        'The program was developed to extract data from common fitness trackers and show the findings in standardized reports.'
        'It works in two phases, first obtaining and storing information in a database.'
        'Second it starts a Flask application with HTML reports.')
    parser.add_argument('-d',
                        '--device',
                        help='The device, which you want to investigate.')
    parser.add_argument('-m',
                        '--method',
                        help='The method, with which you want to investigate')
    parser.add_argument(
        '--db_path',
        help='Change the path, where the database shouled be stored.')
    parser.add_argument('--db_hash',
                        help='The sha265 hash of the given database.')
    parser.add_argument(
        '-l',
        '--log',
        help='Change the path where log files should be stored.')
    parser.add_argument('-D',
                        '--list_devices',
                        action='store_true',
                        help='Lists all available devices')
    parser.add_argument('-M',
                        '--list_methods',
                        action='store_true',
                        help='List all available methods')
    parser.add_argument(
        '--only_report',
        action='store_true',
        help='Skips the first phase and only start the web server and report.')
    args = parser.parse_args()

    # Initial logger
    if args.log is not None:
        main_log = os.path.join(args.log, "main.log")
    else:
        main_log = "main.log"

    # Use the same log since flask writes both std.
    sys.stdout = Logger(main_log, err=False)
    sys.stderr = Logger(main_log, err=True)

    print("Start analysis: " + str(sys.argv))

    # List all devices and methods
    if args.list_devices:
        devices = collect_arg_options(False)
        for device in devices:
            print(device[0])
        sys.exit(0)

    if args.list_methods:
        devices = collect_arg_options(True)
        for device in devices:
            for i, method in enumerate(device):
                print(method if i == 0 else "\t" + method)
        sys.exit(0)

    # Optional database path
    if args.db_path:
        db_path = args.db_path
    else:
        db_path = "data.db"

    # Optional database hash, only useful if --only_report
    db_hash = ""
    if args.db_hash:
        db_hash = args.db_hash

    # Checks if method and device are correct
    if not args.only_report:
        if args.device is None and args.method is None:
            print(
                "You added a method. Please add a device you want to use with --device"
            )
            sys.exit(0)
        if args.device is None and args.method is not None:
            print(
                "Please add device and method you want to use with --device and --method"
            )
            sys.exit(0)
        if args.device is not None and args.method is None:
            print(
                "You added a device. Please add a method you want to use with --method"
            )
            sys.exit(0)
        correct = False
        devices = collect_arg_options(True)
        for device in devices:
            if device[0] == args.device:
                for method in device:
                    if method == args.method:
                        correct = True
        if correct is False:
            print(
                "Your device or method is not available. Probably it is spelled wrong."
            )
            sys.exit(0)
        db_hash = extract_data(args.device, args.method, db_path)

    # Starts report
    reportDb = ReportRequests(db_path, db_hash)
    index.init(reportDb, main_log)
    index.app.run(host="localhost")
    print("Last database is: " + reportDb.origin_hash)
    print("End analysis")
Beispiel #16
0
    def __init__(self,
                 env_fn,
                 save_dir,
                 ac_kwargs=dict(),
                 seed=0,
                 tensorboard_logdir=None,
                 steps_per_epoch=400,
                 batch_size=400,
                 gamma=0.99,
                 delta=0.01,
                 vf_lr=1e-3,
                 train_v_iters=80,
                 damping_coeff=0.1,
                 cg_iters=10,
                 backtrack_iters=10,
                 backtrack_coeff=0.8,
                 lam=0.97,
                 max_ep_len=1000,
                 logger_kwargs=dict(),
                 save_freq=10,
                 algo='trpo',
                 ngpu=1):
        """
        Trust Region Policy Optimization 
        (with support for Natural Policy Gradient)
        Args:
            env_fn : A function which creates a copy of the environment.
                The environment must satisfy the OpenAI Gym API.
            save_dir: path to save directory
            actor_critic: Class for the actor-critic pytorch module
            ac_kwargs (dict): Any kwargs appropriate for the actor_critic 
                function you provided to TRPO.
            seed (int): Seed for random number generators.
            steps_per_epoch (int): Number of steps of interaction (state-action pairs) 
                for the agent and the environment in each epoch.
            batch_size (int): The buffer is split into batches of batch_size to learn from
            gamma (float): Discount factor. (Always between 0 and 1.)
            delta (float): KL-divergence limit for TRPO / NPG update. 
                (Should be small for stability. Values like 0.01, 0.05.)
            vf_lr (float): Learning rate for value function optimizer.
            train_v_iters (int): Number of gradient descent steps to take on 
                value function per epoch.
            damping_coeff (float): Artifact for numerical stability, should be 
                smallish. Adjusts Hessian-vector product calculation:
                
                .. math:: Hv \\rightarrow (\\alpha I + H)v
                where :math:`\\alpha` is the damping coefficient. 
                Probably don't play with this hyperparameter.
            cg_iters (int): Number of iterations of conjugate gradient to perform. 
                Increasing this will lead to a more accurate approximation
                to :math:`H^{-1} g`, and possibly slightly-improved performance,
                but at the cost of slowing things down. 
                Also probably don't play with this hyperparameter.
            backtrack_iters (int): Maximum number of steps allowed in the 
                backtracking line search. Since the line search usually doesn't 
                backtrack, and usually only steps back once when it does, this
                hyperparameter doesn't often matter.
            backtrack_coeff (float): How far back to step during backtracking line
                search. (Always between 0 and 1, usually above 0.5.)
            lam (float): Lambda for GAE-Lambda. (Always between 0 and 1,
                close to 1.)
            max_ep_len (int): Maximum length of trajectory / episode / rollout.
            logger_kwargs (dict): Keyword args for Logger. 
                            (1) output_dir = None
                            (2) output_fname = 'progress.pickle'
            save_freq (int): How often (in terms of gap between epochs) to save
                the current policy and value function.
            algo: Either 'trpo' or 'npg': this code supports both, since they are 
                almost the same.
        """
        # logger stuff
        self.logger = Logger(**logger_kwargs)

        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.env = env_fn()
        self.vf_lr = vf_lr
        self.steps_per_epoch = steps_per_epoch  # if steps_per_epoch > self.env.spec.max_episode_steps else self.env.spec.max_episode_steps
        self.max_ep_len = max_ep_len
        self.train_v_iters = train_v_iters

        # Main network
        self.ngpu = ngpu
        self.actor_critic = get_actor_critic_module(ac_kwargs, 'trpo')
        self.ac_kwargs = ac_kwargs
        self.ac = self.actor_critic(self.env.observation_space,
                                    self.env.action_space,
                                    device=self.device,
                                    ngpu=self.ngpu,
                                    **ac_kwargs)

        # Create Optimizers
        self.v_optimizer = optim.Adam(self.ac.v.parameters(), lr=self.vf_lr)

        # GAE buffer
        self.gamma = gamma
        self.lam = lam
        self.obs_dim = self.env.observation_space.shape
        self.act_dim = self.env.action_space.shape
        self.buffer = GAEBuffer(self.obs_dim, self.act_dim,
                                self.steps_per_epoch, self.device, self.gamma,
                                self.lam)
        self.batch_size = batch_size

        self.cg_iters = cg_iters
        self.damping_coeff = damping_coeff
        self.delta = delta
        self.backtrack_coeff = backtrack_coeff
        self.algo = algo
        self.backtrack_iters = backtrack_iters
        self.best_mean_reward = -np.inf
        self.save_dir = save_dir
        self.save_freq = save_freq

        self.tensorboard_logdir = tensorboard_logdir
Beispiel #17
0
    def __init__(self,
                 env_fn,
                 save_dir,
                 ac_kwargs=dict(),
                 seed=0,
                 tensorboard_logdir=None,
                 steps_per_epoch=400,
                 batch_size=400,
                 gamma=0.99,
                 clip_ratio=0.2,
                 vf_lr=1e-3,
                 pi_lr=3e-4,
                 train_v_iters=80,
                 train_pi_iters=80,
                 lam=0.97,
                 max_ep_len=1000,
                 target_kl=0.01,
                 logger_kwargs=dict(),
                 save_freq=10,
                 ngpu=1):
        """
        Proximal Policy Optimization 
        Args:
            env_fn : A function which creates a copy of the environment.
                The environment must satisfy the OpenAI Gym API.
            save_dir: path to save directory
            actor_critic: Class for the actor-critic pytorch module
            ac_kwargs (dict): Any kwargs appropriate for the actor_critic 
                function you provided to TRPO.
            seed (int): Seed for random number generators.
            steps_per_epoch (int): Number of steps of interaction (state-action pairs) 
                for the agent and the environment in each epoch.
            batch_size (int): The buffer is split into batches of batch_size to learn from
            gamma (float): Discount factor. (Always between 0 and 1.)
            clip_ratio (float): Hyperparameter for clipping in the policy objective.
                Roughly: how far can the new policy go from the old policy while 
                still profiting (improving the objective function)? The new policy 
                can still go farther than the clip_ratio says, but it doesn't help
                on the objective anymore. (Usually small, 0.1 to 0.3.) Typically
                denoted by :math:`\epsilon`. 
            pi_lr (float): Learning rate for policy optimizer.
            vf_lr (float): Learning rate for value function optimizer.
            train_v_iters (int): Number of gradient descent steps to take on 
                value function per epoch.
            train_pi_iters (int): Maximum number of gradient descent steps to take 
                on policy loss per epoch. (Early stopping may cause optimizer
                to take fewer than this.)    
            lam (float): Lambda for GAE-Lambda. (Always between 0 and 1,
                close to 1.)
            max_ep_len (int): Maximum length of trajectory / episode / rollout.
            target_kl (float): Roughly what KL divergence we think is appropriate
                between new and old policies after an update. This will get used 
                for early stopping. (Usually small, 0.01 or 0.05.)
            logger_kwargs (dict): Keyword args for Logger. 
                            (1) output_dir = None
                            (2) output_fname = 'progress.pickle'
            save_freq (int): How often (in terms of gap between epochs) to save
                the current policy and value function.
        """
        # logger stuff
        self.logger = Logger(**logger_kwargs)

        torch.manual_seed(seed)
        np.random.seed(seed)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.env = env_fn()
        self.vf_lr = vf_lr
        self.pi_lr = pi_lr
        self.steps_per_epoch = steps_per_epoch  # if steps_per_epoch > self.env.spec.max_episode_steps else self.env.spec.max_episode_steps

        self.max_ep_len = max_ep_len
        # self.max_ep_len = self.env.spec.max_episode_steps if self.env.spec.max_episode_steps is not None else max_ep_len
        self.train_v_iters = train_v_iters
        self.train_pi_iters = train_pi_iters

        # Main network
        self.ngpu = ngpu
        self.actor_critic = get_actor_critic_module(ac_kwargs, 'ppo')
        self.ac_kwargs = ac_kwargs
        self.ac = self.actor_critic(self.env.observation_space,
                                    self.env.action_space,
                                    device=self.device,
                                    ngpu=self.ngpu,
                                    **ac_kwargs)

        # Create Optimizers
        self.v_optimizer = optim.Adam(self.ac.v.parameters(), lr=self.vf_lr)
        self.pi_optimizer = optim.Adam(self.ac.pi.parameters(), lr=self.pi_lr)

        # GAE buffer
        self.gamma = gamma
        self.lam = lam
        self.obs_dim = self.env.observation_space.shape
        self.act_dim = self.env.action_space.shape
        self.buffer = GAEBuffer(self.obs_dim, self.act_dim,
                                self.steps_per_epoch, self.device, self.gamma,
                                self.lam)
        self.batch_size = batch_size

        self.clip_ratio = clip_ratio
        self.target_kl = target_kl
        self.best_mean_reward = -np.inf
        self.save_dir = save_dir
        self.save_freq = save_freq

        self.tensorboard_logdir = tensorboard_logdir