def get_rj_env(): dct = get_environment() dct['RJ'] = "{RJ_UNAME}@raijin.nci.org.au".format(**dct) dct['RJ_HOME'] = "{RJ}:/home/{REMOTE_DIR}/{RJ_UNAME}".format(**dct) dct['RJ_SHORT'] = "{RJ}:/short/{RJ_PROJ}/{RJ_UNAME}".format(**dct) dct['RJ_RECORDFILE'] = "{RJ_HOME}/.recordfile".format(**dct) dct['QSTAT'] = "/opt/pbs/default/bin/qstat" dct['QDEL'] = "/opt/pbs/default/bin/qdel" dct['QUEUED'] = "{QSTAT} -u {RJ_UNAME} | grep {RJ_UNAME}".format(**dct) dct['SSH'] = "ssh -t -q {RJ}".format(**dct) return dct
def run(evaluator, seed=None): environment = get_environment(evaluator.config) if seed is not None: environment.seed(seed) set_all_seeds(seed) with torch.inference_mode(): game = evaluator.play_game(environment) game.history.observations = [] game.environment = None return game
def get_template_kwargs(self): self.dct = get_environment() self.dct["queue"] = self.queue self.dct['walltime'] = f"{self.walltime:d}:00:00" self.dct['ncpus'] = self.ncpus self.dct['ngpus'] = self.ngpus self.dct['jobfs'] = self.jobfs_mb self.dct['vmem'] = self.vmem_mb self.dct['base_directory'] = self.base_directory self.dct['base_name'] = self.base_name self.dct['out_extension'] = self.out_extension txt = " walltime={walltime}, jobfs={jobfs:d}MB, vmem={vmem:d}MB, queue={queue}, ncpus={ncpus:d}" if self.ngpus: txt.append(", ngpus={ngpus:d}") printdarkcyan(txt.format(**self.dct))
def __init__(self, config): self.config = config if config.load_buffer: self.load_buffer() else: self.replay_buffer = [] self.environment = get_environment(config) config.action_space = self.environment.action_space.n config.obs_space = self.environment.observation_space.shape self.actions = self.environment.action_space.n if config.seed is not None: self.environment.seed(config.seed)
def __init__(self, clear_home=False, clear_remote=False, clear_all=False, include_oe=False, **after): self.now = datetime.utcnow() self.include_oe = include_oe self.__dict__.update(get_environment()) self.remote = f"{self.RJ_UNAME}@raijin.nci.org.au" self.remote_record = f"{self.RJ_UNAME}@raijin.nci.org.au:/home/{self.REMOTE_DIR}/{self.RJ_UNAME}/.recordfile" self.review = [] self.copy_files_back(after) if clear_home or clear_all: self.new_home_record() if clear_remote or clear_all: self.new_remote_record()
def launch(config, date, state=None): os.environ["OMP_NUM_THREADS"] = "1" ray.init() env = get_environment(config) config.action_space = env.action_space.n config.obs_space = env.observation_space.shape storage = SharedStorage.remote(config) replay_buffer = PrioritizedReplay.remote(config) actors = [ Actor.remote(actor_key, config, storage, replay_buffer, state) for actor_key in range(config.num_actors) ] learner = Learner.remote(config, storage, replay_buffer, state) workers = [learner] + actors print_launch_message(config, date) ray.get([worker.launch.remote() for worker in workers]) ray.shutdown()
def set_debug_aws() -> None: s3_id, s3_key, s3_bucket, s3_input, s3_output, s3_sync = get_environment() path_input, path_output = ('/dev/shm/gps/input', '/dev/shm/gps/output') path_data = '/dev/shm/gps/data' #print(f"DEBUG:\n\t{'S3_ID'.ljust(16)}{s3_id}" # f"\n\t{'S3_KEY'.ljust(16)}{s3_key}" # f"\n\t{'S3_BUCKET'.ljust(16)}{s3_bucket}" # f"\n\t{'S3_INPUT'.ljust(16)}{s3_input}" # f"\n\t{'S3_OUTPUT'.ljust(16)}{s3_output}" # f"\n\t{'S3_SYNC'.ljust(16)}{s3_sync}") s3 = B3W(s3_bucket, s3_id, s3_key) # DEBUG: put test data to S3 remove_from_aws(s3, s3_input + '/test') remove_from_aws(s3, s3_output + '/test') for filename in glob(os.path.join('data', '*')): #s3.put('data/geotiff.xml', '/'.join([s3_input, 'test', 'geotiff.xml'])) s3o = '/'.join([s3_input, 'test', os.path.basename(filename)]) s3.put(filename, s3o) print(f"DEBUG: '{filename}' -> '{s3o}'") #print(s3.ls(s3_input)) #sys.exit(0) return None
def main(): # Get environment path environment_path = utils.get_environment() # If no environment is defined, put user in root environment. if not environment_path: msg = "\n\nCould not find the \"environment.conf\" file in \"{path}\"." msg += "\nPlease create an environment pointer file and save it as " msg += "\"{path}/environment.conf\"." msg += "\nYou can also modify the included example " msg += "\"{path}/environment.conf.example\", and rename to " msg += "\"{path}/environment.conf\"." msg += "\n\nYou are in the root environment of Conda. " msg += "The \"conda\" command is available to use now." path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) path = path.replace("\\", "/") print msg.format(path=path) return # If requested to put user into the root environment. if environment_path == "root": msg = "You are in the root environment of Conda. " msg += "The \"conda\" command is available to use now." path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) path = path.replace("\\", "/") print msg.format(path=path) return # Add conda_git_deployment module to environment. # Also removing PYTHONPATH that conda root environment needs. path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) os.environ["PYTHONPATH"] = path # Get environment data. environment_string = "" if os.path.exists(environment_path): f = open(environment_path, "r") environment_string = f.read() f.close() else: msg = "Could not find \"{0}\" on disk." print msg.format(environment_path) if not environment_string: environment_string = requests.get(environment_path).text environment_data = utils.read_yaml(environment_string) # Export environment if (utils.get_arguments()["export"] or utils.get_arguments()["export-without-commit"]): repositories_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "repositories", environment_data["name"])) # Get commit hash and name from repositories on disk. if not utils.check_executable("git"): subprocess.call( ["conda", "install", "-c", "anaconda", "git", "-y"]) disk_repos = {} for repo in os.listdir(repositories_path): path = os.path.join(repositories_path, repo) if not os.path.exists(os.path.join(path, ".git")): continue commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=path).rsplit()[0] disk_repos[repo] = commit_hash # Construct new git dependencies. git_data = {"git": []} for item in environment_data["dependencies"]: if "git" in item: for repo in item["git"]: # Get url from enviroment file. url = "" if isinstance(repo, str): url = repo if isinstance(repo, dict): url = repo.keys()[0] # Skip any repositories that aren't cloned yet. name = url.split("/")[-1].replace(".git", "").split("@")[0] if name not in disk_repos.keys(): continue # Construct commit url if requested. commit_url = url.split("@")[0] if not utils.get_arguments()["export-without-commit"]: commit_url += "@" + disk_repos[name] if isinstance(repo, str): git_data["git"].append(commit_url) if isinstance(repo, dict): git_data["git"].append({commit_url: repo[url]}) # Replace git dependencies for item in environment_data["dependencies"]: if "git" in item: environment_data["dependencies"].remove(item) environment_data["dependencies"].append(git_data) # Write environment file utils.write_yaml(environment_data, os.path.join(os.getcwd(), "environment.yml")) return # Writing original environment to disk data_file = os.path.join(tempfile.gettempdir(), 'data_%s.yml' % os.getpid()) utils.write_yaml(environment_data, data_file) # Remove git from environment as its not supported by conda (yet). for item in environment_data["dependencies"]: if "git" in item: index = environment_data["dependencies"].index(item) del environment_data["dependencies"][index] # Create environment file from passed environment. environment_filename = os.path.join(tempfile.gettempdir(), 'env_%s.yml' % os.getpid()) utils.write_yaml(environment_data, environment_filename) args = ["conda", "env", "create"] # Force environment update/rebuild when requested by command. if utils.get_arguments()["update-environment"]: args.append("--force") # Check whether the environment installed is different from the requested # environment, and whether the conda-git-deployment is different. # Force environment update/rebuild if different. environment_update = False if not utils.get_arguments()["suppress-environment-update"]: incoming_md5 = hashlib.md5(environment_string + "cwd: {0}".format(os.getcwd())).hexdigest() existing_md5 = "" md5_path = os.path.join(os.path.expanduser("~"), "AppData", "Local", "Continuum", "Miniconda2", environment_data["name"] + ".md5") if os.path.exists(md5_path): f = open(md5_path, "r") existing_md5 = f.read() f.close() if incoming_md5 != existing_md5: environment_update = True if "--force" not in args: args.append("--force") with open(md5_path, "w") as the_file: the_file.write(incoming_md5) # Create environment args.extend(["-f", environment_filename]) return_code = subprocess.call(args) os.remove(environment_filename) # Spawning a new process to get the correct python executable and # passing data via file on disk. platform_script = "environment.sh" if platform.system().lower() == "windows": platform_script = "environment.bat" args = [ os.path.join(os.path.dirname(__file__), platform_script), environment_data["name"], os.path.join(os.path.dirname(__file__), "install.py"), data_file ] args.extend(sys.argv[1:]) # If its the first installation, we need to pass update to install.py if not return_code: args.append("--update-environment") if platform.system().lower() != "windows": args.insert(0, "bash") if environment_update and "--update-environment" not in args: args.append("--update-environment") subprocess.call(args)
def __init__(self, actor_key, config, storage, replay_buffer, state=None): set_all_seeds(config.seed + actor_key if config.seed is not None else None) self.run_tag = config.run_tag self.group_tag = config.group_tag self.actor_key = actor_key self.config = deepcopy(config) self.storage = storage self.replay_buffer = replay_buffer self.environment = get_environment(config) self.environment.seed(config.seed) self.mcts = MCTS(config) if "actors" in self.config.use_gpu_for: if torch.cuda.is_available(): if self.config.actors_gpu_device_ids is not None: device_id = self.config.actors_gpu_device_ids[ self.actor_key] self.device = torch.device("cuda:{}".format(device_id)) else: self.device = torch.device("cuda") else: raise RuntimeError( "GPU was requested but torch.cuda.is_available() is False." ) else: self.device = torch.device("cpu") self.network = get_network(config, self.device) self.network.to(self.device) self.network.eval() if config.fixed_temperatures: self.temperature = config.fixed_temperatures[self.actor_key] self.worker_id = 'actors/temp={}'.format(round( self.temperature, 1)) else: self.worker_id = 'actor-{}'.format(self.actor_key) if self.config.norm_obs: self.obs_min = np.array(self.config.obs_range[::2], dtype=np.float32) self.obs_max = np.array(self.config.obs_range[1::2], dtype=np.float32) self.obs_range = self.obs_max - self.obs_min if self.config.two_players: self.stats_to_log = defaultdict(int) self.experiences_collected = 0 self.training_step = 0 self.games_played = 0 self.return_to_log = 0 self.length_to_log = 0 self.value_to_log = {'avg': 0, 'max': 0} if state is not None: self.load_state(state) Logger.__init__(self)
'user': '', 'name': '', }, 'PG': { 'user': '', 'name': '', }, 'DEV': { 'user': '', 'name': '', 'host': '', 'port': 5432, }, } ENVIRONMENT = get_environment() DATABASES = { 'default': { 'ENGINE': 'django.db.backends.postgresql_psycopg2,', 'NAME': DBS[ENVIRONMENT]['name'], 'USER': DBS[ENVIRONMENT]['user'], 'PASSWORD': '', 'HOST': DBS[ENVIRONMENT]['host'], 'port': DBS[ENVIRONMENT]['port'], 'OPTIONS': { } }, 'nhmmer_db': { 'NAME': 'nhmmer_results', 'ENGINE': 'django.db.backends.mysql',
def main(periodic: sched.scheduler) -> None: # Set working variables s3_id, s3_key, s3_bucket, s3_input, s3_output, s3_sync = get_environment() path_input, path_output = ('/dev/shm/gps/input', '/dev/shm/gps/output') path_data = '/dev/shm/gps/data' #print(f"\n=== Started input processing cycle ===\n") s3 = B3W(s3_bucket, s3_id, s3_key) # Get input files from S3 files_input = get_from_aws(s3, s3_input, path_input) #print("DEBUG: input files -->") #print("\n".join([f"DEBUG: {filename}" for filename in files_input])) objects_output = check_in_aws(s3, s3_output, depth=1) #print("DEBUG: output sets -->") #print("\n".join([f"DEBUG: {name}" for name in objects_output])) # DEBUG: list sync objects in S3, remove output test set #objects_sync = check_in_aws(s3, s3_sync) # don't uncomment - dangerous! #print("DEBUG: sync objects -->") #print("\n".join([f"DEBUG: {name}" for name in objects_sync])) # Initialize Copernicus Open Data Access Hub search object config = Config.load('config.yaml') data_hub = DataHub(config, limit=1000) # Cycle through all the data input sets: a set may contain multiple # input areas and shapes to process. Result will be a snapshot that is # cut with each shape (if any) for data_input in glob(os.path.join(path_input, '*')): if not os.path.isdir(data_input): #print(f"DEBUG: '{data_input}' is not a valid data input!") #print("TODO: unzip archived input sets...") continue data_name = os.path.basename(data_input) #print(f"DEBUG: 'data_input' basename = {data_name}") if data_name in objects_output: #print(f"Output set for '{data_input}' already exists. Skipping...") continue #print(f"DEBUG: input directory --->\n{os.listdir(data_input)}\n") areas = glob(os.path.join(data_input, '*.geojson')) shapes = glob(os.path.join(data_input, '*.shp')) #print(f"DEBUG: shapes = {shapes}") if not shapes: shapes.append(None) for area in areas: try: print(f"\n=== Processing '{area}' ===\n") polygon, properties = Polygons.read_geojson(area) except Exception as e: print(f"Failed to read '{area}'!\n{str(e)}") continue #print(f"DEBUG:\n{polygon}") # Set config key (search area) #print(f"DEBUG: config.search -->\n{config.search}") search = config.search.copy() search.update(properties) #config.search["footprint"] = f"\"Intersects({polygon})\"" #print(f"DEBUG: config.search -->\n{config.search}") #print(f"Config 'search' section:\n{config.search}") snapshots = data_hub.search(search, area=polygon) snapshots = sorted(snapshots, key=lambda item: item.begin_position) print(f"\n=== {len(snapshots)} snapshots found ===\n") # print_snapshots(snapshots) # DEBUG # break # DEBUG print(f"\n=== Processing snapshots and shapes ===\n") for index, snapshot in enumerate(snapshots): filename = sync_with_aws(s3, s3_sync, data_hub, snapshot, path_data) if not filename: print(f"'\n{snapshot.uuid}' not synced. Skipping...") continue else: print(f"\n{index:8d}: {snapshot.title}") try: # Process each superposition of an area and a shape # # Process a snapshot # #print(f"DEBUG: search keys = {search.keys()}") path_target = os.path.join(path_output, data_name) #print(f"DEBUG: path_data = '{path_data}'") if search['platformName'] == 'Sentinel-2': filenames = process_sentinel2(filename, path_target, area, shapes) elif search['platformName'] == 'Sentinel-1': filenames = process_sentinel1(filename, path_target, area, shapes) else: filenames = [] print(f"NOT IMPLEMENTED: {snapshot.title}", f"{config.search['platformName']}") #print(f"DEBUG: exporting '{data_prefix}' to S3 -->") # Put processing result (for each output set) to S3 result = put_to_aws(s3, s3_output, path_output) # result... for outfile in filenames: remove(outfile) # all files (TODO: file or directory) except Exception as e: print(f"FAILED: {e}") raise e remove(filename) # remove snapshot #break # DEBUG: the first snapshot only print(f"\n=== Done snapshots for '{area}' ===\n") # Clean up output set (there should remain only logs) try: rmtree(os.path.join(path_output, data_name)) # data output - prefix except FileNotFoundError as e: pass # Clean up for path in (path_data, path_input, path_output): try: #print(f"DEBUG: removing {path}") rmtree(path) except FileNotFoundError as e: pass #print(f"\n=== Completed input processing cycle ===\n") periodic.enter(INTERVAL, 1, main, (periodic,)) return None
def main(): conf = utils.read_yaml(utils.get_arguments()["unknown"][0]) os.remove(utils.get_arguments()["unknown"][0]) # Clone repositories. Using os.getcwd() because the drive letter needs to # be respected on Windows. repositories_path = os.path.abspath( os.path.join(os.getcwd(), "repositories", conf["name"])) os.environ["CONDA_ENVIRONMENT_REPOSITORIES"] = repositories_path # Kept for backwards compatibility os.environ["CONDA_GIT_REPOSITORY"] = repositories_path repositories = [] for item in conf["dependencies"]: if "git" in item: for repo in item["git"]: repo_path = "" if isinstance(repo, str): repo_path = repo if isinstance(repo, dict): repo_path = repo.keys()[0] data = {"url": repo_path} name = repo_path.split("/")[-1].replace(".git", "") if not name: name = repo_path.split("/")[-2] if "@" in name: name = name.split("@")[0] repo_path = repo_path.split("@")[0] data["name"] = name if not os.path.exists(repositories_path): os.makedirs(repositories_path) if name not in os.listdir(repositories_path): subprocess.call(["git", "clone", repo_path], cwd=repositories_path) data["path"] = os.path.join(repositories_path, name) data["commands"] = { "on_launch": [], "on_environment_update": [] } if isinstance(repo, dict): for item in repo[repo.keys()[0]]: if isinstance(item, dict): for event, commands in item.iteritems(): data["commands"][event].extend(commands) else: data["commands"]["on_launch"].append(item) repositories.append(data) # Update repositories. if utils.get_arguments()["update-repositories"]: for repo in repositories: print repo["name"] # Updating origin url subprocess.call([ "git", "remote", "set-url", "origin", repo["url"].split("@")[0] ], cwd=repo["path"]) # Update git repository subprocess.call(["git", "checkout", "master"], cwd=repo["path"]) subprocess.call(["git", "pull"], cwd=repo["path"]) subprocess.call( ["git", "submodule", "update", "--init", "--recursive"], cwd=repo["path"]) subprocess.call(["git", "submodule", "update", "--recursive"], cwd=repo["path"]) # Checkout any commits/tags if there are newly cloned repositories or # updating the repositories. if utils.get_arguments()["update-repositories"]: for repo in repositories: if "@" in repo["url"]: tag = repo["url"].split("@")[1] if tag: print repo["name"] subprocess.call(["git", "checkout", tag], cwd=repo["path"]) # Checkout environment repository environment_path = utils.get_environment() if not os.path.exists(environment_path): # Determine environment repositories by matching passed environment # with repositories environment_repo = None match = 0.0 for repo in repositories: sequence_match = SequenceMatcher(None, repo["url"], environment_path).ratio() if match < sequence_match: environment_repo = repo print environment_repo["name"] branch = environment_path.split("/")[-2] subprocess.call(["git", "checkout", branch], cwd=environment_repo["path"]) # Install any setup.py if we are updating if (utils.get_arguments()["update-repositories"] or utils.get_arguments()["update-environment"]): for repo in repositories: if "setup.py" not in os.listdir(repo["path"]): continue args = ["python", "setup.py", "develop"] subprocess.call(args, cwd=repo["path"]) # Add environment site packages to os.environ prefix = "" if platform.system().lower() == "windows": prefix = os.environ["CONDA_PREFIX"] else: prefix = os.environ["CONDA_ENV_PATH"] path = os.path.join(prefix, "lib", "site-packages") os.environ["PYTHONPATH"] += os.pathsep + path # Add sys.path to os.environ["PYTHONPATH"], because conda only modifies # sys.path which gets lost when launching any detached subprocesses. # This get a little complicated due to being in a process that hasn"t # picked up on the changes, hence going through a subprocess. python_file = os.path.join(os.path.dirname(__file__), "write_sys_path.py") data_file = os.path.join(tempfile.gettempdir(), "data_%s.yml" % os.getpid()) subprocess.call(["python", python_file, data_file]) paths = [] with open(data_file, "r") as f: paths += utils.read_yaml(f.read()) os.remove(data_file) for path in paths: if path.lower().startswith(repositories_path.lower()): os.environ["PYTHONPATH"] += os.pathsep + path if path.endswith(".egg"): os.environ["PYTHONPATH"] += os.pathsep + path # Clean up any existing environment file if os.path.exists(utils.get_environment_path()): os.remove(utils.get_environment_path()) # Ensure subprocess is detached so closing connect will not also # close launched applications. options = {} if not utils.get_arguments()["attached"]: if sys.platform == "win32": options["creationflags"] = subprocess.CREATE_NEW_CONSOLE else: options["preexec_fn"] = os.setsid # Setting update mode environment variable update_modes = [] if utils.get_arguments()["update-environment"]: update_modes.append("environment") if utils.get_arguments()["update-repositories"]: update_modes.append("repositories") os.environ["CONDA_GIT_UPDATE"] = "" for mode in update_modes: os.environ["CONDA_GIT_UPDATE"] += mode + os.pathsep # Execute environment update commands. if utils.get_arguments()["update-environment"]: for repo in repositories: if "commands" in repo.keys(): for cmd in repo["commands"]["on_environment_update"]: os.environ.update(utils.read_environment()) cmd = cmd.replace("$REPO_PATH", repo["path"]) print "Executing: " + cmd subprocess.call(cmd, shell=True, cwd=repo["path"], **options) # Execute launch commands. for repo in repositories: if "commands" in repo.keys(): for cmd in repo["commands"]["on_launch"]: os.environ.update(utils.read_environment()) cmd = cmd.replace("$REPO_PATH", repo["path"]) print "Executing: " + cmd subprocess.call(cmd, shell=True, cwd=repo["path"], **options)
def run(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['train', 'test'], default='train') parser.add_argument('--env-name', type=str, default='iemocap-rl-v3.1') parser.add_argument('--weights', type=str, default=None) parser.add_argument('--policy', type=str, default='EpsGreedyQPolicy') parser.add_argument( '--data-version', nargs='+', choices=[ DataVersions.IEMOCAP, DataVersions.SAVEE, DataVersions.IMPROV, DataVersions.ESD, DataVersions.EMODB, DataVersions.KITCHEN_EMODB, DataVersions.KITCHEN_ESD, DataVersions.KITCHEN_ESD_DB0, DataVersions.KITCHEN_ESD_DBn5, DataVersions.KITCHEN_ESD_DBn10, DataVersions.KITCHEN_ESD_DBp5, DataVersions.KITCHEN_ESD_DBp10 ], type=str2dataset, default=DataVersions.IEMOCAP) parser.add_argument('--data-split', nargs='+', type=float, default=None) parser.add_argument('--zeta-nb-steps', type=int, default=100000) parser.add_argument('--nb-steps', type=int, default=500000) parser.add_argument('--eps', type=float, default=0.1) parser.add_argument('--pre-train', type=str2bool, default=False) parser.add_argument('--pre-train-dataset', choices=[ DataVersions.IEMOCAP, DataVersions.IMPROV, DataVersions.SAVEE, DataVersions.ESD, DataVersions.EMODB ], type=str2dataset, default=DataVersions.IEMOCAP) parser.add_argument('--pre-train-data-split', type=float, default=None) parser.add_argument('--warmup-steps', type=int, default=50000) parser.add_argument('--pretrain-epochs', type=int, default=64) parser.add_argument( '--testing-dataset', type=str2dataset, default=None, choices=[ DataVersions.IEMOCAP, DataVersions.IMPROV, DataVersions.SAVEE, DataVersions.ESD, DataVersions.COMBINED, DataVersions.EMODB, DataVersions.KITCHEN_EMODB, DataVersions.KITCHEN_ESD, DataVersions.KITCHEN_ESD_DB0, DataVersions.KITCHEN_ESD_DBn5, DataVersions.KITCHEN_ESD_DBn10, DataVersions.KITCHEN_ESD_DBp5, DataVersions.KITCHEN_ESD_DBp10 ]) parser.add_argument('--gpu', type=int, default=1) parser.add_argument('--wandb-disable', type=str2bool, default=False, choices=[True, False]) parser.add_argument('--wandb-mode', type=str, default='online', choices=['online', 'offline']) parser.add_argument('--double-dqn', type=str2bool, default=False, choices=[True, False]) parser.add_argument('--dueling-network', type=str2bool, default=False, choices=[True, False]) parser.add_argument('--dueling-type', type=str, default='avg', choices=['avg', 'max', 'naive']) parser.add_argument('--schedule-csv', type=str, default=None) parser.add_argument('--schedule-idx', type=int, default=None) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) print("Tensorflow version:", tf.__version__) if os.path.exists(f'{RESULTS_ROOT}/{time_str}'): raise RuntimeError( f'Results directory {RESULTS_ROOT}/{time_str} is already exists') gpus = tf.config.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) tf.compat.v1.experimental.output_all_intermediates(True) policy = parse_policy(args) data_version_map = {} custom_data_split = [] if args.data_split is not None: if len(args.data_split) == 1 and len(args.data_version) > 1: for i in range(len(args.data_version)): custom_data_split.append(args.data_split[0]) elif 1 < len(args.data_split) != len(args.data_version) > 1: raise RuntimeError( "--data-split either should have one value or similar to --data-version" ) else: custom_data_split = args.data_split else: for i in range(len(args.data_version)): custom_data_split.append(None) if len(args.data_version) == 1: target_datastore = get_datastore( data_version=args.data_version[0], custom_split=None if args.data_split is None else args.data_split[0]) data_version_map[args.data_version[0]] = target_datastore env = get_environment(data_version=args.data_version[0], datastore=target_datastore, custom_split=None if args.data_split is None else args.data_split[0]) else: ds = [] for i in range(len(args.data_version)): d = get_datastore(data_version=args.data_version[i], custom_split=custom_data_split[i]) data_version_map[args.data_version[i]] = d ds.append(d) target_datastore = combine_datastores(ds) env = get_environment(data_version=DataVersions.COMBINED, datastore=target_datastore, custom_split=None) for k in args.__dict__.keys(): print("\t{} :\t{}".format(k, args.__dict__[k])) env.__setattr__("_" + k, args.__dict__[k]) experiment_name = "P-{}-S-{}-e-{}-pt-{}".format(args.policy, args.zeta_nb_steps, args.eps, args.pre_train) if args.pre_train: experiment_name = "P-{}-S-{}-e-{}-pt-{}-pt-w-{}".format( args.policy, args.zeta_nb_steps, args.eps, args.pre_train, args.pre_train_dataset.name) env.__setattr__("_experiment", experiment_name) nb_actions = env.action_space.n input_layer = Input(shape=(1, NUM_MFCC, NO_features)) model = models.get_model_9_rl(input_layer, model_name_prefix='mfcc') memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, nb_steps_warmup=args.warmup_steps, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1., enable_double_dqn=args.double_dqn, enable_dueling_network=args.dueling_network, dueling_type=args.dueling_type) # dqn.compile(Adam(learning_rate=.00025), metrics=['mae', 'accuracy']) dqn.compile('adam', metrics=['mae', 'accuracy']) pre_train_datastore: Datastore = None if args.pre_train: if args.pre_train_dataset == args.data_version: raise RuntimeError( "Pre-Train and Target datasets cannot be the same") else: pre_train_datastore = get_datastore( data_version=args.pre_train_dataset, custom_split=args.pre_train_data_split) assert pre_train_datastore is not None (x_train, y_train, y_gen_train), _ = pre_train_datastore.get_data() pre_train_log_dir = f'{RESULTS_ROOT}/{time_str}/logs/pre_train' if not os.path.exists(pre_train_log_dir): os.makedirs(pre_train_log_dir) dqn.pre_train(x=x_train.reshape( (len(x_train), 1, NUM_MFCC, NO_features)), y=y_train, epochs=args.pretrain_epochs, batch_size=128, log_base_dir=pre_train_log_dir) if args.mode == 'train': models_dir = f'{RESULTS_ROOT}/{time_str}/models' log_dir = f'{RESULTS_ROOT}/{time_str}/logs' if not os.path.exists(models_dir): os.makedirs(models_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) print(f"Models: {models_dir}") # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! weights_filename = f'{models_dir}/dqn_{args.env_name}_weights.h5f' checkpoint_weights_filename = models_dir + '/dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = log_dir + '/dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=10)] if not args.wandb_disable: wandb_project_name = 'zeta-policy' wandb_dir = f'{RESULTS_ROOT}/{time_str}/wandb' if not os.path.exists(wandb_dir): os.makedirs(wandb_dir) callbacks += [ WandbLogger(project=wandb_project_name, name=args.env_name, mode=args.wandb_mode, dir=wandb_dir) ] dqn.fit(env, callbacks=callbacks, nb_steps=args.nb_steps, log_interval=10000) model = dqn.model # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Testing with Labelled Data testing_dataset = args.testing_dataset if testing_dataset is not None: if testing_dataset == DataVersions.COMBINED: if pre_train_datastore is not None: testing_datastore = combine_datastores( [target_datastore, pre_train_datastore]) else: testing_datastore = target_datastore else: testing_datastore = data_version_map[testing_dataset] else: # testing dataset is not defined if pre_train_datastore is not None: testing_datastore = combine_datastores( [target_datastore, pre_train_datastore]) else: testing_datastore = target_datastore x_test, y_test, _ = testing_datastore.get_testing_data() test_loss, test_mae, test_acc, test_mean_q = model.evaluate( x_test.reshape((len(x_test), 1, NUM_MFCC, NO_features)), y_test, verbose=1) print(f"Test\n\t Accuracy: {test_acc}") store_results(f"{log_dir}/results.txt", args=args, experiment=experiment_name, time_str=time_str, test_loss=test_loss, test_acc=test_acc) # # Finally, evaluate our algorithm for 10 episodes. # dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = f'rl-files/models/dqn_{args.env_name}_weights.h5f' if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True) if args.schedule_csv is not None: from scheduler_callback import callback callback(args.schedule_csv, args.schedule_idx)