Example #1
0
def ml_execute_update(**update_dict):
    keys = [
        'jobs', 'description', 'data_query_path', 'data_source', 'groups',
        'dates', 'data_query_path', 'data_source', 'groups', 'dates',
        'time_indicator', 'feature', 'description', 'days'
    ]
    infos = {k: update_dict.get(k, None) for k in keys}
    jobs = infos['jobs']
    for j in jobs:
        jobs[j]['description'] = infos['description']
        jobs[j]['day'] = infos['days'][j] if infos['days'] else None
        jobs[j]['job_start_date'] = str(
            infos['dates'][j][0])[0:16] if infos['dates'] else None
        jobs[j]['job_end_date'] = None if not infos[
            'dates'] else None if infos['dates'][j][1] else str(
                infos['dates'][j][1])[0:16]
        e2 = []
        for e in jobs[j]['execute']:
            for p in e['params']:
                if p in infos.keys():
                    e['params'][p] = str(infos[p])
                if p == 'time_period':
                    e['params'][
                        p] = infos['days'][j] if infos['days'] else None
            e2.append(e)
        jobs[j]['execute'] = e2
    print("ml_execute.yaml is updated!!")
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
Example #2
0
 def update_api_yaml_file(self):
     if not self.master_node:
         self.api_file = {
             a: self.api_file[a]
             for a in list(self.api_file.keys()) if a != 'ml_execute'
         }
     write_yaml(join(self.folder, "docs"), "apis.yaml", self.api_file)
Example #3
0
    def reset_update_config_directory(self, env, reset=False):
        """
            - It allows us to run platform on given folder, otherwise directory will be the local path.
            - It updates 'instace.yaml' at package where it is installed.
            - When you run package first it checks the instance.yaml
              where it is installed in the folder you have installed the package.

        ** instance.yaml **
        Keeps all running and has been run models in same folder. It allows us to restart each platform
        from where it is ended.
            -   directory : where user have assigned on AnomalyDetection(path=....)
            -   absolute_path : It is the path that you have running the platform
            -   web : This is the port that available for web insterface
            -   id : random generated id
            -   active : is the platform active? are services callable?
            -   start_date : started date of instance

        ** Additional Information **
            -   When platform is closed or shutdown;
                instance.yaml is updating. active is being changed to False.
            -   If there has not been generated instance at absolute path before;
                It generates new instance with its parameters.
                Once the instance created, it can be used later when you have start it in same absolute folder.

        :param env: docker, local. if docker == True, no need to to update directory be
                    cause whole .py file are also copied to new folder
        :param reset: reset updates back to normal 'False' value to directory variable at instance.yaml
        """
        if env != 'docker':
            instances = read_yaml(init_directory, "instance.yaml")
            ins_updated = []
            for ins in instances['instances']:
                if ins['directory'] == self.folder:
                    if reset:
                        ins['active'] = False
                        print("instance is closed!!!")
                        print("is active ? ", False, "\n", "directory :",
                              ins['directory'], "\n", "start date :",
                              ins['start_date'], "\n")
                    else:
                        ins['active'] = True
                    ins_updated.append(ins)
            if len(ins_updated) == 0 and not reset:
                instances['instances'] += [{
                    'directory':
                    self.folder,
                    'absolute_path':
                    abspath(""),
                    'web':
                    find_web_port(),
                    'web_host':
                    '127.0.0.1' if env == 'local' else env,
                    'id':
                    str(random.random()).replace(".", ""),
                    'active':
                    True,
                    'start_date':
                    datetime.now()
                }]
            write_yaml(init_directory, "instance.yaml", instances)
Example #4
0
def make_terms(options):
    '''Main driver.'''
    config = utils.read_yaml(options.config)
    glossary = utils.read_yaml(options.glossary)
    glossary = convert_to_dict(glossary)
    per_file = get_all_keys(config)
    terms = keys_to_terms(glossary, options.language, per_file)
    utils.write_yaml(options.output, terms)
Example #5
0
def start_job_and_update_job_active(jobs, job):
    jobs[job]['active'] = True
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
    ml_execute_api = read_yaml(conf('docs_main_path'),
                               'apis.yaml')['ml_execute']
    url = get_api_url(ml_execute_api['host'], ml_execute_api['port'],
                      ml_execute_api['api_name'])
    request_url(url, {'job': job})
Example #6
0
def make_index(options):
    '''Main driver.'''
    config = utils.read_yaml(options.config)
    entries = utils.get_entry_info(config)
    index = {}
    for entry in entries:
        collect_index_entries(entry['file'], entry['slug'], index)
    index = rearrange(entries, index)
    utils.write_yaml(options.output, index)
def make_numbering(options):
    '''Main driver.'''
    config = utils.read_yaml(options.config)
    entries = utils.get_entry_info(config)
    figures = {}
    tables = {}
    for entry in entries:
        text = utils.read_file(entry['file'], scrub=False)
        figures.update(get_inclusions(FIG_INC, entry, text))
        tables.update(get_inclusions(TBL_INC, entry, text))
    result = {'entries': entries, 'figures': figures, 'tables': tables}
    utils.write_yaml(options.output, result)
Example #8
0
def output_yaml(fn):
    o = {
            'branch': get_branch(),
            'commit': get_commit(),
            'manual_path': get_manual_path(),
            'date': str(datetime.date.today().year),
            'version_selector': get_versions(),
            'stable': conf.version.stable,
            'published_branches': conf.git.branches.published,
            'pdfs': []
    }

    write_yaml(o, fn)
Example #9
0
def output_yaml(fn):
    o = {
            'branch': get_branch(),
            'commit': get_commit(),
            'manual_path': get_manual_path(),
            'date': str(datetime.date.today().year),
            'version_selector': get_versions(),
            'stable': STABLE_RELEASE,
            'published_branches': PUBLISHED_BRANCHES,
            'pdfs': []
    }

    write_yaml(o, fn)
Example #10
0
def output_yaml(fn):
    o = {
            'branch': get_branch(),
            'commit': get_commit(),
            'manual_path': get_manual_path(),
            'date': str(datetime.date.today().year),
            'version_selector': get_versions(),
            'stable': STABLE_RELEASE,
            'published_branches': PUBLISHED_BRANCHES,
            'pdfs': []
    }

    write_yaml(o, fn)
Example #11
0
def output_yaml(fn):
    o = {
        'branch': get_branch(),
        'commit': get_commit(),
        'manual_path': get_manual_path(),
        'date': str(datetime.date.today().year),
        'version_selector': get_versions(),
        'stable': conf.version.stable,
        'published_branches': conf.git.branches.published,
        'pdfs': []
    }

    write_yaml(o, fn)
Example #12
0
 def get_time():
     try:
         print("browser time: ", request.args['time'])
         print("server time : ", time.strftime('%A %B, %d %Y %H:%M:%S'))
         jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
         for j in jobs:
             jobs[j]['browser_time'] = str(
                 datetime.datetime.strptime(
                     " ".join(request.args['time'].split()[0:5]),
                     "%a %b %d %Y %H:%M:%S"))[0:13]
         write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
     except Exception as e:
         print(e)
     return "Done"
Example #13
0
 def stop_job(self, request=True):
     if self.job[
             'active'] is True:  # if there is active job update ml_execute.yaml
         self.logger.regenerate_file()
         self.jobs_yaml[self.job_name]['active'] = False
         write_yaml(conf('docs_main_path'), "ml_execute.yaml",
                    self.jobs_yaml)
         for j in self.job['execute']:
             self.api_info = self.api_infos['model_' + j['params']['model']]
             self.url = get_api_url(host=self.api_info['host'],
                                    port=self.api_info['port'],
                                    api_name=self.api_info['api_name'])
             if request:
                 request_url(self.url, self.job['stop_job'])
Example #14
0
def ml_execute_reset(jobs):
    for j in jobs:
        jobs[j]['description'] = None
        jobs[j]['day'] = None
        jobs[j]['job_start_date'] = None
        jobs[j]['job_end_date'] = None
        e2 = []
        for e in jobs[j]['execute']:
            for p in e['params']:
                if p not in ['model', 'job']:
                    e['params'][p] = None
            e2.append(e)
        jobs[j]['execute'] = e2
    print("reset ml-execute.yaml !!!")
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
Example #15
0
def db_connection_update(**args):
    configs = read_yaml(conf('docs_main_path'), 'configs.yaml')
    configs['db_connection']['data_source'] = args['data_source']
    configs['db_connection']['is_from_db'] = False if args['data_source'] in [
        'csv', 'json', 'pickle'
    ] else True
    infos = {
        'db': args.get('db_name', None),
        'password': args.get('pw', None),
        'port': args.get('port', None),
        'server': args.get('host', None),
        'user': args.get('user', None)
    }
    for i in infos:
        configs['db_connection'][i] = infos[i]
    write_yaml(conf('docs_main_path'), "configs.yaml", configs)
Example #16
0
    def __init__(self, config, resume=None, run_id=None):

        """
        method description:
            class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving

        Args:
            config (dict): project parameters
            resume (str): path to the checkpoint being loaded.
            run_id (str): Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default
        Returns:
            return_v (rtype): rtype description

        :Author:  jeffery
        :Create:  2020/8/2 10:00 下午
        """
        # load config file
        self._config = config
        self.resume = resume

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])

        exper_name = self.config['experiment_name']
        if run_id is None:  # use timestamp as default run-id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        self._save_dir = save_dir / 'models' / exper_name / run_id
        self._log_dir = save_dir / 'log' / exper_name / run_id
        self._diff_dir = save_dir / 'diff' / exper_name / run_id

        # make directory for saving checkpoints and log.
        exist_ok = run_id == ''
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.diff_dir.mkdir(parents=True,exist_ok=exist_ok)

        # save updated config file to the checkpoint dir
        write_yaml(self.config,self.save_dir / 'config.yml')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Example #17
0
def output_yaml(fn, conf=None):
    if conf is None:
        conf = load_conf()

    o = {
            'branch': get_branch(),
            'commit': get_commit(),
            'manual_path': get_manual_path(),
            'date': str(datetime.date.today().year),
            'version_selector': get_versions(),
            'stable': conf.version.stable,
            'upcoming': conf.version.upcoming,
            'published_branches': conf.git.branches.published,
            'pdfs': []
    }

    write_yaml(o, fn)
Example #18
0
 def create_job(self, job, **args):
     self.jobs = read_yaml(join(self.conf.folder, "docs"),
                           'ml_execute.yaml')
     for j in self.jobs:
         if j == job:
             self.jobs[j]['browser_time'] = str(datetime.now())[:13]
             self.jobs[j]['description'] = args['description']
             self.jobs[j]['day'] = args['days']
             self.jobs[j]['job_start_date'] = str(args['dates'])[0:16]
             e2 = []
             for e in self.jobs[j]['execute']:
                 for p in e['params']:
                     if p in args.keys():
                         e['params'][p] = str(args[p])
                 e2.append(e)
             self.jobs[j]['execute'] = e2
     write_yaml(join(self.conf.folder, "docs"), "ml_execute.yaml",
                self.jobs)
Example #19
0
def generate(class_type="classA", id_base=0):
    os.makedirs(f"{utils.folder}/envs", exist_ok=True)
    os.makedirs(f"{utils.folder}/params", exist_ok=True)
    xml = read_template("body-templates/ant.template.xml")
    conf = read_yaml("body-templates/ant.yaml")
    data = conf[class_type]
    write_xml(f"{utils.folder}/envs/{id_base}.mean.xml", data, xml)

    for i in range(5):
        data_clone = data.copy()
        for key in data_clone:
            data_clone[key] += np.random.normal(0, 0.01)
        
        data_clone["_initial_z"] = data_clone["size_torso"] + 0.5
        write_xml(f"{utils.folder}/envs/{i+id_base}.xml", data_clone, xml)
        params = {
            "size_torso": data_clone["size_torso"]
        }
        write_yaml(f"{utils.folder}/params/{i+id_base}.yml", params)
Example #20
0
    def get_connector(self):
        """
       query_string_change connection checks.
       tries for db connections (postgresql, RedShift, googlebigquery).
       If fials checks for
        """
        if self.data is None:
            config = conf('config')
            try:
                data_access_args = {
                    "data_source": self.data_source,
                    "data_query_path": self.data_query_path,
                    "time_indicator": self.time_indicator,
                    "feature": self.feature
                }

                for i in config['db_connection']:
                    if i != 'data_source':
                        config['db_connection'][i] = None
                        if self.data_source not in ["csv", "json"]:
                            config['db_connection'][i] = self.connector[i]
                    else:
                        config['db_connection'][
                            'data_source'] = self.data_source
                if self.data_source in ["csv", "json"]:
                    data_access_args['test'] = 10
                write_yaml(join(self.path, "docs"),
                           "configs.yaml",
                           config,
                           ignoring_aliases=False)
                source = GetData(**data_access_args)
                source.get_connection()
                if self.data_source in ["csv", "json"]:
                    source.data_execute()
                    return True if len(source.data) != 0 else False
                else:
                    return True
            except Exception as e:
                return False
        else:
            return True if type(self.data) == DataFrame else False
Example #21
0
 def create_docker_compose_file(self):
     if self.cd.check_for_directory():
         self.compose_file = read_yaml(self.folder, "docker-compose.yml")
         services = self.compose_file['services']
         self.check_for_ports(service_count=len(services))
         count = 0
         for s in services:
             services[s]['ports'] = [
                 str(self.ports[count]) + ":" + str(self.ports[count])
             ]
             print("available port for service :", s, " - ",
                   str(self.ports[count]))
             ## TODO: volumes change to data, models, logs, docs
             services[s]['volumes'] = [join(self.folder, "") + "/:/app"]
             count += 1
         if not self.master_node:
             services = {
                 s: services[s]
                 for s in list(services.keys())
                 if s != 'ml_executor - services'
             }
         self.compose_file['services'] = services
         write_yaml(self.folder, "docker-compose.yml", self.compose_file)
    def __init__(self, config, resume=None, modification=None, run_id=None):
        """
        class to parse configuration yaml file. Handles hyperparameters for training, initializations of modules, checkpoint saving
        and logging module.
        :param config: Dict containing configurations, hyperparameters for training. contents of `config.yaml` file for example.
        :param resume: String, path to the checkpoint being loaded.
        :param modification: Dict keychain:value, specifying position values to be replaced from config dict.
        :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default
        """
        # load config file and apply modification
        self._config = _update_config(config, modification)
        self.resume = resume

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])

        exper_name = self.config['name']
        if run_id is None:  # use timestamp as default run-id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        self._save_dir = save_dir / 'models' / exper_name / run_id
        self._log_dir = save_dir / 'log' / exper_name / run_id

        # make directory for saving checkpoints and log.
        exist_ok = run_id == ''
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)

        # save updated config file to the checkpoint dir
        write_yaml(self.config, self.save_dir / 'config.yaml')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
import sys

import utils

utils.write_yaml(sys.path, sys.argv[1])
Example #24
0
def main():

    # Get environment path
    environment_path = utils.get_environment()

    # If no environment is defined, put user in root environment.
    if not environment_path:

        msg = "\n\nCould not find the \"environment.conf\" file in \"{path}\"."
        msg += "\nPlease create an environment pointer file and save it as "
        msg += "\"{path}/environment.conf\"."
        msg += "\nYou can also modify the included example "
        msg += "\"{path}/environment.conf.example\", and rename to "
        msg += "\"{path}/environment.conf\"."
        msg += "\n\nYou are in the root environment of Conda. "
        msg += "The \"conda\" command is available to use now."
        path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
        path = path.replace("\\", "/")

        print msg.format(path=path)

        return

    # If requested to put user into the root environment.
    if environment_path == "root":

        msg = "You are in the root environment of Conda. "
        msg += "The \"conda\" command is available to use now."
        path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
        path = path.replace("\\", "/")

        print msg.format(path=path)

        return

    # Add conda_git_deployment module to environment.
    # Also removing PYTHONPATH that conda root environment needs.
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    os.environ["PYTHONPATH"] = path

    # Get environment data.
    environment_string = ""
    if os.path.exists(environment_path):
        f = open(environment_path, "r")
        environment_string = f.read()
        f.close()
    else:
        msg = "Could not find \"{0}\" on disk."
        print msg.format(environment_path)

    if not environment_string:
        environment_string = requests.get(environment_path).text

    environment_data = utils.read_yaml(environment_string)

    # Export environment
    if (utils.get_arguments()["export"]
            or utils.get_arguments()["export-without-commit"]):
        repositories_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "..", "repositories",
                         environment_data["name"]))

        # Get commit hash and name from repositories on disk.
        if not utils.check_executable("git"):
            subprocess.call(
                ["conda", "install", "-c", "anaconda", "git", "-y"])
        disk_repos = {}
        for repo in os.listdir(repositories_path):
            path = os.path.join(repositories_path, repo)
            if not os.path.exists(os.path.join(path, ".git")):
                continue

            commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"],
                                                  cwd=path).rsplit()[0]

            disk_repos[repo] = commit_hash

        # Construct new git dependencies.
        git_data = {"git": []}
        for item in environment_data["dependencies"]:
            if "git" in item:
                for repo in item["git"]:

                    # Get url from enviroment file.
                    url = ""
                    if isinstance(repo, str):
                        url = repo
                    if isinstance(repo, dict):
                        url = repo.keys()[0]

                    # Skip any repositories that aren't cloned yet.
                    name = url.split("/")[-1].replace(".git", "").split("@")[0]
                    if name not in disk_repos.keys():
                        continue

                    # Construct commit url if requested.
                    commit_url = url.split("@")[0]
                    if not utils.get_arguments()["export-without-commit"]:
                        commit_url += "@" + disk_repos[name]

                    if isinstance(repo, str):
                        git_data["git"].append(commit_url)

                    if isinstance(repo, dict):
                        git_data["git"].append({commit_url: repo[url]})

        # Replace git dependencies
        for item in environment_data["dependencies"]:
            if "git" in item:
                environment_data["dependencies"].remove(item)

        environment_data["dependencies"].append(git_data)

        # Write environment file
        utils.write_yaml(environment_data,
                         os.path.join(os.getcwd(), "environment.yml"))

        return

    # Writing original environment to disk
    data_file = os.path.join(tempfile.gettempdir(),
                             'data_%s.yml' % os.getpid())
    utils.write_yaml(environment_data, data_file)

    # Remove git from environment as its not supported by conda (yet).
    for item in environment_data["dependencies"]:
        if "git" in item:
            index = environment_data["dependencies"].index(item)
            del environment_data["dependencies"][index]

    # Create environment file from passed environment.
    environment_filename = os.path.join(tempfile.gettempdir(),
                                        'env_%s.yml' % os.getpid())

    utils.write_yaml(environment_data, environment_filename)

    args = ["conda", "env", "create"]

    # Force environment update/rebuild when requested by command.
    if utils.get_arguments()["update-environment"]:
        args.append("--force")

    # Check whether the environment installed is different from the requested
    # environment, and whether the conda-git-deployment is different.
    # Force environment update/rebuild if different.
    environment_update = False
    if not utils.get_arguments()["suppress-environment-update"]:
        incoming_md5 = hashlib.md5(environment_string +
                                   "cwd: {0}".format(os.getcwd())).hexdigest()
        existing_md5 = ""

        md5_path = os.path.join(os.path.expanduser("~"), "AppData", "Local",
                                "Continuum", "Miniconda2",
                                environment_data["name"] + ".md5")
        if os.path.exists(md5_path):
            f = open(md5_path, "r")
            existing_md5 = f.read()
            f.close()

        if incoming_md5 != existing_md5:
            environment_update = True
            if "--force" not in args:
                args.append("--force")

        with open(md5_path, "w") as the_file:
            the_file.write(incoming_md5)

    # Create environment
    args.extend(["-f", environment_filename])

    return_code = subprocess.call(args)

    os.remove(environment_filename)

    # Spawning a new process to get the correct python executable and
    # passing data via file on disk.
    platform_script = "environment.sh"
    if platform.system().lower() == "windows":
        platform_script = "environment.bat"

    args = [
        os.path.join(os.path.dirname(__file__), platform_script),
        environment_data["name"],
        os.path.join(os.path.dirname(__file__), "install.py"), data_file
    ]

    args.extend(sys.argv[1:])

    # If its the first installation, we need to pass update to install.py
    if not return_code:
        args.append("--update-environment")

    if platform.system().lower() != "windows":
        args.insert(0, "bash")

    if environment_update and "--update-environment" not in args:
        args.append("--update-environment")

    subprocess.call(args)
Example #25
0
def main(config):
    # SET DEVICE
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
        str(gpu) for gpu in config["COMMON"]["GPUS"])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    DATE = datetime.datetime.now().strftime("%Y_%m_%d/%H_%M_%S")

    SAVEPATH = os.path.join(config["COMMON"]["SAVEPATH"], DATE)
    config["COMMON"]["SAVEPATH"] = SAVEPATH
    os.makedirs(SAVEPATH)
    utils.set_logger(os.path.join(SAVEPATH, "train.log"))
    utils.write_yaml(os.path.join(SAVEPATH, "config.yaml"), config)

    # DATA LOADING
    logging.info(f'Loading {config["DATA"]["NAME"]} datasets')
    transform = [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15)
    ]
    loader = trainer.Dataloader(config["DATA"])
    # check configuration & real
    num_classes = len(loader["train"].dataset.classes)
    assert num_classes == config["MODEL"][
        "NUMCLASSES"], f'Number of class is not same!\nIn Directory: {num_classes}\nIn Configuration: {config["MODEL"]["NUMCLASSES"]}'

    # PREPROCESSING
    # Add New class
    # Add New data

    # MODEL BUILD
    logging.info(f"Building model")
    net = model.Model(config["MODEL"]["BASEMODEL"],
                      config["MODEL"]["NUMCLASSES"],
                      config["MODEL"]["FREEZE"]).to(device)
    # net = model.Model(num_classes=config["MODEL"]["NUMCLASSES"]).to(device)

    if torch.cuda.is_available() and len(config["COMMON"]["GPUS"]) > 1:
        logging.info(f"Multi GPU mode")
        net = torch.nn.DataParallel(
            net, device_ids=config["COMMON"]["GPUS"]).to(device)

    criterion = model.loss_fn
    metrics = {"acc": model.accuracy}  # If classification
    # metrics = {}
    optm = optm_dict[config["TRAIN"]["OPTIMIZER"]](
        net.parameters(), lr=config["TRAIN"]["LEARNINGRATE"])

    # TRAINING
    EPOCHS = config["TRAIN"]["EPOCHS"]
    logging.info(f"Training start !")
    best_val_loss = np.inf
    for epoch in range(EPOCHS):

        metrics_summary = trainer.train(epoch, net, optm, criterion,
                                        loader["train"], metrics, device,
                                        config)
        metrics_summary.update(
            trainer.eval(epoch, net, optm, criterion, loader["validation"],
                         metrics, device, config))

        metrics_string = " ; ".join(f"{key}: {value:05.3f}"
                                    for key, value in metrics_summary.items())
        logging.info(f"[{epoch+1}/{EPOCHS}] Performance: {metrics_string}")

        is_best = metrics_summary['val_loss'] <= best_val_loss

        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'optim_dict': optm.state_dict()
            },
            is_best=is_best,
            checkpoint=SAVEPATH)

        if is_best:
            logging.info("Found new best loss !")
            best_val_loss = metrics_summary['val_loss']

            best_json_path = os.path.join(SAVEPATH, "metrics_best.json")
            utils.save_dict_to_json(metrics_summary, best_json_path, is_best)

        last_json_path = os.path.join(SAVEPATH, "metrics_history.json")
        utils.save_dict_to_json(metrics_summary, last_json_path)

    # Data version control

    logging.info(f"Training done !")
Example #26
0
def db_connection_reset(configs):
    for arg in configs['db_connection']:
        configs['db_connection'][arg] = None
    print("reset db_connection on configs.yaml !! ")
    write_yaml(conf('docs_main_path'), "configs.yaml", configs)
Example #27
0
def start_experiment_multi(script):
    percentage_train = 0.8
    num_exp = args.replicates
    exp_path = f"outputs/{g_exp_name}"
    os.makedirs(exp_path, exist_ok=True)
    data = {
        "num-exp": num_exp,
        "num-bodies": g_total_bodies,
        "args": args,
    }
    write_yaml(f"{exp_path}/config.yml", data)
    repeated_train_bodies = {}
    for i in range(
            num_exp
    ):  # no matter how many bodies are there in the dataset, we only do random experiment 30 times.
        train_bodies, test_bodies = [], []
        for j in range(
                100
        ):  # retry 100 times if we have chose the same training bodies before. probably will happen when num-bodies is small.
            all_bodies = np.arange(0, g_total_bodies)
            np.random.shuffle(all_bodies)
            train_bodies = all_bodies[:int(percentage_train * g_total_bodies)]
            test_bodies = all_bodies[int(percentage_train * g_total_bodies):]

            train_bodies = np.sort(train_bodies)
            test_bodies = np.sort(test_bodies)
            _t = tuple(train_bodies)
            if _t not in repeated_train_bodies:
                repeated_train_bodies[tuple(train_bodies)] = 1
                break
        if j >= 100:
            abort(
                "Can't generate enough unique replicates. Maybe there are not enough bodies in the dataset."
            )

        output(f"train_bodies {train_bodies}", 2)
        output(f"test_bodies {test_bodies}", 2)
        train_seed = args.num_bodies * 100000 + args.body_variation_range * 10000 + args.seed_bodies * 100 + i  # Unique seeds suggested by Sam
        data = {
            "train_bodies": train_bodies.tolist(),
            "not_train_bodies": test_bodies.tolist(),
            # "test_bodies": all_bodies.tolist(),
            "test_bodies": test_bodies.tolist(
            ),  # only evaluate on test set to save training time, so things can be done on partition short.
            "train_seed": train_seed,
        }
        write_yaml(f"{exp_path}/exp_multi_{i}_bodies.yml", data)

        str_train_bodies = np.array2string(train_bodies, separator=',')[1:-1]
        str_train_bodies = str_train_bodies.replace(' ', '').replace('\n', '')
        output(f"str_train_bodies: {str_train_bodies}", 2)

        str_test_bodies = np.array2string(
            test_bodies, separator=','
        )[1:
          -1]  # only evaluate on test set to save training time, so things can be done on partition short.
        str_test_bodies = str_test_bodies.replace(' ', '').replace('\n', '')
        output(f"str_test_bodies: {str_test_bodies}", 2)
        # calculate a propriate seed smaller than 2**32-1

        output(f"Starting {script} with exp-idx {i} seed {train_seed}", 1)
        if args.vacc:
            bash = "sbatch"
        else:
            bash = "bash"
        cmd_w = [
            bash, script, g_exp_name,
            str(i), str_train_bodies, str_test_bodies,
            str(train_seed), "--with-bodyinfo", f"{args.n_timesteps}"
        ]
        cmd_wo = [
            bash, script, g_exp_name,
            str(i), str_train_bodies, str_test_bodies,
            str(train_seed), "", f"{args.n_timesteps}"
        ]
        output(" ".join(cmd_w), 2)
        output(" ".join(cmd_wo), 2)
        if args.in_parallel:
            Popen(cmd_w)
            Popen(cmd_wo)
        else:
            call(cmd_w)
            call(cmd_wo)
Example #28
0
def models_reset(model_configuration):
    model_configuration2 = model_configuration
    for m in model_configuration['infos']:
        model_configuration2['infos'][m] = None
    write_yaml(conf('model_main_path'), "model_configuration.yaml",
               model_configuration2)
Example #29
0
def logs_reset(process):
    process2 = process
    for j in process:
        for m in process[j]:
            process2[j][m] = 0
    write_yaml(conf('log_main_path'), "process.yaml", process2)
Example #30
0
 def write(self):
     utils.write_yaml(self.data, self._yamlFile)
def generate_bodies():
    assert args.seed_bodies<100, "The way we combining real seeds only allow seed_bodies to be smaller than 100."
    assert args.body_variation_range%10==0, "The way we combining real seeds only allow body_variation_range to be multiplication of 10, e.g. 10, 20, ... 90."
    assert args.num_bodies%10==0, "The way we combining real seeds only allow num_bodies to be multiplication of 10, e.g. 10, 20, 30 ..."
    real_seed = args.num_bodies*1000 + args.body_variation_range * 100 + args.seed_bodies # if I only apply seed_bodies, the first 20 of walker2d_30_10-v0 will be the same of walker2d_20_10-v0.
    random.seed(real_seed)
    output(f"Start generating bodies {args.num_bodies} with seed {real_seed}", 1)

    # 1. Check templates
    template_files = check_templates()

    # 2. Create Folders
    dataset_path = create_folder()

    # 3. Generate variations and write body, param files
    body_xml = read_template(template_files[0])
    body_yaml = read_yaml(template_files[1])
    file_list, param_list = [], []
    for i in range(args.num_bodies):
        data = {}
        for key in body_yaml['variable']:
            data[key] = body_yaml['variable'][key] * ((random.random() * 2 - 1) * args.body_variation_range / 100 + 1.0)
            data[key] = significant_digits(data[key], 4)
        for key in body_yaml['fixed']:
            data[key] = body_yaml['fixed'][key]
        for key in body_yaml['combination']:
            data[key] = 0
            for key1 in body_yaml['combination'][key]:
                data[key] += data[key1]
            data[key] = significant_digits(data[key], 4)
        # Volume calculation
        for part in body_yaml['part']:
            data[f"volume_{part}"] = data[f"length_{part}"] * 3.14 * data[f"weight_{part}"] * data[f"weight_{part}"]

        write_xml(f"{dataset_path}/bodies/{i}.xml", data, body_xml)
        write_yaml(f"{dataset_path}/params/{i}.yaml", data)
        file_list.append(f"bodies/{i}.xml")
        param_list.append(f"params/{i}.yaml")

    # 4. Write config file
    env_id = f"{args.template_body}_{args.num_bodies}_{args.body_variation_range}-v{args.seed_bodies}"
    config_yaml = {
        "dataset_name": args.template_body,
        "real_seed": real_seed,
        "bodies": {
            "total": len(file_list),
            "files": file_list,
            "params": param_list,
        },
        "gym_env": {
            "env_id": env_id,
            "filename": f"{args.template_body}.py",
            "class": f"{args.template_body.capitalize()}Env",
        }
    }
    write_yaml(f"{dataset_path}/config.yaml", config_yaml)

    # 5. Copy over Gym Env Python file
    shutil.copyfile(template_files[2], f"{dataset_path}/{args.template_body}.py")

    return env_id, dataset_path
Example #32
0
                    print(f"classA {classA}, classB {classB}, classC {classC}")

                    test = classA[8:] + classB[8:] + classC[8:]

                    # train on test single
                    if False:
                        for test_body in test:
                            sub_exp(test_body, test_body, False)

                    # # train on 4 A's, test on the rest A and one B
                    # train = classA[:4]
                    # sub_exp(train, test, False)
                    # # train on 4 B's, test on the rest B and one A
                    # train = classB[:4]
                    # sub_exp(train, test, False)

                    # train on 8 A's, 8 B's, and 8 C's. test on the rest A, the rest B, and the rest C
                    train = classA[:8] + classB[:8] + classC[:8]
                    sub_exp(train, test, False, seed=iteration)
                    sub_exp(train, test, True, seed=iteration)

                    one_job = {
                        "train": train,
                        "test": test,
                    }
                    total_jobs.append(one_job)
                iteration += 1
    print(f"total iteration: {iteration}")
    print(f"total sbatch jobs: {total_sbatch}")
    utils.write_yaml(f"{utils.folder}/total_jobs.yml", total_jobs)