Beispiel #1
0
 def executeExperiment(self):
     print("execute")
     try:
         if self.executionText:
             f2 = open(self.projectfolder+"/"+self.experimentfile, "w")
             f2.write(self.executionText)
             f2.close()
             import foundations
             foundations.submit(scheduler_config='scheduler',job_directory=self.projectfolder,command=[self.experimentfile])
             print("started")
         else:
             print("not complied yet")
             return None
     except Exception as e:
         print(e)
     return None
Beispiel #2
0
def submit_jobs(param_generator, command: str, number_jobs=1, project_name=None, job_directory='.',
                global_seed=23336666, ignore_exist=False):
    update_dirs()
    numpy.random.seed(global_seed)
    submitted_jobs = [{}]
    for idx in range(number_jobs):
        hyper_params = param_generator()
        while hyper_params in submitted_jobs or (len(get_targets(dict_filter(hyper_params))) > 0 and ignore_exist):
            hyper_params = param_generator()
        submitted_jobs.append(hyper_params.copy())

        if 'seed' not in hyper_params:
            hyper_params['seed'] = int(2018011328)
        if 'gpus' not in hyper_params:
            hyper_params['gpus'] = 1

        name = project_name if 'project_name' not in hyper_params else hyper_params['project_name']
        from foundations import submit
        submit(scheduler_config='scheduler', job_directory=job_directory, command=command, params=hyper_params,
               stream_job_logs=False, num_gpus=hyper_params["gpus"], project_name=name)

        print(f"Task {idx}, {hyper_params}")
    def test_local_run_job_bundle_is_same_as_remote(self):
        import os
        from foundations_contrib.utils import foundations_home
        import tarfile
        from acceptance.mixins.run_process import run_process
        from foundations_contrib.global_state import redis_connection
        import foundations

        self._deploy_job_file('acceptance/fixtures/run_locally')
        local_job_id = redis_connection.get(
            'foundations_testing_job_id').decode()

        with self.unset_foundations_home():
            remote_job = foundations.submit(
                job_directory='acceptance/fixtures/run_locally',
                command=['main.py'],
                num_gpus=0)
            remote_job.wait_for_deployment_to_complete()
            # Please forgive this hackery; we currently don't have an official way of getting archives through the SDK
            remote_job._deployment.get_job_archive()

        root_archive_directory = os.path.expanduser(
            f'{foundations_home()}/job_data/archive')
        local_archive_directory = f'{root_archive_directory}/{local_job_id}/artifacts/'
        local_files = set(os.listdir(local_archive_directory))

        job_id = remote_job.job_name()
        job_id_prefix = f'{job_id}/'
        tar_file_name = f'{job_id}.tgz'

        tar = tarfile.open(tar_file_name)
        remote_files = set([
            name[len(job_id_prefix):] for name in tar.getnames()
            if name.startswith(job_id_prefix)
        ])
        tar.close()

        try:
            os.remove(tar_file_name)
        except OSError:
            pass

        # Assert subset because the remote files actually contains an additional file generated by the job submission process
        self.assertTrue(local_files.issubset(remote_files))
            hyperparameters[hparam] = search_space.sample()
    return hyperparameters


with open('./src/config/weapon_classifier.config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)


hyperparameter_ranges={
    'num_epochs': 10,
    'batch_size': SearchSpace(16, 256, int),
    'learning_rate': SearchSpace(1e-5, 1e-2, float),
    'conv_layers': SearchSpace(0, 3, int),
    'conv_activation': 'relu',
    'conv_filters': [4, 8, 16],
    'conv_sizes': [(9, 9), (5, 5), (3, 3)],
    'pooling': SearchSpace(0, 1, int),
    'dense_layers': SearchSpace(0, 3, int),
    'dense_activation': 'relu',
    'dense_size': [64, 32, 16],
    'opt': SearchSpace(0, 1, int),
    'decay': SearchSpace(1e-7, 1e-5, float),
    }


for _ in range(config.get('model_search',{}).get('num_jobs')):
    hyperparameters = sample_hyperparameters(hyperparameter_ranges)
    foundations.submit(scheduler_config='scheduler',
                    command='src/weapon_class_train_driver.py',
                    params=hyperparameters)
import foundations

foundations.log_metric('name', 'job1')

deployment = foundations.submit(command=["job2.py"])
deployment.wait_for_deployment_to_complete(wait_seconds=10)
Beispiel #6
0
        'data_path':
        settings.DATA_DIR,
        'metadata_path':
        int(np.random.choice(list(range(len(settings.meta_data_path))))),
        'cache_path':
        settings.continue_cache_path,
    }

    return params


if __name__ == "__main__":
    submitted_jobs = set()
    for job_ in range(NUM_JOBS):
        print(f"packaging job {job_}")
        hyper_params = generate_params()
        while frozenset(hyper_params.items()) in submitted_jobs:
            hyper_params = generate_params()
        submitted_jobs.add(frozenset(hyper_params.items()))

        seed = np.random.randint(2e9)
        hyper_params['seed'] = int(seed)
        print(hyper_params)
        foundations.submit(
            scheduler_config='scheduler',
            job_directory='..',
            command=
            f'-m torch.distributed.launch --nproc_per_node={hyper_params["gpus"]} model5/main.py',
            params=hyper_params,
            stream_job_logs=False,
            num_gpus=hyper_params["gpus"])
Beispiel #7
0
import foundations

NUM_JOBS = 100
from constants import generate_config

for job_num in range(NUM_JOBS):
    print(f'job number {job_num}')
    config_dict = generate_config()
    print('Finished writing config.yml file')

    foundations.submit(
        scheduler_config="scheduler",
        command=["main.py"],
        project_name="Fake-Audio-Detection",
    )
import os
os.environ['FOUNDATIONS_COMMAND_LINE'] = 'True'

import foundations
import sys

deployment = foundations.submit(
    scheduler_config="scheduler",
    job_directory=sys.argv[1],
    command=["main.py", sys.argv[2], sys.argv[3], sys.argv[4]])
deployment.wait_for_deployment_to_complete(wait_seconds=10)
Beispiel #9
0
 def deployment(self):
     return foundations.submit(project_name='tensorboard',
                               entrypoint='tensorboard_job',
                               job_dir='fixtures/tensorboard_job')
Beispiel #10
0
        'RandomErasing':
        int(np.random.choice([0])),
        'data_path':
        settings.DATA_DIR,
        'metadata_path':
        int(np.random.choice(list(range(len(settings.meta_data_path))))),
        'cache_path':
        settings.img_cache_path,
    }

    return params


if __name__ == "__main__":
    submitted_jobs = set()
    for job_ in range(NUM_JOBS):
        print(f"packaging job {job_}")
        hyper_params = generate_params()
        while frozenset(hyper_params.items()) in submitted_jobs:
            hyper_params = generate_params()
        submitted_jobs.add(frozenset(hyper_params.items()))

        seed = np.random.randint(2e9)
        hyper_params['seed'] = int(seed)
        print(hyper_params)
        foundations.submit(scheduler_config='scheduler',
                           job_directory='..',
                           command='model2/main.py',
                           params=hyper_params,
                           stream_job_logs=False,
                           num_gpus=1)
import os
os.environ['FOUNDATIONS_COMMAND_LINE'] = 'True'

import foundations
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--project-name", type=str, default=None)
parser.add_argument("--scheduler", type=str, default=None)
parser.add_argument("--job-directory", type=str, default=None)
parser.add_argument("--entrypoint", type=str, default=None)
parser.add_argument("command", nargs=argparse.REMAINDER)

args = parser.parse_args(sys.argv[1:])

foundations.submit(scheduler_config=args.scheduler,
                   job_directory=args.job_directory,
                   project_name=args.project_name,
                   entrypoint=args.entrypoint,
                   command=args.command)
Beispiel #12
0
import os
import foundations

NUM_JOBS = 10

for job_num in range(NUM_JOBS):
    print(f'job number {job_num}')
    foundations.submit(scheduler_config="scheduler",
                       command=["main.py"],
                       project_name="Trump_Twitter_ML_Experiments")
Beispiel #13
0
              'max_lr': float(choice([0.005, 0.002])),
              'use_lr_scheduler': int(choice([0])),  # 0, 1 (2)
              'scheduler_gamma': float(choice([0.9])),  # 0.96, 0.95, 0.94 (0.96)
              'use_hidden_layer': int(choice([0])),  # 0, (1)
              'backbone': int(choice([7])),  # 1, 2, 3, 4, 5, 6, 7, 8, 9
              'same_transform': int(choice([0, 1])),  # TODO: Why
              'val_rate': 1,
              'data_path': settings.DATA_DIR,
              'metadata_path': int(choice(list(range(len(settings.meta_data_path))))),  # 0, 1, 2
              'bbox_path': settings.bbox_path,
              'cache_path': settings.video_cache_path,
              'diff_path': settings.diff_dict_path,
              }
    return params


if __name__ == "__main__":
    submitted_jobs = set()
    for job_ in range(NUM_JOBS):
        print(f"packaging job {job_}")
        hyper_params = generate_params()
        # while frozenset(hyper_params.items()) in submitted_jobs:
        #     hyper_params = generate_params()
        # submitted_jobs.add(frozenset(hyper_params.items()))

        seed = np.random.randint(2e9)
        hyper_params['seed'] = int(seed)
        print(hyper_params)
        foundations.submit(scheduler_config='scheduler', job_directory='/home/kailu/deepfake',
                           command='model1/main.py', params=hyper_params, stream_job_logs=False, num_gpus=1)