def executeExperiment(self): print("execute") try: if self.executionText: f2 = open(self.projectfolder+"/"+self.experimentfile, "w") f2.write(self.executionText) f2.close() import foundations foundations.submit(scheduler_config='scheduler',job_directory=self.projectfolder,command=[self.experimentfile]) print("started") else: print("not complied yet") return None except Exception as e: print(e) return None
def submit_jobs(param_generator, command: str, number_jobs=1, project_name=None, job_directory='.', global_seed=23336666, ignore_exist=False): update_dirs() numpy.random.seed(global_seed) submitted_jobs = [{}] for idx in range(number_jobs): hyper_params = param_generator() while hyper_params in submitted_jobs or (len(get_targets(dict_filter(hyper_params))) > 0 and ignore_exist): hyper_params = param_generator() submitted_jobs.append(hyper_params.copy()) if 'seed' not in hyper_params: hyper_params['seed'] = int(2018011328) if 'gpus' not in hyper_params: hyper_params['gpus'] = 1 name = project_name if 'project_name' not in hyper_params else hyper_params['project_name'] from foundations import submit submit(scheduler_config='scheduler', job_directory=job_directory, command=command, params=hyper_params, stream_job_logs=False, num_gpus=hyper_params["gpus"], project_name=name) print(f"Task {idx}, {hyper_params}")
def test_local_run_job_bundle_is_same_as_remote(self): import os from foundations_contrib.utils import foundations_home import tarfile from acceptance.mixins.run_process import run_process from foundations_contrib.global_state import redis_connection import foundations self._deploy_job_file('acceptance/fixtures/run_locally') local_job_id = redis_connection.get( 'foundations_testing_job_id').decode() with self.unset_foundations_home(): remote_job = foundations.submit( job_directory='acceptance/fixtures/run_locally', command=['main.py'], num_gpus=0) remote_job.wait_for_deployment_to_complete() # Please forgive this hackery; we currently don't have an official way of getting archives through the SDK remote_job._deployment.get_job_archive() root_archive_directory = os.path.expanduser( f'{foundations_home()}/job_data/archive') local_archive_directory = f'{root_archive_directory}/{local_job_id}/artifacts/' local_files = set(os.listdir(local_archive_directory)) job_id = remote_job.job_name() job_id_prefix = f'{job_id}/' tar_file_name = f'{job_id}.tgz' tar = tarfile.open(tar_file_name) remote_files = set([ name[len(job_id_prefix):] for name in tar.getnames() if name.startswith(job_id_prefix) ]) tar.close() try: os.remove(tar_file_name) except OSError: pass # Assert subset because the remote files actually contains an additional file generated by the job submission process self.assertTrue(local_files.issubset(remote_files))
hyperparameters[hparam] = search_space.sample() return hyperparameters with open('./src/config/weapon_classifier.config.yaml') as file: config = yaml.load(file, Loader=yaml.FullLoader) hyperparameter_ranges={ 'num_epochs': 10, 'batch_size': SearchSpace(16, 256, int), 'learning_rate': SearchSpace(1e-5, 1e-2, float), 'conv_layers': SearchSpace(0, 3, int), 'conv_activation': 'relu', 'conv_filters': [4, 8, 16], 'conv_sizes': [(9, 9), (5, 5), (3, 3)], 'pooling': SearchSpace(0, 1, int), 'dense_layers': SearchSpace(0, 3, int), 'dense_activation': 'relu', 'dense_size': [64, 32, 16], 'opt': SearchSpace(0, 1, int), 'decay': SearchSpace(1e-7, 1e-5, float), } for _ in range(config.get('model_search',{}).get('num_jobs')): hyperparameters = sample_hyperparameters(hyperparameter_ranges) foundations.submit(scheduler_config='scheduler', command='src/weapon_class_train_driver.py', params=hyperparameters)
import foundations foundations.log_metric('name', 'job1') deployment = foundations.submit(command=["job2.py"]) deployment.wait_for_deployment_to_complete(wait_seconds=10)
'data_path': settings.DATA_DIR, 'metadata_path': int(np.random.choice(list(range(len(settings.meta_data_path))))), 'cache_path': settings.continue_cache_path, } return params if __name__ == "__main__": submitted_jobs = set() for job_ in range(NUM_JOBS): print(f"packaging job {job_}") hyper_params = generate_params() while frozenset(hyper_params.items()) in submitted_jobs: hyper_params = generate_params() submitted_jobs.add(frozenset(hyper_params.items())) seed = np.random.randint(2e9) hyper_params['seed'] = int(seed) print(hyper_params) foundations.submit( scheduler_config='scheduler', job_directory='..', command= f'-m torch.distributed.launch --nproc_per_node={hyper_params["gpus"]} model5/main.py', params=hyper_params, stream_job_logs=False, num_gpus=hyper_params["gpus"])
import foundations NUM_JOBS = 100 from constants import generate_config for job_num in range(NUM_JOBS): print(f'job number {job_num}') config_dict = generate_config() print('Finished writing config.yml file') foundations.submit( scheduler_config="scheduler", command=["main.py"], project_name="Fake-Audio-Detection", )
import os os.environ['FOUNDATIONS_COMMAND_LINE'] = 'True' import foundations import sys deployment = foundations.submit( scheduler_config="scheduler", job_directory=sys.argv[1], command=["main.py", sys.argv[2], sys.argv[3], sys.argv[4]]) deployment.wait_for_deployment_to_complete(wait_seconds=10)
def deployment(self): return foundations.submit(project_name='tensorboard', entrypoint='tensorboard_job', job_dir='fixtures/tensorboard_job')
'RandomErasing': int(np.random.choice([0])), 'data_path': settings.DATA_DIR, 'metadata_path': int(np.random.choice(list(range(len(settings.meta_data_path))))), 'cache_path': settings.img_cache_path, } return params if __name__ == "__main__": submitted_jobs = set() for job_ in range(NUM_JOBS): print(f"packaging job {job_}") hyper_params = generate_params() while frozenset(hyper_params.items()) in submitted_jobs: hyper_params = generate_params() submitted_jobs.add(frozenset(hyper_params.items())) seed = np.random.randint(2e9) hyper_params['seed'] = int(seed) print(hyper_params) foundations.submit(scheduler_config='scheduler', job_directory='..', command='model2/main.py', params=hyper_params, stream_job_logs=False, num_gpus=1)
import os os.environ['FOUNDATIONS_COMMAND_LINE'] = 'True' import foundations import sys import argparse parser = argparse.ArgumentParser() parser.add_argument("--project-name", type=str, default=None) parser.add_argument("--scheduler", type=str, default=None) parser.add_argument("--job-directory", type=str, default=None) parser.add_argument("--entrypoint", type=str, default=None) parser.add_argument("command", nargs=argparse.REMAINDER) args = parser.parse_args(sys.argv[1:]) foundations.submit(scheduler_config=args.scheduler, job_directory=args.job_directory, project_name=args.project_name, entrypoint=args.entrypoint, command=args.command)
import os import foundations NUM_JOBS = 10 for job_num in range(NUM_JOBS): print(f'job number {job_num}') foundations.submit(scheduler_config="scheduler", command=["main.py"], project_name="Trump_Twitter_ML_Experiments")
'max_lr': float(choice([0.005, 0.002])), 'use_lr_scheduler': int(choice([0])), # 0, 1 (2) 'scheduler_gamma': float(choice([0.9])), # 0.96, 0.95, 0.94 (0.96) 'use_hidden_layer': int(choice([0])), # 0, (1) 'backbone': int(choice([7])), # 1, 2, 3, 4, 5, 6, 7, 8, 9 'same_transform': int(choice([0, 1])), # TODO: Why 'val_rate': 1, 'data_path': settings.DATA_DIR, 'metadata_path': int(choice(list(range(len(settings.meta_data_path))))), # 0, 1, 2 'bbox_path': settings.bbox_path, 'cache_path': settings.video_cache_path, 'diff_path': settings.diff_dict_path, } return params if __name__ == "__main__": submitted_jobs = set() for job_ in range(NUM_JOBS): print(f"packaging job {job_}") hyper_params = generate_params() # while frozenset(hyper_params.items()) in submitted_jobs: # hyper_params = generate_params() # submitted_jobs.add(frozenset(hyper_params.items())) seed = np.random.randint(2e9) hyper_params['seed'] = int(seed) print(hyper_params) foundations.submit(scheduler_config='scheduler', job_directory='/home/kailu/deepfake', command='model1/main.py', params=hyper_params, stream_job_logs=False, num_gpus=1)