Python assemble_run_torque Exemples, methylnet.torque_jobs.assemble_run_torque Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : hyperparameter_scan.py Projet : jlevy44/MethylCapsNet-1

def return_val_loss(command, torque, total_time, delay_time, job, gpu,
                    additional_command, additional_options):

    if torque:
        assemble_run_torque(command,
                            use_gpu=gpu,
                            additions=additional_command,
                            queue='gpuq' if gpu else "normal",
                            time=np.ceil(total_time / 60.),
                            ngpu=1,
                            additional_options=additional_options)
    else:
        subprocess.call(command, shell=True)

    total_time *= 60.
    start_time = time.time()

    monitor = MonitorJobs(start_time, delay_time, total_time, job=job)

    monitor.run()

    while not monitor.stopped:
        time.sleep(delay_time)

    val_loss = monitor.return_val_loss()

    return val_loss

Exemple #2

0

Afficher le fichier

Fichier : hyperparameter_scans.py Projet : adamritter/MethylNet-1

def coarse_scan(hyperparameter_input_csv,
                hyperparameter_output_log,
                generate_input,
                job_chunk_size,
                stratify_column,
                reset_all,
                torque,
                gpu,
                gpu_node,
                nohup,
                mlp=False,
                custom_jobs=[],
                model_complexity_factor=0.9,
                set_beta=-1.,
                n_jobs=4,
                categorical=True,
                add_softmax=False,
                additional_command="",
                cuda=True,
                new_grid={},
                randomseed=42,
                additional_opts="",
                max_epochs=-1):
    """Perform randomized hyperparameter grid search

	Parameters
	----------
	hyperparameter_input_csv : type
		CSV file containing hyperparameter inputs.
	hyperparameter_output_log : type
		CSV file containing prior runs.
	generate_input : type
		Generate hyperparameter input csv.
	job_chunk_size : type
		Number of jobs to be launched at same time.
	stratify_column : type
		Performing classification?
	reset_all : type
		Rerun all jobs previously scanned.
	torque : type
		Run jobs using torque.
	gpu : type
		What GPU to use, set to -1 to be agnostic to GPU selection.
	gpu_node : type
		What GPU to use, set to -1 to be agnostic to GPU selection, for torque submission.
	nohup : type
		Launch jobs using nohup.
	mlp : type
		If running prediction job (classification/regression) after VAE.
	custom_jobs : type
		Supply custom job parameters to be run.
	model_complexity_factor : type
		Degree of neural network model complexity for hyperparameter search. Search for less wide networks with a lower complexity value, bounded between 0 and infinity.
	set_beta : type
		Don't hyperparameter scan over beta (KL divergence weight), and set it to value.
	n_jobs : type
		Number of jobs to generate.
	categorical : type
		Classification task?
	add_softmax : type
		Add softmax layer at end of neural network.
	cuda : type
		Whether to use GPU?
	"""
    from itertools import cycle
    from pathos.multiprocessing import ProcessingPool as Pool
    np.random.seed(randomseed)
    os.makedirs(hyperparameter_input_csv[:hyperparameter_input_csv.rfind('/')],
                exist_ok=True)
    generated_input = []
    np.random.seed(int(time.time()))
    if mlp:
        grid = {
            '--learning_rate_vae':
            [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1],
            '--learning_rate_mlp':
            [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1],
            '--weight_decay': [1e-4],
            '--n_epochs': [25, 50, 75, 100, 200, 500, 700],
            '--scheduler': ['warm_restarts', 'null'],
            '--t_max': [10],
            '--eta_min': [1e-7, 1e-6],
            '--t_mult': [1., 1.2, 1.5, 2],
            '--batch_size': [50, 100, 256, 512],
            '--dropout_p': [0., 0.1, 0.2, 0.3, 0.5],
            '--n_workers': [4],
            '--loss_reduction': ['sum']
        }
        topology_grid = [0, 100, 200, 300, 500, 1000, 2000, 3000, 4096]
    else:
        grid = {
            '--n_latent': [100, 150, 200, 300, 500],
            '--learning_rate':
            [5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1],
            '--weight_decay': [1e-4],
            '--n_epochs': [25, 50, 75, 100, 200, 500, 700],
            '--kl_warm_up': [0, 20],
            '--beta': [0., 0.5, 1, 10, 50, 100, 200, 500]
            if set_beta == -1. else [set_beta],
            '--scheduler': ['warm_restarts', 'null'],
            '--t_max': [10],
            '--eta_min': [1e-7, 1e-6],
            '--t_mult': [1., 1.2, 1.5, 2],
            '--batch_size': [50, 100, 256, 512],
            '--n_workers': [4],
            '--loss_reduction': ['sum']
        }
        topology_grid = [0, 100, 200, 300, 500, 1000, 2000]
    if new_grid:
        grid, topology_grid = replace_grid(grid, new_grid, topology_grid)
    if max_epochs > 0:
        grid['--n_epochs'] = [
            epoch for epoch in grid['--n_epochs'] if epoch <= max_epochs
        ]
    grid['--hidden_layer_topology'
         if mlp else '--hidden_layer_encoder_topology'] = [
             generate_topology(
                 topology_grid,
                 probability_decay_factor=model_complexity_factor)
             for i in range(40)
         ]
    if generate_input:
        for i in range(n_jobs):
            generated_input.append(['False'] +
                                   [np.random.choice(grid[k]) for k in grid])
        generated_input = [
            pd.DataFrame(generated_input,
                         columns=['--job_name'] + list(grid.keys()))
        ]
    if custom_jobs:
        custom_jobs[0].loc[:, '--job_name'] = 'False'
        generated_input = custom_jobs

    def run(x):
        print(x)
        subprocess.call(x, shell=True)

    lower = lambda x: x.lower()
    if gpu == -1:
        gpus = cycle(range(4))
    else:
        gpus = cycle([gpu])
    if os.path.exists(hyperparameter_input_csv):
        df = pd.read_csv(hyperparameter_input_csv)
        df = [df[[col for col in list(df) if not col.startswith('Unnamed')]]]
    else:
        df = []
    df = pd.concat(df + generated_input,
                   axis=0)[['--job_name'] + list(grid.keys())].fillna('')
    print(df)
    if reset_all:
        df.loc[:, '--job_name'] = 'False'
    df_final = df[df['--job_name'].astype(str).map(lower) ==
                  'false'].reset_index(drop=True)[list(grid.keys())]
    commands = []
    for i in range(df_final.shape[0]):
        job_id = str(np.random.randint(0, 100000000))
        if not mlp:
            commands.append(
                'sh -c "methylnet-embed perform_embedding -bce {} -v -j {} -hl {} -sc {} {} && pymethyl-visualize transform_plot -i embeddings/vae_methyl_arr.pkl -o visualizations/{}_vae_embed.html -c {} -nn 10 "'
                .format(
                    "-c" if cuda else "", job_id, hyperparameter_output_log,
                    stratify_column, ' '.join([
                        '{} {}'.format(k2, df_final.loc[i, k2])
                        for k2 in list(df_final)
                        if (df_final.loc[i, k2] != ''
                            and df_final.loc[i, k2] != np.nan)
                    ]), job_id, stratify_column))
        else:
            commands.append(
                'sh -c "methylnet-predict make_prediction {} {} {} {} -v {} -j {} -hl {} {} && {}"'
                .format(
                    "-c" if cuda else "", '-sft' if add_softmax else '',
                    '-cat' if categorical else '',
                    ''.join([' -ic {}'.format(col)
                             for col in stratify_column]),
                    '-do' if stratify_column[0] == 'disease_only' else '',
                    job_id, hyperparameter_output_log, ' '.join([
                        '{} {}'.format(k2, df_final.loc[i, k2])
                        for k2 in list(df_final)
                        if (df_final.loc[i, k2] != ''
                            and df_final.loc[i, k2] != np.nan)
                    ]), '&&'.join([
                        " pymethyl-visualize transform_plot -i predictions/vae_mlp_methyl_arr.pkl -o visualizations/{}_{}_mlp_embed.html -c {} -nn 8 "
                        .format(job_id, col, col) for col in stratify_column
                    ])))  #-do
        df.loc[np.arange(df.shape[0]) == np.where(
            df['--job_name'].astype(str).map(lower) == 'false')[0][0],
               '--job_name'] = job_id
    for i in range(len(commands)):
        commands[i] = '{} {} {} {}'.format('CUDA_VISIBLE_DEVICES="{}"'.format(
            next(gpus)) if not torque else "", 'nohup' if nohup else '',
                                           commands[i],
                                           '&' if nohup else '')  # $gpuNum
    os.makedirs('visualizations', exist_ok=True)
    df.to_csv(hyperparameter_input_csv)
    if torque:
        for command in commands:
            job = assemble_run_torque(
                command,
                use_gpu=cuda,
                additions=additional_command,
                queue='gpuq' if cuda else "normal",
                time=4,
                ngpu=1,
                additional_options=additional_opts
            )  #'' if gpu_node == -1 else ' -l hostlist=g0{}'.format(gpu_node))
    else:
        if len(commands) == 1:
            subprocess.call(commands[0], shell=True)
        else:
            commands = np.array_split(commands,
                                      len(commands) // job_chunk_size)
            for command_list in commands:
                if nohup:
                    for command in command_list:
                        print(command)
                        subprocess.call(command, shell=True)
                else:
                    if job_chunk_size <= 1:
                        for command in command_list:
                            subprocess.call(command, shell=True)
                    else:
                        dask.compute(*[
                            dask.delayed(lambda x: subprocess.call(
                                x, shell=True))(command)
                            for command in command_list
                        ],
                                     scheduler='processes')
                    """pool = Pool(len(command_list))

Exemple #3

0

Afficher le fichier

Fichier : hyperparameter_scan.py Projet : jlevy44/MethylCapsNet-1

def hyperparameter_scan(
        train_methyl_array, val_methyl_array, interest_col, n_bins,
        custom_loss, torque, search_strategy, total_time, delay_time, gpu,
        additional_command, additional_options, n_jobs, n_workers, update,
        random_seed, optimize_time, capsule_choice, custom_capsule_file,
        retrain_top_job, batch_size, output_top_job_params,
        limited_capsule_names_file, n_epochs, min_capsule_len_low_bound,
        gsea_superset, tissue, number_sets, use_set, gene_context,
        select_subtypes, custom_hyperparameters, min_capsules, fit_spw, l1_l2):

    np.random.seed(random_seed)

    #subprocess.call('rm -f jobs.db',shell=True)

    opts = dict(train_methyl_array=train_methyl_array,
                val_methyl_array=val_methyl_array,
                interest_col=interest_col,
                n_bins=n_bins,
                custom_loss=custom_loss,
                search_strategy=search_strategy,
                total_time=total_time,
                delay_time=delay_time,
                random_state=random_seed,
                batch_size=batch_size,
                n_epochs=n_epochs,
                min_capsule_len_low_bound=min_capsule_len_low_bound,
                number_sets=number_sets,
                custom_hyperparameters=custom_hyperparameters,
                min_capsules=min_capsules)
    if torque and not update:
        opts['torque'] = ''
    if use_set:
        opts['use_set'] = ''
    if gene_context:
        opts['gene_context'] = ''
    if fit_spw:
        opts['fit_spw'] = ''
    if l1_l2:
        opts['l1_l2'] = l1_l2
    if gsea_superset:
        opts['gsea_superset'] = gsea_superset
    if tissue:
        opts['tissue'] = tissue
    if gpu:
        opts['gpu'] = ''
    if optimize_time:
        opts['optimize_time'] = ''
    if capsule_choice:
        opts['capsule_choice'] = ' -cc '.join(
            list(filter(None, capsule_choice)))
    select_subtypes = list(filter(None, select_subtypes))
    if select_subtypes:
        opts['select_subtypes'] = ' -ss '.join(select_subtypes)
    if limited_capsule_names_file:
        opts['limited_capsule_names_file'] = limited_capsule_names_file
    if retrain_top_job:
        n_jobs = 1
        opts['retrain_top_job'] = ''
    if output_top_job_params:
        opts['output_top_job_params'] = ''
    if custom_capsule_file:
        opts['custom_capsule_file'] = custom_capsule_file
    additional_opts = dict(additional_command=additional_command,
                           additional_options=additional_options)
    for job in [np.random.randint(0, 10000000) for i in range(n_jobs)]:
        opts['job'] = job
        command = 'methylcaps-hypjob hyperparameter_job {} {}'.format(
            ' '.join(['--{} {}'.format(k, v) for k, v in opts.items()]),
            ' '.join([
                '--{} "{}"'.format(k, v) for k, v in additional_opts.items()
            ]))
        if update:
            command = '{} {}'.format(command, '-u')
        command = '{} {}'.format(command,
                                 '&' if not (torque and update) else '')
        if update:
            if torque:
                assemble_run_torque(
                    command,
                    use_gpu=gpu,
                    queue='gpuq' if gpu else "normal",
                    time=int(np.ceil(total_time / 60.)),
                    ngpu=1,
                    additions=additional_opts['additional_command'],
                    additional_options=additional_opts['additional_options'])
            else:
                command = '{} {}'.format(
                    'CUDA_VISIBLE_DEVICES=0' if gpu else '', command)
        else:
            subprocess.call(command, shell=True)