def gen_qsubs(): print('Generating qsub scripts...') qsubs_dir = _config.QSUBS_DIR + NAME + '/' util.ensure_dir_exists(qsubs_dir) qsub_commands = [] script_id = NAME.split('_')[0] num_per_run = get_num_per_run() num_scripts = 0 for start in range(0, len(fns), num_per_run): end = start + num_per_run command = f'python {NAME}.py run_single {start} {end}' # Write shell scripts sh_fn = qsubs_dir + f'q_{script_id}_{start}_{end}.sh' with open(sh_fn, 'w') as f: f.write(f'#!/bin/bash\n{command}\n') num_scripts += 1 qsub_commands.append( f'qsub -j y -V -P regevlab -l h_rt=1:00:00, -wd {_config.SRC_DIR} {sh_fn} &' ) # Save commands commands_fn = qsubs_dir + '_commands.sh' with open(commands_fn, 'w') as f: f.write('\n'.join(qsub_commands)) subprocess.check_output(f'chmod +x {commands_fn}', shell=True) print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}') return
def __init__(self, **options): """options: Override any of the defaults in settings.py by passing in key/value pairs where the key is in lower case. For instance: workspace_def_path=/tmp/my_file.xml will override the workspace_def_path default. """ check_dependencies.check_dependencies() self._options = self._set_options(**options) self._command_processor = command_processor.CommandProcessor(self._options) util.ensure_dir_exists(self._options['workspace_cache_root'])
def gen_qsubs_remainder(NAME, chart_fnm, extension): # Only gen qsubs for unsubmmited jobs, by print('Generating qsub scripts...') qsubs_dir = _config.QSUBS_DIR + NAME + '/' util.ensure_dir_exists(qsubs_dir) qsub_commands = [] script_id = NAME.split('_')[0] out_dir = _config.OUT_PLACE + NAME + '/' fns = set(os.listdir(out_dir)) qdf = pd.read_csv(inp_dir + f'{chart_fnm}.csv', index_col=0) names = qdf['Name (unique)'] num_per_run = len(names) // MAX_QSUB_PROCESSES if num_per_run * MAX_QSUB_PROCESSES < len(names): num_per_run += 1 num_scripts = 0 for i in range(0, len(qdf), num_per_run): out_fn = names[i] + extension if out_fn in fns: continue start = i end = start + num_per_run command = f'python {NAME}.py run_qsubs {chart_fnm} {start} {end}' # Write shell scripts sh_fn = qsubs_dir + f'q_{script_id}_{chart_fnm}_{start}_{end}.sh' with open(sh_fn, 'w') as f: f.write(f'#!/bin/bash\n{command}\n') num_scripts += 1 # Write qsub commands if NAME == 'c_dijkstra': if 'highlevel' in chart_fnm: vmem = 'h_vmem=8G' else: vmem = 'h_vmem=4G' else: vmem = 'h_vmem=1G' qsub_commands.append( f'qsub -j y -V -P regevlab -l h_rt=1:00:00,{vmem} -wd {_config.SRC_DIR} {sh_fn} &' ) # Save commands commands_fn = qsubs_dir + '_commands.sh' with open(commands_fn, 'w') as f: f.write('\n'.join(qsub_commands)) subprocess.check_output(f'chmod +x {commands_fn}', shell=True) print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}') return
def main(): sns.set() plt.rcParams['figure.figsize'] = (16.0, 8.0) document = CollegeDocumentLoader().load(join_curdir("data", "data.meta")) geodata = GeoLoader() \ .load(join_curdir("data", "ne_110m_admin_1_states_provinces.shp")) data = CollegeLoader(document).load(join_curdir("data", "data.csv")) ensure_dir_exists(join_curdir("results")) plot_p1(data, document) plot_p2(data, document, geodata) plot_p3(data)
def upload_file(): try: filename = request.headers.get('FILE_NAME') if 'process' in request.args: auto_process_scan = request.args.get('process').lower() in [ 'true', '1' ] else: auto_process_scan = cfg.AUTOPROCESS log.info('Receiving %s, autoprocess=%s', filename, auto_process_scan) if allowed_file(filename): filename = secure_filename(filename) # determine final staging path for file and check if the file already exists basename = os.path.splitext(filename)[0] stagingdir = os.path.join(cfg.STAGING_FOLDER, basename) stagingpath = os.path.join(stagingdir, filename) if os.path.exists(stagingpath): log.info('File already exists on server: %s', stagingpath) receive_file(request, filename) return util.ret_ok('File already exists on server') # temp location to receive stream tmppath = os.path.join(cfg.TEMP_FOLDER, filename) with open(tmppath, 'wb') as f: receive_file(request, filename, f) # move to staging area dir and return util.ensure_dir_exists(stagingdir) shutil.move(tmppath, stagingpath ) # TODO: check if move succeeded and log error if not log.info('Staged ' + filename + ' to ' + stagingdir) # If uploading is complete try to trigger processing if scan_done_uploading(stagingdir): log.info('Scan done uploading to ' + stagingdir) if auto_process_scan: #NOTE: Comment out lines below to disable automated scan processing trigger indexThread = threading.Thread(target=preprocess, args=(basename, log)) indexThread.start() processThread = threading.Thread(target=trigger_processing, args=(basename, log)) processThread.start() return util.ret_ok() else: log.error('File type not allowed: ' + filename) log.error(request) raise Error(message=('File type not allowed: ' + filename), status_code=415) except Exception as e: log.error(traceback.format_exc()) #raise Error(message=('Unknown exception encountered %s' % str(e)), status_code=500) raise e
def gen_qsubs(modelexp_nm=''): # Generate qsub shell scripts and commands for easy parallelization print('Generating qsub scripts...') qsubs_dir = _config.QSUBS_DIR + NAME + '/' util.ensure_dir_exists(qsubs_dir) qsub_commands = [] if modelexp_nm == '': modelexp_nm = 'modelexp_simple' print(f'Writing qsubs for {modelexp_nm}. OK?') input() exp_design = pd.read_csv(_config.DATA_DIR + f'{modelexp_nm}.csv') hyperparam_cols = [col for col in exp_design.columns if col != 'Name'] # Parse df into dict hyperparam_combinations = dict() for idx, row in exp_design.iterrows(): nm = row['Name'] hps = '+'.join([f'{hp}:{row[hp]}' for hp in hyperparam_cols]) hyperparam_combinations[nm] = hps # Generate qsubs num_scripts = 0 for hyperparam_nm in hyperparam_combinations: hyperparam_setting = hyperparam_combinations[hyperparam_nm] command = f'python {NAME}.py {modelexp_nm} {hyperparam_nm} {hyperparam_setting}' script_id = NAME.split('_')[0] # Write shell scripts sh_fn = qsubs_dir + f'q_{modelexp_nm}_{hyperparam_nm}.sh' with open(sh_fn, 'w') as f: f.write('#!/bin/bash\n%s\n' % (command)) num_scripts += 1 # Write qsub commands qsub_commands.append( f'qsub -V -P regevlab -l h_rt=16:00:00,h_vmem=4G -l os=RedHat7 -wd {_config.SRC_DIR} {sh_fn} &' ) # Save commands commands_fn = qsubs_dir + '_commands.sh' with open(commands_fn, 'w') as f: f.write('\n'.join(qsub_commands)) subprocess.check_output(f'chmod +x {commands_fn}', shell=True) print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}') return
def gen_qsubs(NAME, chart_fnm): # Generate qsub shell scripts and commands for easy parallelization print('Generating qsub scripts...') qsubs_dir = _config.QSUBS_DIR + NAME + '/' util.ensure_dir_exists(qsubs_dir) qsub_commands = [] script_id = NAME.split('_')[0] qdf = pd.read_csv(inp_dir + chart_fnm + '.csv', index_col=0) names = qdf['Name (unique)'] num_per_run = len(names) // MAX_QSUB_PROCESSES if num_per_run * MAX_QSUB_PROCESSES < len(names): num_per_run += 1 num_scripts = 0 for start in range(0, len(qdf), num_per_run): end = start + num_per_run command = f'python {NAME}.py run_qsubs {chart_fnm} {start} {end}' # Write shell scripts sh_fn = qsubs_dir + f'q_{script_id}_{chart_fnm}_{start}_{end}.sh' with open(sh_fn, 'w') as f: f.write(f'#!/bin/bash\n{command}\n') num_scripts += 1 # Write qsub commands if NAME == 'c_dijkstra': if 'highlevel' in chart_fnm: vmem = 'h_vmem=8G' else: vmem = 'h_vmem=4G' else: vmem = 'h_vmem=1G' qsub_commands.append( f'qsub -j y -V -P regevlab -l h_rt=1:00:00,{vmem} -wd {_config.SRC_DIR} {sh_fn} &' ) # Save commands commands_fn = qsubs_dir + '_commands.sh' with open(commands_fn, 'w') as f: f.write('\n'.join(qsub_commands)) subprocess.check_output(f'chmod +x {commands_fn}', shell=True) print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}') return
def download(self, track): codec = self.extension[1:] bitrate = 192 track.ym_track.download(ensure_dir_exists(track.cache_entry), codec, bitrate) return self.read(track)
def run_inference(batch_nm='', exp_nm='', custom_hyperparams=''): parse_custom_hyperparams(custom_hyperparams) set_random_seed() # Load data dataset_nm = hparams['dataset'] print(f'Loading dataset {dataset_nm} ...') data, data_stats = load_data(dataset_nm) print(data_stats) # Set up environment global out_dir if batch_nm == '': batch_nm = 'unnamed' out_dir = out_dir + batch_nm + '/' if batch_nm != '' else out_dir util.ensure_dir_exists(out_dir) global fold_nm fold_nm = f'{exp_nm}' if bool(exp_nm != '') else '' # Set up model print('Setting up model ...') model = LowLevelGPModel(data_stats).to(device) print('Created parameters:') total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) for param in model.parameters(): print(type(param.data), param.shape) # Set up optimizers sgd_optimizer = pyro.optim.SGD({ 'lr': hparams['learning_rate'], 'momentum': hparams['momentum'], 'weight_decay': hparams['weight_decay'], }) optimizer = sgd_optimizer create_model_dir() copy_model_script() print_and_log(f'hparams: {custom_hyperparams}') print_and_log(f'Total num. model parameters: {total_params}') print_and_log(f'Custom folder name: {exp_nm}') print_and_log(f'Dataset name: {dataset_nm}') train_model(model, optimizer, data) return
def propose_genotypes(obs_marginals, out_fn, options=''): ''' Proposes genotypes. ''' if options: print(f'Using custom hyperparameters: {options}') parse_custom_hyperparams(options) setup(obs_marginals['Symbols and linkage group index']) groups = util.parse_read_groups(obs_marginals) print(f'Proposing genotypes ...') gts = get_default_genotypes(obs_marginals, groups) out_dir = os.path.dirname(out_fn) util.ensure_dir_exists(out_dir) print(f'Writing {len(gts)} genotypes to {out_fn} ...') with open(out_fn, 'w') as f: for gt in gts: f.write(f'{gt}\n') print('Done.') return gts
def command_package_init(*args): """ admin package init package_name Initialize a new package of Sirikata. Packages are a build of Sirikata you might want to execute multiple services from. This command sets up the basic directory structure for a package, including a customizable configuration file which you probably want to edit after running this command. """ if len(args) == 0: print 'No package name specified' return 1 packname = args[0] # Setup build, install, and data directories util.ensure_dir_exists(package_path(packname)) # Touch an empty config.py where the user can adjust settings config_py_file = open(package_path(packname, 'config.py'), 'w') config_py_file.close() return 0
# if cmd_line_opts.mode in ['simo', 'mimo']: # ax_params.append({ # "name": "num_models", # "type": "range", # "bounds": [2, 8], # }) ax = AxClient() ax.create_experiment( name="ensemble_net_tuning", parameters=ax_params, objective_name="final_loss", minimize=True, ) u.ensure_dir_exists("logs/%s" % cmd_line_opts.group) log = open("logs/%s/ax_trials.tsv" % cmd_line_opts.group, "w") print("trial_index\tparameters\truntime\tfinal_loss", file=log) end_time = time.time() + cmd_line_opts.run_time_sec while time.time() < end_time: parameters, trial_index = ax.get_next_trial() log_record = [trial_index, json.dumps(parameters)] print("starting", log_record) class Opts(object): pass opts = Opts()
fitness_log = u.Log(opts.fitness_log_file) generation = 0 while not es.stop(): # fetch next set of trials trial_weights = es.ask() # run eval fitnesses = [] for member_idx, weights in enumerate(trial_weights): agent.set_weights_of_model(weights) fitness = cartpole.fitness(agent) fitnesses.append(fitness) # update es # note: cma es is trying to minimise es.tell(trial_weights, -1 * np.array(fitnesses)) # save best weights if opts.weights_dir is not None: u.ensure_dir_exists(opts.weights_dir) np.save("%s/%05d.npy" % (opts.weights_dir, generation), es.result[0]) # give results generation += 1 fitness_log.log(generation, np.max(fitnesses)) es.result_pretty() sys.stdout.flush() es.disp()
from PIL import Image, ImageDraw import os from data import H, W import numpy as np import util as u # TODO: add opts # generate synthetic toy data where each frame is it's own colour. for frame_num, colour in enumerate( ['#ff0000', '#ffff00', '#00ff00', '#00ffff', '#0000ff', '#ff00ff']): for run_id in range(2): for camera_id in range(5): output_dir = "imgs/00_rgb/c%02d/r%02d/" % (camera_id, run_id) u.ensure_dir_exists(output_dir) img = Image.new('RGB', (W, H), (0, 0, 0)) canvas = ImageDraw.Draw(img) # choose rectangle of min size rectangle_area = 0 while rectangle_area < 1000: x0, x1 = np.random.randint(0, W, size=2) y0, y1 = np.random.randint(0, H, size=2) rectangle_area = np.abs((x1 - x0) * (y1 - y0)) print("frame_num", frame_num, "rectangle_area", rectangle_area) canvas.rectangle([x0, y0, x1, y1], fill=colour) img.save("%s/f%03d.png" % (output_dir, frame_num))
#!/usr/bin/python3 from fuse import FUSE from mpd_fs import MpdFilesystem from util import ensure_dir_exists if __name__ == "__main__": FUSE(MpdFilesystem(), ensure_dir_exists("Music/"), foreground=True)
''' import _config, util import sys, os, pickle, fnmatch, datetime, subprocess, functools, re import numpy as np, pandas as pd from collections import defaultdict, Counter import b_graph, segment, _qsub import _notelines, _movement, _stepcharts # Default params inp_dir_b = _config.OUT_PLACE + 'b_graph/' inp_dir_segment = _config.OUT_PLACE + 'segment/' inp_dir_c = _config.OUT_PLACE + 'c_dijkstra/' NAME = util.get_fn(__file__) out_dir = _config.OUT_PLACE + NAME + '/' util.ensure_dir_exists(out_dir) mover = None import _annotate_local, _annotate_global, _annotate_post annot_types = _annotate_local.annot_types annot_types.update(_annotate_global.annot_types) annot_types.update(_annotate_post.annot_types) add_annots = { 'Notes per second since downpress': float, } annot_types.update(add_annots) ''' Parsing
def __init__(self): self.cache_dir = ensure_dir_exists("Cache/") self.extension = ".mp3"
feature_matrix_filename = "/var/shared/openstream/examples/jacobi-2d/14_11_1_instances.csv" if experiment_folder.startswith("/"): output_schedule_filename = str( experiment_folder) + "/schedules/schedule.csv" MODEL_SAVE_FILENAME = experiment_folder + "/" + MODEL_SAVE_FILENAME else: # relative p = subprocess.Popen("echo `pwd`/" + str(experiment_folder) + "/schedules/schedule.csv", stdout=subprocess.PIPE, shell=True) output, err = p.communicate() output_schedule_filename = output.decode("utf-8").replace("\n", "") MODEL_SAVE_FILENAME = "./" + experiment_folder + "/" + MODEL_SAVE_FILENAME util.ensure_dir_exists(output_schedule_filename) util.ensure_dir_exists(MODEL_SAVE_FILENAME) logging.debug("Initializing...") logging.debug("Agent...") agent = Agent(saved_execution_times_prefix, adjacency_matrix_filename, feature_matrix_filename, benchmark, execution_features, output_schedule_filename=output_schedule_filename, adjacency_is_sparse=SPARSE_ADJ, num_repeats=NUM_REPEATS) logging.debug("Replay Memory...")
full_rgbs_t1.append(full_rgb) full_dithers_t1.append(full_true_dither) full_rgbs_t1 = np.stack(full_rgbs_t1) full_dithers_t0 = np.stack(full_dithers_t0) full_dithers_t1 = np.stack(full_dithers_t1) # jit the generator now (we'll use it for predicting against the full res # images) and also the two loss fns if JIT: generator = objax.Jit(generator) generator_loss = objax.Jit(generator_loss, generator.vars()) discriminator_loss = objax.Jit(discriminator_loss, discriminator.vars()) # setup output directory for full res samples u.ensure_dir_exists("full_res_samples/%s" % RUN) if os.path.exists("full_res_samples/latest"): os.remove("full_res_samples/latest") os.symlink(RUN, "full_res_samples/latest") # init dataset iterator dataset = data.dataset(manifest_file=opts.manifest_file, batch_size=opts.batch_size, patch_size=opts.patch_size) # set up ckpting for G and D generator_ckpt = objax.io.Checkpoint( logdir=f"ckpts/{RUN}/generator/", keep_ckpts=20) discriminator_ckpt = objax.io.Checkpoint( logdir=f"ckpts/{RUN}/discriminator/", keep_ckpts=20)
parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--steps-per-epoch', type=int, default=20) parser.add_argument( '--run', type=str, default='.', help='run name to use as postfix for model saving, tb output') parser.add_argument('--model-input', type=str, default=None, help='if set, load weights from this model file') opts = parser.parse_args() print(opts) u.ensure_dir_exists("runs/%s" % opts.run) triplet_selector = triplet_selection.TripletSelection( opts.img_dir, opts.negative_frame_range, opts.negative_selection_mode) examples = data.a_p_n_iterator(opts.batch_size, triplet_selector) model, inputs, loss_fn = m.construct_model(opts.embedding_dim, opts.model_input, opts.learning_rate, opts.margin) class NumZeroLossCB(callbacks.Callback): def __init__(self, batch_size=16): self.batch_size = batch_size self.sess = tf.Session()
def predict(obs_reads_df, proposed_gts, out_dir, options=''): ''' Main public-facing function. ''' read_segments = util.parse_read_groups(obs_reads_df) check_valid_input(obs_reads_df, read_segments, proposed_gts) if options: print(f'Using custom hyperparameters: {options}') parse_custom_hyperparams(options) set_random_seed() # Load data and setup dataset = MarginalDirectedEvolutionDataset(obs_reads_df, proposed_gts, read_segments) updated_proposed_genotypes = dataset.genotypes print('Setting up ...') model = MarginalFitnessModel(dataset.package).to(device) # for param in model.parameters(): # print(type(param.data), param.shape) optimizer = torch.optim.Adam( model.parameters(), lr=hparams['learning_rate'], weight_decay=hparams['weight_decay'], ) schedulers = { 'plateau': torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=hparams['plateau_patience'], threshold=hparams['plateau_threshold'], factor=hparams['plateau_factor'], verbose=True, threshold_mode='rel', ) } global model_dir model_dir = out_dir if out_dir[-1] != '/': out_dir += '/' util.ensure_dir_exists(out_dir) global log_fn log_fn = out_dir + '_log.out' with open(log_fn, 'w') as f: pass print_and_log('model dir: ' + model_dir) # Inference print(f'Running inference ...') fitness, fq_mat, pred_marginals = train_model(model, optimizer, schedulers, dataset) package = { 'fitness': fitness, 'fq_mat': fq_mat, 'pred_marginals': pred_marginals, 'updated_proposed_gts': updated_proposed_genotypes, 'num_updated_proposed_gts': len(updated_proposed_genotypes), 'fitness_df': pd.DataFrame({ 'Genotype': list(updated_proposed_genotypes), 'Inferred fitness': fitness, }), 'genotype_matrix': pd.DataFrame( fq_mat.T, index=updated_proposed_genotypes, columns=dataset.timepoints, ), } # Save results for fn, df in { '_final_fitness.csv': package['fitness_df'], '_final_genotype_matrix.csv': package['genotype_matrix'], }.items(): print(f'Saving {out_dir}{fn} ...') df.to_csv(out_dir + fn) out_fn = out_dir + f'_final_package.pkl' print(f'Saving {out_fn} ...') with open(out_fn, 'wb') as f: pickle.dump(package, f) print('Done.') return package