def do_sim(config, scripts): logger.info("###################### Running pipeline config '%s' "\ "######################", config['name']) logger.debug("Producing directory '%s'", config['name']) os.mkdir(config['name']) os.chdir(config['name']) logger.debug("CWD: %s", os.getcwd()) run_pipeline(config['parameters'], config['options'], scripts=scripts) os.chdir('..')
def run_test_changing_bitrate(video, parts, bitrate): file_to_transcode = video task_def = utils.create_bitrate_change_params(file_to_transcode, parts, bitrate) tests_dir = utils.build_test_directory_path(file_to_transcode, "change-bitrate") pipeline.run_pipeline(task_def, tests_dir, utils.DOCKER_IMAGE) # This intentionally won't happen if tests fails. User can check content of test directory. pipeline.clean_step(tests_dir)
def change_codec_test(video, target_codec, parts): file_to_transcode = video task_def = utils.create_codec_change_params(file_to_transcode, target_codec, parts) tests_dir = utils.build_test_directory_path(file_to_transcode, "change-codec/" + target_codec) pipeline.run_pipeline(task_def, tests_dir, utils.DOCKER_IMAGE) # This intentionally won't happen if tests fails. User can check content of test directory. pipeline.clean_step(tests_dir)
def main(): args = parse_command_line_args() setup_logging(args.verbose) citations = run_pipeline(args.input, args.keyword_file, find_authors=args.find_authors, resolve_repetitions=args.resolve_repetitions) save_citations(citations, args.output) create_export_bundle(args.output, args.zip_output)
def kfold_run_pipeline(model_module, model_params, X, y, nb_features, nb_classes, k, perm_indices, interpret_model, out_directory, id_string): losses, accs, runtimes = [], [], [] list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums = \ [], [], [] print('Starting trials ... k = {}'.format(k)) print() pairs = None for k_idx in range(k): print('Partition k = {}'.format(k_idx)) print() data_partition_dict = emr.get_k_fold_partition( X, y, k_idx=k_idx, k=k, perm_indices=perm_indices) sub_out_directory = '{}/{}_idx_partition'.format(out_directory, k_idx) if not os.path.exists(sub_out_directory): os.makedirs(sub_out_directory) (loss, acc, runtime), (sums_D, sums_D2, sums_contribs, curr_pairs) = \ run_pipeline(model_module, model_params[k_idx], data_partition_dict, nb_features=nb_features, nb_classes=nb_classes, interpret_model=interpret_model, out_directory=sub_out_directory) losses.append(loss) accs.append(acc) runtimes.append(runtime) list_contrib_sums_D.append(sums_D) list_contrib_sums_D2.append(sums_D2) list_contrib_sums.append(sums_contribs) if pairs is None: pairs = curr_pairs else: assert pairs == curr_pairs print('Finished with partition k = {}'.format(k_idx)) print('=' * 72) print() return (losses, accs, runtimes), (list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums), pairs
def kfold_run_pipeline(model_module, model_params, X, y, nb_features, nb_classes, k, perm_indices, interpret_model, out_directory, id_string): losses, accs, runtimes = [], [], [] list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums = \ [], [], [] print('Starting trials ... k = {}'.format(k)) print() pairs = None for k_idx in range(k): print('Partition k = {}'.format(k_idx)) print() data_partition_dict = emr.get_k_fold_partition(X, y, k_idx=k_idx, k=k, perm_indices=perm_indices) sub_out_directory = '{}/{}_idx_partition'.format(out_directory, k_idx) if not os.path.exists(sub_out_directory): os.makedirs(sub_out_directory) (loss, acc, runtime), (sums_D, sums_D2, sums_contribs, curr_pairs) = \ run_pipeline(model_module, model_params[k_idx], data_partition_dict, nb_features=nb_features, nb_classes=nb_classes, interpret_model=interpret_model, out_directory=sub_out_directory) losses.append(loss) accs.append(acc) runtimes.append(runtime) list_contrib_sums_D.append(sums_D) list_contrib_sums_D2.append(sums_D2) list_contrib_sums.append(sums_contribs) if pairs is None: pairs = curr_pairs else: assert pairs == curr_pairs print('Finished with partition k = {}'.format(k_idx)) print('=' * 72) print() return (losses, accs, runtimes), (list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums), pairs
def run(self): pipeline.run_pipeline(self.options, self.in_seq_files, False, self.trace_stream) print self.options.gene_name, "processing completed" self.trace_stream.close()
for index, row in tqdm(user_data.iterrows(), desc="Setting other phases"): if data.loc[index, 'phase'] == "": # Set phase to indicate the day of training data.loc[index, 'phase'] = self.get_experiment_day(row.DateTime, first_day, id_=id_) # # check for ambiguity on april 5th # if first_day in ['05', 5]: # if data.loc[index, 'phase'] == "day5": # data.loc[index, 'phase'] = key original_max_columns = pd.options.display.max_columns original_max_colwidth = pd.options.display.max_colwidth pd.options.display.max_columns = 30 # pd.options.display.max_rows = 400 pd.options.display.max_colwidth = 300 pd.options.display.width = 120 # print(data.loc[data.UserId == user]) pd.options.display.max_columns = original_max_columns pd.options.display.max_colwidth = original_max_colwidth return data if __name__ == "__main__": import pipeline pipeline.run_pipeline(False)