Beispiel #1
0
def do_sim(config, scripts):
    logger.info("###################### Running pipeline config '%s' "\
            "######################", config['name'])
    logger.debug("Producing directory '%s'", config['name'])
    os.mkdir(config['name'])
    os.chdir(config['name'])
    logger.debug("CWD: %s", os.getcwd())
    run_pipeline(config['parameters'], config['options'], scripts=scripts)
    os.chdir('..')
def run_test_changing_bitrate(video, parts, bitrate):
    file_to_transcode = video

    task_def = utils.create_bitrate_change_params(file_to_transcode, parts, bitrate)
    tests_dir = utils.build_test_directory_path(file_to_transcode, "change-bitrate")

    pipeline.run_pipeline(task_def, tests_dir, utils.DOCKER_IMAGE)

    # This intentionally won't happen if tests fails. User can check content of test directory.
    pipeline.clean_step(tests_dir)
Beispiel #3
0
def change_codec_test(video, target_codec, parts):
    file_to_transcode = video

    task_def = utils.create_codec_change_params(file_to_transcode,
                                                target_codec, parts)
    tests_dir = utils.build_test_directory_path(file_to_transcode,
                                                "change-codec/" + target_codec)

    pipeline.run_pipeline(task_def, tests_dir, utils.DOCKER_IMAGE)

    # This intentionally won't happen if tests fails. User can check content of test directory.
    pipeline.clean_step(tests_dir)
Beispiel #4
0
def main():
    args = parse_command_line_args()
    setup_logging(args.verbose)

    citations = run_pipeline(args.input,
                             args.keyword_file,
                             find_authors=args.find_authors,
                             resolve_repetitions=args.resolve_repetitions)
    save_citations(citations, args.output)
    create_export_bundle(args.output, args.zip_output)
Beispiel #5
0
def kfold_run_pipeline(model_module, model_params, X, y, nb_features,
                       nb_classes, k, perm_indices, interpret_model,
                       out_directory, id_string):
    losses, accs, runtimes = [], [], []
    list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums = \
        [], [], []

    print('Starting trials ... k = {}'.format(k))
    print()

    pairs = None
    for k_idx in range(k):
        print('Partition k = {}'.format(k_idx))
        print()
        data_partition_dict = emr.get_k_fold_partition(
            X, y, k_idx=k_idx, k=k, perm_indices=perm_indices)
        sub_out_directory = '{}/{}_idx_partition'.format(out_directory, k_idx)
        if not os.path.exists(sub_out_directory):
            os.makedirs(sub_out_directory)

        (loss, acc, runtime), (sums_D, sums_D2, sums_contribs, curr_pairs) = \
            run_pipeline(model_module, model_params[k_idx],
                data_partition_dict, nb_features=nb_features,
                nb_classes=nb_classes, interpret_model=interpret_model,
                out_directory=sub_out_directory)

        losses.append(loss)
        accs.append(acc)
        runtimes.append(runtime)

        list_contrib_sums_D.append(sums_D)
        list_contrib_sums_D2.append(sums_D2)
        list_contrib_sums.append(sums_contribs)

        if pairs is None: pairs = curr_pairs
        else: assert pairs == curr_pairs

        print('Finished with partition k = {}'.format(k_idx))
        print('=' * 72)
        print()

    return (losses, accs,
            runtimes), (list_contrib_sums_D, list_contrib_sums_D2,
                        list_contrib_sums), pairs
Beispiel #6
0
def kfold_run_pipeline(model_module, model_params, X, y, nb_features, nb_classes, 
    k, perm_indices, interpret_model, out_directory, id_string):
    losses, accs, runtimes = [], [], []
    list_contrib_sums_D, list_contrib_sums_D2, list_contrib_sums = \
        [], [], []

    print('Starting trials ... k = {}'.format(k))
    print()

    pairs = None
    for k_idx in range(k):
        print('Partition k = {}'.format(k_idx))
        print()
        data_partition_dict = emr.get_k_fold_partition(X, y, k_idx=k_idx, k=k, 
            perm_indices=perm_indices)
        sub_out_directory = '{}/{}_idx_partition'.format(out_directory, k_idx)
        if not os.path.exists(sub_out_directory): os.makedirs(sub_out_directory)

        (loss, acc, runtime), (sums_D, sums_D2, sums_contribs, curr_pairs) = \
            run_pipeline(model_module, model_params[k_idx], 
                data_partition_dict, nb_features=nb_features, 
                nb_classes=nb_classes, interpret_model=interpret_model, 
                out_directory=sub_out_directory)
        
        losses.append(loss)
        accs.append(acc)
        runtimes.append(runtime)

        list_contrib_sums_D.append(sums_D)
        list_contrib_sums_D2.append(sums_D2)
        list_contrib_sums.append(sums_contribs)

        if pairs is None: pairs = curr_pairs
        else: assert pairs == curr_pairs

        print('Finished with partition k = {}'.format(k_idx))
        print('=' * 72)
        print()


    return (losses, accs, runtimes), (list_contrib_sums_D, 
        list_contrib_sums_D2, list_contrib_sums), pairs
 def run(self):
     pipeline.run_pipeline(self.options, self.in_seq_files, False,
                           self.trace_stream)
     print self.options.gene_name, "processing completed"
     self.trace_stream.close()
            for index, row in tqdm(user_data.iterrows(),
                                   desc="Setting other phases"):
                if data.loc[index, 'phase'] == "":
                    # Set phase to indicate the day of training
                    data.loc[index,
                             'phase'] = self.get_experiment_day(row.DateTime,
                                                                first_day,
                                                                id_=id_)
                    # # check for ambiguity on april 5th
                    # if first_day in ['05', 5]:
                    #     if data.loc[index, 'phase'] == "day5":
                    #         data.loc[index, 'phase'] = key

            original_max_columns = pd.options.display.max_columns
            original_max_colwidth = pd.options.display.max_colwidth
            pd.options.display.max_columns = 30
            # pd.options.display.max_rows = 400

            pd.options.display.max_colwidth = 300
            pd.options.display.width = 120
            # print(data.loc[data.UserId == user])

            pd.options.display.max_columns = original_max_columns
            pd.options.display.max_colwidth = original_max_colwidth
        return data


if __name__ == "__main__":
    import pipeline
    pipeline.run_pipeline(False)
	def run(self):
		pipeline.run_pipeline(self.options, self.in_seq_files, False, self.trace_stream)
		print self.options.gene_name, "processing completed"
		self.trace_stream.close()