Example #1
0
def pattern_selection(project_directory, **kwargs):
    logger = logging.getLogger(__name__)
    logger.info("BEGIN Pattern Selection")
    args = _set_parameters(**kwargs)
    start_time = time.time()
    _check_inputs(args['max_loci'], args['required_loci'],
                  args['exclude_loci'])
    history = History(project_directory.make_new_file(
        "history", "pattern_selection_history"),
                      "Pattern_Selection",
                      project_directory.timestamp,
                      param_dict=args)

    preprocessing_history = History(
        project_directory.get_parent_subdirectory_file(
            "history", "preprocessing_history_{}.txt".format(
                project_directory.get_parent_directory_timestamp())),
        "Preprocessing",
        exists=True)

    # Get JSON file path from preprocessing step
    json_file = preprocessing_history.get_path("PATTERN JSON")
    variant_matrix = preprocessing_history.get_path("VARIANT SITE MATRIX FILE")
    sep = {
        'comma': ",",
        "space": " ",
        "tab": "\t"
    }[preprocessing_history.get_parameter("SEP")]

    # Get flag file path from preprocessing step
    flag_file = preprocessing_history.get_path("PRIMER ZONE FLAGS")
    primer_zone_size = preprocessing_history.get_parameter("PZ_SIZE")

    history.add_path("PATTERN JSON", json_file)
    logger.info("Reading from pattern JSON: %s", json_file)
    # Read in pattern JSON
    patterns = Patterns()
    patterns.load_patterns(json_file)
    if len(args['exclude_loci']):
        patterns.remove_sites(args['exclude_loci'])
    if len(args['required_loci']):
        patterns.add_required_sites(args['required_loci'])
    if len(args['exclude_strains']):
        patterns.remove_strains(args['exclude_strains'])
    patterns.set_resolution(args['res'], args['stop_at_res'])
    best_set = _get_minimum_spanning_set(
        patterns, args['reps'], args['max_loci'], args['max_res'],
        args['n_threads'], int(preprocessing_history.get_parameter("PZ_SIZE")))

    haplotype_file = project_directory.make_new_file("minimum_spanning_set",
                                                     ".haplotype", "csv")
    amplicon_json = project_directory.make_new_file("minimum_spanning_set",
                                                    ".amplicons", "json")
    haplotype_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                       "haplotypes", "csv")
    amplicon_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                      "amplicons", "csv")
    pattern_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                     "patterns", "csv")
    summary_file = project_directory.make_new_file("summary", "summary")

    haplotype = Haplotype(patterns, best_set, flag_file, primer_zone_size,
                          variant_matrix, sep)

    haplotype.write_haplotype(haplotype_file)
    history.add_path("Haplotype File", haplotype_file)

    haplotype.write_json(amplicon_json)
    history.add_path("Amplicon JSON", amplicon_json)

    haplotype.write_summary(summary_file)
    history.add_path("Summary", summary_file)

    haplotype.write_output(haplotype_matrix, pattern_matrix, amplicon_matrix)
    history.add_path("Haplotype Matrix", haplotype_matrix)
    history.add_path("Amplicon Matrix", amplicon_matrix)
    history.add_path("Pattern Matrix", pattern_matrix)

    logger.info("FINISHED Pattern Selection")
    run_time = time.time() - start_time
    history.add_other("Run Time", run_time)
    history.write()