def pattern_selection(project_directory, **kwargs): logger = logging.getLogger(__name__) logger.info("BEGIN Pattern Selection") args = _set_parameters(**kwargs) start_time = time.time() _check_inputs(args['max_loci'], args['required_loci'], args['exclude_loci']) history = History(project_directory.make_new_file( "history", "pattern_selection_history"), "Pattern_Selection", project_directory.timestamp, param_dict=args) preprocessing_history = History( project_directory.get_parent_subdirectory_file( "history", "preprocessing_history_{}.txt".format( project_directory.get_parent_directory_timestamp())), "Preprocessing", exists=True) # Get JSON file path from preprocessing step json_file = preprocessing_history.get_path("PATTERN JSON") variant_matrix = preprocessing_history.get_path("VARIANT SITE MATRIX FILE") sep = { 'comma': ",", "space": " ", "tab": "\t" }[preprocessing_history.get_parameter("SEP")] # Get flag file path from preprocessing step flag_file = preprocessing_history.get_path("PRIMER ZONE FLAGS") primer_zone_size = preprocessing_history.get_parameter("PZ_SIZE") history.add_path("PATTERN JSON", json_file) logger.info("Reading from pattern JSON: %s", json_file) # Read in pattern JSON patterns = Patterns() patterns.load_patterns(json_file) if len(args['exclude_loci']): patterns.remove_sites(args['exclude_loci']) if len(args['required_loci']): patterns.add_required_sites(args['required_loci']) if len(args['exclude_strains']): patterns.remove_strains(args['exclude_strains']) patterns.set_resolution(args['res'], args['stop_at_res']) best_set = _get_minimum_spanning_set( patterns, args['reps'], args['max_loci'], args['max_res'], args['n_threads'], int(preprocessing_history.get_parameter("PZ_SIZE"))) haplotype_file = project_directory.make_new_file("minimum_spanning_set", ".haplotype", "csv") amplicon_json = project_directory.make_new_file("minimum_spanning_set", ".amplicons", "json") haplotype_matrix = project_directory.make_new_file("minimum_spanning_set", "haplotypes", "csv") amplicon_matrix = project_directory.make_new_file("minimum_spanning_set", "amplicons", "csv") pattern_matrix = project_directory.make_new_file("minimum_spanning_set", "patterns", "csv") summary_file = project_directory.make_new_file("summary", "summary") haplotype = Haplotype(patterns, best_set, flag_file, primer_zone_size, variant_matrix, sep) haplotype.write_haplotype(haplotype_file) history.add_path("Haplotype File", haplotype_file) haplotype.write_json(amplicon_json) history.add_path("Amplicon JSON", amplicon_json) haplotype.write_summary(summary_file) history.add_path("Summary", summary_file) haplotype.write_output(haplotype_matrix, pattern_matrix, amplicon_matrix) history.add_path("Haplotype Matrix", haplotype_matrix) history.add_path("Amplicon Matrix", amplicon_matrix) history.add_path("Pattern Matrix", pattern_matrix) logger.info("FINISHED Pattern Selection") run_time = time.time() - start_time history.add_other("Run Time", run_time) history.write()