def pre_process(self): pre_processor = PreProcessor(self.path, pickle_arb_id_filename, pickle_j1979_filename, self.use_j1979) pid_dictionary = pre_processor.import_pid_dict(pid_file) id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary( a_timer, tang_normalize_strategy, pid_dictionary, time_conversion, freq_analysis_accuracy, freq_synchronous_threshold, force_pre_processing) if dump_to_pickle: if force_pre_processing: if path.isfile(pickle_arb_id_filename): remove(pickle_arb_id_filename) if path.isfile(pickle_j1979_filename): remove(pickle_j1979_filename) # Lexical analysis will add additional information to the Arb ID dict. Don't dump if you're going to # immediately delete and replace pickle_arb_id_filename during Lexical Analysis. if not force_lexical_analysis: if not path.isfile(pickle_arb_id_filename) and id_dictionary: print("\nDumping arb ID dictionary for " + self.output_vehicle_dir + " to " + pickle_arb_id_filename) dump(id_dictionary, open(pickle_arb_id_filename, "wb")) print("\tComplete...") if not path.isfile(pickle_j1979_filename) and j1979_dictionary: print("\nDumping J1979 dictionary for " + self.output_vehicle_dir + " to " + pickle_j1979_filename) dump(j1979_dictionary, open(pickle_j1979_filename, "wb")) print("\tComplete...") return id_dictionary, j1979_dictionary, pid_dictionary
tokenization_bit_distance: float = 0.2 tokenize_padding: bool = True # Threshold parameters used during semantic analysis subset_selection_size: float = 0.25 fuzzy_labeling: bool = True min_correlation_threshold: float = 0.85 # A timer class to record timings throughout the pipeline. a_timer = PipelineTimer(verbose=True) # DATA IMPORT AND PRE-PROCESSING # pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename, pickle_j1979_filename) id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary( a_timer, tang_normalize_strategy, time_conversion, freq_analysis_accuracy, freq_synchronous_threshold, force_pre_processing) if j1979_dictionary: plot_j1979(a_timer, j1979_dictionary, force_j1979_plotting) # LEXICAL ANALYSIS # print("\n\t\t\t##### BEGINNING LEXICAL ANALYSIS #####") tokenize_dictionary(a_timer, id_dictionary, force_lexical_analysis, include_padding=tokenize_padding, merge=True, max_distance=tokenization_bit_distance) signal_dictionary = generate_signals(a_timer, id_dictionary, pickle_signal_filename, signal_normalize_strategy,