예제 #1
0
 def pre_process(self):
     pre_processor = PreProcessor(self.path, pickle_arb_id_filename,
                                  pickle_j1979_filename, self.use_j1979)
     pid_dictionary = pre_processor.import_pid_dict(pid_file)
     id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(
         a_timer, tang_normalize_strategy, pid_dictionary, time_conversion,
         freq_analysis_accuracy, freq_synchronous_threshold,
         force_pre_processing)
     if dump_to_pickle:
         if force_pre_processing:
             if path.isfile(pickle_arb_id_filename):
                 remove(pickle_arb_id_filename)
             if path.isfile(pickle_j1979_filename):
                 remove(pickle_j1979_filename)
         # Lexical analysis will add additional information to the Arb ID dict. Don't dump if you're going to
         # immediately delete and replace pickle_arb_id_filename during Lexical Analysis.
         if not force_lexical_analysis:
             if not path.isfile(pickle_arb_id_filename) and id_dictionary:
                 print("\nDumping arb ID dictionary for " +
                       self.output_vehicle_dir + " to " +
                       pickle_arb_id_filename)
                 dump(id_dictionary, open(pickle_arb_id_filename, "wb"))
                 print("\tComplete...")
             if not path.isfile(pickle_j1979_filename) and j1979_dictionary:
                 print("\nDumping J1979 dictionary for " +
                       self.output_vehicle_dir + " to " +
                       pickle_j1979_filename)
                 dump(j1979_dictionary, open(pickle_j1979_filename, "wb"))
                 print("\tComplete...")
     return id_dictionary, j1979_dictionary, pid_dictionary
예제 #2
0
tokenization_bit_distance: float = 0.2
tokenize_padding: bool = True

# Threshold parameters used during semantic analysis
subset_selection_size: float = 0.25
fuzzy_labeling: bool = True
min_correlation_threshold: float = 0.85

# A timer class to record timings throughout the pipeline.
a_timer = PipelineTimer(verbose=True)

#            DATA IMPORT AND PRE-PROCESSING             #
pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename,
                             pickle_j1979_filename)
id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(
    a_timer, tang_normalize_strategy, time_conversion, freq_analysis_accuracy,
    freq_synchronous_threshold, force_pre_processing)
if j1979_dictionary:
    plot_j1979(a_timer, j1979_dictionary, force_j1979_plotting)

#                 LEXICAL ANALYSIS                     #
print("\n\t\t\t##### BEGINNING LEXICAL ANALYSIS #####")
tokenize_dictionary(a_timer,
                    id_dictionary,
                    force_lexical_analysis,
                    include_padding=tokenize_padding,
                    merge=True,
                    max_distance=tokenization_bit_distance)
signal_dictionary = generate_signals(a_timer, id_dictionary,
                                     pickle_signal_filename,
                                     signal_normalize_strategy,