Ejemplo n.º 1
0
     mkdir(args.output_dir)
 chdir(args.output_dir)
 print('Reading sample ' + file)
 sample = Sample(make="",
                 model="",
                 year="",
                 sample_number=0, # NOTE we only process one file
                 sample_path=file,
                 kfold_n=args.kfold_n)
 try:
     # TODO This comment came with the original code I'm wondering if it means that the Validation step later in the process is not working
     # Cross validation parameters for finding an optimal tokenization inversion distance threshold -- NOT WORKING?
     print("\nData import and Pre-Processing for " + sample.output_vehicle_dir)
     id_dict, j1979_dict, pid_dict = sample.pre_process()
     if j1979_dict:
         sample.plot_j1979(j1979_dict, vehicle_number=args.vehicle_name)
     #                 LEXICAL ANALYSIS                     #
     print("\n\t##### BEGINNING LEXICAL ANALYSIS OF " + sample.output_vehicle_dir + " #####")
     sample.tokenize_dictionary(id_dict)
     signal_dict = sample.generate_signals(id_dict, bool(j1979_dict))
     sample.plot_arb_ids(id_dict, signal_dict, vehicle_number=args.vehicle_name)
     #                 LEXICAL ANALYSIS                     #
     print("\n\t##### BEGINNING SEMANTIC ANALYSIS OF " + sample.output_vehicle_dir + " #####")
     corr_matrix, combined_df = sample.generate_correlation_matrix(signal_dict)
     if j1979_dict:
         signal_dict, j1979_correlation = sample.j1979_labeling(j1979_dict, signal_dict, combined_df)
     cluster_dict, linkage_matrix = sample.cluster_signals(corr_matrix)
     sample.plot_clusters(cluster_dict, signal_dict, bool(j1979_dict), vehicle_number=args.vehicle_name)
     sample.plot_dendrogram(linkage_matrix, vehicle_number=args.vehicle_name)
 except KeyboardInterrupt:
     print("Keyboard interupt pressed program quit")