Ejemplo n.º 1
0
    chdir(args.output_dir)
    print('Reading sample ' + file)
    sample = Sample(make="",
                    model="",
                    year="",
                    sample_number=0, # NOTE we only process one file
                    sample_path=file,
                    kfold_n=args.kfold_n)
    try:
        # TODO This comment came with the original code I'm wondering if it means that the Validation step later in the process is not working
        # Cross validation parameters for finding an optimal tokenization inversion distance threshold -- NOT WORKING?
        print("\nData import and Pre-Processing for " + sample.output_vehicle_dir)
        id_dict, j1979_dict, pid_dict = sample.pre_process()
        if j1979_dict:
            sample.plot_j1979(j1979_dict, vehicle_number=args.vehicle_name)
        #                 LEXICAL ANALYSIS                     #
        print("\n\t##### BEGINNING LEXICAL ANALYSIS OF " + sample.output_vehicle_dir + " #####")
        sample.tokenize_dictionary(id_dict)
        signal_dict = sample.generate_signals(id_dict, bool(j1979_dict))
        sample.plot_arb_ids(id_dict, signal_dict, vehicle_number=args.vehicle_name)
        #                 LEXICAL ANALYSIS                     #
        print("\n\t##### BEGINNING SEMANTIC ANALYSIS OF " + sample.output_vehicle_dir + " #####")
        corr_matrix, combined_df = sample.generate_correlation_matrix(signal_dict)
        if j1979_dict:
            signal_dict, j1979_correlation = sample.j1979_labeling(j1979_dict, signal_dict, combined_df)
        cluster_dict, linkage_matrix = sample.cluster_signals(corr_matrix)
        sample.plot_clusters(cluster_dict, signal_dict, bool(j1979_dict), vehicle_number=args.vehicle_name)
        sample.plot_dendrogram(linkage_matrix, vehicle_number=args.vehicle_name)
    except KeyboardInterrupt:
        print("Keyboard interupt pressed program quit")