Ejemplo n.º 1
0
def clustering_pro(args):
    print(args.output_folder)
    # if not os.path.isfile(args.output_folder + '/statistic.txt'):
    #     lib.init_dir(args.output_folder)
    # else:
    #     print "It looks good!"
    #     sys.exit(0)

    lib.init_dir(args.output_folder)

    # collect X
    (X, generated_traces, additional_val_traces,
     method_list) = parse_sampled_traces(args.generated_traces_folder, 'd')
    print("Training data:", len(generated_traces))

    # read validation trace
    validation_traces = parse_validation_traces(args.validation_traces_folder,
                                                'seed_')
    validation_traces |= additional_val_traces
    print("Validating:", len(validation_traces), "data")

    number_of_clusters = args.num_cluster

    # print args
    if len(X) <= number_of_clusters:
        print("WARNING: number of clusters must be < number of instances!",
              args.num_cluster, len(X))
        sys.exit(0)

    print("Length of X", len(X))
    # do clustering

    estimator = do_clustering(args, X)
    compute_statistics(X, method_list, args, estimator, generated_traces,
                       validation_traces)
Ejemplo n.º 2
0
def when_ending_method_available(ending_methods,
                                 fsm,
                                 output_folder,
                                 make_dfa=False):
    extended_fsm_dir = output_folder + '/extended_endings_fsm'
    lib.init_dir(extended_fsm_dir)

    extended_fsm = extending_ending_states(fsm, ending_methods)
    open(extended_fsm_dir + '/fsm.txt', 'w').write(extended_fsm.to_string())
    drawing_dot(extended_fsm, extended_fsm_dir + '/fsm')

    extended_dfa = extended_fsm.nfa2dfa()
    open(extended_fsm_dir + '/dfa.txt', 'w').write(extended_dfa.to_string())
    drawing_dot(extended_dfa, extended_fsm_dir + '/dfa')

    if make_dfa:
        extended_mindfa = graph_lib.minimize_dfa(extended_dfa)
        open(extended_fsm_dir + '/mindfa.txt',
             'w').write(extended_mindfa.to_string())
        drawing_dot(extended_mindfa, extended_fsm_dir + '/mindfa')
Ejemplo n.º 3
0
def create_fsm_for_unit_traces(elementID2cluster, training_traces,
                               output_folder):
    lib.init_dir(output_folder)

    unit_id = 0
    for one_trace in training_traces:
        unit_id += 1
        unit_dir = output_folder + '/fsm_d' + str(unit_id)
        lib.init_dir(unit_dir)
        fsm, log_fsm = create_fsm(elementID2cluster, [one_trace])

        dfa = fsm.nfa2dfa()
        mindfa = graph_lib.minimize_dfa(dfa)

        open(unit_dir + '/fsm.txt', 'w').write(fsm.to_string())
        open(unit_dir + '/dfa.txt', 'w').write(dfa.to_string())
        open(unit_dir + '/mindfa.txt', 'w').write(mindfa.to_string())

        drawing_dot(fsm, unit_dir + '/fsm')
        drawing_dot(dfa, unit_dir + '/dfa')
        drawing_dot(mindfa, unit_dir + '/mindfa')
Ejemplo n.º 4
0
def compute_statistics(X,
                       method_list,
                       args,
                       estimator,
                       generated_traces,
                       validation_traces,
                       output_folder=None,
                       X_id_mapping=None,
                       create_fsm_per_unit_trace=False,
                       ending_methods=None,
                       minimize_dfa=True,
                       ktails=False,
                       check_accepted_traces=True):
    if output_folder is None:
        output_folder = args.output_folder
    lib.init_dir(output_folder)
    if estimator is not None:
        ###
        elementID2cluster, centroids, X_labels = read_clusters(
            estimator, X, X_id_mapping=X_id_mapping)
    elif ktails:
        # ktails
        elementID2cluster, centroids, X_labels = read_ktails_clusters(
            X, X_id_mapping=X_id_mapping)
    else:
        print("ERROR: no estimators!")
        sys.exit(0)

    # write cluster info
    write_cluster(elementID2cluster,
                  X,
                  output_folder + '/resultant_cluster.gz',
                  X_id_mapping=X_id_mapping)

    # write centroids
    write_centroids_to_file(centroids, output_folder + '/centroids.txt')

    # write distance to centroid of each element in each cluster
    write_cluster_contents_distance(
        elementID2cluster, X, centroids,
        output_folder + '/cluster_element_distances.txt')

    if create_fsm_per_unit_trace:
        create_fsm_for_unit_traces(elementID2cluster, generated_traces,
                                   output_folder + '/unit_fsms')

    # create FSM
    fsm, log_fsm = create_fsm(elementID2cluster, generated_traces)

    # write info of data contained inside each cluster
    write_trace_cluster_info(elementID2cluster, generated_traces,
                             output_folder + '/trace_cluster_info.txt')

    # write fsm log to file
    write_log_to_file(log_fsm, output_folder + '/debug_fsm.txt')

    # DFA
    dfa = fsm.nfa2dfa()
    if check_accepted_traces:
        dfa_num_accepted_traces = count_accepted_traces(
            dfa,
            validation_traces,
            output_file=output_folder + '/dfa_uncovered_traces.txt')
    else:
        dfa_num_accepted_traces = -1
    print("Finished validating DFA:", dfa_num_accepted_traces,
          "validation traces accepted by DFA")

    if minimize_dfa:
        # MinDFA
        mindfa = graph_lib.minimize_dfa(dfa)
    else:
        mindfa = None

    open(output_folder + '/fsm.txt', 'w').write(fsm.to_string())
    drawing_dot(fsm, output_folder + '/fsm')

    open(output_folder + '/dfa.txt', 'w').write(dfa.to_string())

    if minimize_dfa:
        open(output_folder + '/mindfa.txt', 'w').write(mindfa.to_string())

    drawing_dot(dfa, output_folder + '/dfa')

    if minimize_dfa:
        drawing_dot(mindfa, output_folder + '/mindfa')

    print("after drawing dot")
    print(output_folder)

    try:
        fsm.serialize(output_folder + "/serialized_fsa.json")
    except Exception as e:
        print("Serialization problem:")
        print(e)

    # Number of accepted data; size of DFA, MinDFA, FSM;

    # fsm_num_accepted_traces = count_accepted_traces(fsm, validation_traces, debug=True)
    # print "Finished validating FSM:", fsm_num_accepted_traces, "data"

    ###

    # mindfa_num_accepted_traces = count_accepted_traces(mindfa, validation_traces)
    # print "Finished validating MinDFA:", mindfa_num_accepted_traces, "data"

    ##### compute silhouete ####

    # try:
    #     import signal
    #     signal.signal(signal.SIGALRM, lib.handler)
    #     signal.alarm(waiting_time())
    #     silhouette_avg = silhouette_score(np.array(X), estimator.labels_,
    #                                       sample_size=min(
    #                                           args.silhouette_sample_size if args.silhouette_sample_size is not None else len(
    #                                               X), len(X)),
    #                                       random_state=args.seed)
    #     print "silhouette_avg:", silhouette_avg
    #
    #     signal.alarm(0)
    # except TimeoutError:
    #     print "silhouette computation runs too long!"
    #     silhouette_avg = -1
    # except ValueError as e:
    #     print e
    #     silhouette_avg = -1
    # finally:
    #     signal.alarm(0)

    # write statistics
    with open(output_folder + '/statistic.txt', 'w') as writer:
        writer.write('FSM_size:' + '\t' + str(len(fsm.states)) + '\n')
        if dfa is not None:
            writer.write('DFA_size:' + '\t' + str(len(dfa.states)) + '\n')
        if mindfa is not None:
            writer.write('MinDFA_size:' + '\t' + str(len(mindfa.states)) +
                         '\n')
        # writer.write('FSM_validation:' + '\t' + str(fsm_num_accepted_traces) + '\n')
        if dfa_num_accepted_traces is not None:
            writer.write('DFA_validation:' + '\t' +
                         str(dfa_num_accepted_traces) + '\n')
        # writer.write('MinDFA_validation:' + '\t' + str(mindfa_num_accepted_traces) + '\n')
        # writer.write('silhouette_avg:' + '\t' + str(silhouette_avg) + '\n')
        if hasattr(estimator, 'n_clusters'):
            writer.write('num_cluster:\t' + str(estimator.n_clusters) + '\n')
        else:
            n_clusters_ = len(set(X_labels)) - (1 if -1 in X_labels else 0)

            writer.write('num_cluster:\t' + str(n_clusters_) + '\n')
        writer.write('total_validation_traces:\t' +
                     str(len(validation_traces)) + '\n')
        if dfa_num_accepted_traces is not None:
            possible_recall = float(dfa_num_accepted_traces) / float(
                len(validation_traces))
            writer.write('recall:\t' + str(possible_recall) + '\n')

    print("after writing stats")
    ########################
    if ending_methods is not None:
        when_ending_method_available(ending_methods,
                                     fsm,
                                     output_folder,
                                     make_dfa=minimize_dfa)