Ejemplo n.º 1
0
def update(input_option):
    old_fsm_path = input_option.args.old_fsm

    local_fsm_path = input_option.args.work_dir+'/FINAL_mindfa.txt'

    updated_fsm_path = input_option.args.work_dir+'/UPDATED_mindfa.txt'

    updated_dot_path = input_option.args.work_dir+'/UPDATED_mindfa.dot'

    # merge two FSMs

    updated_fsm = merge_fsms(old_fsm_path,local_fsm_path)

    # minimize them

    merge_fsm = graph_lib.minimize_dfa(updated_fsm.nfa2dfa())


    if not os.path.isdir(os.path.dirname(updated_fsm_path)):
        os.makedirs(os.path.dirname(updated_fsm_path))

    with open(updated_fsm_path,'w') as writer:
        writer.write(merge_fsm.to_string()+'\n')

    merge_fsm.to_dot(updated_dot_path)

    return updated_fsm
Ejemplo n.º 2
0
def predict_accuracy(fsm_file, stat_file, input_file, prediction_file):
    print(fsm_file)
    num_cluster = int(
        os.path.basename(os.path.dirname(fsm_file)).replace('S_', ''))
    training_pairs = set()
    with open(input_file, 'r') as reader:
        lines = [l.strip().split() for l in reader]
        for tr in lines:
            training_pairs |= set([(tr[i], tr[i + 1])
                                   for i in range(len(tr) - 1)])
    ######################################
    fsm_pairs = set()
    the_fsm = graph_lib.parse_fsm_file(fsm_file)
    adjlst = the_fsm.create_adjacent_list()
    for a in adjlst:
        for (b, label_one) in adjlst[a]:
            if b not in adjlst:
                continue
            for (c, label_two) in adjlst[b]:
                fsm_pairs.add((label_one, label_two))
    ######################################
    predicted_precision = float(len(training_pairs)) / float(
        len(training_pairs | fsm_pairs))
    print("Predicted Precision:", predicted_precision, "unseen pairs:",
          len(fsm_pairs - training_pairs), "training pairs:",
          len(training_pairs))
    ######################################
    with open(stat_file, 'r') as reader:
        lines = [l.strip().split(':') for l in reader]
        recall = list(filter(lambda x: x[0].strip() == 'recall',
                             lines))[0][-1].strip()
        recall = float(recall)
    print("Predicted Recall:", recall)
    ######################################
    if predicted_precision + recall > 0.0:
        predicted_f1 = float(2.0 * predicted_precision *
                             recall) / float(predicted_precision + recall)
    else:
        predicted_f1 = 0.0
    print("Predicted F-measure", predicted_f1)
    with open(prediction_file, 'w') as writer:
        writer.write(str(predicted_precision) + '\n')
        writer.write(str(recall) + '\n')
        writer.write(str(predicted_f1) + '\n')
    return (fsm_file, num_cluster, predicted_precision, recall, predicted_f1)
Ejemplo n.º 3
0
def when_ending_method_available(ending_methods,
                                 fsm,
                                 output_folder,
                                 make_dfa=False):
    extended_fsm_dir = output_folder + '/extended_endings_fsm'
    lib.init_dir(extended_fsm_dir)

    extended_fsm = extending_ending_states(fsm, ending_methods)
    open(extended_fsm_dir + '/fsm.txt', 'w').write(extended_fsm.to_string())
    drawing_dot(extended_fsm, extended_fsm_dir + '/fsm')

    extended_dfa = extended_fsm.nfa2dfa()
    open(extended_fsm_dir + '/dfa.txt', 'w').write(extended_dfa.to_string())
    drawing_dot(extended_dfa, extended_fsm_dir + '/dfa')

    if make_dfa:
        extended_mindfa = graph_lib.minimize_dfa(extended_dfa)
        open(extended_fsm_dir + '/mindfa.txt',
             'w').write(extended_mindfa.to_string())
        drawing_dot(extended_mindfa, extended_fsm_dir + '/mindfa')
Ejemplo n.º 4
0
def create_fsm_for_unit_traces(elementID2cluster, training_traces,
                               output_folder):
    lib.init_dir(output_folder)

    unit_id = 0
    for one_trace in training_traces:
        unit_id += 1
        unit_dir = output_folder + '/fsm_d' + str(unit_id)
        lib.init_dir(unit_dir)
        fsm, log_fsm = create_fsm(elementID2cluster, [one_trace])

        dfa = fsm.nfa2dfa()
        mindfa = graph_lib.minimize_dfa(dfa)

        open(unit_dir + '/fsm.txt', 'w').write(fsm.to_string())
        open(unit_dir + '/dfa.txt', 'w').write(dfa.to_string())
        open(unit_dir + '/mindfa.txt', 'w').write(mindfa.to_string())

        drawing_dot(fsm, unit_dir + '/fsm')
        drawing_dot(dfa, unit_dir + '/dfa')
        drawing_dot(mindfa, unit_dir + '/mindfa')
Ejemplo n.º 5
0
def compute_statistics(X,
                       method_list,
                       args,
                       estimator,
                       generated_traces,
                       validation_traces,
                       output_folder=None,
                       X_id_mapping=None,
                       create_fsm_per_unit_trace=False,
                       ending_methods=None,
                       minimize_dfa=True,
                       ktails=False,
                       check_accepted_traces=True):
    if output_folder is None:
        output_folder = args.output_folder
    lib.init_dir(output_folder)
    if estimator is not None:
        ###
        elementID2cluster, centroids, X_labels = read_clusters(
            estimator, X, X_id_mapping=X_id_mapping)
    elif ktails:
        # ktails
        elementID2cluster, centroids, X_labels = read_ktails_clusters(
            X, X_id_mapping=X_id_mapping)
    else:
        print("ERROR: no estimators!")
        sys.exit(0)

    # write cluster info
    write_cluster(elementID2cluster,
                  X,
                  output_folder + '/resultant_cluster.gz',
                  X_id_mapping=X_id_mapping)

    # write centroids
    write_centroids_to_file(centroids, output_folder + '/centroids.txt')

    # write distance to centroid of each element in each cluster
    write_cluster_contents_distance(
        elementID2cluster, X, centroids,
        output_folder + '/cluster_element_distances.txt')

    if create_fsm_per_unit_trace:
        create_fsm_for_unit_traces(elementID2cluster, generated_traces,
                                   output_folder + '/unit_fsms')

    # create FSM
    fsm, log_fsm = create_fsm(elementID2cluster, generated_traces)

    # write info of data contained inside each cluster
    write_trace_cluster_info(elementID2cluster, generated_traces,
                             output_folder + '/trace_cluster_info.txt')

    # write fsm log to file
    write_log_to_file(log_fsm, output_folder + '/debug_fsm.txt')

    # DFA
    dfa = fsm.nfa2dfa()
    if check_accepted_traces:
        dfa_num_accepted_traces = count_accepted_traces(
            dfa,
            validation_traces,
            output_file=output_folder + '/dfa_uncovered_traces.txt')
    else:
        dfa_num_accepted_traces = -1
    print("Finished validating DFA:", dfa_num_accepted_traces,
          "validation traces accepted by DFA")

    if minimize_dfa:
        # MinDFA
        mindfa = graph_lib.minimize_dfa(dfa)
    else:
        mindfa = None

    open(output_folder + '/fsm.txt', 'w').write(fsm.to_string())
    drawing_dot(fsm, output_folder + '/fsm')

    open(output_folder + '/dfa.txt', 'w').write(dfa.to_string())

    if minimize_dfa:
        open(output_folder + '/mindfa.txt', 'w').write(mindfa.to_string())

    drawing_dot(dfa, output_folder + '/dfa')

    if minimize_dfa:
        drawing_dot(mindfa, output_folder + '/mindfa')

    print("after drawing dot")
    print(output_folder)

    try:
        fsm.serialize(output_folder + "/serialized_fsa.json")
    except Exception as e:
        print("Serialization problem:")
        print(e)

    # Number of accepted data; size of DFA, MinDFA, FSM;

    # fsm_num_accepted_traces = count_accepted_traces(fsm, validation_traces, debug=True)
    # print "Finished validating FSM:", fsm_num_accepted_traces, "data"

    ###

    # mindfa_num_accepted_traces = count_accepted_traces(mindfa, validation_traces)
    # print "Finished validating MinDFA:", mindfa_num_accepted_traces, "data"

    ##### compute silhouete ####

    # try:
    #     import signal
    #     signal.signal(signal.SIGALRM, lib.handler)
    #     signal.alarm(waiting_time())
    #     silhouette_avg = silhouette_score(np.array(X), estimator.labels_,
    #                                       sample_size=min(
    #                                           args.silhouette_sample_size if args.silhouette_sample_size is not None else len(
    #                                               X), len(X)),
    #                                       random_state=args.seed)
    #     print "silhouette_avg:", silhouette_avg
    #
    #     signal.alarm(0)
    # except TimeoutError:
    #     print "silhouette computation runs too long!"
    #     silhouette_avg = -1
    # except ValueError as e:
    #     print e
    #     silhouette_avg = -1
    # finally:
    #     signal.alarm(0)

    # write statistics
    with open(output_folder + '/statistic.txt', 'w') as writer:
        writer.write('FSM_size:' + '\t' + str(len(fsm.states)) + '\n')
        if dfa is not None:
            writer.write('DFA_size:' + '\t' + str(len(dfa.states)) + '\n')
        if mindfa is not None:
            writer.write('MinDFA_size:' + '\t' + str(len(mindfa.states)) +
                         '\n')
        # writer.write('FSM_validation:' + '\t' + str(fsm_num_accepted_traces) + '\n')
        if dfa_num_accepted_traces is not None:
            writer.write('DFA_validation:' + '\t' +
                         str(dfa_num_accepted_traces) + '\n')
        # writer.write('MinDFA_validation:' + '\t' + str(mindfa_num_accepted_traces) + '\n')
        # writer.write('silhouette_avg:' + '\t' + str(silhouette_avg) + '\n')
        if hasattr(estimator, 'n_clusters'):
            writer.write('num_cluster:\t' + str(estimator.n_clusters) + '\n')
        else:
            n_clusters_ = len(set(X_labels)) - (1 if -1 in X_labels else 0)

            writer.write('num_cluster:\t' + str(n_clusters_) + '\n')
        writer.write('total_validation_traces:\t' +
                     str(len(validation_traces)) + '\n')
        if dfa_num_accepted_traces is not None:
            possible_recall = float(dfa_num_accepted_traces) / float(
                len(validation_traces))
            writer.write('recall:\t' + str(possible_recall) + '\n')

    print("after writing stats")
    ########################
    if ending_methods is not None:
        when_ending_method_available(ending_methods,
                                     fsm,
                                     output_folder,
                                     make_dfa=minimize_dfa)
Ejemplo n.º 6
0
    if predicted_precision + predicted_recall != 0:
        fmeasure = 2.0 * predicted_precision * predicted_recall / (
            predicted_precision + predicted_recall)
    else:
        fmeasure = 0.0

    if verbose:
        print("Unseen pairs:", unseen_pairs)

    return predicted_precision, predicted_recall, fmeasure


if __name__ == '__main__':
    args = read_args()

    the_fsm = graph_lib.parse_fsm_file(args.fsm)

    #######

    training_pairs = set()
    with open(args.traces, 'r') as reader:
        traces = [l.strip().split() for l in reader]
        traces = list(map(lambda x: x[1:] if x[0] == '<START>' else x, traces))

    precision, recall, fmeasure = predict(the_fsm,
                                          traces,
                                          verbose=args.verbose)

    print('Precision:', precision)
    print('Recall:', recall)
    print('F-measure:', fmeasure)