Exemple #1
0
def process():
    #folder_path = "/home/amber/stew/slam++/bin"
    #
    #inlier_quantity = 20 # manhattan: 2097 garage: 4615 mit: 20 intel: 256
    #outlier_quantity = 20

    # read input file, find last N edges (where N=outlier_quantity+inlier_quantity)

    print(sys.argv)

    if len(sys.argv) > 1:
        posegraph_input_path = sys.argv[1]
        output_path = sys.argv[2]
        #rejected_loops_path = sys.argv[3]
        inlier_quantity = int(sys.argv[3])
        outlier_quantity = int(sys.argv[4])

    #posegraph_input_path = "/home/amber/stew/slam++/bin/input.g2o"
    #output_path = "/home/amber/stew/slam++/bin/clustering_results.txt"
    lc_edge_quantity = inlier_quantity + outlier_quantity

    #df_full = pd.read_csv(output_path, delimiter = " ", header = None, names = ['vertex_from', 'vertex_to', 'ofc', 'score'])

    # In[]:

    i = 0
    lc_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    outlier_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    inlier_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    with readreverse(posegraph_input_path, encoding="utf-8") as obj:
        while i < lc_edge_quantity:
            line = obj.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            #if [line_splitted[1], line_splitted[2]] in lc_pair_search_phrases:
            #print('reduntant: '+ line_splitted[1] +' '+line_splitted[2] )
            lc_pair_search_phrases.append([line_splitted[1], line_splitted[2]])
            if i < outlier_quantity:
                outlier_pair_search_phrases.append(
                    [line_splitted[1], line_splitted[2]])
            else:
                inlier_pair_search_phrases.append(
                    [line_splitted[1], line_splitted[2]])

            i += 1

    edge_twice = [
    ]  # handle edges that are in both inlier list and outlier lise, consider as inlier
    for e in outlier_pair_search_phrases:
        if e in inlier_pair_search_phrases:
            edge_twice.append(e)

    outlier_pair_search_phrases = [
        i for i in outlier_pair_search_phrases if i not in edge_twice
    ]

    # In[]:

    #read output file, find clusters

    clusters = [[]]
    edge_counter = 0
    cluster_index = 0

    with open(output_path, encoding="utf-8") as obj:
        while edge_counter < lc_edge_quantity:  # should always be true, since some edges will be deleted during the incremental clustering
            line = obj.readline()
            if line != '\n':
                edge_counter += 1
                line_stripped = line.strip('\n')
                line_splitted = line_stripped.split(' ')
                if line_splitted[0] == 'CLUSTER':
                    clusters[-1].append(
                        [int(line_splitted[1]),
                         int(line_splitted[2])])
                #print('edge counter: ', edge_counter)

            else:
                #print('cluster', cluster_index, ' ended')
                if line_splitted[0] != 'CLUSTER_R':
                    clusters.append([])
                    cluster_index += 1

    clusters.remove([])

    # In[]:

    #read rejected_loops_file

    rejected_loops = []
    edge_counter = 0
    cluster_index = 0

    content = open(output_path, 'r')
    lines = content.readlines()
    for line in lines:

        line_stripped = line.strip('\n')
        line_splitted = line_stripped.split(' ')
        #print(line_splitted)
        if line_splitted[0] == 'CLUSTER_R':
            rejected_loops.append([line_splitted[1], line_splitted[2]])
        #print('edge counter: ', edge_counter)

    rejected_outliers = []
    rejected_inliers = []

    for loop in rejected_loops:
        if loop in outlier_pair_search_phrases:
            rejected_outliers.append(loop)
        else:
            rejected_inliers.append(loop)

    # In[]:

    clusters_array_list = []
    for cluster in clusters:
        np_array = np.array(cluster)
        empty_column = np.zeros([len(cluster), 1],
                                dtype=int)  # prepare space for later decisions
        np_array = np.append(np_array, empty_column, axis=1)
        clusters_array_list.append(np_array)

    # In[]:

    for i in range(0, len(clusters_array_list)):
        for j in range(0, len(clusters_array_list[i])):
            if [str(clusters_array_list[i][j][0]),
                    str(clusters[i][j][1])] in outlier_pair_search_phrases:

                clusters_array_list[i][j][2] = -1
            else:
                clusters_array_list[i][j][2] = 1

    # In[]:

    decision = []
    cluster_nature = []
    inconsistant_cluster = []
    for i in range(0, len(clusters_array_list)):
        if abs(sum(clusters_array_list[i][:,
                                          2])) == len(clusters_array_list[i]):

            decision.append(True)
            if sum(clusters_array_list[i][:, 2]) > 0:
                cluster_nature.append(1)  # cluster full of inliers
            else:
                cluster_nature.append(-1)  # cluster full of outliers

        else:
            decision.append(False)
            inconsistant_cluster.append(clusters_array_list[i])

    wrong_clusters = []

    print('rejected outliers (', len(rejected_outliers), ') includes: ',
          rejected_outliers)
    print('rejected inliers (', len(rejected_inliers), ') includes: ',
          rejected_inliers)

    print('consistant clusters/all clusters: ', sum(decision), '/',
          len(decision))
    if sum(decision) != len(decision):
        for i in range(0, len(decision)):
            if decision[i] == False:
                print('wrong clusters: ', i)
                wrong_clusters.append(clusters_array_list[i])
                print('includes ', clusters_array_list[i])
    print('sum of all edges',
          sum([len(cluster) for cluster in clusters_array_list]))
def process():

    #inlier_quantity = 256
    #outlier_quantity = 256

    print(sys.argv)

    if len(sys.argv) > 1:
        inlier_quantity = int(sys.argv[1])
        outlier_quantity = int(sys.argv[2])

    posegraph_input_path = "input.g2o"  #"/home/amber/stew/test_backend/MIT_random10_dcs50/mit_seed_1/input.g2o"#
    text_output_path = "s_value.txt"
    lc_edge_quantity = inlier_quantity + outlier_quantity

    # In[]:

    # read input file, find last N edges
    #find all edge ID-pair and outlier ID-pair
    f = open(posegraph_input_path, "r")
    i = 0
    all_lc_edge_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    outlier_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    all_lc_edge_value = pd.DataFrame(columns=range(0, lc_edge_quantity),
                                     index=range(0, iteration + 2))
    # 0th row: vertex_from
    # 1st row: vertex_to
    # 1st row: scale value

    with readreverse(posegraph_input_path, encoding="utf-8") as obj:
        while i < outlier_quantity + inlier_quantity:
            line = obj.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            all_lc_edge_pair_search_phrases.append(
                [line_splitted[1], line_splitted[2]])
            all_lc_edge_value[lc_edge_quantity - i - 1].iloc[0] = int(
                line_splitted[1])  # fill from the last column
            all_lc_edge_value[lc_edge_quantity - i - 1].iloc[1] = int(
                line_splitted[2])  # fill from the last column
            if i < outlier_quantity:
                outlier_pair_search_phrases.append(
                    [line_splitted[1], line_splitted[2]])

            i += 1

    all_lc_edge_pair_search_phrases = all_lc_edge_pair_search_phrases[::
                                                                      -1]  #reverse
    outlier_pair_search_phrases = outlier_pair_search_phrases[::-1]  #reverse
    # In[]:

    all_lc_edge_value = all_lc_edge_value.sort_values(by=0, axis=1)
    input_edge_sequence = pd.DataFrame(all_lc_edge_value.columns)
    # append the sorted edge_sequence to last row, so later can be used to reverse the sorting
    # In[]:
    all_lc_edge_value.columns = range(0,
                                      lc_edge_quantity)  # refresh the columns

    all_lc_edge_value = all_lc_edge_value.append(
        input_edge_sequence.transpose(), ignore_index=True)

    # In[]:

    #read s_value_file

    f = open(text_output_path, "r")
    i = 0
    while i < (lc_edge_quantity) * iteration:
        line = f.readline()
        line_stripped = line.strip('\n')
        line_splitted = line_stripped.split(' ')
        if len(line_splitted) > 1:

            if i < lc_edge_quantity:
                all_lc_edge_value[i].iloc[2] = float(line_splitted[1])
            else:
                all_lc_edge_value[i %
                                  lc_edge_quantity].iloc[i // lc_edge_quantity
                                                         + 2] = float(
                                                             line_splitted[1])

        i += 1

    # In[]:
    # revert the sorting
    all_lc_edge_value = all_lc_edge_value.sort_values(by=32, axis=1)
    # In[]:

    #for i in range(2, iteration+2): # starts from 2nd row, 0th and 1st row are for intpair
    inlier_value = all_lc_edge_value.iloc[31, 0:inlier_quantity]
    rejected_inlier = 0
    print('rejected inliers: ')
    for i in range(0, inlier_quantity):
        if inlier_value.iloc[i] < threshold:
            print([all_lc_edge_value.iloc[0, i], all_lc_edge_value.iloc[1, i]],
                  inlier_value.iloc[i])
            rejected_inlier += 1
    print('Num of rejected inliers: ', rejected_inlier)
    fig, ax = plt.subplots()
    plt.plot(range(0, inlier_quantity), inlier_value, 'o-')
    plt.legend(("inlier value", ))
    plt.savefig("inlier_value_iteration_" + str(31 - 1))
    plt.show()

    # In[]:
    acceptted_outlier = 0
    print('acceptted outliers: ')

    outlier_value = all_lc_edge_value.iloc[31,
                                           inlier_quantity:lc_edge_quantity]
    for i in range(0, outlier_quantity):
        if outlier_value.iloc[i] > threshold:
            print([
                all_lc_edge_value.iloc[0, i + inlier_quantity],
                all_lc_edge_value.iloc[1, i + inlier_quantity]
            ], outlier_value.iloc[i])
            acceptted_outlier += 1

    print('Num of acceptted outliers: ', acceptted_outlier)
    fig, ax = plt.subplots()
    plt.plot(range(0, outlier_quantity), outlier_value, 'ro-')
    plt.legend(("outlier value", ))
    plt.savefig("outlier_value_iteration_" + str(31 - 1))
    plt.show()
Exemple #3
0
def process(options):

    #    folder_path = "/home/amber/stew/test_backend/garage_seed_3"
    #    pose_quantity = 1661
    #    inlier_quantity = 4615
    #    outlier_quantity = 20
    #    options = "3d"

    print(sys.argv)

    if len(sys.argv) > 1:
        folder_path = sys.argv[1]
        pose_quantity = int(sys.argv[2])
        inlier_quantity = int(sys.argv[3])
        outlier_quantity = int(sys.argv[4])

    posegraph_input_path = folder_path + "/input.g2o"
    output_path = folder_path + "/output.g2o"
    text_output_path = folder_path + "/output.txt"
    lc_edge_quantity = inlier_quantity + outlier_quantity

    # In[]:

    # read input file, find last N edges
    #find all edge ID-pair and outlier ID-pair
    f = open(posegraph_input_path, "r")
    i = 0
    all_lc_edge_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    outlier_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    with readreverse(posegraph_input_path, encoding="utf-8") as obj:
        while i < outlier_quantity + inlier_quantity:
            line = obj.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            if i < outlier_quantity:
                outlier_pair_search_phrases.append(
                    [line_splitted[1], line_splitted[2]])
            if abs(
                    int(line_splitted[1]) - int(float(line_splitted[2]))
            ) != 1:  # deals with garage dataset, where edges are already sorted
                all_lc_edge_pair_search_phrases.append(
                    [line_splitted[1],
                     str(int(float(line_splitted[2])))])
                i += 1  # only increment when it's nonsequential edge

    # In[]:

    #read output.g2o, find outlier ID-pair

    f = open(output_path, "r")
    outlier_id_pair = np.zeros((outlier_quantity, 3), dtype=int)
    #   array of [int0, int1, int2], int0: switchcounter, int1: vertex_from, int2: vertex_to

    i = 0
    while i < outlier_quantity:
        x = f.readline()
        x_stripped = x.strip('\n')
        x_splitted = x_stripped.split(' ')
        if options == "2d":
            prefix = "EDGE_SE2_SWITCHABLE"
        elif options == "3d":
            prefix = "EDGE_SE3_SWITCHABLE"
        if x_splitted[0] == prefix:
            if [x_splitted[1], x_splitted[2]] in outlier_pair_search_phrases:
                outlier_id_pair[i] = np.array([
                    int(x_splitted[3]),
                    int(x_splitted[1]),
                    int(x_splitted[2])
                ])
                #print(all_switch_edge[i])
                i += 1

    outlier_pair_df = pd.DataFrame(columns=outlier_id_pair[:, 0], index=[0])
    # put data in dataframe for easier access
    # DF column names: int (switch counter)
    #        0th row: [int0, int1], int0: vertex_from, int1: vertex_to
    for i in range(0, outlier_quantity):
        outlier_id = outlier_id_pair[:, 0][i]
        outlier_pair_df[outlier_id].iloc[0] = outlier_id_pair[i, 1:3]

    outlier_id_search_phrases = [
    ]  # list of ['s<switch counter>:'] for searching in the output.txt

    for rows in outlier_id_pair:
        outlier_id_search_phrases.append("s" + str(rows[0]) + ":")

    # In[]:
    #find all_edge ID-pair

    f = open(output_path, "r")
    all_edge_id_pair = np.zeros((lc_edge_quantity, 3), dtype=int)
    #   array of [int0, int1, int2], int0: switchcounter, int1: vertex_from, int2: vertex_to

    i = 0
    while i < lc_edge_quantity:
        x = f.readline()
        x_stripped = x.strip('\n')
        x_splitted = x_stripped.split(' ')
        if options == "2d":
            prefix = "EDGE_SE2_SWITCHABLE"
        elif options == "3d":
            prefix = "EDGE_SE3_SWITCHABLE"
        if x_splitted[0] == prefix:
            if [x_splitted[1],
                    x_splitted[2]] in all_lc_edge_pair_search_phrases:
                all_edge_id_pair[i] = np.array([
                    int(x_splitted[3]),
                    int(x_splitted[1]),
                    int(x_splitted[2])
                ])
                #print(all_switch_edge[i])
                i += 1

    ## In[]:
    all_edge_pair_df = pd.DataFrame(columns=all_edge_id_pair[:, 0], index=[0])
    # put data in dataframe for easier access
    # DF column names: int (switch counter)
    #        0th row: [int0, int1], int0: vertex_from, int1: vertex_to

    for i in range(0, lc_edge_quantity):
        e_id = all_edge_id_pair[:, 0][i]
        all_edge_pair_df[e_id].iloc[0] = all_edge_id_pair[i, 1:3]

    # In[2]:

    # read output.txt, find the value of all LC edges after the last iteration

    lc_edge_search_phrases = [
    ]  # list of ['s<switch counter>:'] for searching in the output.txt
    for i in range(pose_quantity, pose_quantity + lc_edge_quantity):
        # pose zero  will be drop in rtabmap
        lc_edge_search_phrases.append("s" + str(i) + ":")

    all_lc_edge_value = pd.DataFrame(columns=lc_edge_search_phrases,
                                     index=[0, 1, 2])
    #   column names:   's<switch counter>:'
    #   0th row:        switch variable value
    #   1st row:        [vertex_from, vertex_to]
    #   2nd row:        bool, indicating whether is switched on or off, depends on threshold

    i = 0
    with readreverse(text_output_path, encoding="utf-8") as obj:
        while i < lc_edge_quantity:  # only get the last iteration
            line = obj.readline()

            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            if len(line_splitted) > 1:
                if str(line_splitted[1]) in lc_edge_search_phrases:
                    all_lc_edge_value[str(line_splitted[1])][0] = float(
                        line_splitted[4])
                    temp_s = str(line_splitted[1])
                    temp_str = temp_s.strip('s')
                    temp_string = temp_str.strip(':')
                    id_number_only = int(temp_string)
                    all_lc_edge_value[str(
                        line_splitted[1]
                    )][1] = all_edge_pair_df[id_number_only].iloc[0]
                    i += 1

    # In[]:

    # calculate precision/recall rate of outlier/inlier

    for column in all_lc_edge_value:
        all_lc_edge_value[column][2] = (all_lc_edge_value[column][0] >
                                        threshold)

    outlier_analysis = all_lc_edge_value[outlier_id_search_phrases]
    #outlier_analysis = outlier_analysis.drop('s5644:',1)

    if generate_figure == True:
        fig, ax = plt.subplots()
        plt.plot(range(0, outlier_quantity), (outlier_analysis.iloc[0]), 'ro-')
        plt.legend(("outlier value", ))
        plt.savefig("outlier_variance")
        plt.show()

    pd.set_option('display.max_columns', None)  # or 1000
    pd.set_option('display.max_rows', None)  # or 1000
    pd.set_option('display.max_colwidth', -1)  # or 199
    print("threshold: ", threshold)
    print("outliers acceptted: ", sum(outlier_analysis.iloc[2]), "/",
          outlier_quantity)
    print("accepted outliers: ",
          outlier_analysis.loc[:, outlier_analysis.iloc[2] == True].columns)
    print("accepted outlier: ")
    print(outlier_analysis.loc[:, outlier_analysis.iloc[2] == True])

    inlier_analysis = all_lc_edge_value.drop(outlier_id_search_phrases, 1)

    if generate_figure == True:
        fig, ax = plt.subplots()
        plt.plot(range(0, inlier_quantity), (inlier_analysis.iloc[0]), 'o-')
        plt.legend(("inlier value", ))
        plt.savefig("inlier_variance")
        plt.show()

    print("inliers acceptted: ", sum(inlier_analysis.iloc[2]), "/",
          lc_edge_quantity - outlier_quantity)
    print("rejected inlier: ")
    print(inlier_analysis.loc[:, inlier_analysis.iloc[2] == False])
    print()

    # In[]:

    # get the convergence curve of all lc edge value:
    if examine_edge_option == 1:

        f = open(text_output_path, "r")

        lc_edge_value_convergence = pd.DataFrame(
            columns=lc_edge_search_phrases, index=[0])
        #   column names:   's<switch counter>:'
        #       0st row:    list of values over all iterations
        #

        i = 0
        while i < (lc_edge_quantity) * (iteration +
                                        1):  # including initial estimate
            line = f.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            if len(line_splitted) > 1:
                if str(line_splitted[1]) in lc_edge_search_phrases:
                    if type(lc_edge_value_convergence[str(
                            line_splitted[1])][0]) != list:
                        lc_edge_value_convergence[str(
                            line_splitted[1])][0] = []

                    lc_edge_value_convergence[str(line_splitted[1])][0].append(
                        float(line_splitted[4]))
                    i += 1
        # In[]:

        # examine a specified edge value convergence

        examine_edge = []  #3625, 3781, 4139, 4291, 6441
        new_search_phases = []
        if len(sys.argv) > 5:
            for i in range(5, len(sys.argv)):
                examine_edge.append(sys.argv[i])

        for edge in examine_edge:
            new_search_phases.append("s" + str(edge) + ":")

        for j in new_search_phases:

            plt.plot(lc_edge_value_convergence[j][0])
            plt.title(j + str(all_lc_edge_value[j][1][0]) + ',' +
                      str(all_lc_edge_value[j][1][1]))
            plt.savefig(j)
            plt.show()
Exemple #4
0
def process():

    #    inlier_quantity = 256
    #    outlier_quantity = 256
    #

    print(sys.argv)

    if len(sys.argv) > 1:
        inlier_quantity = int(sys.argv[1])
        outlier_quantity = int(sys.argv[2])

    posegraph_input_path = "input.g2o"  #"/home/amber/stew/test_backend/MIT_random10_dcs50/mit_seed_1/input.g2o"#
    text_output_path = "s_value.txt"
    lc_edge_quantity = inlier_quantity + outlier_quantity

    # In[]:

    # read input file, find last N edges
    #find all edge ID-pair and outlier ID-pair
    f = open(posegraph_input_path, "r")
    i = 0
    all_lc_edge_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    outlier_pair_search_phrases = [
    ]  # list of [str0, str1], each strpair represents one loop closure
    # 0th row: vertex_from
    # 1st row: vertex_to
    # 1st row: scale value

    with readreverse(posegraph_input_path, encoding="utf-8") as obj:
        while i < outlier_quantity + inlier_quantity:
            line = obj.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            all_lc_edge_pair_search_phrases.append('e'.join(
                [line_splitted[1],
                 str(int(float(line_splitted[2])))]))
            #all_lc_edge_value[lc_edge_quantity - i - 1].iloc[0] = int(line_splitted[1]) # fill from the last column
            #all_lc_edge_value[lc_edge_quantity - i - 1].iloc[1] = int(line_splitted[2]) # fill from the last column
            if i < outlier_quantity:
                outlier_pair_search_phrases.append('e'.join(
                    [line_splitted[1],
                     str(int(float(line_splitted[2])))]))

            i += 1

    all_lc_edge_pair_search_phrases = all_lc_edge_pair_search_phrases[::
                                                                      -1]  #reverse
    outlier_pair_search_phrases = outlier_pair_search_phrases[::-1]  #reverse

    all_lc_edge_set = set(all_lc_edge_pair_search_phrases)
    outlier_set = set(outlier_pair_search_phrases)
    if len(outlier_set) != outlier_quantity:
        print('possible redundant edges')
    inlier_set = all_lc_edge_set - outlier_set
    if len(inlier_set) != inlier_quantity:
        print('possible redundant edges')
    inlier_pair_search_phrases = list(inlier_set)

    #all_lc_edge_value = all_lc_edge_value.sort_values(by=0, axis=1)
    #input_edge_sequence = pd.DataFrame(all_lc_edge_value.columns)
    # append the sorted edge_sequence to last row, so later can be used to reverse the sorting

    #all_lc_edge_value.columns = range(0, lc_edge_quantity) # refresh the columns

    #all_lc_edge_value = all_lc_edge_value.append(input_edge_sequence.transpose(), ignore_index=True)

    # In[]:

    #read s_value_file

    all_lc_edge_value = pd.DataFrame(columns=all_lc_edge_pair_search_phrases,
                                     index=range(0, 1))

    f = open(text_output_path, "r")
    i = 0
    while i < (lc_edge_quantity) * iteration:
        line = f.readline()
        line_stripped = line.strip('\n')
        line_splitted = line_stripped.split(' ')

        if len(line_splitted) > 1:
            edge_joined = 'e'.join([line_splitted[1], line_splitted[2]])
            if type(all_lc_edge_value[edge_joined].iloc[0]) != list:
                all_lc_edge_value.loc[0,
                                      edge_joined] = [float(line_splitted[4])]
            else:
                all_lc_edge_value.loc[0, edge_joined].append(
                    float(line_splitted[4]))

        i += 1

    # revert the sorting
    #all_lc_edge_value = all_lc_edge_value.sort_values(by=32, axis=1)
    # In[]:

    #
    inlier_value = all_lc_edge_value[inlier_pair_search_phrases]
    inlier_final_value = []
    rejected_inlier = 0
    print('rejected inliers: ')
    for column in inlier_value.columns:
        final_value = inlier_value.loc[0, column][-1]
        inlier_final_value.append(final_value)
        if final_value < threshold:
            edge_list = column.split('e')
            print(edge_list, final_value)

            rejected_inlier += 1
    print('Num of rejected inliers: ', rejected_inlier)
    if generate_figure == True:
        fig, ax = plt.subplots()
        plt.plot(range(0, inlier_quantity), inlier_final_value, 'o-')
        plt.legend(("inlier value", ))
        plt.savefig("inlier_value_iteration_" +
                    str(len(all_lc_edge_value.iloc[0, 0])))
        plt.show()

    # In[]:
    acceptted_outlier = 0
    print('acceptted outliers: ')

    outlier_value = all_lc_edge_value[outlier_pair_search_phrases]
    outlier_final_value = []
    for column in outlier_value.columns:
        final_value = outlier_value.loc[0, column][-1]
        outlier_final_value.append(final_value)
        if final_value >= threshold:
            edge_list = column.split('e')
            print(edge_list, final_value)

            acceptted_outlier += 1

    print('Num of acceptted outliers: ', acceptted_outlier)
    if generate_figure == True:
        fig, ax = plt.subplots()
        plt.plot(range(0, outlier_quantity), outlier_final_value, 'ro-')
        plt.legend(("outlier value", ))
        plt.savefig("outlier_value_iteration_" +
                    str(len(all_lc_edge_value.iloc[0, 0])))
        plt.show()

    # In[]:
    if examine_edge_option == 1:
        examine_edge = []  #3625, 3781, 4139, 4291, 6441
        if len(sys.argv) > 3:
            for i in range(3, len(sys.argv), 2):
                examine_edge.append('e'.join([sys.argv[i], sys.argv[i + 1]]))

        for j in examine_edge:

            plt.plot(all_lc_edge_value.loc[0, j])
            plt.title(j)
            plt.savefig(j)
            plt.show()
def process():
#folder_path = "/home/amber/stew/test_backend/analysis_chamber"
#threshold = 0.01
#outlier_quantity = 10
#
#pose_quantity = 808 # manhattan: 3499 garage: 1660 mit: 808 intel: 1228
#inlier_quantity = 20 # manhattan: 2097 garage: 4615 mit: 20 intel: 256
    
    # read input file, find last N edges (where N=outlier_quantity+inlier_quantity)
    
    print(sys.argv)
    
    if len(sys.argv) > 1:
        folder_path = sys.argv[1]
        #pose_quantity = int(sys.argv[2])
        inlier_quantity = int(sys.argv[2])
        outlier_quantity = int(sys.argv[3])   
        threshold = float(sys.argv[4])
        zero_out_threshold = float(sys.argv[5])
        print('threshold value: ', threshold)


    posegraph_input_path = folder_path+"/input.g2o"
    output_path = folder_path+"/full_analysis.txt" 
    lc_edge_quantity = inlier_quantity + outlier_quantity 
    
    df_full = pd.read_csv(output_path, delimiter = " ", header = None, names = ['vertex_from', 'vertex_to', 'ofc', 'score'])
    #df_full = df_full.sort_values(by=['vertex_to'])
    #df_full = df_full.reset_index(drop=True)
    
    
    #fig = plt.figure()
    #plt.plot(df_full['ofc'], 'ro')
    #plt.legend(("ofc",))
    #plt.savefig("ofc_incremental")
    #plt.show()
    
    #fig = plt.figure()
    #plt.plot(df_full['score'], 'o')
    #plt.savefig("score_incremental")
    #plt.show()
    
    #df_full
    
    # In[]:
    
    i = 0
    lc_pair_search_phrases = []         # list of [str0, str1], each strpair represents one loop closure
    outlier_pair_search_phrases = []    # list of [str0, str1], each strpair represents one loop closure
    inlier_pair_search_phrases = []     # list of [str0, str1], each strpair represents one loop closure
    with readreverse(posegraph_input_path, encoding="utf-8") as obj:
        while i < lc_edge_quantity:
            line = obj.readline()
            line_stripped = line.strip('\n')
            line_splitted = line_stripped.split(' ')
            #if [line_splitted[1], line_splitted[2]] in lc_pair_search_phrases:
                #print('reduntant: '+ line_splitted[1] +' '+line_splitted[2] )
            lc_pair_search_phrases.append([line_splitted[1], line_splitted[2]])
            if i < outlier_quantity:
                outlier_pair_search_phrases.append([line_splitted[1], line_splitted[2]])
            else:
                inlier_pair_search_phrases.append([line_splitted[1], line_splitted[2]])
                
            i += 1
    
    edge_twice = []        # handle edges that are in both inlier list and outlier lise, consider as inlier
    for e in outlier_pair_search_phrases:
        if e in inlier_pair_search_phrases:
            edge_twice.append(e)
            
    outlier_pair_search_phrases = [i for i in outlier_pair_search_phrases if i not in edge_twice]
            
        
            
    # In[]:
            
    #read output file, find ID-value-pair 
            
    
    lc_id_score_pair = np.zeros((lc_edge_quantity, 4), dtype = float)
    lc_id_ofc_pair = np.zeros((lc_edge_quantity, 4), dtype = float)
    #   array of [float0, float1, float2, float3], f0: vertex_from, f1: vertex_to, f2: ofc or score, f3: incremental step
    
    
    for i in df_full.index:
        
        if [str(df_full['vertex_from'].iloc[i]), str(df_full['vertex_to'].iloc[i])] in lc_pair_search_phrases:
            
            lc_id_ofc_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], df_full['ofc'].iloc[i], float(i)])
            if float(df_full['score'].iloc[i]) > zero_out_threshold:            
                
                lc_id_score_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], df_full['score'].iloc[i], float(i)])
            else:
            
                lc_id_score_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], 0, float(i)])
            #print(all_switch_edge[i])
           
    print('std_score of all edges: ', lc_id_score_pair[:,2].std())
          
    # In[]:
    inlier_id_ofc_pair = np.zeros((inlier_quantity+len(edge_twice), 4), dtype = float) # change the quantity based on overlap
    outlier_id_ofc_pair = np.zeros((outlier_quantity-len(edge_twice), 4), dtype = float) # change the quantity based on overlap
    #   array of [float0, float1, float2, float3], f0: vertex_from, f1: vertex_to, f2: ofc or score, f3: incremental step
    
    
    j=0
    k=0
    for i in range(0, lc_id_ofc_pair.shape[0]):
        if [str(int(lc_id_ofc_pair[i, 0])), str(int(lc_id_ofc_pair[i,1]))] in outlier_pair_search_phrases:
            outlier_id_ofc_pair[j] = lc_id_ofc_pair[i]
            j+=1
        elif [str(int(lc_id_ofc_pair[i, 0])), str(int(lc_id_ofc_pair[i,1]))] in inlier_pair_search_phrases:
            inlier_id_ofc_pair[k] = lc_id_ofc_pair[i]
            k+=1
            
    # In[]:
    print('outlier_ofc_minimum: ', outlier_id_ofc_pair[:,2].min())
    print('outlier_ofc_maximum: ', outlier_id_ofc_pair[:,2].max())
    print('inlier_ofc_minimum: ', inlier_id_ofc_pair[:,2].min())
    print('inlier_ofc_maximum: ', inlier_id_ofc_pair[:,2].max())
    print(' ')
    
    #    fig = plt.figure()
    #    plt.plot(outlier_id_ofc_pair[:,2], 'ro-')
    #    plt.savefig("ofc_outlier")
    #    plt.show()
    
    #    fig = plt.figure()
    #    plt.plot(inlier_id_ofc_pair[:,2], 'o-')
    #    plt.savefig("ofc_inlier")
    #    plt.show()
    
    #fig = plt.figure()
    #plt.plot(outlier_id_ofc_pair[:,3], outlier_id_ofc_pair[:,2], 'ro')
    #plt.plot(inlier_id_ofc_pair[:,3], inlier_id_ofc_pair[:,2], 'o')
    #plt.savefig("combined_ofc_plot")
    #plt.show()
    
    
    
    # In[]:
    inlier_id_score_pair = np.zeros((inlier_quantity+len(edge_twice), 4), dtype = float) # change the quantity based on overlap
    outlier_id_score_pair = np.zeros((outlier_quantity-len(edge_twice), 4), dtype = float) # change the quantity based on overlap
    j=0
    k=0
    for i in range(0, lc_id_score_pair.shape[0]):
        if [str(int(lc_id_score_pair[i, 0])), str(int(lc_id_score_pair[i,1]))] in outlier_pair_search_phrases:
            outlier_id_score_pair[j] = lc_id_score_pair[i]
            j+=1
        elif [str(int(lc_id_score_pair[i, 0])), str(int(lc_id_score_pair[i,1]))] in inlier_pair_search_phrases:
            inlier_id_score_pair[k] = lc_id_score_pair[i]
            k+=1
            
    print('outlier_score_minimum: ', outlier_id_score_pair[:,2].min())
    print('outlier_score_maximum: ', outlier_id_score_pair[:,2].max())
    print('inlier_score_minimum: ', inlier_id_score_pair[:,2].min())
    print('inlier_score_maximum: ', inlier_id_score_pair[:,2].max())
    
    #    fig = plt.figure()
    #    plt.plot(outlier_id_score_pair[:,2], 'ro-')
    #    plt.savefig("score_outlier")
    #    plt.show()
    
    
    #    fig = plt.figure()
    #    plt.plot(inlier_id_score_pair[:,2], 'o-')
    #    plt.savefig("score_inlier")
    #    plt.show()
    
    fig = plt.figure()
    plt.plot(outlier_id_score_pair[:,3], outlier_id_score_pair[:,2], 'ro')
    plt.plot(inlier_id_score_pair[:,3], inlier_id_score_pair[:,2], 'o')
    plt.savefig("combined_score_plot")
    plt.show()
    
    # In[]:
                
    clusters = [[lc_id_score_pair[0]]]
    for i in range(0, len(lc_id_score_pair)-1):
        last_cluster = clusters[-1]
        sumsup = 0
        for j in last_cluster:
            sumsup += j[2]
        ave = sumsup / len(last_cluster)
        #print('ave: ', ave)
        #print('new score: ', lc_id_score_pair[i+1, 2])
        if abs(ave - lc_id_score_pair[i+1, 2]) > threshold or ((lc_id_score_pair[i+1, 2] > 0.99) != (ave > 0.99)):
            clusters.append([lc_id_score_pair[i+1]]) 

        else:
            clusters[-1].append(lc_id_score_pair[i+1])
    
    for i in range(0, len(clusters)):
        print('cluster begins: ', clusters[i][0][0], ' ', clusters[i][0][1])
            
    # In[]:
    for i in range(0, len(clusters)):
        for j in range(0, len(clusters[i])):
            if [str(int(clusters[i][j][0])), str(int(clusters[i][j][1]))] in outlier_pair_search_phrases:
                clusters[i][j] = np.append(clusters[i][j], -1)  # append one value for decision status
            else:
                clusters[i][j] = np.append(clusters[i][j], 1)
    
    decision = []
    inconsistant_cluster = []  
    for i in range(0, len(clusters)):   
        if abs(sum(np.array(clusters[i])[:,4])) == len(clusters[i]):
            
            decision.append(True)
        else:
            decision.append(False)
            inconsistant_cluster.append(clusters[i])
            
    print('consistant clusters/all clusters: ', sum(decision), '/', len(decision))
    if sum(decision) != len(decision):
        for i in range(0, len(decision)):
            if decision[i] == False:
                print('wrong clusters: ', i)
                print('includes ', clusters[i])
    print('sum of all edges', sum([len(cluster) for cluster in clusters]))