def main():
    action = sys.argv[1]

    if action == "create":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph(indexfile, filelist)

    if action == "create_feature":
        indexfile = sys.argv[2]
        level_filelist = load_listfile(sys.argv[3])
        feature_filelist = load_listfile(sys.argv[4])
        create_graph_feature(indexfile, level_filelist, feature_filelist)

    if action == "create2":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph2(indexfile, filelist)

    if action == "create_correlation":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph_correlation(indexfile, filelist)

    if action == "print":
        print_graph(sys.argv[2])

    if action == "correlate":
        find_duplicates(sys.argv[2], float(sys.argv[3]))

    if action == "dot":
        print_dot(sys.argv[2])

    if action == "rebase_log":
        graph, filenames = load_graph(sys.argv[2])
        n = float(sys.argv[3])

        for s_index in xrange(len(graph)):
            for e_i in xrange(len(graph[s_index])):
                e_index, correlation = graph[s_index][e_i]
                graph[s_index][e_i] = (e_index, rebase_value_log(n, correlation))

        print pickle.dumps((graph, filenames))

    if action == "rebase_exp":
        graph, filenames = load_graph(sys.argv[2])
        n = float(sys.argv[3])

        for s_index in xrange(len(graph)):
            for e_i in xrange(len(graph[s_index])):
                e_index, correlation = graph[s_index][e_i]
                graph[s_index][e_i] = (e_index, rebase_value_exp(n, correlation))

        print pickle.dumps((graph, filenames))

    if action == "adjust":
        graph, filenames = load_graph(sys.argv[2])
        values = [float(v) for v in open(sys.argv[3], "r").readlines()]
        value_index = 0
        for i in xrange(len(filenames)):
            for j in xrange(len(filenames)):
                if i != j:
                    edges = graph[i]
                    for k, (e_index, correlation) in enumerate(edges):
                        if e_index == j:
                            edges[k] = (e_index, values[value_index])
                            break
                    value_index += 1

        print pickle.dumps((graph, filenames))

    if action == "extend":
        graph, filenames = load_graph(sys.argv[2])
        graph = fill_graph(graph, float(sys.argv[3]))
        print pickle.dumps((graph, filenames))

    if action == "level_update":
        graph, filenames = load_graph(sys.argv[2])
        level_signatures = pickle.loads(open(sys.argv[3], "r").read())
        c_value = float(sys.argv[4])
        l_value = float(sys.argv[5])
        import signature_make

        result_graph = []
        for s_index, edges in enumerate(graph):
            result_edges = []
            for e_index, correlation in edges:
                l_correlation = signature_make.signature_matrix_compare(level_signatures[s_index], level_signatures[e_index])
                if l_correlation  >= l_value and correlation >= c_value:
                    result_edges.append((e_index, l_correlation))
            result_graph.append(result_edges)

        print pickle.dumps((result_graph, filenames))

    if action == "clique":
        graph, filenames = load_graph(sys.argv[2])

        r_sets = find_all_cliques([[e_index for e_index, correlation in edges] for edges in graph])
        r_sets.sort(key = lambda x: -len(x))

        for r_set in r_sets:
            print str(list(r_set))
            print str([filenames[i] for i in r_set])

    if action == "auto_clique":
        graph, filenames = load_graph(sys.argv[2])
        weak_threshold = float(sys.argv[3])
        strong_threshold = float(sys.argv[4])
        weak_graph = [[e_index for e_index, correlation in edges if correlation >= weak_threshold] for edges in graph]
        strong_graph = [[(e_index, correlation) for e_index, correlation in edges if correlation >= strong_threshold] for edges in graph]

        print pickle.dumps((strong_graph, filenames))
Esempio n. 2
0
def main():
    action = sys.argv[1]

    if action == "create":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph(indexfile, filelist)

    if action == "create_feature":
        indexfile = sys.argv[2]
        level_filelist = load_listfile(sys.argv[3])
        feature_filelist = load_listfile(sys.argv[4])
        create_graph_feature(indexfile, level_filelist, feature_filelist)

    if action == "create2":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph2(indexfile, filelist)

    if action == "create_correlation":
        indexfile = sys.argv[2]
        filelist = load_listfile(sys.argv[3])
        create_graph_correlation(indexfile, filelist)

    if action == "print":
        print_graph(sys.argv[2])

    if action == "correlate":
        find_duplicates(sys.argv[2], float(sys.argv[3]))

    if action == "dot":
        print_dot(sys.argv[2])

    if action == "rebase_log":
        graph, filenames = load_graph(sys.argv[2])
        n = float(sys.argv[3])

        for s_index in xrange(len(graph)):
            for e_i in xrange(len(graph[s_index])):
                e_index, correlation = graph[s_index][e_i]
                graph[s_index][e_i] = (e_index, rebase_value_log(n, correlation))

        print pickle.dumps((graph, filenames))

    if action == "rebase_exp":
        graph, filenames = load_graph(sys.argv[2])
        n = float(sys.argv[3])

        for s_index in xrange(len(graph)):
            for e_i in xrange(len(graph[s_index])):
                e_index, correlation = graph[s_index][e_i]
                graph[s_index][e_i] = (e_index, rebase_value_exp(n, correlation))

        print pickle.dumps((graph, filenames))

    if action == "adjust":
        graph, filenames = load_graph(sys.argv[2])
        values = [float(v) for v in open(sys.argv[3], "r").readlines()]
        value_index = 0
        for i in xrange(len(filenames)):
            for j in xrange(len(filenames)):
                if i != j:
                    edges = graph[i]
                    for k, (e_index, correlation) in enumerate(edges):
                        if e_index == j:
                            edges[k] = (e_index, values[value_index])
                            break
                    value_index += 1

        print pickle.dumps((graph, filenames))

    if action == "pagerank":
            import graph_reweight as rw
            graph, filenames = load_graph(sys.argv[2])
            # graph = rw.make_agraph(rw.reweight(rw.make_graph(threshold_graph(graph, 0.1))))
            graph = rw.make_agraph(rw.reweight(rw.make_graph(graph)))
            print pickle.dumps((graph, filenames))

    if action == "extend":
        graph, filenames = load_graph(sys.argv[2])
        graph = fill_graph(graph, float(sys.argv[3]))
        print pickle.dumps((graph, filenames))

    if action == "level_update":
        graph, filenames = load_graph(sys.argv[2])
        level_signatures = pickle.loads(open(sys.argv[3], "r").read())
        c_value = float(sys.argv[4])
        l_value = float(sys.argv[5])
        import signature_make

        result_graph = []
        for s_index, edges in enumerate(graph):
            result_edges = []
            for e_index, correlation in edges:
                l_correlation = signature_make.signature_matrix_compare(level_signatures[s_index], level_signatures[e_index])
                if l_correlation  >= l_value and correlation >= c_value:
                    result_edges.append((e_index, l_correlation))
            result_graph.append(result_edges)

        print pickle.dumps((result_graph, filenames))