Ejemplo n.º 1
0
def parse_nobal_ilp_sol(filename, genome_a, genome_b):
    # build the master graph with matching and adjacency edges:

    # 1st add capping genes:
    add_capping_genes(genome_a, genome_b)

    master_graph = nx.Graph()
    # matching edges:
    obj, edges = obj_and_matching_from_sol(filename)
    for gene, c_a, c_b in edges:
        master_graph.add_edge(("A", gene, c_a, Ext.HEAD), ("B", gene, c_b, Ext.HEAD))
        master_graph.add_edge(("A", gene, c_a, Ext.TAIL), ("B", gene, c_b, Ext.TAIL))

    # adjacency edges:
    for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
        for (g_i, copy_a, e_i), (g_j, copy_b, e_j) in genome.adjacency_iter_with_copies():
            master_graph.add_edge((genome_name, g_i, copy_a, e_i), (genome_name, g_j, copy_b, e_j))

    # now, graph should be a collection of cycles and paths, where path extremities are balancing extremities.
    # We need to check those extremities to classify paths in AB-, AA- and BB-
    # loop for each component:
    G = nx.Graph()
    ab_components = []
    for comp in connected_components(master_graph):
        degree_one = sorted([v for v in comp if master_graph.degree(v) == 1])
        if len(degree_one) > 1:
            assert len(degree_one) == 2
            # if AA- or BB, add the balancing edge:
            if degree_one[0][0] == degree_one[1][0]:
                G.add_edge(*degree_one)
                # add also the gene edges, from head to tail:
                for genome, gene, copy, ext in degree_one:
                    G.add_edge((genome, gene, copy, Ext.HEAD), (genome, gene, copy, Ext.TAIL))
            else:
                # AB- component, save for later;
                ab_components.append(degree_one)
    # deal with AB pairing 2-by-2 arbitrarly:
    for ab_i, ab_j in zip(*[iter(ab_components)] * 2):
        G.add_edge(ab_i[0], ab_j[0])
        G.add_edge(ab_i[1], ab_j[1])
        # add also the gene edges, from head to tail:
        for genome, gene, copy, ext in [ab_i[0], ab_i[1], ab_j[0], ab_j[1]]:
            G.add_edge((genome, gene, copy, Ext.HEAD), (genome, gene, copy, Ext.TAIL))

    # Find indels as connected components in A and B:
    indels = {"A": 0, "B": 0}
    for comp in connected_components(G):
        genome, gene, copy, ext = list(comp)[0]
        indels[genome] += 1

    # log file:
    gap, time = parse_log_file(filename.replace("sol", "log"))
    # correct objective function, since each AB-component is counted as a full cycle in the ILP, but
    # should only by half-cycle.
    obj += len(ab_components) / 2

    return {"dcj_distance": obj, "dup_a": indels["A"], "dup_b": indels["B"],
            "time": time, "gap": gap}
Ejemplo n.º 2
0
def parse_ilp_sol(filename):
    G = nx.Graph()
    obj = 0
    edges = []
    with open(filename) as f:
        for l in f:
            m = obj_regexp.match(l.strip())
            if m is not None:
                obj = int(round(float(m.group(1)), 0))
            m = balancing_regexp.match(l.strip())
            if m is not None:
                # print m.groups()
                # print l
                genome_1, gene_1, copy_1, ext_1, genome_2, gene_2, copy_2, ext_2, val = m.groups()
                if float(val) >= 0.9:  # sometimes variables with values like 1e-12, 0.99999 or 1.000000001 appear.
                    # connect head and tail:
                    G.add_edge((genome_1, gene_1, copy_1, Ext.HEAD), (genome_1, gene_1, copy_1, Ext.TAIL))
                    G.add_edge((genome_2, gene_2, copy_2, Ext.HEAD), (genome_2, gene_2, copy_2, Ext.TAIL))
                    # balancing edge:
                    G.add_edge((genome_1, gene_1, copy_1, ext_1), (genome_2, gene_2, copy_2, ext_2))

    # Find indels as connected components in A and B:
    indels = {"A": 0, "B": 0}
    for comp in connected_components(G):
        genome, gene, copy, ext = list(comp)[0]
        indels[genome] += 1

    # log file:
    gap, time = parse_log_file(filename.replace("sol", "log"))

    return {"dcj_distance": obj, "dup_a": indels["A"], "dup_b": indels["B"],
            "time": time, "gap": gap}
Ejemplo n.º 3
0
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.g = grid_2d_graph(width, height)

        for node, node_data in self.g.nodes(data=True):
            node_data['barrier'] = False
        for src, dst, edge_data in self.g.edges(data=True):
            edge_data['weight'] = min_int

        self.walls = set()
        self.boxes = set()
        self.bombs = {}  # mapping from bomb to rounds

        self.players = set()
        self.enemies = set()

        self.boost_remain = 0
        self.boost_renew = 0

        max_radius = distance((0, 0), (self.width-1, self.height-1))

        self.in_boost_sequence = [False] * max_radius

        self.radii = range(0, max_radius)

        self.points = list(product(range(self.width), range(self.height)))

        self.point_bomb_rounds = {p: max_int for p in self.points}

        self.bomb_ranges_graph = Graph()
        self.bomb_ranges_sub_graphs = connected_components(self.bomb_ranges_graph)
Ejemplo n.º 4
0
def calculate_features(queue,g_file, pairs):
    print("Started!")
    G = nx.read_graphml(graph_file)
    gen = nxa.connected_components(G)
    mainLst = next(gen)
    G = G.subgraph(mainLst)
    f = Featurator(G)
    print("File: "+g_file)
    print("Pairs: "+str(len(pairs)))

    count = 0
    
    for pair in pairs:
        #queue.put(pair)
        count += 1
        #continue
        h1 = pair[0]
        h2 = pair[1]
        res = f.get_feature_dict(h1,h2)
        #res = dict()
        res['pair'] = h1+h2 if h1 < h2 else h2+h1
        res['h1'] = h1 if h1 < h2 else h2
        res['h2'] = h2 if h1 < h2 else h1
        # PUT PAIR IN QUEUE!
        queue.put(res)
    print("Calculated everyone! - "+str(count))
    queue.put('done')
Ejemplo n.º 5
0
def calculate_features(queue, g_file, pairs):
    print("Started!")
    G = nx.read_graphml(graph_file)
    gen = nxa.connected_components(G)
    mainLst = next(gen)
    G = G.subgraph(mainLst)
    f = Featurator(G)
    print("File: " + g_file)
    print("Pairs: " + str(len(pairs)))

    count = 0

    for pair in pairs:
        #queue.put(pair)
        count += 1
        #continue
        h1 = pair[0]
        h2 = pair[1]
        res = f.get_feature_dict(h1, h2)
        #res = dict()
        res['pair'] = h1 + h2 if h1 < h2 else h2 + h1
        res['h1'] = h1 if h1 < h2 else h2
        res['h2'] = h2 if h1 < h2 else h1
        # PUT PAIR IN QUEUE!
        queue.put(res)
    print("Calculated everyone! - " + str(count))
    queue.put('done')
Ejemplo n.º 6
0
 def update_bomb_rounds(self):
     self.bomb_ranges_sub_graphs = connected_components(self.bomb_ranges_graph)
     for sub_graph in self.bomb_ranges_sub_graphs:
         rounds = max_int
         for n in sub_graph:
             if n in self.bombs and self.bombs[n] < rounds:
                 rounds = self.bombs[n]
         for n in sub_graph:
             self.point_bomb_rounds[n] = rounds
Ejemplo n.º 7
0
import csv
import sys

if len(sys.argv) < 3:
    print("Usage: ./generate_csv.py graph_file output_file [#processes]")
    sys.exit()

graph_file = sys.argv[1]
output_file = sys.argv[2]
processes = 1 if len(sys.argv) < 4 else int(sys.argv[3])

print("Loading graph file...")
G = nx.read_graphml(graph_file)

print("Obtaining largest connected component...")
gen = nxa.connected_components(G)
mainLst = next(gen)
G = G.subgraph(mainLst)

f = Featurator(G)

csv_fields = ['pair', 'h1', 'h2'] + f.feature_list()

csvfile = open(output_file, 'w')
writer = csv.DictWriter(csvfile, fieldnames=csv_fields)
writer.writeheader()

count = 0

pairs = [(h1, h2) for i, h1 in enumerate(G.nodes())
         for j, h2 in enumerate(G.nodes()) if j > i]
Ejemplo n.º 8
0
import csv
import sys

if len(sys.argv) < 3:
    print("Usage: ./generate_csv.py graph_file output_file [#processes]")
    sys.exit()

graph_file = sys.argv[1]
output_file = sys.argv[2]
processes = 1 if len(sys.argv) < 4 else int(sys.argv[3])

print("Loading graph file...")
G = nx.read_graphml(graph_file)

print("Obtaining largest connected component...")
gen = nxa.connected_components(G)
mainLst = next(gen)
G = G.subgraph(mainLst)

f = Featurator(G)

csv_fields = ['pair','h1','h2'] + f.feature_list()

csvfile =  open(output_file,'w')
writer = csv.DictWriter(csvfile,fieldnames=csv_fields)
writer.writeheader()

count = 0

pairs = [(h1, h2) for i,h1 in enumerate(G.nodes()) for j,h2 in enumerate(G.nodes()) if j > i]
Ejemplo n.º 9
0
Archivo: ilp.py Proyecto: ANekhai/RINGO
def dcj_dupindel_ilp(genome_a, genome_b, output):
    # copy genomes to possibly make some changes:
    genome_a = copy.deepcopy(genome_a)
    genome_b = copy.deepcopy(genome_b)
    max_chromosomes = max(genome_a.n_chromosomes(), genome_b.n_chromosomes())

    # add capping genes:
    for genome in [genome_a, genome_b]:
        for c in genome.chromosomes:
            if not c.circular:
                c.gene_order.append(0)
                c.circular = True
        for i in range(genome.n_chromosomes(), max_chromosomes):
            genome.add_chromosome(DupChromosome([0], circular=True))

    # count of each gene on each genome
    gene_count = {"A": genome_a.gene_count(), "B": genome_b.gene_count()}

    # for all genes ,the total "balanced" count:
    total_gene_count = {g: max(gene_count["A"][g], gene_count["B"][g]) for g in
                        set(gene_count["A"].keys()).union(set(gene_count["B"].keys()))}

    # define the y labels -> integer 1..n
    y_label = define_y_label(total_gene_count)

    # list of possible edges for each vertex:
    edges = {}
    for gene, copies in total_gene_count.iteritems():
        for i in xrange(1, copies + 1):
            edges[(gene, i)] = set(range(1, copies + 1))

    # try to fix variables:
    # Build the BP graph of fixed elements to try to find more variables to fix:
    master_graph = nx.Graph()
    # fixed vars:
    y_fix = {}
    z_fix = {}
    balancing_fix = {"A": {}, "B": {}}

    # add matching edges of genes with single copy:
    for (gene, copy_a), set_y in edges.iteritems():
        if len(set_y) == 1:
            copy_b = list(set_y)[0]
            for ext in [Ext.HEAD, Ext.TAIL]:
                master_graph.add_edge(("A", gene, copy_a, ext), ("B", gene, copy_b, ext))

    # add adjacency edges:
    for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
        for (g_i, copy_a, e_i), (g_j, copy_b, e_j) in adjacency_list(genome, total_gene_count):
            master_graph.add_edge((genome_name, g_i, copy_a, e_i), (genome_name, g_j, copy_b, e_j))

    # Search components to fix:
    rescan = True
    edges_to_add = []
    vertices_to_remove = []
    while rescan:
        rescan = False
        master_graph.add_edges_from(edges_to_add)
        master_graph.remove_nodes_from(vertices_to_remove)
        edges_to_add = []
        vertices_to_remove = []

        # check each connected component:
        for comp in connected_components(master_graph):
            # get degree-1 vertices:
            degree_one = [v for v in comp if master_graph.degree(v) == 1]

            # if two degree one vertices, it is a path;
            if len(degree_one) == 2:
                genome_i, g_i, copy_a, e_i = degree_one[0]
                genome_j, g_j, copy_b, e_j = degree_one[1]
                # 1 - check if nodes are balancing, to find AA-, BB- and AB- paths that can be fixed.
                i_is_balancing = g_i != 0 and copy_a > gene_count[genome_i][g_i]
                j_is_balancing = g_j != 0 and copy_b > gene_count[genome_j][g_j]
                if i_is_balancing and j_is_balancing:
                    if genome_i == genome_j:  # AA- or BB-path, close it
                        balancing_fix[genome_i][degree_one[0][1:]] = degree_one[1][1:]
                        balancing_fix[genome_i][degree_one[1][1:]] = degree_one[0][1:]
                        degree_one = []
                    else:
                        # TODO: deal with AB-components;
                        pass

                # if the path has homologous genes at the ends, I can join:
                elif genome_i != genome_j and g_i == g_j:
                    # invert to put genome A always in variables _i :
                    if genome_j == "A":
                        genome_i, g_i, copy_a, e_i, genome_j, g_j, copy_b, e_j = genome_j, g_j, copy_b, e_j, genome_i, g_i, copy_a, e_i
                    # check conflict, only add edge if ok:
                    if copy_b in edges[(g_i, copy_a)]:
                        edges[(g_i, copy_a)] = {copy_b}
                        # save edges to add to graph:
                        for ext in [Ext.HEAD, Ext.TAIL]:
                            edges_to_add.append((("A", g_i, copy_a, ext), ("B", g_i, copy_b, ext)))
                        # new edges, re-scan:
                        rescan = True

                        # remove possible edges from other copies:
                        for idx in xrange(1, total_gene_count[g_i] + 1):
                            if idx == copy_a:
                                continue
                            try:
                                # if not there already, exception is thrown, that' ok
                                edges[(g_i, idx)].remove(copy_b)
                                # Add new edges to graph, if the removal created degree 1 vertices:
                                if len(edges[(g_i, idx)]) == 1:
                                    idx_c = list(edges[(g_i, idx)])[0]
                                    for ext in [Ext.HEAD, Ext.TAIL]:
                                        edges_to_add.append((("A", g_i, idx, ext), ("B", g_i, idx_c, ext)))
                            except KeyError:
                                pass
            # if no degree one vertices, it is a cycle, I can fix the y_i:
            elif len(degree_one) == 0:
                # get indexes of the y_i:
                indexes = [(v, y_label[vertex_name(*v)]) for v in comp]
                min_label = min([x[1] for x in indexes])
                for v, label in indexes:
                    y_fix[label] = min_label
                    z_fix[label] = 0
                z_fix[min_label] = 1
                vertices_to_remove.extend(comp)

    # DRAW?
    # nx.draw_circular(master_graph, font_size=8, width=0.5, node_shape="8", node_size=1, with_labels=True)
    # nx.draw_spring(master_graph, font_size=8, width=0.5, node_shape="8", node_size=20, with_labels=True)
    # nx.draw_spectral(master_graph, font_size=8, width=0.5, node_shape="8", node_size=20, with_labels=True)
    # nx.draw_graphviz(master_graph, font_size=8, width=0.5, node_shape="8", node_size=20, with_labels=True)
    # plt.savefig('graph.pdf', bbox_inches='tight')

    # all fixed, generate ILP:
    constraints = []

    # consistency and matching 1-to-1
    constraints.append("\ Matching and consistency constraints")
    # sorting just to make it nicer looking:
    for (gene, copy_a) in sorted(edges):
        copy_set_b = edges[(gene, copy_a)]
        if len(copy_set_b) > 1:
            for copy_b in copy_set_b:
                constraints.append("%s - %s = 0" % (
                    matching_edge_name(gene, copy_a, copy_b, Ext.TAIL),
                    matching_edge_name(gene, copy_a, copy_b, Ext.HEAD)))
            constraints.append(
                " + ".join([matching_edge_name(gene, copy_a, copy_b, Ext.TAIL) for copy_b in copy_set_b]) + " = 1")

    constraints.append("\ Balancing:")
    balancing_genes_A = {g: range(gene_count["A"][g] + 1, gene_count["B"][g] + 1) for g in total_gene_count.iterkeys()
                         if gene_count["A"][g] < gene_count["B"][g]}
    balancing_genes_B = {g: range(gene_count["B"][g] + 1, gene_count["A"][g] + 1) for g in total_gene_count.iterkeys()
                         if gene_count["B"][g] < gene_count["A"][g]}

    for genome, balancing in [("A", balancing_genes_A), ("B", balancing_genes_B)]:
        constraints.append("\ Genome %s" % genome)
        for gene_i, copy_i, ext_i in balancing_extremities(balancing):
            # check if fixed:
            if (gene_i, copy_i, ext_i) in balancing_fix[genome]:
                gene_j, copy_j, ext_j = balancing_fix[genome][(gene_i, copy_i, ext_i)]
                if (gene_i, copy_i, ext_i) < (gene_j, copy_j, ext_j):
                    constraints.append(
                        "%s = 1" % balancing_edge_name(genome, gene_i, copy_i, ext_i, gene_j, copy_j, ext_j))
            # if not, matching 1-to-1:
            else:
                constraints.append(
                    " + ".join([balancing_edge_name(genome, gene_i, copy_i, ext_i, gene_j, copy_j, ext_j) for
                                gene_j, copy_j, ext_j in
                                balancing_extremities(balancing, exclude=balancing_fix[genome].keys()) if
                                (gene_i, copy_i, ext_i) != (gene_j, copy_j, ext_j)]) + " = 1")
    constraints.append("\ Labelling")

    #
    # for each adjacency, fix label:
    constraints.append("\\ Adjacency have the same label:")
    for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
        for i, j in adjacency_list(genome, total_gene_count):
            v_i = vertex_name(genome_name, *i)
            v_j = vertex_name(genome_name, *j)
            # if already fixed, skip
            if y_label[v_i] in y_fix and y_label[v_j] in y_fix:
                continue
            # if the edge is 0 for sure, also skip:
            constraints.append("y_%s - y_%s = 0 \\ %s <-> %s " % (y_label[v_i], y_label[v_j], v_i, v_j))
    #
    constraints.append("\\ Matching edges with the same label:")
    for (gene, copy_a) in sorted(edges):
        copy_set_b = edges[(gene, copy_a)]
        for ext in [Ext.HEAD, Ext.TAIL]:
            y_i = y_label[vertex_name("A", gene, copy_a, ext)]
            # if edge is set, just make the y_i's equal;
            if len(copy_set_b) == 1:
                y_j = y_label[vertex_name("B", gene, list(copy_set_b)[0], ext)]
                # skip if this y_i's are already fixed
                if y_i in y_fix and y_j in y_fix:
                    continue
                constraints.append("y_%s - y_%s = 0 " % (y_i, y_j))
            else:
                # if edge not set, add both ineqs.
                for copy_b in copy_set_b:
                    y_j = y_label[vertex_name("B", gene, copy_b, ext)]
                    constraints.append(
                        "y_%s - y_%s + %s %s <= %d" % (
                        y_i, y_j, y_i, matching_edge_name(gene, copy_a, copy_b, ext), y_i))
                    constraints.append(
                        "y_%s - y_%s + %s %s <= %d" % (
                        y_j, y_i, y_j, matching_edge_name(gene, copy_a, copy_b, ext), y_j))

    constraints.append("\\ Balancing edges with same label:")
    for genome, balancing in [("A", balancing_genes_A), ("B", balancing_genes_B)]:
        constraints.append("\\ Genome %s" % genome)
        for gene_i, copy_i, ext_i in balancing_extremities(balancing, exclude=balancing_fix[genome].keys()):
            for gene_j, copy_j, ext_j in balancing_extremities(balancing, exclude=balancing_fix[genome].keys()):
                if (gene_i, copy_i, ext_i) >= (gene_j, copy_j, ext_j):
                    continue
                y_i = y_label[vertex_name(genome, gene_i, copy_i, ext_i)]
                y_j = y_label[vertex_name(genome, gene_j, copy_j, ext_j)]
                # should not have someone here if I'm excluding fixed edges:
                if y_i in y_fix and y_j in y_fix:
                    continue
                constraints.append("y_%s - y_%s + %s %s <= %d" % (
                    y_i, y_j, y_i, balancing_edge_name(genome, gene_i, copy_i, ext_i, gene_j, copy_j, ext_j), y_i))
                constraints.append("y_%s - y_%s + %s %s <= %d" % (
                    y_j, y_i, y_j, balancing_edge_name(genome, gene_i, copy_i, ext_i, gene_j, copy_j, ext_j), y_j))

    # z variables: since all cycles have to contains vertices from both genomes, we only add z variables
    # for genome A, that have smallest labels, so a genome B z variable will never be =1.
    constraints.append("\\ Z variables")
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if vertex[0] == "A":
            # if i in z_fix and z_fix[i] == 0:
            #     continue
            # if i in z_fix and z_fix[i] == 1:
            #     constraints.append("z_%s = 1" % i)
            if i not in z_fix:
                constraints.append("%d z_%s - y_%s <= 0" % (i, i, i))
    #
    # # number of genes, to fix distance:
    constraints.append("n = %d" % (sum(total_gene_count.itervalues())))
    # # number of fixed cycles
    constraints.append("c = %d" % (sum(z_fix.itervalues())))
    # for g in sorted(total_gene_count):
    #     print g,total_gene_count[g]

    #
    # # bounds:
    bounds = []
    for i in sorted(y_label.itervalues()):
        if i not in y_fix:
            bounds.append("y_%d <= %d" % (i, i))
    #
    # # variables:
    binary = []
    #
    # # matching edges
    # matching edges, skipping fixed pairs.
    matching = ["\ match"]
    # for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
    for (gene, copy_a), copy_set_b in sorted(edges.items(), key=operator.itemgetter(0)):
        if len(copy_set_b) > 1:
            for copy_b in copy_set_b:
                for ext in [Ext.HEAD, Ext.TAIL]:
                    matching.append(matching_edge_name(gene, copy_a, copy_b, ext))

    print "%d matching edges" % len(matching)
    # print "Potentially %d matching edges" % sum([2*x ** 2 for x in gene_count.itervalues()])
    binary.extend(matching)
    #
    # balancing edges:
    balancing_edges = [balancing_edge_name(genome, gene_i, copy_i, ext_i, gene_j, copy_j, ext_j) for genome, balancing
                       in [("A", balancing_genes_A), ("B", balancing_genes_B)] for gene_i, copy_i, ext_i in
                       balancing_extremities(balancing, exclude=balancing_fix[genome].keys()) for gene_j, copy_j, ext_j
                       in balancing_extremities(balancing, exclude=balancing_fix[genome].keys()) if
                       (gene_i, copy_i, ext_i) < (gene_j, copy_j, ext_j)]
    print "%d balancing edges" % len(balancing_edges)
    binary.extend(balancing_edges)
    #
    # z cycles:
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if i in z_fix:  # and z_fix[i] == 0:
            continue
        if vertex[0] == "B":
            continue
        binary.append("z_%d" % i)
    #
    # # Y label are general:
    # TODO: remove unused y' and z's from model. If y=1, it can be removed, just set z=1.
    general = []
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if i not in y_fix:
            general.append("y_%d" % i)
    #
    # # number of genes and fixed cycles:
    general.append("n")
    general.append("c")
    # # objective function:
    objective = ["obj: n - c - " + " - ".join(
        ["z_%d" % i for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)) if
         vertex[0] == "A" and i not in z_fix])]

    # write:
    with open(output, "w") as f:
        for header, lines in [("Minimize", objective), ("Subject to", constraints),
                              ("Bounds", bounds), ("Binary", binary), ("General", general)]:
            print >> f, header
            print >> f, "\n".join(lines)
Ejemplo n.º 10
0
    def solve_ilp(timelimit=60):
        # import here, so only if actually solving we will need gurobi.
        from gurobipy import read, GRB
        # pycharm complains of gurobi commands, cannot see them from the import
        model = read(filename)

        # set some options:
        # time limit in seconds:
        model.params.timeLimit = timelimit

        # not verbose:
        # model.setParam('OutputFlag', False)
        # MIP focus, from 0 to 3:
        model.params.MIPFocus = 1  # best solutions, less focus on bounds.
        model.optimize()

        if model.status != GRB.Status.INFEASIBLE:
            print('FINISHED: Best objective: %g' % model.objVal)
            print('Optimization ended with status %d' % model.status)
            model.write(filename + '.sol')

        if model.status == GRB.INFEASIBLE:
            model.computeIIS()
            model.write("unfeasible.lp")
            print('\nThe following constraint(s) cannot be satisfied:')
            for c in model.getConstrs():
                if c.IISConstr:
                    print('%s' % c.constrName)
        else:

            z = n = c = 0
            solution_matching = collections.defaultdict(list)
            matching_regexp = re.compile("x_A(\d+)_(\d+)h,B(\d+)_(\d+)h")
            # get basic vars and matching:
            for v in model.getVars():
                if v.varName == "n":
                    n = v.x
                elif v.varName == "c":
                    c = v.x
                elif v.varName.startswith("z") and v.x >= 0.9:
                    z += 1
                else:
                    m = matching_regexp.match(v.varName)
                    if m is not None and v.x == 1:
                        g_a, c_a, g_b, c_b = map(int, m.groups())
                        solution_matching[g_a].append((c_a, c_b))

            from parse_orthology import build_correct_matching, parse_orthology_quality
            correct_matching = build_correct_matching(genome_a, genome_b)
            tp, fp, fn = parse_orthology_quality(solution_matching, correct_matching)

            print "N: %d  cycles:%d (%d fixed, %d from opt)" % (n, z + c, c, z)
            print "Orthology. TP:%d  FP:%d  FN:%d" % (len(tp), len(fp), len(fn))
            # print match_edges
            # Now, analyse the BP graph, for the incomplete matching model, to find AA-, BB- and AB- components:
            master_graph = nx.Graph()
            # fixed vars:
            # add matching edges of genes with single copy:
            # for (gene, copy_a), copy_j in match_edges.iteritems():
            for gene, pair_list in solution_matching.iteritems():
                for copy_a, copy_b in pair_list:
                    for ext in [Ext.HEAD, Ext.TAIL]:
                        master_graph.add_edge(("A", gene, copy_a, ext), ("B", gene, copy_b, ext))

            # add adjacency edges:
            for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
                for (g_i, copy_a, e_i), (g_j, copy_j, e_j) in genome.adjacency_iter_with_copies():
                    master_graph.add_edge((genome_name, g_i, copy_a, e_i), (genome_name, g_j, copy_j, e_j))

            count = {"A": 0, "B": 0, "AB": 0}
            c = 0
            # print "C:", len([x for x in connected_components(master_graph)])
            for comp in connected_components(master_graph):
                degree_one = [v for v in comp if master_graph.degree(v) == 1]
                if len(degree_one) == 0:
                    c += 1
                else:
                    if len(degree_one) != 2:
                        import ipdb;
                        ipdb.set_trace()
                    if degree_one[0][0] == degree_one[1][0]:
                        count[degree_one[0][0]] += 1
                    else:
                        count["AB"] += 1
            print count
            if skip_balancing:
                print "Corrected distance: %d" % (model.objVal + count["AB"] / 2)

        return model
Ejemplo n.º 11
0
def dcj_dupindel_ilp(genome_a, genome_b, output, skip_balancing=False, fix_vars=True, solve=False, all_vs_all=False):
    def solve_ilp(timelimit=60):
        # import here, so only if actually solving we will need gurobi.
        from gurobipy import read, GRB
        # pycharm complains of gurobi commands, cannot see them from the import
        model = read(filename)

        # set some options:
        # time limit in seconds:
        model.params.timeLimit = timelimit

        # not verbose:
        # model.setParam('OutputFlag', False)
        # MIP focus, from 0 to 3:
        model.params.MIPFocus = 1  # best solutions, less focus on bounds.
        model.optimize()

        if model.status != GRB.Status.INFEASIBLE:
            print('FINISHED: Best objective: %g' % model.objVal)
            print('Optimization ended with status %d' % model.status)
            model.write(filename + '.sol')

        if model.status == GRB.INFEASIBLE:
            model.computeIIS()
            model.write("unfeasible.lp")
            print('\nThe following constraint(s) cannot be satisfied:')
            for c in model.getConstrs():
                if c.IISConstr:
                    print('%s' % c.constrName)
        else:

            z = n = c = 0
            solution_matching = collections.defaultdict(list)
            matching_regexp = re.compile("x_A(\d+)_(\d+)h,B(\d+)_(\d+)h")
            # get basic vars and matching:
            for v in model.getVars():
                if v.varName == "n":
                    n = v.x
                elif v.varName == "c":
                    c = v.x
                elif v.varName.startswith("z") and v.x >= 0.9:
                    z += 1
                else:
                    m = matching_regexp.match(v.varName)
                    if m is not None and v.x == 1:
                        g_a, c_a, g_b, c_b = map(int, m.groups())
                        solution_matching[g_a].append((c_a, c_b))

            from parse_orthology import build_correct_matching, parse_orthology_quality
            correct_matching = build_correct_matching(genome_a, genome_b)
            tp, fp, fn = parse_orthology_quality(solution_matching, correct_matching)

            print "N: %d  cycles:%d (%d fixed, %d from opt)" % (n, z + c, c, z)
            print "Orthology. TP:%d  FP:%d  FN:%d" % (len(tp), len(fp), len(fn))
            # print match_edges
            # Now, analyse the BP graph, for the incomplete matching model, to find AA-, BB- and AB- components:
            master_graph = nx.Graph()
            # fixed vars:
            # add matching edges of genes with single copy:
            # for (gene, copy_a), copy_j in match_edges.iteritems():
            for gene, pair_list in solution_matching.iteritems():
                for copy_a, copy_b in pair_list:
                    for ext in [Ext.HEAD, Ext.TAIL]:
                        master_graph.add_edge(("A", gene, copy_a, ext), ("B", gene, copy_b, ext))

            # add adjacency edges:
            for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
                for (g_i, copy_a, e_i), (g_j, copy_j, e_j) in genome.adjacency_iter_with_copies():
                    master_graph.add_edge((genome_name, g_i, copy_a, e_i), (genome_name, g_j, copy_j, e_j))

            count = {"A": 0, "B": 0, "AB": 0}
            c = 0
            # print "C:", len([x for x in connected_components(master_graph)])
            for comp in connected_components(master_graph):
                degree_one = [v for v in comp if master_graph.degree(v) == 1]
                if len(degree_one) == 0:
                    c += 1
                else:
                    if len(degree_one) != 2:
                        import ipdb;
                        ipdb.set_trace()
                    if degree_one[0][0] == degree_one[1][0]:
                        count[degree_one[0][0]] += 1
                    else:
                        count["AB"] += 1
            print count
            if skip_balancing:
                print "Corrected distance: %d" % (model.objVal + count["AB"] / 2)

        return model

    # copy genomes to possibly make some changes:
    genome_a = copy.deepcopy(genome_a)
    genome_b = copy.deepcopy(genome_b)

    add_capping_genes(genome_a, genome_b)

    # since the gene set might be different for each genome, find all genes:
    all_genes = genome_a.gene_set().union(genome_b.gene_set())
    # find all gene copies
    gene_copies = build_gene_copies_dict(all_genes, genome_a, genome_b)
    # count balancing genes:
    bal = {
        g: sum([len([c for c in gene_copies[g][gene].itervalues() if c == CopyType.BALANCING]) for gene in all_genes])
        for g in ["A", "B"]}

    print "Balancing genes:A=%(A)d, B=%(B)d" % bal
    # define the y labels (vertex = genome,gene,copy,ext) -> integer 1..n
    y_label = define_y_label(gene_copies)

    # store all possible matchings (edges) from each family:
    fixed_matching = {}
    possible_matching = {}
    for gene in all_genes:
        # if only 1 copy, matching is fixed:
        if len(gene_copies["A"][gene]) == 1:
            # fix the matching, then remove from the available copies
            copy_a, type_a = gene_copies["A"][gene].items()[0]
            copy_j, type_b = gene_copies["B"][gene].items()[0]
            fixed_matching[(gene, copy_a)] = copy_j
        else:
            possible_matching[gene] = {"A": {copy_i for copy_i, type_i in gene_copies["A"][gene].items()},
                                       "B": {copy_i for copy_i, type_i in gene_copies["B"][gene].items()}}

    # Build the BP graph of fixed matchings to try to find more variables to fix:
    y_fix = {}
    z_fix = {}
    balancing_fix = {"A": {}, "B": {}}

    if fix_vars:
        master_graph = nx.Graph()
        # fixed vars:

        # add matching edges of genes with single copy:
        for (gene, copy_a), copy_j in fixed_matching.iteritems():
            for ext in [Ext.HEAD, Ext.TAIL]:
                master_graph.add_edge(("A", gene, copy_a, ext), ("B", gene, copy_j, ext))

        # add adjacency edges:
        for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
            for (g_i, copy_a, e_i), (g_j, copy_j, e_j) in genome.adjacency_iter_with_copies():
                master_graph.add_edge((genome_name, g_i, copy_a, e_i), (genome_name, g_j, copy_j, e_j))

        # Search components to fix:
        rescan = True
        edges_to_add = []
        vertices_to_remove = []
        ab_components = set()
        while rescan:
            rescan = False
            # Pre-scan:
            # add and remove edges detected from previous rounds:
            master_graph.add_edges_from(edges_to_add)
            master_graph.remove_nodes_from(vertices_to_remove)
            edges_to_add = []
            vertices_to_remove = []
            # fix AB-components; while I have at least 2, join pairs arbitrarily:
            while len(ab_components) > 1:
                a_i, b_i = ab_components.pop()
                a_j, b_j = ab_components.pop()
                master_graph.add_edge(a_i, a_j)
                balancing_fix["A"][a_i[1:]] = a_j[1:]
                balancing_fix["A"][a_j[1:]] = a_i[1:]
                master_graph.add_edge(b_i, b_j)
                balancing_fix["B"][b_i[1:]] = b_j[1:]
                balancing_fix["B"][b_j[1:]] = b_i[1:]

            # Now I search for vertices that have only balancing vertices as matching
            # candidates. If that is the case, I can fix them arbitrarly.
            fix_only_bal = True
            if fix_only_bal:
                for gene in sorted(possible_matching):
                    set_a = possible_matching[gene]["A"]
                    set_b = possible_matching[gene]["B"]
                    if all([gene_copies["A"][gene][copy_a] == CopyType.BALANCING for copy_a in set_a]) or all(
                            [gene_copies["B"][gene][copy_b] == CopyType.BALANCING for copy_b in set_b]):
                        for copy_a, copy_b in zip(set_a, set_b):
                            fixed_matching[(gene, copy_a)] = copy_b
                            # save edges to add to graph:
                            for ext in [Ext.HEAD, Ext.TAIL]:
                                # edges_to_add.append((("A", gene, copy_a, ext), ("B", gene, copy_b, ext)))
                                master_graph.add_edge(("A", gene, copy_a, ext), ("B", gene, copy_b, ext))
                        rescan = True
                        # remove from possible matching:
                        del possible_matching[gene]

            # now loop for each connected component, fixing cycles and trying to close paths to cycles when possible.
            for comp in connected_components(master_graph):
                # can only consider even components;
                if len(comp) % 2 != 0:
                    continue
                # get degree-1 vertices:
                degree_one = [v for v in comp if master_graph.degree(v) == 1]
                # if two degree one vertices, it is a path;
                if len(degree_one) == 2:
                    genome_i, g_i, copy_a, e_i = degree_one[0]
                    genome_j, g_j, copy_j, e_j = degree_one[1]

                    # 1 - check if both nodes are balancing, to find AA-, BB- and AB- paths that can be fixed.
                    i_is_balancing = g_i != 0 and gene_copies[genome_i][g_i][copy_a] == CopyType.BALANCING
                    j_is_balancing = g_j != 0 and gene_copies[genome_j][g_j][copy_j] == CopyType.BALANCING

                    if i_is_balancing and j_is_balancing:
                        # open-path, both ends are balancing.
                        # If AA- or BB-path, close it to a cycle:
                        if genome_i == genome_j:
                            # fix the cycle:
                            fix_cycle_y_z(comp, y_label, y_fix, z_fix, vertices_to_remove)
                            # fix the balancing variables if we have them:
                            if not skip_balancing:
                                balancing_fix[genome_i][degree_one[0][1:]] = degree_one[1][1:]
                                balancing_fix[genome_i][degree_one[1][1:]] = degree_one[0][1:]
                        else:
                            # If not, it is AB-, add to the list to try to make pairs.
                            if skip_balancing:  # if not using balancing edges, I can fix the AB directly, instead of
                                # doing the merge in pairs;
                                fix_cycle_y_z(comp, y_label, y_fix, z_fix, vertices_to_remove)
                            else:
                                # merge in pairs:
                                ab_components.add(tuple(sorted(degree_one)))
                                if len(ab_components) > 1:
                                    rescan = True
                    # Not open path; then, check if the path has homologous extremities at both ends, so I can close
                    # to a path:
                    elif genome_i != genome_j and g_i == g_j and e_i == e_j:
                        # invert to put genome A always in variables _i :
                        if genome_j == "A":
                            genome_i, g_i, copy_a, e_i, genome_j, g_j, copy_j, e_j = genome_j, g_j, copy_j, e_j, genome_i, g_i, copy_a, e_i

                        # check conflict, only add edge if it's in the allowed edges:
                        if g_i in possible_matching and copy_a in possible_matching[g_i]["A"] and copy_j in \
                                possible_matching[g_i]["B"]:
                            # new edges, re-scan:
                            rescan = True
                            fix_new_matching(fixed_matching, edges_to_add, possible_matching, g_i, copy_a, copy_j)

                # if there are no degree one vertices, it is a cycle; I can fix the y_i and z_i for this cycle:
                elif len(degree_one) == 0:
                    fix_cycle_y_z(comp, y_label, y_fix, z_fix, vertices_to_remove)
                    rescan = True

    # DRAW:
    draw_bp = False
    if draw_bp:
        plot_bp('graph.pdf', master_graph, gene_copies, possible_matching)


    # all fixed, generate ILP

    # to make it easier to find the matching edges, specially when limiting edges from balancing genes,
    # I will build a gene connections graph;
    gene_connection = nx.DiGraph()  # make it directed, so the vertex of A is always 1st on the edge tuple.
    for gene in possible_matching.iterkeys():
        set_a = possible_matching[gene]["A"]
        set_b = possible_matching[gene]["B"]
        # All vs all model:
        if all_vs_all:
            for copy_a in set_a:
                for copy_b in set_b:
                    gene_connection.add_edge(("A", gene, copy_a), ("B", gene, copy_b))
        else:
            # try to minimise needed matching edges for balancing nodes:
            real_a = [cp for cp in set_a if gene_copies["A"][gene][cp] == CopyType.REAL]
            real_b = [cp for cp in set_b if gene_copies["B"][gene][cp] == CopyType.REAL]

            # all real, then all-vs-all:
            if len(real_a) == len(real_b):
                for copy_a in set_a:
                    for copy_b in set_b:
                        gene_connection.add_edge(("A", gene, copy_a), ("B", gene, copy_b))
            # a has balancing:
            if len(real_a) < len(real_b):
                balancing_a = [cp for cp in set_a if gene_copies["A"][gene][cp] == CopyType.BALANCING]
                # the real in A match the real in B (which are all)
                for copy_a in real_a:
                    for copy_b in set_b:
                        gene_connection.add_edge(("A", gene, copy_a), ("B", gene, copy_b))
                # then, the balancing in A have len(real_a)+1 incident edges
                list_b = list(set_b)
                for idx, copy_a in enumerate(balancing_a):
                    for j in range(len(real_a) + 1):
                        gene_connection.add_edge(("A", gene, copy_a), ("B", gene, list_b[idx + j]))
            # b has balancing:
            else:
                balancing_b = [cp for cp in set_b if gene_copies["B"][gene][cp] == CopyType.BALANCING]
                # the real in B match the real in A (which are all)
                for copy_b in real_b:
                    for copy_a in set_a:
                        gene_connection.add_edge(("A", gene, copy_a), ("B", gene, copy_b))
                # then, the balancing in B have len(real_b)+1 incident edges
                list_a = list(set_a)
                for idx, copy_b in enumerate(balancing_b):
                    for j in range(len(real_b) + 1):
                        gene_connection.add_edge(("A", gene, list_a[idx + j]), ("B", gene, copy_b))

    # Start building constraints:
    constraints = []

    # consistency and matching 1-to-1

    # Fixed matching:
    # sorting just to make it nicer looking:
    constraints.append("\ Fixed matching:")
    for (gene, copy_a), copy_b in sorted(fixed_matching.items(), key=lambda pair: pair[0]):
        constraints.append("%s = 1" % matching_edge_name(gene, copy_a, copy_b, Ext.TAIL))
        constraints.append("%s = 1" % matching_edge_name(gene, copy_a, copy_b, Ext.HEAD))

    # HEAD TAIL consistency:
    constraints.append("\ Head/Tail consistency:")
    for (_, gene_a, copy_a), (_, gene_b, copy_b) in gene_connection.edges_iter():
        constraints.append("%s - %s = 0" % (
            matching_edge_name(gene_a, copy_a, copy_b, Ext.TAIL),
            matching_edge_name(gene_a, copy_a, copy_b, Ext.HEAD)))

    # 1 Matching per node :
    constraints.append("\ Degree 1 per node (Matching):")
    # for all vertices:
    for v in gene_connection.nodes_iter():
        # find the incident edges:
        if v[0] == "A":
            edges = gene_connection.out_edges_iter
        else:
            edges = gene_connection.in_edges_iter
        incident = [matching_edge_name(gene_a, copy_a, copy_b, Ext.TAIL) for
                    (_, gene_a, copy_a), (_, gene_b, copy_b) in edges(v)]
        # sum of incidents is 1:
        constraints.append("%s = 1" % (" + ".join(incident)))

    if not skip_balancing:
        constraints.append("\ Balancing:")

        for genome in ["A", "B"]:
            constraints.append("\ Genome %s" % genome)
            for gene_i, copy_a, ext_i in balancing_extremities(gene_copies[genome]):
                # check if fixed:
                if (gene_i, copy_a, ext_i) in balancing_fix[genome]:
                    gene_j, copy_j, ext_j = balancing_fix[genome][(gene_i, copy_a, ext_i)]
                    if (gene_i, copy_a, ext_i) < (gene_j, copy_j, ext_j):
                        constraints.append(
                            "%s = 1" % balancing_edge_name(genome, gene_i, copy_a, ext_i, gene_j, copy_j, ext_j))
                # if not, matching 1-to-1:
                else:
                    constraints.append(
                        " + ".join([balancing_edge_name(genome, gene_i, copy_a, ext_i, gene_j, copy_j, ext_j) for
                                    gene_j, copy_j, ext_j in
                                    balancing_extremities(gene_copies[genome], exclude=balancing_fix[genome].keys())
                                    if
                                    (gene_i, copy_a, ext_i) != (gene_j, copy_j, ext_j)]) + " = 1")

    constraints.append("\ Labelling")
    # for each adjacency, fix the label of adjacent genes:

    constraints.append("\\ Adjacent nodes have the same label:")
    for genome, genome_name in [(genome_a, "A"), (genome_b, "B")]:
        for (g_i, copy_a, ext_i), (g_j, copy_j, ext_j) in genome.adjacency_iter_with_copies():
            v_i = genome_name, g_i, copy_a, ext_i
            v_j = genome_name, g_j, copy_j, ext_j
            # if already fixed, skip
            if y_label[v_i] in y_fix and y_label[v_j] in y_fix:
                continue
            # if the edge is 0 for sure, also skip:
            constraints.append("y_%s - y_%s = 0 \\ %s <-> %s " % (y_label[v_i], y_label[v_j], v_i, v_j))
    #
    constraints.append("\\ Matching extremities have the same label:")

    # if extremities are matched, but I don't know the y_i (cycle was not closed in the fixing phase),
    # then I know that the y_i's of these extremities are equal:
    constraints.append("\\ Fixed matching without fixed y_i:")
    for (gene, copy_a) in sorted(fixed_matching):
        copy_j = fixed_matching[(gene, copy_a)]
        for ext in [Ext.HEAD, Ext.TAIL]:
            y_i = y_label[("A", gene, copy_a, ext)]
            y_j = y_label[("B", gene, copy_j, ext)]
            # only add if this y_i's aren't already fixed
            if y_i not in y_fix and y_j not in y_fix:
                constraints.append("y_%s - y_%s = 0 " % (y_i, y_j))

    # for the "open" matching, for each edge I add the "y fixing" restrictions, that force the y_i's
    # to be equal whenever the edge variable is set to 1.
    constraints.append("\\ Open matching:")
    for (_, gene_a, copy_a), (_, gene_b, copy_b) in gene_connection.edges_iter():
        for ext in [Ext.HEAD, Ext.TAIL]:
            y_a = y_label[("A", gene_a, copy_a, ext)]
            y_b = y_label[("B", gene_b, copy_b, ext)]
            constraints.append(
                "y_%s - y_%s + %s %s <= %d" % (
                    y_a, y_b, y_a, matching_edge_name(gene_a, copy_a, copy_b, ext), y_a))
            constraints.append(
                "y_%s - y_%s + %s %s <= %d" % (
                    y_b, y_a, y_b, matching_edge_name(gene_a, copy_a, copy_b, ext), y_b))

    if not skip_balancing:
        constraints.append("\\ Balancing edges have same label:")
        for genome in ["A", "B"]:
            constraints.append("\\ Genome %s" % genome)
            for gene_i, copy_a, ext_i in balancing_extremities(gene_copies[genome],
                                                               exclude=balancing_fix[genome].keys()):
                for gene_j, copy_j, ext_j in balancing_extremities(gene_copies[genome],
                                                                   exclude=balancing_fix[genome].keys()):
                    if (gene_i, copy_a, ext_i) >= (gene_j, copy_j, ext_j):
                        continue
                    y_i = y_label[(genome, gene_i, copy_a, ext_i)]
                    y_j = y_label[(genome, gene_j, copy_j, ext_j)]
                    # should not have someone here if I'm excluding fixed edges:
                    if y_i in y_fix and y_j in y_fix:
                        continue
                    constraints.append("y_%s - y_%s + %s %s <= %d" % (
                        y_i, y_j, y_i, balancing_edge_name(genome, gene_i, copy_a, ext_i, gene_j, copy_j, ext_j), y_i))
                    constraints.append("y_%s - y_%s + %s %s <= %d" % (
                        y_j, y_i, y_j, balancing_edge_name(genome, gene_i, copy_a, ext_i, gene_j, copy_j, ext_j), y_j))

    # z variables: since all cycles have to contains vertices from both genomes, we only add z variables
    # for genome A, that have smallest labels, so a genome B z variable will never be =1.
    constraints.append("\\ Z variables")
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if vertex[0] == "A":
            if i not in z_fix:
                constraints.append("%d z_%s - y_%s <= 0" % (i, i, i))
    #
    # # number of genes, to fix distance:
    n_genes = sum([len(copies) for copies in gene_copies["A"].itervalues()])
    constraints.append("n = %d" % n_genes)
    # # number of fixed cycles
    constraints.append("c = %d" % (sum(z_fix.itervalues())))

    #
    # # bounds:
    bounds = []
    for i in sorted(y_label.itervalues()):
        if i not in y_fix:
            bounds.append("y_%d <= %d" % (i, i))
    #
    # # variables:
    binary = []
    #
    # # matching edges
    matching = ["\ Fixed matching:"]
    for (gene, copy_a), copy_b in fixed_matching.iteritems():
        matching.append(matching_edge_name(gene, copy_a, copy_b, Ext.TAIL))
        matching.append(matching_edge_name(gene, copy_a, copy_b, Ext.HEAD))

    matching.append("\ Open matching:")
    for (_, gene_a, copy_a), (_, gene_b, copy_b) in gene_connection.edges_iter():
        for ext in [Ext.HEAD, Ext.TAIL]:
            matching.append(matching_edge_name(gene_a, copy_a, copy_b, ext))

    print "%d fixed matching edges" % (len(fixed_matching) * 2)
    print "%d open matching edges" % (len(gene_connection.edges()) * 2)
    binary.extend(matching)
    if not skip_balancing:
        # balancing edges:
        balancing_edges = [balancing_edge_name(genome, gene_i, copy_a, ext_i, gene_j, copy_j, ext_j) for
                           genome
                           in ["A", "B"] for gene_i, copy_a, ext_i in
                           balancing_extremities(gene_copies[genome], exclude=balancing_fix[genome].keys()) for
                           gene_j, copy_j, ext_j
                           in balancing_extremities(gene_copies[genome], exclude=balancing_fix[genome].keys()) if
                           (gene_i, copy_a, ext_i) < (gene_j, copy_j, ext_j)]
        print "%d balancing edges" % len(balancing_edges)
        binary.extend(balancing_edges)
    #
    # z cycles:
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if i in z_fix:  # and z_fix[i] == 0:
            continue
        if vertex[0] == "B":
            continue
        binary.append("z_%d" % i)
    #
    # # Y label are general:
    # TODO: remove unused y' and z's from model. If y=1, it can be removed, just set z=1.
    general = []
    for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)):
        if i not in y_fix:
            general.append("y_%d" % i)
    #
    # # number of genes and fixed cycles:
    general.append("n")
    general.append("c")
    # # objective function:
    z_obj = " - ".join(["z_%d" % i for vertex, i in sorted(y_label.items(), key=operator.itemgetter(1)) if
                        vertex[0] == "A" and i not in z_fix])

    objective = ["obj: n - c %s" % ("- " + z_obj if len(z_obj) > 0 else "")]
    # write ILP:
    with open(output, "w") as f:
        for header, lines in [("Minimize", objective), ("Subject to", constraints),
                              ("Bounds", bounds), ("Binary", binary), ("General", general)]:
            print >> f, header
            print >> f, "\n".join(lines)

    if solve:
        model = solve_ilp(timelimit=60)
        return model
Ejemplo n.º 12
0
def plot_bp(filename, master_graph, gene_copies, possible_matching, simplified=True):
    # add isolated vertices (balancing extremities that are not fixed already)
    for genome_i in ["A", "B"]:
        for gene_i, copy_i, ext_i in balancing_extremities(gene_copies[genome_i]):
            if gene_i in possible_matching and copy_i in possible_matching[gene_i][genome_i]:
            # if (gene_i, copy_i) not in fixed_matching:
            #     print "add bal", (genome_i, gene_i, copy_i, ext_i)
                master_graph.add_node((genome_i, gene_i, copy_i, ext_i))
    # simplified:
    if simplified:
        edges = []
        vertices = []

        for comp in connected_components(master_graph):
            if len(comp) == 1:
                vertices.append(comp.pop())
                continue
            degree_one = tuple([v for v in comp if master_graph.degree(v) == 1])
            edges.append(degree_one)
        master_graph = nx.Graph()
        master_graph.add_edges_from(edges)
        master_graph.add_nodes_from(vertices)

    # Relabel nodes to make it easier to read:
    mapping = {}
    normal = []
    balancing = []
    be = {genome_i: list(balancing_extremities(gene_copies[genome_i])) for genome_i in ["A", "B"]}
    for v in master_graph.nodes():
        genome_i, gene_i, copy_i, ext_i = v
        mapping[v] = "$%s%s_{(%s)}^%s$" % v
        if (gene_i, copy_i, ext_i) in be[genome_i]:
            balancing.append(mapping[v])
        else:
            normal.append(mapping[v])
    master_graph = nx.relabel_nodes(master_graph, mapping)

    # Graphviz position:
    # pos = nx.nx_agraph.graphviz_layout(master_graph, prog="fdp")

    # custom position:
    x_pos = 0
    y_pos = {"A": 1, "B": 0}
    pos = {}
    for comp in sorted(connected_components(master_graph), key=lambda c: (-len(c), min(c))):
        last_v = None
        for v in sort_component(master_graph, comp, fmt=False):
            if last_v == v[1]:
                x_pos += 1
            last_v = v[1]
            pos[v] = (x_pos, y_pos[v[1]])
        x_pos += 1
        if x_pos > 7:
            x_pos = 0
            y_pos["A"] -= 2
            y_pos["B"] -= 2

    # draw and save:
    for nodelist, color in [(normal, "lightgray"), (balancing, "lightblue")]:
        nx.draw(master_graph, pos, font_size=5, nodelist=nodelist, node_color=color, linewidths=0.1, width=0.5,
                node_size=400,
                with_labels=True)
    plt.savefig(filename, bbox_inches='tight')
 def getConnectedComponents(self, G):
     return nalgos.connected_components(G)
Ejemplo n.º 14
0
from networkx import read_edgelist
import networkx as nx

G = read_edgelist('hartford_drug.edgelist')
print(G.number_of_nodes())
print(G.number_of_edges())

import matplotlib.pyplot as plt
nx.draw(G)
plt.show()

# 寻找社区/联通子图
from networkx.algorithms import number_connected_components, connected_components

print(number_connected_components(G))
for subG in connected_components(G):
    print(subG)

# 获取联通子图的图结构
from networkx.algorithms import connected_component_subgraphs

for i, subG in enumerate(connected_component_subgraphs(G)):
    print('G%s' % i, subG.number_of_nodes(), subG.number_of_edges())

# 通过三角计算强化社区发现
# 三角计数(triangles counts)和集束系数/聚类系数(clustering coefficient)衡量社区/子图的紧密程度
from networkx.algorithms import triangles, transitivity, average_clustering

# 三角计数
print(triangles(G))
# 平均三角计数