Ejemplo n.º 1
0
 def test_predecessor(self):
     assert_equal(nx.dfs_predecessors(self.G, source=0), {
         1: 0,
         2: 1,
         3: 4,
         4: 2
     })
     assert_equal(nx.dfs_predecessors(self.D), {1: 0, 3: 2})
Ejemplo n.º 2
0
 def test_predecessor(self):
     assert (nx.dfs_predecessors(self.G, source=0) == {
         1: 0,
         2: 1,
         3: 4,
         4: 2
     })
     assert nx.dfs_predecessors(self.D) == {1: 0, 3: 2}
Ejemplo n.º 3
0
 def dls_test_predecessor(self):
     assert_equal(nx.dfs_predecessors(self.G, source=0, depth_limit=3), {
         1: 0,
         2: 1,
         3: 2,
         7: 2
     })
     assert_equal(nx.dfs_predecessors(self.D, source=2, depth_limit=3), {
         8: 7,
         9: 8,
         3: 2,
         7: 2
     })
Ejemplo n.º 4
0
 def test_dls_predecessor(self):
     assert (nx.dfs_predecessors(self.G, source=0, depth_limit=3) == {
         1: 0,
         2: 1,
         3: 2,
         7: 2
     })
     assert (nx.dfs_predecessors(self.D, source=2, depth_limit=3) == {
         8: 7,
         9: 8,
         3: 2,
         7: 2
     })
Ejemplo n.º 5
0
def _find_missing_input(calc_inp_nodes, graph):
    '''Search for *tentatively* missing data.'''
    calc_inp_nodes = set(calc_inp_nodes)  # for efficiency below
    missing_input_nodes = []
    for node in nx.dfs_predecessors(graph):
        if (node not in calc_inp_nodes and graph.out_degree(node) == 0):
            missing_input_nodes.append(node)
    return missing_input_nodes
Ejemplo n.º 6
0
def _find_missing_input(calc_inp_nodes, graph):
    '''Search for *tentatively* missing data.'''
    calc_inp_nodes = set(calc_inp_nodes) # for efficiency below
    missing_input_nodes = []
    for node in nx.dfs_predecessors(graph):
        if ( node not in calc_inp_nodes and graph.out_degree(node) == 0):
            missing_input_nodes.append(node)
    return missing_input_nodes
Ejemplo n.º 7
0
 def part1():
     # Reverse edges
     RG = G.reverse()
     # Get predecessors
     predecessors = nx.dfs_predecessors(RG, 'shiny gold')
     # Count predecessors
     for p in predecessors:
         print(p)
     return len(predecessors)
Ejemplo n.º 8
0
    def get_all_predecessors(self, cfgnode):
        """
        Get all predecessors of a specific node on the control flow graph.

        :param CFGNode cfgnode: The CFGNode object
        :return: A list of predecessors in the CFG
        :rtype: list
        """

        return networkx.dfs_predecessors(self._graph, cfgnode)
def gather_relevant_nodes(G, s):
    subnodes = nx.dfs_successors(G, source=s)
    ret = []
    for l in subnodes.values():
        ret.extend(l)
    for node in ret:
        prednodes = nx.dfs_predecessors(G, node)
        for l in prednodes.keys():
            ret.append(l)
    return ret
Ejemplo n.º 10
0
    def get_all_predecessors(self, cfgnode):
        """
        Get all predecessors of a specific node on the control flow graph.

        :param CFGNode cfgnode: The CFGNode object
        :return: A list of predecessors in the CFG
        :rtype: list
        """

        return networkx.dfs_predecessors(self._graph, cfgnode)
Ejemplo n.º 11
0
def link2pred(linkdata,lookup_data):
    #########################################################
    # convert sub to graph to get upscaled reconstruction
    #########################################################
    numsegments = len(linkdata)
    linkdata_con = np.concatenate(linkdata,axis=0)
    edges = []
    # radius_estimate_around_trace
    for ix in range(numsegments):
        edge1 = linkdata[ix][:-1,-1]
        edge2 = linkdata[ix][1:,-1]
        rad = (linkdata[ix][1:,-2]+linkdata[ix][:-1,-2])/2
        edges.append(np.concatenate((edge1[:,None],edge2[:,None],rad[:,None]),axis=1))

    edges = np.concatenate(edges,axis=0)
    # [keepthese, ia, ic] = unique(edges(:, [1 2]));
    # [subs(:, 1), subs(:, 2), subs(:, 3)] = ind2sub(outsiz([1 2 3]), keepthese);
    # edges_ = reshape(ic, [], 2);
    # weights_ = edges(ia, 3:end);

    # in order to go back to original index: unique_edges[edges_reduced[0,0]]
    unique_edges,unique_indicies,unique_inverse = np.unique(edges[:,:2], return_index=True,return_inverse=True)
    edges_reduced = np.reshape(unique_inverse,(edges.shape[0],2))

    # connectivity graph
    dat = np.ones((edges_reduced.shape[0],1)).flatten()
    e1 = edges_reduced[:,0]
    e2 = edges_reduced[:,1]

    sM = csr_matrix((dat,(e1,e2)), shape=(np.max(edges_reduced)+1,np.max(edges_reduced)+1))
    # build shorthest spanning tree from seed
    seed_index = edges_reduced[0,0]

    nxsM = nx.from_scipy_sparse_matrix(sM)

    preds = nx.dfs_predecessors(nxsM,seed_index)
    orderlist = nx.dfs_preorder_nodes(nxsM, seed_index)
    orderlist = np.array(list(orderlist))
    seed_vals = lookup_data[unique_edges[seed_index]]

    swc_data=[]
    swc_list={}
    # iterate over orderlist (set first column based on this)
    for ix, idx_trace in enumerate(orderlist):
        swc_list[idx_trace] = ix + 1
        if ix==0:
            target = -1
        else:
            target = swc_list[preds[idx_trace]]

        loc_xyzr = lookup_data[unique_edges[idx_trace]]
        swc_data.append([ix+1,1,loc_xyzr[0],loc_xyzr[1],loc_xyzr[2],loc_xyzr[3],target])

    return swc_data
Ejemplo n.º 12
0
def ruleScore(graph):
    # recursive search
    scoreFunction = {}
    for n1 in list(graph.nodes()):
        # print(n1)
        preds_n1 = graph.predecessors(n1)
        # print(preds_n1)
        if (len(preds_n1) >= 1):
            scoreFunction[n1] = 0
            for pred1 in preds_n1:
                for pred2 in preds_n1:
                    if pred1 != pred2:
                        temp1 = set(
                            nx.dfs_predecessors(graph, pred1).keys() +
                            nx.dfs_predecessors(graph, pred1).values())
                        temp2 = set(
                            nx.dfs_predecessors(graph, pred2).keys() +
                            nx.dfs_predecessors(graph, pred2).values())
                        scoreFunction[n1] = scoreFunction[n1] + len(
                            list(temp1.intersection(temp2)))
                        if pred1 in nx.dfs_predecessors(graph, pred2):
                            scoreFunction[n1] = scoreFunction[n1] + 1
                        if pred2 in nx.dfs_predecessors(graph, pred1):
                            scoreFunction[n1] = scoreFunction[n1] + 1
        else:
            scoreFunction[n1] = 0
    return (scoreFunction)  # find the end of a node in the bistring
Ejemplo n.º 13
0
def ruleScore6(graph):

    #Function to calculate total ancestor overlap as described in the main text

    # recursive search
    scoreFunction = {}
    for n1 in list(graph.nodes()):
        preds_n1_dict = graph.predecessors(n1)
        preds_n1 = list(flatten(preds_n1_dict))
        if (len(preds_n1) >= 1):
            scoreFunction[n1] = 0
            for pred1 in preds_n1:
                for pred2 in preds_n1:
                    if pred1 != pred2:
                        temp1 = set(
                            nx.dfs_predecessors(graph, pred1).keys() +
                            nx.dfs_predecessors(graph, pred1).values())
                        temp2 = set(
                            nx.dfs_predecessors(graph, pred2).keys() +
                            nx.dfs_predecessors(graph, pred2).values())
                        scoreFunction[n1] = scoreFunction[n1] + len(
                            list(temp1.intersection(temp2)))
                        if pred1 in nx.dfs_predecessors(graph, pred2):
                            scoreFunction[n1] = scoreFunction[n1] + 1
                        if pred2 in nx.dfs_predecessors(graph, pred1):
                            scoreFunction[n1] = scoreFunction[n1] + 1
        else:
            scoreFunction[n1] = 0
    return (scoreFunction)
Ejemplo n.º 14
0
    def get_all_predecessors(self, cfgnode):
        """
        Get all predecessors of a specific node on the control flow graph.

        :param CFGNode cfgnode: The CFGNode object
        :return: A list of predecessors in the CFG
        :rtype: list
        """
        s = set()
        for child, parent in networkx.dfs_predecessors(self.graph, cfgnode).items():
            s.add(child)
            s.add(parent)
        return list(s)
Ejemplo n.º 15
0
    def traversal(self):
        rslt = {}

        rslt['dfs_predecessors'] = nx.dfs_predecessors(self.graph)
        rslt['dfs_successors'] = nx.dfs_successors(self.graph)
        #rslt['dfs_preorder_nodes']=nx.dfs_preorder_nodes(self.graph)
        #rslt['dfs_postorder_nodes']=nx.dfs_postorder_nodes(self.graph)
        #rslt['dfs_labeled_edges']=nx.dfs_labeled_edges(self.graph)
        #rslt['edge_dfs']=nx.edge_dfs(self.graph)
        #rslt['dfs_edges']=nx.dfs_edges(self.graph)
        #rslt['dfs_tree']=nx.dfs_tree(self.graph)

        fname_traversal = self.DIR + '/traversal.json'
        with open(fname_traversal, "w") as f:
            json.dump(rslt, f, cls=SetEncoder, indent=2)
        print(fname_traversal)
Ejemplo n.º 16
0
    def _resolve_grouping_node(group_node, group_tree, group_graph,
                               target_graph):
        """
        Resolves the predecessors of a grouping node and adds them to the
        target graph. This function can be used to enrich connectivity
        and discourse graphs with information from the grouping graph
        by fetching the nodes that participate in a visual group.

        Parameters:
            group_node: A string with the identifier of the grouping node.
            group_tree: A depth-first search tree for the grouping graph.
            group_graph: An AI2D-RST grouping graph.
            target_graph: A NetworkX graph which contains the grouping node
                          to resolve.

        Returns:
             An updated target graph with diagram element nodes added
             under the grouping node.
        """

        # Get the predecessors of the grouping node
        preds = nx.dfs_predecessors(group_tree, group_node)

        # Get a list of unique node identifiers among predecessors. These are
        # the nodes on which a subgraph will be induced.
        preds = list(set(list(preds.keys()) + list(preds.values())))

        # Induce a subgraph based on the nodes
        pred_group = group_graph.subgraph(preds).copy()

        # Set up edge dictionary
        edge_attrs = {}

        # Encode edge type information
        for s, t in pred_group.edges():

            # Add edge attributes to the dictionary
            edge_attrs[(s, t)] = {'kind': 'grouping'}

        # Set edge attributes
        nx.set_edge_attributes(pred_group, edge_attrs)

        # Add the nodes and edges from the subgraph to the connectivity graph
        target_graph.add_nodes_from(pred_group.nodes(data=True))
        target_graph.add_edges_from(pred_group.edges(data=True))
    def get_next_groups(self, processed_nodes):
        """Get nodes that have predecessors in processed_nodes list.
        All predecessors should be taken into account, not only direct
        parents

        :param processed_nodes: set of nodes names
        :returns: list of nodes names
        """
        result = []
        for node in self.nodes():
            if node in processed_nodes:
                continue

            predecessors = nx.dfs_predecessors(self.reverse(), node)
            if (set(predecessors.keys()) <= processed_nodes):
                result.append(node)

        return result
Ejemplo n.º 18
0
    def get_next_groups(self, processed_nodes):
        """Get nodes that have predecessors in processed_nodes list.
        All predecessors should be taken into account, not only direct
        parents

        :param processed_nodes: set of nodes names
        :returns: list of nodes names
        """
        result = []
        for node in self.nodes():
            if node in processed_nodes:
                continue

            predecessors = nx.dfs_predecessors(self.reverse(), node)
            if (set(predecessors.keys()) <= processed_nodes):
                result.append(node)

        return result
Ejemplo n.º 19
0
def cheapestSuccessorConnection(G, H):
    nodes = H.nodes()
    candidates = []
    tmp_weight = 0
    counter = 0
    while not udah_belom(H):
        for x in nodes:
            candidates = []
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    dummyGraph = H.copy()
                    #remove edge
                    removeNeighborColor(dummyGraph, x, y)
                    tmp_weight = G[x][y]['weight']  #save weight
                    dummyGraph.remove_edge(x, y)

                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if validColor(dummyGraph, anak, y):
                            tmp = (anak, y, G[anak][y]['weight'])
                            candidates.append(tmp)
                    dummyGraph.add_edge(x, y, weight=tmp_weight)
                    addNeighborColor(dummyGraph, x, y)

                if len(candidates) > 0:
                    candidates = sorted(candidates, key=lambda z: z[2])
                    fro, tom, wei = candidates[0]
                    removeNeighborColor(dummyGraph, x, tom)
                    dummyGraph.remove_edge(x, tom)

                    addNeighborColor(dummyGraph, fro, tom)
                    dummyGraph.add_edge(fro, tom, weight=wei)
                    H = dummyGraph.copy()
            if counter == 100001:
                #drawGraph(H)
                print "reached 100000 iterations in Cheapest Successor Connection"
                #os.system('say "reached 100000 iterations in cheapest Successor Connection"')
                print 'Moving on to Cheapest Leaf Connection'
                #os.system('say "Moving on to cheapest Leaf Connection"')
                H = cheapestLeafConnection(G, H)
                return H
    return H
def cheapestSuccessorConnection(G, H):
    nodes = H.nodes()
    candidates = []
    tmp_weight = 0
    counter = 0
    while not udah_belom(H):
        for x in nodes:
            candidates=[]
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    dummyGraph = H.copy()
                    #remove edge
                    removeNeighborColor(dummyGraph, x, y)
                    tmp_weight = G[x][y]['weight']      #save weight
                    dummyGraph.remove_edge(x,y)

                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if validColor(dummyGraph, anak, y):
                            tmp = (anak, y, G[anak][y]['weight'])
                            candidates.append(tmp)
                    dummyGraph.add_edge(x, y, weight = tmp_weight)
                    addNeighborColor(dummyGraph, x, y)

                if len(candidates)>0:
                    candidates = sorted(candidates, key = lambda z: z[2])
                    fro, tom, wei = candidates[0]
                    removeNeighborColor(dummyGraph, x, tom)
                    dummyGraph.remove_edge(x,tom)

                    addNeighborColor(dummyGraph, fro, tom)
                    dummyGraph.add_edge(fro,tom, weight=wei)
                    H = dummyGraph.copy()
            if counter==100001:
                #drawGraph(H)
                print "reached 100000 iterations in Cheapest Successor Connection"
                #os.system('say "reached 100000 iterations in cheapest Successor Connection"')
                print 'Moving on to Cheapest Leaf Connection'
                #os.system('say "Moving on to cheapest Leaf Connection"')
                H = cheapestLeafConnection(G, H)
                return H
    return H
Ejemplo n.º 21
0
def directLeafConnection(G, H):
    nodes = H.nodes()
    counter = 0
    while not udah_belom(H):
        if time.time() - waktu1 > 300:
            return H
        for x in nodes:
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    dummyGraph = H.copy()

                    dummyGraph.node[x]['neighbor_color'].remove(
                        dummyGraph.node[y]['color'])  #remove neighbor color
                    dummyGraph.node[y]['neighbor_color'].remove(
                        dummyGraph.node[x]['color'])  #remove neighbor color
                    dummyGraph.remove_edge(x, y)

                    tree = nx.dfs_successors(dummyGraph, x)
                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if anak not in tree:
                            if validColor(dummyGraph, anak, y):
                                dummyGraph.add_edge(
                                    anak, y, weight=G[anak][y]['weight'])
                                addNeighborColor(dummyGraph, y, anak)
                                H = dummyGraph.copy()
                                #drawHraph(H)
                                break
            if counter % 100000 == 0:
                print 'iteration: ', counter, ' in Direct Leaf Connection'
            if counter == 500001:
                #os.system('say "Redo from scratch"')
                mst = kruskal_mst(G)
                H = G.copy()
                H.remove_edges_from(H.edges())
                H.add_edges_from(mst)

                H = directLeafConnection(G, H)
                return H
    return H
def directLeafConnection(G, H):
    nodes = H.nodes()
    counter = 0
    while not udah_belom(H):
        if time.time()-waktu1 > 300:
            return H
        for x in nodes:
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    dummyGraph = H.copy()
    
                    dummyGraph.node[x]['neighbor_color'].remove(dummyGraph.node[y]['color'])        #remove neighbor color
                    dummyGraph.node[y]['neighbor_color'].remove(dummyGraph.node[x]['color'])        #remove neighbor color
                    dummyGraph.remove_edge(x,y)
    
                    tree = nx.dfs_successors(dummyGraph, x)
                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if anak not in tree:
                            if validColor(dummyGraph, anak, y):
                                dummyGraph.add_edge(anak,y, weight=G[anak][y]['weight'])
                                addNeighborColor(dummyGraph, y, anak)
                                H = dummyGraph.copy()
                                #drawHraph(H)
                                break
            if counter%100000==0:
                print 'iteration: ', counter, ' in Direct Leaf Connection'
            if counter == 500001:
                #os.system('say "Redo from scratch"')
                mst = kruskal_mst(G)
                H = G.copy()
                H.remove_edges_from(H.edges())
                H.add_edges_from(mst)

                H = directLeafConnection(G, H)
                return H
    return H
Ejemplo n.º 23
0
def hamilton(scene):
    involvedRoomIds = []
    views = []
    # load existing views.
    for fn in os.listdir(f'./latentspace/autoview/{scene["origin"]}'):
        if '.json' not in fn:
            continue
        with open(f'./latentspace/autoview/{scene["origin"]}/{fn}') as f:
            views.append(json.load(f))
    for view in views:
        view['isVisited'] = False
        if view['roomId'] not in involvedRoomIds:
            involvedRoomIds.append(view['roomId'])
    print(involvedRoomIds)
    res = []
    # deciding connections of a floorplan.
    G = nx.Graph()
    for room in scene['rooms']:
        room['isVisited'] = False
        floorMeta = p2d(
            '.', '/dataset/room/{}/{}f.obj'.format(room['origin'],
                                                   room['modelId']))
        try:
            H = sk.getWallHeight(
                f"./dataset/room/{room['origin']}/{room['modelId']}w.obj")
        except:
            continue
        for door in room['objList']:
            if 'coarseSemantic' not in door:
                continue
            if door['coarseSemantic'] not in ['Door', 'door']:
                continue
            if len(door['roomIds']) < 2:
                continue
            # if door['roomIds'][0] not in involvedRoomIds and door['roomIds'][1] not in involvedRoomIds:
            #     continue
            x = (door['bbox']['min'][0] + door['bbox']['max'][0]) / 2
            z = (door['bbox']['min'][2] + door['bbox']['max'][2]) / 2
            DIS = np.Inf
            for wallIndex in range(floorMeta.shape[0]):
                wallIndexNext = (wallIndex + 1) % floorMeta.shape[0]
                dis = sk.pointToLineDistance(np.array([x, z]),
                                             floorMeta[wallIndex, 0:2],
                                             floorMeta[wallIndexNext, 0:2])
                if dis < DIS:
                    DIS = dis
                    direction = np.array(
                        [floorMeta[wallIndex, 2], 0, floorMeta[wallIndex, 3]])
            translate = np.array([x, H / 2, z])
            G.add_edge(door['roomIds'][0],
                       door['roomIds'][1],
                       translate=translate,
                       direction=direction,
                       directionToRoom=room['roomId'])
    pre = nx.dfs_predecessors(G)
    suc = nx.dfs_successors(G)
    print(pre, suc)
    # decide the s and t which are the start point and end point respectively.
    # ndproom = list(nx.dfs_successors(G).keys())[0]
    # ndproom = views[0]['roomId']
    ndproom = involvedRoomIds[0]
    roomOrder = []
    while ndproom != -1:
        roomOrder.append(ndproom)
        scene['rooms'][ndproom]['isVisited'] = True
        ndproom = hamiltonNextRoom(ndproom, pre, suc, scene)
    for room in scene['rooms']:
        room['isVisited'] = False
    print(roomOrder)

    def subPath(s):
        if s == len(roomOrder) - 1:
            return (True, s)
        state = False
        start = roomOrder[s]
        s += 1
        while s < len(roomOrder) and roomOrder[s] != start:
            if roomOrder[s] in involvedRoomIds and not scene['rooms'][
                    roomOrder[s]]['isVisited']:
                state = True
            s += 1
        return (state, s)

    i = 0
    while i < len(roomOrder):
        state, s = subPath(i)
        if not state:
            roomOrder = roomOrder[0:i + 1] + roomOrder[s + 1:]
            i -= 1
        else:
            scene['rooms'][roomOrder[i]]['isVisited'] = True
        i += 1
    print(roomOrder)
    ndproom = roomOrder[0]
    for view in views:
        if view['roomId'] == ndproom:
            ndpNext = view
    # perform the algorithm of Angluin and Valiant.
    for i in range(1, len(roomOrder) + 1):
        while ndpNext is not None:
            ndp = ndpNext
            res.append(ndp)
            ndp['isVisited'] = True
            ndpNext = hamiltonNext(ndp, views, scene)
        if i == len(roomOrder):
            break
        lastndproom = roomOrder[i - 1]
        ndproom = roomOrder[i]
        edge = G[lastndproom][ndproom]
        # if edge['direction'].dot(edge['translate'] - ndp['probe']) < 0:
        if edge['directionToRoom'] != ndproom:
            edge['direction'] = -edge['direction']
        ndpNext = {
            'roomId': ndproom,
            'probe': edge['translate'],
            'origin': edge['translate'].tolist(),
            'target': (edge['translate'] + edge['direction']).tolist(),
            'direction': edge['direction'].tolist()
        }
    with open(f'./latentspace/autoview/{scene["origin"]}/path', 'w') as f:
        json.dump(res, f, default=sk.jsonDumpsDefault)
    return res
Ejemplo n.º 24
0
def cheapestLeafConnection(G, H):
    nodes = H.nodes()
    candidates = []
    tmp_weight = 0
    counter = 0
    while not udah_belom(H):
        #counter = 0
        for x in nodes:
            candidates = []
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    #make a copy of current mst
                    dummyGraph = H.copy()

                    #remove edge
                    removeNeighborColor(dummyGraph, x, y)
                    tmp_weight = G[x][y]['weight']  #save weight
                    dummyGraph.remove_edge(x, y)

                    tree = nx.dfs_successors(dummyGraph, x)
                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if anak not in tree:  #if leaf
                            if validColor(dummyGraph, anak, y):
                                tmp = (anak, y, G[anak][y]['weight'])
                                candidates.append(tmp)
                    dummyGraph.add_edge(x, y, weight=tmp_weight)
                    addNeighborColor(dummyGraph, x, y)

            if len(candidates) > 0:
                candidates = sorted(candidates, key=lambda z: z[2])
                fro, tom, wei = candidates[0]
                dummyGraph.add_edge(fro, tom, weight=wei)
                removeNeighborColor(dummyGraph, x, tom)
                dummyGraph.remove_edge(x, tom)
                addNeighborColor(dummyGraph, fro, tom)
                H = dummyGraph.copy()
                #drawHraph(H)
            elif (counter % 10000) == 0:
                if counter % 100000 == 0:
                    print 'no candidates,', counter, ' iterations in Cheapest Leaf Connection'
                #drawGraph(dummyGraph)
                for x in nodes:
                    x_neigh = dummyGraph.neighbors(x)
                    if len(x_neigh) == 1:
                        dummyGraph.remove_edge(x, x_neigh[0])
                        removeNeighborColor(dummyGraph, x, x_neigh[0])
                        for y in dummyGraph.neighbors(x_neigh[0]):
                            if validColor(
                                    dummyGraph, x,
                                    y) and len(dummyGraph.neighbors(y)) < 3:
                                dummyGraph.add_edge(x, y)
                                addNeighborColor(dummyGraph, x, y)
                                H = dummyGraph.copy()
                                break
                        break
            elif counter == 250001:
                print 'Using Cheapest Leaf Connection failed miserably =('
                #os.system('say "Using candidates failed miserably..."')
                print 'Try using Direct Leaf Connection'
                #os.system('say "Try using direct leaf connection"')
                H = directLeafConnection(G, dummyGraph)
                return H
    return H
def minibatch_(w, loss__,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode):
    X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c)                
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord, y[2]])
        assert(len(boxes)<500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500)
        
        if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'):
            gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r')
        ground_truths = []
        for line in gr:
           tmp = line.split(',')
           ground_truth = []
           for s in tmp:
              ground_truth.append(int(s))
           ground_truths.append(ground_truth)
        
        #prune boxes
        pruned_x = []
        pruned_y = []
        pruned_boxes = []
        for i, y_ in enumerate(y_p):
            if y_ > 0:
                pruned_x.append(X_p[i])
                pruned_y.append(y_p[i])
                pruned_boxes.append(boxes[i])
        
        if subsampling and pruned_boxes > subsamples:
            pruned_x = pruned_x[0:subsamples]
            pruned_y = pruned_y[0:subsamples]
            pruned_boxes = pruned_boxes[0:subsamples]
            
        if mode == 'mean_variance':
            sum_x += np.array(pruned_x).sum(axis=0)
            n_samples += len(pruned_x)
            sum_sq_x +=  (np.array(pruned_x)**2).sum(axis=0)
            return sum_x,n_samples,sum_sq_x
            
        # create_tree
        G, levels = create_tree(pruned_boxes)
        norm_x = []
        
        #normalize
        for p_x in pruned_x:
            norm_x.append((p_x-mean)/variance)
        data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'train':
            nodes = list(G.nodes())[1:]
            for node in nodes:
                w = like_scikit(w,norm_x[node],pruned_y[node],learning_rate,alphas)
                #w = update_weights(w,data,predecs,children,node, learning_rate)
        else:
            loss__.append(loss_simple(w,data))#(w, data, predecs, children))
    if mode == 'train':
        return w, len(pruned_y)
    else:
        return loss__
Ejemplo n.º 26
0
def resample_states(T, root, node_to_pmap, nstates,
        root_distn=None, P_default=None):
    """
    This function applies to a tree for which nodes will be assigned states.

    Parameters
    ----------
    T : undirected acyclic networkx graph
        A tree whose edges are annotated with transition matrices P.
    root : integer
        The root node.
    node_to_pmap : dict
        A map from a node to an array that gives the subtree likelihood
        for each state.
    nstates : integer
        Number of states.
    root_distn : dict, optional
        A dense array giving a finite distribution or weights over root states.
        Values should be positive but are not required to sum to 1.
        If the distribution is not provided,
        then it will be assumed to have values of 1 for each possible state.
    P_default : 2d ndarray, optional
        If an edge is not annotated with a transition matrix P,
        then this default transition matrix will be used.

    Returns
    -------
    node_to_sampled_state : dict
        A map from each node of T to its state.
        If the state was not defined by the node_to_state argument,
        then the state will have been sampled.

    """
    # Get the root pmap.
    root_pmap = node_to_pmap[root]

    # Try to compute the likelihood.
    # This will raise an informative exception if no path is possible.
    # If the likelihood is numerically zero then raise a different exception.
    likelihood = _mc0_dense.get_likelihood(root_pmap, root_distn=root_distn)
    if likelihood <= 0:
        raise _util.NumericalZeroProb(
                'numerically intractably small likelihood: %s' % likelihood)

    # Bookkeeping structure related to tree traversal.
    predecessors = nx.dfs_predecessors(T, root)

    # Sample the node states, beginning at the root.
    node_to_sampled_state = {}
    for node in nx.dfs_preorder_nodes(T, root):

        # Get the precomputed pmap associated with the node.
        # This is a sparse map from state to subtree likelihood.
        pmap = node_to_pmap[node]

        # Define a prior distribution.
        if node == root:
            prior = root_distn
        else:

            # Get the parent node and its state.
            parent_node = predecessors[node]
            parent_state = node_to_sampled_state[parent_node]

            # Get the transition probability matrix.
            P = T[parent_node][node].get('P', P_default)

            # Get the distribution of a non-root node.
            prior = P[parent_state]

        # Sample the state from the posterior distribution.
        if prior is None:
            dpost = pmap
        else:
            dpost = prior * pmap
        node_to_sampled_state[node] = _util.array_random_choice(dpost)

    # Return the map of sampled states.
    return node_to_sampled_state
Ejemplo n.º 27
0
    htgs = T['hashtags'][i].lower()
    if len(set(htgs.split(',')) & seed_htgs_CAF) != 0:
        CAF_ids.append(i)

# construct tweet-reply network to get related tweets
print('Listing relevant tweets ids...')
edges = list(N['tweet-reply'].keys())
G = nx.Graph()
G.add_edges_from(edges)

# using depth-first search algorithm to traverse through the tree network using the seed nodes
all_CAF_ids = []
for i in CAF_ids:
    all_CAF_ids.append(i)
    try:
        pred = nx.dfs_predecessors(G, source=i)
        all_CAF_ids.extend(list(pred.values()))
    except:
        pass
    try:
        succ = nx.dfs_successors(G, source=i)
        for j in succ.keys():
            all_CAF_ids.append(j)
            all_CAF_ids.extend(succ[j])
    except:
        pass
all_CAF_ids = np.unique(all_CAF_ids)
del G

# subset Tweets
print('Subsetting tweets...')
Ejemplo n.º 28
0
def get_node_to_distn(T, root, node_to_pmap, nstates,
        root_distn=None, P_default=None):
    """
    Get marginal state distributions at nodes in a tree.

    This function is similar to the Rao-Teh state sampling function,
    except that instead of sampling a state at each node,
    this function computes marginal distributions over states at each node.
    Also, each edge of the input tree for this function has been
    annotated with its own transition probability matrix,
    whereas the Rao-Teh sampling function uses a single
    uniformized transition probability matrix for all edges.

    Parameters
    ----------
    T : undirected acyclic networkx graph
        A tree whose edges are annotated with transition matrices P.
    root : integer
        Root node.
    node_to_pmap : dict
        Map from a node to a 1d array giving subtree likelihoods per state.
        This map incorporates state restrictions.
    nstates : integer
        Number of states.
    root_distn : 1d ndarray, optional
        A finite distribution over root states.
    P_default : 2d ndarray, optional
        Default transition matrix.

    Returns
    -------
    node_to_distn : dict
        Sparse map from node to sparse map from state to probability.

    """
    if P_default is not None:
        _density.check_square_dense(P_default)
    if root_distn is not None:
        if root_distn.shape[0] != nstates:
            raise ValueError('inconsistent root distribution')

    # Bookkeeping.
    predecessors = nx.dfs_predecessors(T, root)

    # Get the distributions.
    node_to_distn = {}
    for node in nx.dfs_preorder_nodes(T, root):

        # Get the map from state to subtree likelihood.
        pmap = node_to_pmap[node]
        if pmap.shape[0] != nstates:
            raise ValueError('inconsistent pmap')

        # Compute the prior distribution at the root separately.
        # If the prior distribution is not provided,
        # then treat it as uninformative.
        if node == root:
            distn = get_normalized_ndarray_distn(pmap, root_distn)
        else:
            parent_node = predecessors[node]
            parent_distn = node_to_distn[parent_node]

            # Get the transition matrix associated with this edge.
            P = T[parent_node][node].get('P', P_default)
            _density.check_square_dense(P)
            if P.shape[0] != nstates:
                raise Exception('internal inconsistency')

            # For each parent state,
            # get the distribution over child states;
            # this distribution will include both the P matrix
            # and the pmap of the child node.
            distn = np.zeros(nstates, dtype=float)
            for sa in range(nstates):
                pa = parent_distn[sa]
                if pa:

                    # Construct the conditional transition probabilities.
                    sb_weights = P[sa] * pmap
                    sb_distn = get_normalized_ndarray_distn(sb_weights)

                    # Add to the marginal distn.
                    for sb in range(nstates):
                        distn[sb] += pa * sb_distn[sb]

        # Set the node_to_distn.
        node_to_distn[node] = distn

    # Return the marginal state distributions at nodes.
    return node_to_distn
Ejemplo n.º 29
0
 def dls_test_predecessor(self):
     assert_equal(nx.dfs_predecessors(self.G, source=0, depth_limit=3),
                  {1: 0, 2: 1, 3: 2, 7: 2})
     assert_equal(nx.dfs_predecessors(self.D, source=2, depth_limit=3),
                  {8: 7, 9: 8, 3: 2, 7: 2})
Ejemplo n.º 30
0
def ScenarioTreeModelFromNetworkX(
        tree,
        node_name_attribute=None,
        edge_probability_attribute='probability',
        stage_names=None,
        scenario_name_attribute=None):
    """
    Create a scenario tree model from a networkx tree.  The
    height of the tree must be at least 1 (meaning at least
    2 stages).

    Optional Arguments:
      - node_name_attribute:
           By default, node names are the same as the node
           hash in the networkx tree. This keyword can be
           set to the name of some property of nodes in the
           graph that will be used for their name in the
           PySP scenario tree.
      - edge_probability_attribute:
           Can be set to the name of some property of edges
           in the graph that defines the conditional
           probability of that branch (default: 'probability').
           If this keyword is set to None, then all branches
           leaving a node are assigned equal conditional
           probabilities.
      - stage_names:
           Can define a list of stage names to use (assumed
           in time order). The length of this list much
           match the number of stages in the tree.
      - scenario_name_attribute:
           By default, scenario names are the same as the
           leaf-node hash in the networkx tree. This keyword
           can be set to the name of some property of
           leaf-nodes in the graph that will be used for
           their corresponding scenario in the PySP scenario
           tree.

    Examples:

      - A 2-stage scenario tree with 10 scenarios:
           G = networkx.DiGraph()
           G.add_node("Root")
           N = 10
           for i in range(N):
               node_name = "Leaf"+str(i)
               G.add_node(node_name)
               G.add_edge("Root",node_name,probability=1.0/N)
           model = ScenarioTreeModelFromNetworkX(G)

       - A 4-stage scenario tree with 125 scenarios:
           branching_factor = 5
           height = 3
           G = networkx.balanced_tree(
                   branching_factory,
                   height,
                   networkx.DiGraph())
           model = ScenarioTreeModelFromNetworkX(
                       G,
                       edge_probability_attribute=None)
    """

    if not has_networkx:
        raise ValueError("networkx module is not available")

    if not networkx.is_tree(tree):
        raise TypeError(
            "object is not a tree (see networkx.is_tree)")

    if not networkx.is_directed(tree):
        raise TypeError(
            "object is not directed (see networkx.is_directed)")

    if not networkx.is_branching(tree):
        raise TypeError(
            "object is not a branching (see networkx.is_branching")

    if not networkx.is_arborescence(tree):
            raise TypeError("Object must be a directed, rooted tree "
                            "in which all edges point away from the "
                            "root (see networkx.is_arborescence)")

    root = [u for u,d in tree.in_degree().items() if d == 0]
    assert len(root) == 1
    root = root[0]
    num_stages = networkx.eccentricity(tree, v=root) + 1
    if num_stages < 2:
        raise ValueError(
            "The number of stages must be at least 2")
    m = CreateAbstractScenarioTreeModel()
    if stage_names is not None:
        unique_stage_names = set()
        for cnt, stage_name in enumerate(stage_names,1):
            m.Stages.add(stage_name)
            unique_stage_names.add(stage_name)
        if cnt != num_stages:
            raise ValueError(
                "incorrect number of stages names (%s), should be %s"
                % (cnt, num_stages))
        if len(unique_stage_names) != cnt:
            raise ValueError("all stage names were not unique")
    else:
        for i in range(num_stages):
            m.Stages.add('Stage'+str(i+1))
    node_to_name = {}
    node_to_scenario = {}
    def _setup(u, succ):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.node[u]:
                raise KeyError(
                    "node '%s' missing name attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.node[u][node_name_attribute]
        else:
            node_name = u
        node_to_name[u] = node_name
        m.Nodes.add(node_name)
        if u in succ:
            for v in succ[u]:
                _setup(v, succ)
        else:
            # a leaf node
            if scenario_name_attribute is not None:
                if scenario_name_attribute not in tree.node[u]:
                    raise KeyError(
                        "node '%s' missing attribute: '%s'"
                        % (u, scenario_name_attribute))
                scenario_name = tree.node[u][scenario_name_attribute]
            else:
                scenario_name = u
            node_to_scenario[u] = scenario_name
            m.Scenarios.add(scenario_name)

    _setup(root,
           networkx.dfs_successors(tree, root))
    m = m.create_instance()
    def _add_node(u, stage, succ, pred):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.node[u]:
                raise KeyError(
                    "node '%s' missing name attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.node[u][node_name_attribute]
        else:
            node_name = u
        m.NodeStage[node_name] = m.Stages[stage]
        if u == root:
            m.ConditionalProbability[node_name] = 1.0
        else:
            assert u in pred
            edge = tree.edge[pred[u]][u]
            probability = None
            if edge_probability_attribute is not None:
                if edge_probability_attribute not in edge:
                    raise KeyError(
                        "edge '(%s, %s)' missing probability attribute: '%s'"
                        % (pred[u], u, edge_probability_attribute))
                probability = edge[edge_probability_attribute]
            else:
                probability = 1.0/len(succ[pred[u]])
            m.ConditionalProbability[node_name] = probability
        if u in succ:
            child_names = []
            for v in succ[u]:
                child_names.append(
                    _add_node(v, stage+1, succ, pred))
            total_probability = 0.0
            for child_name in child_names:
                m.Children[node_name].add(child_name)
                total_probability += \
                    value(m.ConditionalProbability[child_name])
            if abs(total_probability - 1.0) > 1e-5:
                raise ValueError(
                    "edge probabilities leaving node '%s' "
                    "do not sum to 1 (total=%r)"
                    % (u, total_probability))
        else:
            # a leaf node
            scenario_name = node_to_scenario[u]
            m.ScenarioLeafNode[scenario_name] = node_name
            m.Children[node_name].clear()

        return node_name

    _add_node(root,
              1,
              networkx.dfs_successors(tree, root),
              networkx.dfs_predecessors(tree, root))

    return m
Ejemplo n.º 31
0
def ScenarioTreeModelFromNetworkX(
        tree,
        node_name_attribute=None,
        edge_probability_attribute='probability',
        stage_names=None,
        scenario_name_attribute=None):
    """
    Create a scenario tree model from a networkx tree.  The
    height of the tree must be at least 1 (meaning at least
    2 stages).

    Optional Arguments:
      - node_name_attribute:
           By default, node names are the same as the node
           hash in the networkx tree. This keyword can be
           set to the name of some property of nodes in the
           graph that will be used for their name in the
           PySP scenario tree.
      - edge_probability_attribute:
           Can be set to the name of some property of edges
           in the graph that defines the conditional
           probability of that branch (default: 'probability').
           If this keyword is set to None, then all branches
           leaving a node are assigned equal conditional
           probabilities.
      - stage_names:
           Can define a list of stage names to use (assumed
           in time order). The length of this list much
           match the number of stages in the tree.
      - scenario_name_attribute:
           By default, scenario names are the same as the
           leaf-node hash in the networkx tree. This keyword
           can be set to the name of some property of
           leaf-nodes in the graph that will be used for
           their corresponding scenario in the PySP scenario
           tree.

    Examples:

      - A 2-stage scenario tree with 10 scenarios:
           G = networkx.DiGraph()
           G.add_node("Root")
           N = 10
           for i in range(N):
               node_name = "Leaf"+str(i)
               G.add_node(node_name)
               G.add_edge("Root",node_name,probability=1.0/N)
           model = ScenarioTreeModelFromNetworkX(G)

       - A 4-stage scenario tree with 125 scenarios:
           branching_factor = 5
           height = 3
           G = networkx.balanced_tree(
                   branching_factory,
                   height,
                   networkx.DiGraph())
           model = ScenarioTreeModelFromNetworkX(
                       G,
                       edge_probability_attribute=None)
    """

    if not has_networkx:
        raise ValueError("networkx module is not available")

    if not networkx.is_tree(tree):
        raise TypeError(
            "object is not a tree (see networkx.is_tree)")

    if not networkx.is_directed(tree):
        raise TypeError(
            "object is not directed (see networkx.is_directed)")

    if not networkx.is_branching(tree):
        raise TypeError(
            "object is not a branching (see networkx.is_branching")

    if not networkx.is_arborescence(tree):
            raise TypeError("Object must be a directed, rooted tree "
                            "in which all edges point away from the "
                            "root (see networkx.is_arborescence)")

    in_degree_items = tree.in_degree()
    # Prior to networkx ~2.0, in_degree() returned a dictionary.
    # Now it is a view on items, so only call .items() for the old case
    if hasattr(in_degree_items, 'items'):
        in_degree_items = in_degree_items.items()
    root = [u for u,d in in_degree_items if d == 0]
    assert len(root) == 1
    root = root[0]
    num_stages = networkx.eccentricity(tree, v=root) + 1
    if num_stages < 2:
        raise ValueError(
            "The number of stages must be at least 2")
    m = CreateAbstractScenarioTreeModel()
    if stage_names is not None:
        unique_stage_names = set()
        for cnt, stage_name in enumerate(stage_names,1):
            m.Stages.add(stage_name)
            unique_stage_names.add(stage_name)
        if cnt != num_stages:
            raise ValueError(
                "incorrect number of stages names (%s), should be %s"
                % (cnt, num_stages))
        if len(unique_stage_names) != cnt:
            raise ValueError("all stage names were not unique")
    else:
        for i in range(num_stages):
            m.Stages.add('Stage'+str(i+1))
    node_to_name = {}
    node_to_scenario = {}
    def _setup(u, succ):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.node[u]:
                raise KeyError(
                    "node '%s' missing name attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.node[u][node_name_attribute]
        else:
            node_name = u
        node_to_name[u] = node_name
        m.Nodes.add(node_name)
        if u in succ:
            for v in succ[u]:
                _setup(v, succ)
        else:
            # a leaf node
            if scenario_name_attribute is not None:
                if scenario_name_attribute not in tree.node[u]:
                    raise KeyError(
                        "node '%s' missing attribute: '%s'"
                        % (u, scenario_name_attribute))
                scenario_name = tree.node[u][scenario_name_attribute]
            else:
                scenario_name = u
            node_to_scenario[u] = scenario_name
            m.Scenarios.add(scenario_name)

    _setup(root,
           networkx.dfs_successors(tree, root))
    m = m.create_instance()
    def _add_node(u, stage, succ, pred):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.node[u]:
                raise KeyError(
                    "node '%s' missing name attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.node[u][node_name_attribute]
        else:
            node_name = u
        m.NodeStage[node_name] = m.Stages[stage]
        if u == root:
            m.ConditionalProbability[node_name] = 1.0
        else:
            assert u in pred
            # prior to networkx ~2.0, we used a .edge attribute on DiGraph,
            # which no longer exists.
            if hasattr(tree, 'edge'):
                edge = tree.edge[pred[u]][u]
            else:
                edge = tree.edges[pred[u],u]
            probability = None
            if edge_probability_attribute is not None:
                if edge_probability_attribute not in edge:
                    raise KeyError(
                        "edge '(%s, %s)' missing probability attribute: '%s'"
                        % (pred[u], u, edge_probability_attribute))
                probability = edge[edge_probability_attribute]
            else:
                probability = 1.0/len(succ[pred[u]])
            m.ConditionalProbability[node_name] = probability
        if u in succ:
            child_names = []
            for v in succ[u]:
                child_names.append(
                    _add_node(v, stage+1, succ, pred))
            total_probability = 0.0
            for child_name in child_names:
                m.Children[node_name].add(child_name)
                total_probability += \
                    value(m.ConditionalProbability[child_name])
            if abs(total_probability - 1.0) > 1e-5:
                raise ValueError(
                    "edge probabilities leaving node '%s' "
                    "do not sum to 1 (total=%r)"
                    % (u, total_probability))
        else:
            # a leaf node
            scenario_name = node_to_scenario[u]
            m.ScenarioLeafNode[scenario_name] = node_name
            m.Children[node_name].clear()

        return node_name

    _add_node(root,
              1,
              networkx.dfs_successors(tree, root),
              networkx.dfs_predecessors(tree, root))

    return m
Ejemplo n.º 32
0
def minibatch_(clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode):
    if mode == 'loss_test' or mode == 'loss_scikit_test' or mode == 'levels_test':
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c)                
    else:
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c)        
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        if less_features:
            X_p = [fts[0:features_used] for fts in X_p]
        if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord, y[2]])
        assert(len(boxes)<500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500)
        
        if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'):
            gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r')
        ground_truths = []
        for line in gr:
           tmp = line.split(',')
           ground_truth = []
           for s in tmp:
              ground_truth.append(int(s))
           ground_truths.append(ground_truth)
        
        #prune boxes
        pruned_x = []
        pruned_y = []
        pruned_boxes = []
        if prune:
            for i, y_ in enumerate(y_p):
                if y_ > 0:
                    pruned_x.append(X_p[i])
                    pruned_y.append(y_p[i])
                    pruned_boxes.append(boxes[i])
        else:
            pruned_x = X_p
            pruned_y = y_p
            pruned_boxes = boxes
        
        if subsampling and pruned_boxes > subsamples:
            pruned_x = pruned_x[0:subsamples]
            pruned_y = pruned_y[0:subsamples]
            pruned_boxes = pruned_boxes[0:subsamples]
            
        if mode == 'mean_variance':
            sum_x += np.array(pruned_x).sum(axis=0)
            n_samples += len(pruned_x)
            sum_sq_x +=  (np.array(pruned_x)**2).sum(axis=0)
            scaler.partial_fit(pruned_x)  # Don't cheat - fit only on training data
            return sum_x,n_samples,sum_sq_x, scaler
            
        # create_tree
        G, levels = create_tree(pruned_boxes)
        
        #normalize
        norm_x = []
        if normalize:
#            for p_x in pruned_x:
#                norm_x.append((p_x-mean)/variance)
            norm_x = scaler.transform(pruned_x)
        else:
            norm_x = pruned_x
        data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'train':
            nodes = list(G.nodes())
            for node in nodes:
                print node
                if node == 0:
                    w = learn_root(w,norm_x[0],pruned_y[0],learning_rate,alphas)
                else:
                    w = like_scikit(scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr)
            return w, len(pruned_y)
        elif mode == 'scikit_train':
            clf.partial_fit(norm_x,pruned_y)
            return clf
        elif mode == 'loss_train' or mode == 'loss_test':
            loss__.append(loss(scaler, w, data, predecs, children,img_nr,-1))
            mse.append(((data[2] - np.dot(w,np.array(data[3]).T)) ** 2).sum())
            a2 = alphas[2]
            data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, [0,0,a2,0])
            hinge1.append(loss(scaler, w, data, predecs, children,img_nr,-1))
            a3 = alphas[3]
            data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, [0,0,0,a3])
            hinge2.append(loss(scaler, w, data, predecs, children,img_nr,-1))
            full_image.append([pruned_y[0],np.dot(w,np.array(norm_x[0]).T)])
            return loss__, mse,hinge1,hinge2,full_image
        elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train':
            loss__.append(((clf.predict(norm_x) - pruned_y)**2).sum())
            return loss__ 
        elif mode == 'finite_differences':
            feature = random.sample(range(4096),1)[0]
            #1. Pick an example z.
            example = random.sample(range(len(norm_x[1:])),1)[0]
            #2. Compute the loss Q(z, w) for the current w.
            Q = loss(scaler, w,data, predecs, children,img_nr,example)
            #3. Compute the gradient g = ∇w Q(z, w).
            g = gradient(w,norm_x,pruned_y,example,predecs,children,boxes,alphas,img_nr,feature)
            #4. Apply a slight perturbation w0 = w +δ. For instance, change a single weight
            #by a small increment, or use δ = −γg with γ small enough.
            w0 = w
            w0[feature] = w0[feature] + delta
            #5. Compute the new loss Q(z, w0
            #) and verify that Q(z, w0) ≈ Q(z, w) + δg
            # Q(z, w + delta*e_i) ≈ ( Q(z, w) + delta * g_i )
            Q_ = loss(scaler, w0,data, predecs, children,img_nr,example)
            #print Q,Q_,g
            #print abs(Q_ - Q+(delta*g)) < 0.001
            #raw_input()
        elif mode == 'levels_train' or mode == 'levels_test':
            im = mpimg.imread('/home/stahl/Images/'+ (format(img_nr, "06d")) +'.jpg')
            plt.imshow(im)
            preds = []
            for i,x_ in enumerate(norm_x):
                preds.append(np.dot(w, x_))
            cpls = []
            truelvls = []
            used_boxes_ = []
            # to get prediction min and max for colorbar
            min_pred = 10
            max_pred = -5
            for level in levels:
                cpl,used_boxes = count_per_level(scaler,w, preds, img_nr, pruned_boxes,levels[level], '')
                if used_boxes is not None:
                    used_b_preds = [x[1] for x in used_boxes]
                    if min(used_b_preds) < min_pred:
                        min_pred = min(used_b_preds)
                    if max(used_b_preds) > max_pred:
                        max_pred = max(used_b_preds)
            if min(preds) < min_pred:
                min_pred = min(preds)
            if max(preds) > max_pred:
                max_pred = max(preds)
            print'minmax of intersections: ', min_pred, max_pred
            cNorm  = colors.Normalize(vmin=min_pred, vmax=max_pred)
            scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pl.cm.jet)
            scalarMap.set_array(range(int(round(min_pred - 0.5)), int(round(max_pred + 0.5))))
            for pr_box, pr in zip(pruned_boxes,preds):
                pru_box = pr_box[0]
                colorVal = scalarMap.to_rgba(pr)
                ax = plt.gca()
                ax.add_patch(Rectangle((int(pru_box[0]), int(pru_box[1])), int(pru_box[2] - pru_box[0]), int(pru_box[3] - pru_box[1]), alpha=0.1, facecolor = colorVal, edgecolor = 'black'))
            for level in levels:
                #tru and truelvls was in order to check if count_per_level method is correct
                cpl,used_boxes = count_per_level(scaler,w, preds, img_nr, pruned_boxes,levels[level], '')
                #tru = count_per_level(None, pruned_y, img_nr, pruned_boxes,levels[level], 'gt')
                cpls.append(cpl)
                #plot image and predictions as color - only for debugging/testing
                if used_boxes is not None:
                    for u_box in used_boxes:
                        pru_box = pr_box[0]
                        colorVal = scalarMap.to_rgba(u_box[1])
                        #print u_box[0],u_box[1]
                        ax = plt.gca()
                        ax.add_patch(Rectangle((int(pru_box[0]), int(pru_box[1])), int(pru_box[2] - pru_box[0]), int(pru_box[3] - pru_box[1]), alpha=0.1, facecolor = colorVal, edgecolor = 'black'))
                #truelvls.append(tru)
            #print 'truth: ', pruned_y[0]
            matplotlib.pylab.colorbar(scalarMap, shrink=0.9)
            plt.draw()
            plt.savefig('/home/stahl/'+str(img_nr))
            plt.clf()
            return cpls, truelvls, used_boxes_,pruned_boxes,preds
Ejemplo n.º 33
0
import re
import sys
import networkx as nx

node_regex = re.compile("(\w+ \w+) bags contain ")
bags_regex = re.compile("(\d) (\w+ \w+) bags?[,\.] ?")
graph = nx.DiGraph()

for line in sys.stdin:
    bag_name = node_regex.match(line).groups()[0]
    content = bags_regex.findall(line) if "no other bags" not in line else []

    graph.add_node(bag_name) # idempotent
    for count_str, next_bag in content:
        graph.add_node(next_bag) # still idempotent
        count = int(count_str)
        graph.add_edge(next_bag, bag_name, weight=count)

my_bag = "shiny gold"
print(len(list(nx.dfs_predecessors(graph, my_bag))))
def main():
    test_imgs, train_imgs = get_seperation()
    # learn
#    if os.path.isfile('/home/stahl/Models/'+class_+c+'normalized_constrained.pickle'):
#        with open('/home/stahl/Models/'+class_+c+'normalized_constrained.pickle', 'rb') as handle:
#            w = pickle.load(handle)
#    else:
    loss_ = {}
    weights = {}
    gamma = 0.5
    epochs = 50
    images = 10
    subsamples = 10
    weights_visualization = {}
    learning_rates = [math.pow(10,-3),math.pow(10,-4),math.pow(10,-5),math.pow(10,-6)]
    learning_rates_ = {}
    weights_sample = random.sample(range(4096), 10)
    all_alphas = [0,math.pow(10,-1),math.pow(10,-2),math.pow(10,-3)]
    sum_x = np.zeros(4096)
    n_samples = 0.0
    sum_sq_x = np.zeros(4096)
    #TODO: normalize
    for minibatch in range(0,images):
        sum_x,n_samples,sum_sq_x = minibatch_([], [],[],[],test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,None,None,'mean_variance')
    mean = sum_x/n_samples
    variance = (sum_sq_x - (sum_x * sum_x) / n_samples) / (n_samples - 1)
    print mean,variance
    raw_input()
    for alpha1 in all_alphas:
        for learning_rate0 in learning_rates:
            learning_rate = learning_rate0
            alphas = [alpha1,0,0]
            w = np.zeros(4096)
            change = -100000
            for epoch in range(epochs):
                print epoch, learning_rate
                if learning_rate0 in learning_rates_:
                    learning_rates_[learning_rate0].append(learning_rate)
                else:
                    learning_rates_[learning_rate0] = [learning_rate]
                #shuffle images, not boxes!
                shuffled = range(0,images)
                random.shuffle(shuffled)
                for minibatch in shuffled:
                    w,t = minibatch_(w, [],alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance,'train')
                            
                #update learning_rate
                learning_rate = learning_rate0 * (1+learning_rate0*gamma*t)**-1
                #compute average loss on training set
                loss__ = []
                for minibatch in range(0,images):
                    loss__ = minibatch_(w, loss__,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance,'test')
                # save avg loss for plotting
                temp_label = [alphas[0], learning_rate0]
                llloss = sum(loss__)/len(loss__)
                if tuple(temp_label) in loss_:
                    change = llloss - loss_[alphas[0], learning_rate0][-1]
                    print 'avg', llloss, ' change: ',  change
                if tuple(temp_label) in loss_:
                    loss_[alphas[0], learning_rate0].append(sum(loss__)/len(loss__))
                else:
                    loss_[alphas[0], learning_rate0] = [sum(loss__)/len(loss__)]                          
                          
                # save sample weights for plotting
                ww_ = []
                for w_ in weights_sample:
                    ww_.append(w[w_])
                temp_label = [alphas[0],learning_rate0]
                if tuple(temp_label) in weights_visualization:
                    weights_visualization[alphas[0],learning_rate0].append(ww_)
                else:
                    weights_visualization[alphas[0],learning_rate0] = [ww_]
                    
                #TODO: update learning rate
            
            
            
            #save final weights
            weights[alphas[0], learning_rate0] = w
                        
    final_model_losses = [x[-1] for x in loss_.values()]
    best_model_index = final_model_losses.index(min(final_model_losses))
    a1, learning_rate_0 = loss_.keys()[best_model_index]
    print a1, learning_rate_0
    w_best = weights[a1, learning_rate_0]
    #plot
    for i,l in zip(loss_.keys(),loss_.values()):
        to_plot = [math.log(a) for a in loss_[i]]
        labl = "alpha1=%s"%i[0]
        plt.plot(range(len(loss_[i])),to_plot,'-', label=labl)
        plt.title('Learning rate=%s'%learning_rate_0)
    plt.xlabel('Iterations')
    plt.ylabel('Log(Loss)')
    plt.legend()
    plt.savefig('/home/stahl/debugwithalphas.png')
    
    plt.figure()
    for l in learning_rates_:
        plt.plot(range(len(learning_rates_[l])),learning_rates_[l],label='start=%s'%l)
    plt.xlabel('Iterations')
    plt.ylabel('Learning rate')
    plt.legend()
    plt.savefig('/home/stahl/learning_ratewithalphas.png')
    
    plt.figure()
    for in_ in range(len(weights_sample)):
        refactor = [weights_visualization[a1,learning_rate_0][x][in_] for x in range(len(weights_visualization[a1,learning_rate_0]))]
        plt.plot(range(len(refactor)),refactor,'-')
    plt.xlabel('Iterations')
    plt.ylabel('Weights')
    plt.savefig('/home/stahl/best_weights_pruned_withalphas.png')
    print "model learned"
    with open('/home/stahl/Models/'+class_+c+'normalized_constrained_best.pickle', 'wb') as handle:
        pickle.dump(w_best, handle)
        
    sys.exit("Error message")
    #TODO: compute average loss test set using best configuration on hold out set
    loss__ = []
    for minibatch in range(0,100,1):
        print alphas, learning_rate, minibatch
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c)                
        if X_p != []:
            #TODO: prune?
            boxes = []
            ground_truth = inv[0][2]
            img_nr = inv[0][0]
            if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
                f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
            else:
                print 'warning'
            for line, y in zip(f, inv):
                tmp = line.split(',')
                coord = []
                for s in tmp:
                    coord.append(float(s))
                boxes.append([coord, y[2]])
            assert(len(boxes)<500)
            boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500)

            if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'):
                gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r')
            ground_truths = []
            for line in gr:
               tmp = line.split(',')
               ground_truth = []
               for s in tmp:
                  ground_truth.append(int(s))
               ground_truths.append(ground_truth)

            # create_tree
            G, levels = create_tree(boxes)
            # normalize
            new_matrix = preprocessing.normalize(X_p, norm='l2', axis=0)
            root = boxes[0]
            boxes_ = boxes[1:]
            for box, y,i_ in zip(boxes_, y_p[1:],range(1,len(boxes))):
                if i_ in G.nodes():
                   parent = nx.dfs_predecessors(G,i_).values().index(i_)
                   for i, b_i in enumerate(levels.values()):
                      if any(x == parent for x in b_i):
                          level = i
                          break
                else:
                   print len(new_matrix), i_
                   if i_ >= len(new_matrix):
                      new_matrix = new_matrix[0:-1]
                      y_p = y_p[0:-1]
                   else:
                      new_matrix = np.vstack((new_matrix[0:i_],new_matrix[i_+1:]))
                      y_p_temp = y_p[0:i_]
                      y_p_temp.append(y_p[i_+1:])
                      y_p = y_p_temp
  #              y_p = y_p[0:8]
  #              new_matrix = new_matrix[0:8]
            data = (G, levels, y_p, new_matrix, boxes, ground_truths, alphas)
            sucs = nx.dfs_successors(G)
            predecs = nx.dfs_predecessors(G)
            #preprocess: node - children
            children = {}
            last = -1
            #print sucs, predecs
            for node,children_ in zip(sucs.keys(),sucs.values()):
                #print node, children_, predecs.values()[node-1]
                #print node,children_, last+1
                #raw_input()
                if node != last+1:
                    for i in range(last+1,node):
                        children[i] = []
                    children[node] = children_
                elif node == last +1:
                    children[node] = children_
                last = node
            loss__.append(loss(w, data, predecs, children))

    temp_label = [alphas[0],alphas[1],alphas[2],learning_rate]
    if tuple(temp_label) in loss_:
        loss_[alphas[0],alphas[1],alphas[2],learning_rate].append(sum(loss__)/len(loss__))
    else:
        loss_[alphas[0],alphas[1],alphas[2],learning_rate] = [sum(loss__)/len(loss__)]
Ejemplo n.º 35
0
def part_one():
    RG = G.reverse()
    predecessors = sorted(nx.dfs_predecessors(RG, 'shiny gold'))
    print(predecessors)
    print(len(predecessors))
Ejemplo n.º 36
0
def main():
    # Command line arguments
    parser = argparse.ArgumentParser(
        description='Extract model subsets from the National Hydrologic Model')
    parser.add_argument('-O',
                        '--output_dir',
                        help='Output directory for subset')
    parser.add_argument('-p',
                        '--param_filename',
                        help='Name of output parameter file')
    parser.add_argument('-s',
                        '--streamflow_filename',
                        help='Name of streamflow data file')
    parser.add_argument('-P',
                        '--paramdb_dir',
                        help='Location of parameter database')
    parser.add_argument('-M',
                        '--merged_paramdb_dir',
                        help='Location of merged parameter database')
    parser.add_argument('-C', '--cbh_dir', help='Location of CBH files')
    parser.add_argument('-g',
                        '--geodatabase_filename',
                        help='Full path to NHM geodatabase')
    parser.add_argument('-j', '--job', help='Job directory to work in')
    parser.add_argument('-v',
                        '--verbose',
                        help='Output additional information',
                        action='store_true')
    parser.add_argument('--check_DAG',
                        help='Verify the streamflow network',
                        action='store_true')
    parser.add_argument('--output_cbh',
                        help='Output CBH files for subset',
                        action='store_true')
    parser.add_argument('--output_shapefiles',
                        help='Output shapefiles for subset',
                        action='store_true')
    parser.add_argument('--output_streamflow',
                        help='Output streamflows for subset',
                        action='store_true')
    parser.add_argument('--cbh_netcdf',
                        help='Enable netCDF output for CBH files',
                        action='store_true')
    parser.add_argument('--param_netcdf',
                        help='Enable netCDF output for parameter file',
                        action='store_true')
    parser.add_argument(
        '--add_gages',
        metavar="KEY=VALUE",
        nargs='+',
        help=
        'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.'
    )
    parser.add_argument(
        '--no_filter_params',
        help='Output all parameters regardless of modules selected',
        action='store_true')
    args = parser.parse_args()

    stdir = os.getcwd()

    # TODO: Add to command line arguments
    single_poi = False

    if args.job:
        if os.path.exists(args.job):
            # Change into job directory before running extraction
            os.chdir(args.job)
            # print('Working in directory: {}'.format(args.job))
        else:
            print('ERROR: Invalid jobs directory: {}'.format(args.job))
            exit(-1)

    # Setup the logging
    bandit_log = logging.getLogger('bandit')
    bandit_log.setLevel(logging.DEBUG)

    log_fmt = logging.Formatter('%(levelname)s: %(name)s: %(message)s')

    # Handler for file logs
    flog = logging.FileHandler('bandit.log')
    flog.setLevel(logging.DEBUG)
    flog.setFormatter(log_fmt)

    # Handler for console logs
    clog = logging.StreamHandler()
    clog.setLevel(logging.ERROR)
    clog.setFormatter(log_fmt)

    bandit_log.addHandler(flog)
    bandit_log.addHandler(clog)

    bandit_log.info('========== START {} =========='.format(
        datetime.datetime.now().isoformat()))

    addl_gages = None
    if args.add_gages:
        addl_gages = parse_gages(args.add_gages)
        bandit_log.info('Additionals streamgages specified on command line')

    config = bc.Cfg('bandit.cfg')

    # Override configuration variables with any command line parameters
    for kk, vv in iteritems(args.__dict__):
        if kk not in [
                'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf',
                'no_filter_params'
        ]:
            if vv:
                bandit_log.info(
                    'Overriding configuration for {} with {}'.format(kk, vv))
                config.update_value(kk, vv)

    # Where to output the subset
    outdir = config.output_dir

    # The control file to use
    control_filename = config.control_filename

    # What to name the output parameter file
    param_filename = config.param_filename

    # Location of the NHM parameter database
    paramdb_dir = config.paramdb_dir

    # Location of the merged parameter database
    merged_paramdb_dir = config.merged_paramdb_dir

    streamgage_file = config.streamgage_file

    # List of outlets
    # dsmost_seg = config.outlets

    # List of upstream cutoffs
    # uscutoff_seg = config.cutoffs

    # List of additional HRUs (have no route to segment within subset)
    # hru_noroute = config.hru_noroute

    # List of output variables to sbuset
    try:
        include_model_output = config.include_model_output
        output_vars_dir = config.output_vars_dir
        output_vars = config.output_vars
    except KeyError:
        include_model_output = False

    # Control what is checked and output for subset
    check_dag = config.check_DAG

    try:
        output_cbh = config.output_cbh

        # Location of the NHM CBH files
        cbh_dir = config.cbh_dir
    except KeyError:
        output_cbh = False

    try:
        output_streamflow = config.output_streamflow

        # What to name the streamflow output file
        obs_filename = config.streamflow_filename
    except KeyError:
        output_streamflow = False

    try:
        output_shapefiles = config.output_shapefiles

        # Full path and filename to the geodatabase to use for outputting shapefile subsets
        geo_file = config.geodatabase_filename
    except KeyError:
        output_shapefiles = False

    # Load the control file
    ctl = ControlFile(control_filename)

    if ctl.has_dynamic_parameters:
        if config.dyn_params_dir:
            if os.path.exists(config.dyn_params_dir):
                dyn_params_dir = config.dyn_params_dir
            else:
                bandit_log.error('dyn_params_dir: {}, does not exist.'.format(
                    config.dyn_params_dir))
                exit(2)
        else:
            bandit_log.error(
                'Control file has dynamic parameters but dyn_params_dir is not specified in the config file'
            )
            exit(2)

    # Load master list of valid parameters
    vpdb = ValidParams()

    # Build list of parameters required for the selected control file modules
    required_params = vpdb.get_params_for_modules(modules=ctl.modules.values())

    # TODO: make sure dynamic parameter filenames are correct
    # Write an updated control file
    # ctl.write('somefile')

    # Date range for pulling NWIS streamgage observations
    if isinstance(config.start_date, datetime.date):
        st_date = config.start_date
    else:
        st_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.start_date)])

    if isinstance(config.end_date, datetime.date):
        en_date = config.end_date
    else:
        en_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.end_date)])

    # ===============================================================
    params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML)

    # Output revision of NhmParamDb and the revision used by merged paramdb
    nhmparamdb_revision = git_version(paramdb_dir)
    bandit_log.info('Parameters based on NhmParamDb revision: {}'.format(
        nhmparamdb_revision))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read hru_nhm_to_local and hru_nhm_to_region
    # Create segment_nhm_to_local and segment_nhm_to_region

    # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for
    #       CBH files we should 'soft-fail' if the files are missing and just
    #       output a warning and turn off CBH output if it was selected.
    # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir))
    # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir))

    # Load the NHMparamdb
    print('Loading NHM ParamDb')
    pdb = ParamDb(merged_paramdb_dir)
    nhm_params = pdb.parameters
    nhm_global_dimensions = pdb.dimensions

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get tosegment_nhm
    # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone.
    tosegment = nhm_params.get('tosegment').data
    nhm_seg = nhm_params.get('nhm_seg').data

    if args.verbose:
        print('Generating stream network from tosegment_nhm')

    # Build the stream network
    dag_ds = nx.DiGraph()
    for ii, vv in enumerate(tosegment):
        #     dag_ds.add_edge(ii+1, vv)
        if vv == 0:
            dag_ds.add_edge(ii + 1, 'Out_{}'.format(ii + 1))
        else:
            dag_ds.add_edge(ii + 1, vv)

    # nx.draw_networkx(dag_ds)
    bandit_log.debug('Number of NHM downstream nodes: {}'.format(
        dag_ds.number_of_nodes()))
    bandit_log.debug('Number of NHM downstream edges: {}'.format(
        dag_ds.number_of_edges()))

    if check_dag:
        if not nx.is_directed_acyclic_graph(dag_ds):
            bandit_log.error('Cycles and/or loops found in stream network')

            for xx in nx.simple_cycles(dag_ds):
                bandit_log.error('Cycle found for segment {}'.format(xx))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Build dictionary which maps poi_gage_id to poi_gage_segment
    # poi_gage_segment_tmp = get_parameter('{}/poi_gage_segment.msgpack'.format(merged_paramdb_dir))['data']
    # poi_gage_id_tmp = get_parameter('{}/poi_gage_id.msgpack'.format(merged_paramdb_dir))['data']
    poi_gage_segment_tmp = nhm_params.get('poi_gage_segment').data
    poi_gage_id_tmp = nhm_params.get('poi_gage_id').data

    # Create dictionary to lookup nhm_segment for a given poi_gage_id
    poi_id_to_seg = dict(zip(poi_gage_id_tmp, poi_gage_segment_tmp))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read streamgage ids from file - one streamgage id per row
    with open(streamgage_file, 'r') as fhdl:
        streamgages = fhdl.read().splitlines()

    # =====================================
    # dag_ds should not change below here
    # For each streamgage:
    #   1) lookup nhm_segment (if any) and use as outlet
    #   2) create output directory
    #   3) subset the stream network, HRUs, params, etc

    uscutoff_seg = []

    for sg in streamgages:
        print('Working on streamgage {}'.format(sg))

        while True:
            # Create the upstream graph
            dag_us = dag_ds.reverse()
            bandit_log.debug('Number of NHM upstream nodes: {}'.format(
                dag_us.number_of_nodes()))
            bandit_log.debug('Number of NHM upstream edges: {}'.format(
                dag_us.number_of_edges()))

            # Trim the u/s graph to remove segments above the u/s cutoff segments
            try:
                for xx in uscutoff_seg:
                    try:
                        dag_us.remove_nodes_from(
                            nx.dfs_predecessors(dag_us, xx))

                        # Also remove the cutoff segment itself
                        dag_us.remove_node(xx)
                    except KeyError:
                        print(
                            'WARNING: nhm_segment {} does not exist in stream network'
                            .format(xx))
            except TypeError:
                bandit_log.error(
                    '\nSelected cutoffs should at least be an empty list instead of NoneType. ({})'
                    .format(outdir))
                exit(200)

            bandit_log.debug(
                'Number of NHM upstream nodes (trimmed): {}'.format(
                    dag_us.number_of_nodes()))
            bandit_log.debug(
                'Number of NHM upstream edges (trimmed): {}'.format(
                    dag_us.number_of_edges()))

            # Lookup the outlet for the current streamgage
            try:
                dsmost_seg = [poi_id_to_seg[sg]]

                if dsmost_seg[0] == 0:
                    # POI stream segment was never properly assigned in paramdb
                    bandit_log.error(
                        'Streamgage {} has segment = 0. Skipping.'.format(sg))
                    break
                elif len(dsmost_seg) > 1:
                    # Should never have more than one segment per streamgage
                    bandit_log.info(
                        'Streamgage {} has more than one stream segment.'.
                        format(sg))
                    break
            except KeyError:
                bandit_log.error(
                    'Streamgage {} does not exist in poi_gage_id'.format(sg))
                break

            sg_dir = '{}/{}'.format(outdir, sg)

            try:
                os.makedirs(sg_dir)
            except OSError as exception:
                if exception.errno != errno.EEXIST:
                    raise
                else:
                    pass

            # =======================================
            # Given a d/s segment (dsmost_seg) create a subset of u/s segments
            if args.verbose:
                print('\tExtracting model subset')

            # Get all unique segments u/s of the starting segment
            uniq_seg_us = set()
            if dsmost_seg:
                for xx in dsmost_seg:
                    try:
                        pred = nx.dfs_predecessors(dag_us, xx)
                        uniq_seg_us = uniq_seg_us.union(
                            set(pred.keys()).union(set(pred.values())))
                    except KeyError:
                        bandit_log.error(
                            'KeyError: Segment {} does not exist in stream network'
                            .format(xx))
                # print('\nKeyError: Segment {} does not exist in stream network'.format(xx))

                # Get a subgraph in the dag_ds graph and return the edges
                dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy()

                # 2018-02-13 PAN: It is possible to have outlets specified which are not truly
                #                 outlets in the most conservative sense (e.g. a point where
                #                 the stream network exits the study area). This occurs when
                #                 doing headwater extractions where all segments for a headwater
                #                 are specified in the configuration file. Instead of creating
                #                 output edges for all specified 'outlets' the set difference
                #                 between the specified outlets and nodes in the graph subset
                #                 which have no edges is performed first to reduce the number of
                #                 outlets to the 'true' outlets of the system.
                node_outlets = [ee[0] for ee in dag_ds_subset.edges()]
                true_outlets = set(dsmost_seg).difference(set(node_outlets))
                bandit_log.debug('node_outlets: {}'.format(','.join(
                    map(str, node_outlets))))
                bandit_log.debug('true_outlets: {}'.format(','.join(
                    map(str, true_outlets))))

                # Add the downstream segments that exit the subgraph
                for xx in true_outlets:
                    dag_ds_subset.add_edge(xx, 'Out_{}'.format(xx))
            else:
                # No outlets specified so pull the CONUS
                dag_ds_subset = dag_ds

            # Create list of toseg ids for the model subset
            try:
                # networkx 1.x
                toseg_idx = list(
                    set(xx[0] for xx in dag_ds_subset.edges_iter()))
            except AttributeError:
                # networkx 2.x
                toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges))

            toseg_idx0 = [xx - 1
                          for xx in toseg_idx]  # 0-based version of toseg_idx

            bandit_log.info('Number of segments in subset: {}'.format(
                len(toseg_idx)))

            # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone.
            # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS
            #                 ordered 1..nhru. This is not always the case so the nhm_id parameter
            #                 needs to be loaded and used to map the nhm HRU ids to their
            #                 respective indices.
            hru_segment = nhm_params.get('hru_segment').data
            nhm_id = nhm_params.get('nhm_id').data

            nhm_id_to_idx = {}
            for ii, vv in enumerate(nhm_id):
                # keys are 1-based, values are 0-based
                nhm_id_to_idx[vv] = ii

            bandit_log.info('Number of NHM hru_segment entries: {}'.format(
                len(hru_segment)))

            # Create a dictionary mapping segments to HRUs
            seg_to_hru = {}
            for ii, vv in enumerate(hru_segment):
                # keys are 1-based, values in arrays are 1-based
                seg_to_hru.setdefault(vv, []).append(ii + 1)

    # Get HRU ids ordered by the segments in the model subset - entries are 1-based
            hru_order_subset = []
            for xx in toseg_idx:
                if xx in seg_to_hru:
                    for yy in seg_to_hru[xx]:
                        hru_order_subset.append(yy)
                else:
                    bandit_log.warning(
                        'Stream segment {} has no HRUs connected to it.'.
                        format(xx))
                    # raise ValueError('Stream segment has no HRUs connected to it.')

            # Append the additional non-routed HRUs to the list
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             bandit_log.info('User-supplied HRU {} is not connected to any stream segment'.format(xx))
            #             hru_order_subset.append(xx)
            #         else:
            #             bandit_log.error('User-supplied HRU {} routes to stream segment {} - Skipping.'.format(xx,
            #                                                                                            hru_segment[xx-1]))

            hru_order_subset0 = [xx - 1 for xx in hru_order_subset]

            bandit_log.info('Number of HRUs in subset: {}'.format(
                len(hru_order_subset)))

            # Use hru_order_subset to pull selected indices for parameters with nhru dimensions
            # hru_order_subset contains the in-order indices for the subset of hru_segments
            # toseg_idx contains the in-order indices for the subset of tosegments

            # Renumber the tosegment list
            new_tosegment = []

            # Map old DAG_subds indices to new
            for xx in toseg_idx:
                if list(dag_ds_subset.neighbors(xx))[0] in toseg_idx:
                    new_tosegment.append(
                        toseg_idx.index(list(dag_ds_subset.neighbors(xx))[0]) +
                        1)
                else:
                    # Outlets should be assigned zero
                    new_tosegment.append(0)

            # Renumber the hru_segments for the subset
            new_hru_segment = []

            for xx in toseg_idx:
                # if DAG_subds.neighbors(xx)[0] in toseg_idx:
                if xx in seg_to_hru:
                    for _ in seg_to_hru[xx]:
                        # The new indices should be 1-based from PRMS
                        new_hru_segment.append(toseg_idx.index(xx) + 1)

            # Append zeroes to new_hru_segment for each additional non-routed HRU
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             new_hru_segment.append(0)

            bandit_log.info('Size of hru_segment for subset: {}'.format(
                len(new_hru_segment)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset hru_deplcrv
            hru_deplcrv = nhm_params.get('hru_deplcrv').data

            bandit_log.info('Size of NHM hru_deplcrv: {}'.format(
                len(hru_deplcrv)))

            # Get subset of hru_deplcrv using hru_order
            # A single snarea_curve can be referenced by multiple HRUs
            hru_deplcrv_subset = np.array(hru_deplcrv)[
                tuple(hru_order_subset0), ]
            uniq_deplcrv = list(set(hru_deplcrv_subset))
            uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv]

            # Create new hru_deplcrv and renumber
            new_hru_deplcrv = [
                uniq_deplcrv.index(cc) + 1 for cc in hru_deplcrv_subset
            ]
            bandit_log.info('Size of hru_deplcrv for subset: {}'.format(
                len(new_hru_deplcrv)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset poi_gage_segment
            new_poi_gage_segment = []
            new_poi_gage_id = []
            new_poi_type = []

            if nhm_params.exists('poi_gage_segment'):
                poi_gage_segment = nhm_params.get('poi_gage_segment').tolist()
                bandit_log.info('Size of NHM poi_gage_segment: {}'.format(
                    len(poi_gage_segment)))

                poi_gage_id = nhm_params.get('poi_gage_id').data
                poi_type = nhm_params.get('poi_type').data

                # We want to get the indices of the poi_gage_segments that match the
                # segments that are part of the subset. We can then use these
                # indices to subset poi_gage_id and poi_type.
                # The poi_gage_segment will need to be renumbered for the subset of segments.

                # To subset poi_gage_segment we have to lookup each segment in the subset

                # Reset the cutoff list
                uscutoff_seg = []

                # for ss in uniq_seg_us:
                try:
                    # networkx 1.x
                    for ss in nx.nodes_iter(dag_ds_subset):
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])
                except AttributeError:
                    # networkx 2.x
                    for ss in dag_ds_subset.nodes:
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])

                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add any valid user-specified streamgage, nhm_seg pairs
                if addl_gages:
                    for ss, vv in iteritems(addl_gages):
                        if ss in new_poi_gage_id:
                            idx = new_poi_gage_id.index(ss)
                            bandit_log.warning(
                                'Existing NHM POI, {}, overridden on commandline (was {}, now {})'
                                .format(ss, new_poi_gage_segment[idx],
                                        toseg_idx.index(vv) + 1))
                            new_poi_gage_segment[idx] = toseg_idx.index(vv) + 1
                            new_poi_type[idx] = 0
                        elif toseg_idx.index(vv) + 1 in new_poi_gage_segment:
                            sidx = new_poi_gage_segment.index(
                                toseg_idx.index(vv) + 1)
                            bandit_log.warning(
                                'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID'
                                .format(ss,
                                        toseg_idx.index(vv) + 1,
                                        new_poi_gage_id[sidx]))
                            new_poi_gage_id[sidx] = ss
                            new_poi_type[sidx] = 0
                        elif vv not in seg_to_hru.keys():
                            bandit_log.warning(
                                'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.'
                                .format(ss, vv))
                        else:
                            new_poi_gage_id.append(ss)
                            new_poi_gage_segment.append(
                                toseg_idx.index(vv) + 1)
                            new_poi_type.append(0)
                            bandit_log.info(
                                'Added user-specified POI streamgage ({}) at nhm_seg={}'
                                .format(ss, vv))

            # ==================================================================
            # ==================================================================
            # Process the parameters and create a parameter file for the subset
            params = list(nhm_params.keys())

            # Remove the POI-related parameters if we have no POIs
            if len(new_poi_gage_segment) == 0:
                bandit_log.warning(
                    'No POI gages found for subset; removing POI-related parameters.'
                )

                for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']:
                    # params.pop(rp, None)
                    try:
                        params.remove(rp)
                    except ValueError:
                        print('ERROR: unable to remove {}'.format(rp))
                        pass

            params.sort()

            dims = {}
            for kk in nhm_global_dimensions.values():
                dims[kk.name] = kk.size

            # Resize dimensions to the model subset
            crap_dims = dims.copy()  # need a copy since we modify dims
            for dd, dv in iteritems(crap_dims):
                # dimensions 'nmonths' and 'one' are never changed
                if dd in HRU_DIMS:
                    dims[dd] = len(hru_order_subset0)
                elif dd == 'nsegment':
                    dims[dd] = len(toseg_idx0)
                elif dd == 'ndeplval':
                    dims[dd] = len(uniq_deplcrv0) * 11
                    # if 'ndepl' not in dims:
                    dims['ndepl'] = len(uniq_deplcrv0)
                elif dd == 'npoigages':
                    dims[dd] = len(new_poi_gage_segment)

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Build a ParameterSet for output
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            new_ps = ParameterSet()

            for dd, dv in iteritems(dims):
                new_ps.dimensions.add(dd, dv)

                if dd == 'npoigages':
                    # 20170217 PAN: nobs is missing from the paramdb but is necessary
                    new_ps.dimensions.add('nobs', dv)

            new_params = list(required_params)

            # WARNING: 2019-04-23 PAN
            #          Very hacky way to remove parameters that shouldn't always get
            #          included. Need to figure out a better way.
            check_list = [
                'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent',
                'irr_type', 'obsout_segment', 'rad_conv', 'rain_code',
                'hru_lon'
            ]

            for xx in check_list:
                if xx in new_params:
                    if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']:
                        if not new_ps.dimensions.exists('nsol'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nsol') == 0:
                            new_params.remove(xx)
                    elif xx == 'humidity_percent':
                        if not new_ps.dimensions.exists('nhumid'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nhumid') == 0:
                            new_params.remove(xx)
                    elif xx == 'irr_type':
                        if not new_ps.dimensions.exists('nwateruse'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nwateruse') == 0:
                            new_params.remove(xx)
                    elif xx == 'gvr_hru_id':
                        if ctl.get('mapOutON_OFF').values == 0:
                            new_params.remove(xx)
                    elif xx in [
                            'hru_lat',
                            'hru_lon',
                    ]:
                        if not nhm_params.exists(xx):
                            new_params.remove(xx)

            new_params.sort()
            for pp in params:
                if pp in new_params or args.no_filter_params:
                    cparam = nhm_params.get(pp).tostructure()

                    new_ps.parameters.add(cparam['name'])

                    ndims = len(cparam['dimensions'])
                    if args.verbose:
                        sys.stdout.write(
                            '\r                                       ')
                        sys.stdout.write('\rProcessing {} '.format(
                            cparam['name']))
                        sys.stdout.flush()

                    # Get order of dimensions and total size for parameter
                    dim_order = [None] * ndims

                    for dd, dv in iteritems(cparam['dimensions']):
                        dim_order[dv['position']] = dd

                    for dd in dim_order:
                        # self.parameters.get(varname).dimensions.add(dd, self.dimensions.get(dd).size)
                        new_ps.parameters.get(cparam['name']).dimensions.add(
                            dd,
                            new_ps.dimensions.get(dd).size)

                        new_ps.parameters.get(
                            cparam['name']).datatype = cparam['datatype']

                    first_dimension = dim_order[0]

                    if ndims == 2:
                        second_dimension = dim_order[1]

                    # Write out the data for the parameter
                    if ndims == 1:
                        # 1D Parameters
                        if first_dimension == 'one':
                            outdata = np.array(cparam['data'])
                        elif first_dimension == 'nsegment':
                            if pp in ['tosegment']:
                                outdata = np.array(new_tosegment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(toseg_idx0), ]
                        elif first_dimension == 'ndeplval':
                            # This is really a 2D in disguise, however, it is stored in C-order unlike
                            # other 2D arrays
                            outdata = np.array(cparam['data']).reshape(
                                (-1, 11))[tuple(uniq_deplcrv0), :]
                        elif first_dimension == 'npoigages':
                            if pp == 'poi_gage_segment':
                                outdata = np.array(new_poi_gage_segment)
                            elif pp == 'poi_gage_id':
                                outdata = np.array(new_poi_gage_id)
                            elif pp == 'poi_type':
                                outdata = np.array(new_poi_type)
                            else:
                                bandit_log.error(
                                    'Unkown parameter, {}, with dimensions {}'.
                                    format(pp, first_dimension))
                        elif first_dimension in HRU_DIMS:
                            if pp == 'hru_deplcrv':
                                outdata = np.array(new_hru_deplcrv)
                            elif pp == 'hru_segment':
                                outdata = np.array(new_hru_segment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(hru_order_subset0), ]
                        else:
                            bandit_log.error(
                                'No rules to handle dimension {}'.format(
                                    first_dimension))
                    elif ndims == 2:
                        # 2D Parameters
                        outdata = np.array(cparam['data']).reshape(
                            (-1, dims[second_dimension]), order='F')

                        if first_dimension == 'nsegment':
                            outdata = outdata[tuple(toseg_idx0), :]
                        elif first_dimension in HRU_DIMS:
                            outdata = outdata[tuple(hru_order_subset0), :]
                        else:
                            bandit_log.error(
                                'No rules to handle 2D parameter, {}, which contains dimension {}'
                                .format(pp, first_dimension))

                    # Convert outdata to a list for writing
                    if first_dimension == 'ndeplval':
                        outlist = outdata.ravel().tolist()
                    else:
                        outlist = outdata.ravel(order='F').tolist()

                    new_ps.parameters.get(cparam['name']).data = outlist

            # Write the new parameter file
            header = [
                'Written by Bandit version {}'.format(__version__),
                'NhmParamDb revision: {}'.format(nhmparamdb_revision)
            ]
            if args.param_netcdf:
                base_filename = os.path.splitext(param_filename)[0]
                param_filename = '{}.nc'.format(base_filename)
                new_ps.write_netcdf('{}/{}'.format(sg_dir, param_filename))
            else:
                new_ps.write_parameter_file('{}/{}'.format(
                    sg_dir, param_filename),
                                            header=header)

            ctl.get('param_file').values = param_filename

            if args.verbose:
                sys.stdout.write('\n')
                # sys.stdout.write('\r                                       ')
                # sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename)))
                sys.stdout.flush()

            # 2019-09-16 PAN: Nasty hack to handle parameter databases that may not have
            #                 a one-to-one match between index value and nhm_id.
            cparam = nhm_params.get('nhm_id').tostructure()
            hru_order_subset_nhm_id = np.array(
                cparam['data'])[tuple(hru_order_subset0), ].ravel(
                    order='F').tolist()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write CBH files
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if output_cbh:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Subset the cbh files for the selected HRUs
                if len(hru_order_subset) > 0:
                    if args.verbose:
                        print('Processing CBH files')

                    if os.path.splitext(cbh_dir)[1] == '.nc':
                        cbh_hdl = CbhNetcdf(src_path=cbh_dir,
                                            st_date=st_date,
                                            en_date=en_date,
                                            nhm_hrus=hru_order_subset_nhm_id)
                        # nhm_hrus=hru_order_subset)
                    else:
                        # Subset the hru_nhm_to_local mapping
                        # TODO: This section will not work with the monolithic paramdb - remove
                        hru_order_ss = OrderedDict()
                        for kk in hru_order_subset:
                            hru_order_ss[kk] = hru_nhm_to_local[kk]

                        cbh_hdl = CbhAscii(src_path=cbh_dir,
                                           st_date=st_date,
                                           en_date=en_date,
                                           nhm_hrus=hru_order_subset,
                                           indices=hru_order_ss,
                                           mapping=hru_nhm_to_region)

                    if args.cbh_netcdf:
                        # Pull the filename prefix off of the first file found in the
                        # source netcdf CBH directory.
                        file_it = glob.iglob(cbh_dir)
                        cbh_prefix = os.path.basename(
                            next(file_it)).split('_')[0]

                        cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix)
                        cbh_hdl.write_netcdf(cbh_outfile)
                        ctl.get('tmax_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('tmin_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('precip_day').values = os.path.basename(
                            cbh_outfile)
                    else:
                        cbh_hdl.write_ascii(pathname=sg_dir)
                    # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv)))
                else:
                    bandit_log.error('No HRUs associated with the segments')

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write output variables
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # 2019-08-07 PAN: first prototype for extractions of output variables
            if include_model_output:
                if len(hru_order_subset) > 0:
                    try:
                        os.makedirs(f'{sg_dir}/model_output')
                        print(
                            'Creating directory model_output, for model output variables'
                        )
                    except OSError:
                        print(
                            'Using existing model_output directory for output variables'
                        )

                    for vv in output_vars:
                        if args.verbose:
                            sys.stdout.write(
                                '\r                                                  '
                            )
                            sys.stdout.write(
                                f'\rProcessing output variable: {vv} ')
                            sys.stdout.flush()

                        filename = f'{output_vars_dir}/{vv}.nc'

                        if vv[0:3] == 'seg':
                            mod_out = ModelOutput(filename=filename,
                                                  varname=vv,
                                                  startdate=st_date,
                                                  enddate=en_date,
                                                  nhm_segs=toseg_idx)
                        else:
                            mod_out = ModelOutput(
                                filename=filename,
                                varname=vv,
                                startdate=st_date,
                                enddate=en_date,
                                nhm_hrus=hru_order_subset_nhm_id)

                        mod_out.write_csv(f'{sg_dir}/model_output')
                    sys.stdout.write('\n')
                    sys.stdout.flush()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write dynamic parameters
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if ctl.has_dynamic_parameters:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add dynamic parameters
                for cparam in ctl.dynamic_parameters:
                    param_name = 'dyn_{}'.format(cparam)
                    input_file = '{}/{}.nc'.format(dyn_params_dir, param_name)
                    output_file = '{}/{}.param'.format(sg_dir, param_name)

                    if not os.path.exists(input_file):
                        bandit_log.warning(
                            'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping'
                            .format(input_file))
                    else:
                        if args.verbose:
                            print(
                                'Writing dynamic parameter {}'.format(cparam))

                        mydyn = dyn_params.DynamicParameters(
                            input_file, cparam, st_date, en_date,
                            hru_order_subset_nhm_id)
                        # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset)

                        mydyn.read_netcdf()
                        out_order = [kk for kk in hru_order_subset_nhm_id]
                        # out_order = [kk for kk in hru_order_subset]
                        for cc in ['day', 'month', 'year']:
                            out_order.insert(0, cc)

                        header = ' '.join(map(str, out_order))

                        # Output ASCII files
                        out_ascii = open(output_file, 'w')
                        out_ascii.write('{}\n'.format(cparam))
                        out_ascii.write('{}\n'.format(header))
                        out_ascii.write('####\n')
                        mydyn.data.to_csv(out_ascii,
                                          columns=out_order,
                                          na_rep='-999',
                                          sep=' ',
                                          index=False,
                                          header=False,
                                          encoding=None,
                                          chunksize=50)
                        out_ascii.close()

            # Write an updated control file to the output directory
            ctl.write('{}.bandit'.format('{}/{}'.format(
                sg_dir, control_filename)))

            if output_streamflow:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Download the streamgage information from NWIS
                if args.verbose:
                    print(
                        'Downloading NWIS streamgage observations for {} stations'
                        .format(len(new_poi_gage_id)))

                streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id,
                                            st_date=st_date,
                                            en_date=en_date,
                                            verbose=args.verbose)
                streamflow.get_daily_streamgage_observations()
                streamflow.write_prms_data(
                    filename='{}/{}'.format(sg_dir, obs_filename))

            # *******************************************
            # Create a shapefile of the selected HRUs
            if output_shapefiles:
                if args.verbose:
                    print('-' * 40)
                    print('Writing shapefiles for model subset')

                if not os.path.isdir(geo_file):
                    bandit_log.error(
                        'File geodatabase, {}, does not exist. Shapefiles will not be created'
                        .format(geo_file))
                else:
                    geo_shp = prms_geo.Geo(geo_file)

                    # Create GIS sub-directory if it doesn't already exist
                    gis_dir = '{}/GIS'.format(sg_dir)
                    try:
                        os.makedirs(gis_dir)
                    except OSError as exception:
                        if exception.errno != errno.EEXIST:
                            raise
                        else:
                            pass

                    # Output a shapefile of the selected HRUs
                    # print('\tHRUs')
                    # geo_shp.select_layer('nhruNationalIdentifier')
                    geo_shp.select_layer('nhru')
                    geo_shp.write_shapefile(
                        '{}/GIS/HRU_subset.shp'.format(sg_dir),
                        'hru_id_nat',
                        hru_order_subset_nhm_id,
                        included_fields=[
                            'nhm_id', 'model_idx', 'region', 'hru_id_nat'
                        ])

                    # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset)

                    # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset)
                    # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir))
                    # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir))

                    # Output a shapefile of the selected stream segments
                    # print('\tSegments')
                    geo_shp.select_layer('nsegmentNationalIdentifier')
                    geo_shp.write_shapefile(
                        '{}/GIS/Segments_subset.shp'.format(sg_dir),
                        'seg_id_nat',
                        toseg_idx,
                        included_fields=['seg_id_nat', 'model_idx', 'region'])

                    # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us)
                    # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir))

                    del geo_shp

            break  # break out of while True loop

    bandit_log.info('========== END {} =========='.format(
        datetime.datetime.now().isoformat()))

    os.chdir(stdir)
Ejemplo n.º 37
0
def get_node_to_distn(T, root, node_to_pmap, root_distn=None, P_default=None):
    """
    Get marginal state distributions at nodes in a tree.

    This function is similar to the Rao-Teh state sampling function,
    except that instead of sampling a state at each node,
    this function computes marginal distributions over states at each node.
    Also, each edge of the input tree for this function has been
    annotated with its own transition probability matrix,
    whereas the Rao-Teh sampling function uses a single
    uniformized transition probability matrix for all edges.

    Parameters
    ----------
    T : undirected acyclic networkx graph
        A tree whose edges are annotated with transition matrices P.
    root : integer
        Root node.
    node_to_pmap : dict
        Map from node to a map from a state to the subtree likelihood.
        This map incorporates state restrictions.
    root_distn : dict, optional
        A finite distribution over root states.
    P_default : weighted directed networkx graph, optional
        Default transition matrix.

    Returns
    -------
    node_to_distn : dict
        Sparse map from node to sparse map from state to probability.

    """
    # Bookkeeping.
    predecessors = nx.dfs_predecessors(T, root)

    # Get the distributions.
    node_to_distn = {}
    for node in nx.dfs_preorder_nodes(T, root):

        # Get the map from state to subtree likelihood.
        pmap = node_to_pmap[node]

        # Compute the prior distribution at the root separately.
        # If the prior distribution is not provided,
        # then treat it as uninformative.
        if node == root:
            distn = get_normalized_dict_distn(pmap, root_distn)
        else:
            parent_node = predecessors[node]
            parent_distn = node_to_distn[parent_node]

            # Get the transition matrix associated with this edge.
            P = T[parent_node][node].get('P', P_default)
            if P is None:
                raise ValueError('no transition matrix is available')

            # For each parent state,
            # get the distribution over child states;
            # this distribution will include both the P matrix
            # and the pmap of the child node.
            distn = defaultdict(float)
            for sa, pa in parent_distn.items():

                # Construct the conditional transition probabilities.
                feasible_sb = set(P[sa]) & set(node_to_pmap[node])
                sb_weights = {}
                for sb in feasible_sb:
                    a = P[sa][sb]['weight']
                    b = node_to_pmap[node][sb]
                    sb_weights[sb] = a*b
                sb_distn = get_normalized_dict_distn(sb_weights)

                # Add to the marginal distn.
                for sb, pb in sb_distn.items():
                    distn[sb] += pa * pb

        # Set the node_to_distn.
        node_to_distn[node] = distn

    # Return the marginal state distributions at nodes.
    return node_to_distn
def cheapestLeafConnection(G, H):
    nodes = H.nodes()
    candidates = []
    tmp_weight = 0
    counter = 0
    while not udah_belom(H):
        #counter = 0
        for x in nodes:
            candidates=[]
            counter += 1
            x_neigh = H.neighbors(x)
            if len(x_neigh) > 2:
                for y in x_neigh:
                    #make a copy of current mst
                    dummyGraph = H.copy()
                    
                    #remove edge
                    removeNeighborColor(dummyGraph, x, y)
                    tmp_weight = G[x][y]['weight']      #save weight
                    dummyGraph.remove_edge(x,y)
    
                    tree = nx.dfs_successors(dummyGraph, x)
                    anak_anak = nx.dfs_predecessors(dummyGraph, x)
                    for anak in anak_anak:
                        if anak not in tree:            #if leaf
                            if validColor(dummyGraph, anak, y):
                                tmp = (anak, y, G[anak][y]['weight'])
                                candidates.append(tmp)
                    dummyGraph.add_edge(x, y, weight = tmp_weight)
                    addNeighborColor(dummyGraph, x, y)

            if len(candidates)>0:
                candidates = sorted(candidates, key = lambda z: z[2])
                fro, tom, wei = candidates[0]
                dummyGraph.add_edge(fro,tom, weight=wei)
                removeNeighborColor(dummyGraph, x, tom)
                dummyGraph.remove_edge(x,tom)
                addNeighborColor(dummyGraph, fro, tom)
                H = dummyGraph.copy()
                #drawHraph(H)    
            elif (counter%10000) ==0:
                if counter%100000==0:
                    print 'no candidates,', counter, ' iterations in Cheapest Leaf Connection'
                #drawGraph(dummyGraph)
                for x in nodes:
                    x_neigh = dummyGraph.neighbors(x)
                    if len(x_neigh)==1:
                        dummyGraph.remove_edge(x, x_neigh[0])
                        removeNeighborColor(dummyGraph, x, x_neigh[0])
                        for y in dummyGraph.neighbors(x_neigh[0]):
                            if validColor(dummyGraph, x, y) and len(dummyGraph.neighbors(y))<3:
                                dummyGraph.add_edge(x,y)
                                addNeighborColor(dummyGraph, x, y)
                                H = dummyGraph.copy()
                                break
                        break
            elif counter == 250001:
                print 'Using Cheapest Leaf Connection failed miserably =('
                #os.system('say "Using candidates failed miserably..."')
                print 'Try using Direct Leaf Connection'
                #os.system('say "Try using direct leaf connection"')
                H = directLeafConnection(G, dummyGraph)
                return H
    return H
Ejemplo n.º 39
0
def minibatch_(functions, clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,img_nr,alphas,learning_rate,subsamples, mode):
    X_p, y_p, inv = get_data_from_img_nr(class_,img_nr, subsamples)
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        print img_nr
        if less_features:
            X_p = [fts[0:features_used] for fts in X_p]
        if os.path.isfile('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord, y[2]])
        #assert(len(boxes)<500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,5000)
        
        if os.path.isfile('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))):
            gr = open('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r')
        else:
            gr = []
        ground_truths = []
        for line in gr:
           tmp = line.split(',')
           ground_truth = []
           for s in tmp:
              ground_truth.append(int(s))
           ground_truths.append(ground_truth)
        
        #prune boxes
        pruned_x = []
        pruned_y = []
        pruned_boxes = []
        if prune:
            for i, y_ in enumerate(y_p):
                if y_ > 0:
                    pruned_x.append(X_p[i])
                    pruned_y.append(y_p[i])
                    pruned_boxes.append(boxes[i])
        else:
            pruned_x = X_p
            pruned_y = y_p
            pruned_boxes = boxes
        
        if subsampling and pruned_boxes > subsamples:
            pruned_x = pruned_x[0:subsamples]
            pruned_y = pruned_y[0:subsamples]
            pruned_boxes = pruned_boxes[0:subsamples]
            
            
        # create_tree
        G, levels = create_tree(pruned_boxes)
        
        #prune tree to only have levels which fully cover the image, tested
        if prune_fully_covered:
            nr_levels_covered = 100
            total_size = surface_area(pruned_boxes, levels[0])
            for level in levels:
                sa = surface_area(pruned_boxes, levels[level])
                sa_co = sa/total_size
                if sa_co != 1.0:
                    G.remove_nodes_from(levels[level])
                else:
                    nr_levels_covered = level
            levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)}
            
        # prune levels, speedup + performance 
        levels_tmp = {k:v for k,v in levels.iteritems() if k<prune_tree_levels}
        levels_gone = {k:v for k,v in levels.iteritems() if k>=prune_tree_levels}
        levels = levels_tmp
        #prune tree as well, for patches training
        for trash_level in levels_gone.values():
            G.remove_nodes_from(trash_level)
        
        coords = []
        features = []
        f_c = []
        f = []
        
        #either subsampling or prune_fully_covered
        #assert(subsampling != prune_fully_covered)
        
        if subsampling:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+')
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], pruned_boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples))
                    if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                        f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') 
                
                
        elif prune_fully_covered:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+')
                
                
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], pruned_boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d"))
                    os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d")))
                    if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                        f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') 
                        
                
        else:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d"))):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d")), 'r') 
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d"))):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d")), 'r+')
                
        if f_c != []:
            for i,line in enumerate(f_c):
                str_ = line.rstrip('\n').split(',')
                cc = []
                for s in str_:
                   cc.append(float(s))
                coords.append(cc)
        if f != []:
            for i,line in enumerate(f):
                str_ = line.rstrip('\n').split(',')  
                ff = []
                for s in str_:
                   ff.append(float(s))
                features.append(ff)
        #assert len(coords) == len(features)
        
        # append x,y of intersections
        if learn_intersections:
            for inters,coord in zip(features,coords):
#                if inters not in pruned_x:
                pruned_x.append(inters)
                ol = 0.0
                ol = get_intersection_count(coord, ground_truths)
                pruned_y.append(ol)
                
        if mode == 'mean_variance':
            print 'normalizing'
            sum_x += np.array(pruned_x).sum(axis=0)
            n_samples += len(pruned_x)
            sum_sq_x +=  (np.array(pruned_x)**2).sum(axis=0)
            scaler.partial_fit(pruned_x)  # Don't cheat - fit only on training data
            return sum_x,n_samples,sum_sq_x, scaler
            
        if less_features:
            features = [fts[0:features_used] for fts in features]
        #normalize
        norm_x = []
        if normalize and (mode != 'extract_train' and mode != 'extract_test'):
#            for p_x in pruned_x:
#                norm_x.append((p_x-mean)/variance)
            norm_x = scaler.transform(pruned_x)
            if features != []:
                features = scaler.transform(features)
        else:
            norm_x = pruned_x
        data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'training':
            if alphas[0] == 0: #if we don't learn the proposals, we learn just the levels: better, because every level has same importance and faster
                print 'training levels', img_nr
                for level in levels:
                    print 'level' , level
                    if img_nr in functions:
                        if level in functions[img_nr]:
                            function = functions[img_nr][level]
                        else:
                            function = []
                    else:
                        functions[img_nr] = {}
                        function = []
                    w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,None,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,jans_idea)
                    if level not in functions[img_nr]:
                        functions[img_nr][level] = function
                return w, len(pruned_y), len(levels)
            else: #if we learn proposals, levels with more proposals have more significance...., slow - need to change
                print 'training patches', img_nr
                print predecs
                nodes = list(G.nodes())
                for node in nodes:
                    print node
                    if node == 0:
                        w = learn_root(w,norm_x[0],pruned_y[0],learning_rate,alphas)
                    else:
                        for num,n in enumerate(levels.values()):
                            if node in n:
                                level = num
                                break
                        if img_nr in functions:
                            if level in functions[img_nr]:
                                function = functions[img_nr][level]
                            else:
                                function = []
                        else:
                            functions[img_nr] = {}
                            function = []
                        #w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr)
                        w, function = constrained_regression(class_,function,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,squared_hinge_loss)
                        #TODO: train regressor/classifier that predicts/chooses level. Features: level, number of proposals, number of intersections, avg size of proposal, predictions(for regressor), etc.
                        if level not in functions[img_nr]:
                            functions[img_nr][level] = function
                return w, len(pruned_y), len(G.nodes())
        elif mode == 'scikit_train':
            clf.partial_fit(norm_x,pruned_y)
            return clf
        elif mode == 'loss_train':
            if alphas[0] == 0: #levels
                loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions))
                return loss__
            else:
                loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1))
        elif mode == 'loss_test' or mode == 'loss_eval':
            print mode, loss__
            if alphas[0] == 0: #levels
                loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions))
                cpl = max(0, np.dot(w,np.array(norm_x[0]).T))
                full_image.append([pruned_y[0],cpl])
                return loss__,full_image
            else:
                loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1))
                cpl = max(0, np.dot(w,np.array(norm_x[0]).T))
                full_image.append([pruned_y[0],cpl])
                return loss__,full_image
        elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train':
            loss__.append(((clf.predict(norm_x) - pruned_y)**2).sum())
            return loss__ 
        elif mode == 'levels_train' or mode == 'levels_test':
            preds = []
            for i,x_ in enumerate(norm_x):
                preds.append(np.dot(w, x_))
            cpls = []
            truelvls = []
            used_boxes_ = []
            total_size = surface_area(pruned_boxes, levels[0])
            fully_covered_score = 0.0
            fully_covered_score_lvls = 0.0
            covered_levels = []
            print mode, len(levels)
            for level in levels:
                function = functions[img_nr][level]
                cpl,used_boxes,_ = count_per_level([],class_,features,coords,scaler,w, preds, img_nr, pruned_boxes,levels[level], '',function)
                # clipp negative predictions
                cpl = max(0,cpl)
                if used_boxes != []:
                    used_boxes_.append(used_boxes[0][1])
                tru = y_p[0]
                cpls.append(cpl)
                sa = surface_area(pruned_boxes, levels[level])
                sa_co = sa/total_size
                if sa_co == 1.0:
                   fully_covered_score += cpl
                   fully_covered_score_lvls += 1
                   covered_levels.append(cpl)
                truelvls.append(tru)
            return cpls,truelvls
Ejemplo n.º 40
0
    def trim(self, by_inflow=True, by_outflow=True, min_count=0):
        """
        Trims a graph to delete nodes that are not connected to the main
        component, which is the component containing the most-sampled node (MSN)
        by counts.

        by_inflow: whether to delete nodes that are not connected to the MSN by inflow

        by_outflow: whether to delete nodes that are not connected to the MSN by outflow

        min_count: nodes that do not have a count > min_count will be deleted

        Trimmed graph is saved as self.trim_graph. The trimmed transition matrix 
        is saved as self.trim_transmat, and the count matrix is saved as 
        self.trim_countmat.

        The mapping from the nodes in the trimmed set to the full set is given by
        self.trim_indices.
        """

        totcounts = self.countmat.sum(axis=1)
        msn = totcounts.argmax()

        mask = np.ones(self.nnodes, dtype=bool)
        oldmask = np.zeros(self.nnodes, dtype=bool)

        if min_count > 0:
            mask[[i for i in range(self.nnodes)
                  if totcounts[i] < min_count]] = False

        while (mask != oldmask).any():

            oldmask = mask.copy()
            self.trim_indices = [
                i for i in range(self.nnodes) if mask[i] == True
            ]
            self.trim_graph = self.graph.subgraph(self.trim_indices)

            if by_outflow:
                downstream = nx.dfs_successors(self.trim_graph, msn).values()
                dlist = list(itertools.chain(*downstream)) + [msn]
                mask[[i for i in range(self.nnodes) if i not in dlist]] = False

            if by_inflow:
                upstream = list(
                    nx.dfs_predecessors(self.trim_graph, msn).keys()) + [msn]
                mask[[i for i in range(self.nnodes)
                      if i not in upstream]] = False

        # count all transitions to masked states and add these as self-transitions
        to_add = {}
        rows = self.countmat.row
        cols = self.countmat.col
        data = self.countmat.data

        for i in range(len(data)):
            if mask[rows[i]] is False and mask[cols[i]] is True:
                if cols[i] in to_add:
                    to_add[cols[i]] += data[i]
                else:
                    to_add[cols[i]] = data[i]

        tmp_arr = self.countmat.toarray()[mask, ...][..., mask]

        for ind, full_ind in enumerate(self.trim_indices):
            if full_ind in to_add:
                tmp_arr[ind][ind] += to_add[full_ind]

        self.trim_countmat = scipy.sparse.coo_matrix(tmp_arr)
        if self.symmetrize:
            self.trim_countmat = symmetrize_matrix(self.trim_countmat)

        self.trim_nnodes = self.trim_countmat.shape[0]
        self.trim_transmat = count_to_trans(self.trim_countmat)
Ejemplo n.º 41
0
def ScenarioTreeModelFromNetworkX(
        tree,
        node_name_attribute=None,
        edge_probability_attribute='weight',
        stage_names=None,
        scenario_name_attribute=None):
    """
    Create a scenario tree model from a networkx tree.  The
    height of the tree must be at least 1 (meaning at least
    2 stages).

    Required node attributes:
        - cost (str): A string identifying a component on
              the model whose value indicates the cost at
              the time stage of the node for any scenario
              traveling through it.

    Optional node attributes:
        - variables (list): A list of variable identifiers
              that will be tracked by the node. If the node
              is not a leaf node, these indicate variables
              with non-anticipativity constraints.
        - derived_variables (list): A list of variable or
              expression identifiers that will be tracked by
              the node (but will never have
              non-anticipativity constraints enforced).
        - bundle: A bundle identifier for the scenario
              defined by a leaf-stage node. This attribute
              is ignored on non-terminal tree nodes. This
              attribute appears on at least one leaf-stage
              node (and is not set to :const:`None`), then
              it must be set on all leaf-stage nodes (to
              something other than :const:`None`);
              otherwise, an exception will be raised.

    Optional edge attributes:
        - weight (float): Indicates the conditional
              probability of moving from the parent node to
              the child node in the directed edge. If not
              present, it will be assumed that all edges
              leaving the parent node have equal probability
              (normalized to sum to one).

    Args:
        stage_names: Can define a list of stage names to use
           (assumed in time order). The length of this list
           much match the number of stages in the tree. The
           default value of :const:`None` indicates that
           stage names should be automatically generated in
           with the form ['Stage1','Stage2',...].
        node_name_attribute: By default, node names are the
           same as the node hash in the networkx tree. This
           keyword can be set to the name of some property
           of nodes in the graph that will be used for their
           name in the PySP scenario tree.
        scenario_name_attribute: By default, scenario names
           are the same as the leaf-node hash in the
           networkx tree. This keyword can be set to the
           name of some property of leaf-nodes in the graph
           that will be used for their corresponding
           scenario name in the PySP scenario tree.
        edge_probability_attribute: Can be set to the name
           of some property of edges in the graph that
           defines the conditional probability of that
           branch (default: 'weight'). If this keyword is
           set to :const:`None`, then all branches leaving a
           node are assigned equal conditional
           probabilities.

    Examples:

        A 2-stage scenario tree with 10 scenarios grouped
        into 2 bundles:

        >>> G = networkx.DiGraph()
        >>> G.add_node("root", variables=["x"])
        >>> N = 10
        >>> for i in range(N):
        >>>     node_name = "s"+str(i)
        >>>     bundle_name = "b"+str(i%2)
        >>>     G.add_node(node_name, bundle=bundle)
        >>>     G.add_edge("root", node_name, weight=1.0/N)
        >>> model = ScenarioTreeModelFromNetworkX(G)

        A 4-stage scenario tree with 125 scenarios:

        >>> branching_factor = 5
        >>> height = 3
        >>> G = networkx.balanced_tree(
                   branching_factor,
                   height,
                   networkx.DiGraph())
        >>> model = ScenarioTreeModelFromNetworkX(G)
    """

    if not has_networkx:                          #pragma:nocover
        raise ValueError(
            "networkx>=2.0 module is not available")

    if not networkx.is_tree(tree):
        raise TypeError(
            "Graph object is not a tree "
            "(see networkx.is_tree)")

    if not networkx.is_directed(tree):
        raise TypeError(
            "Graph object is not directed "
            "(see networkx.is_directed)")

    if not networkx.is_branching(tree):
        raise TypeError(
            "Grapn object is not a branching "
            "(see networkx.is_branching")

    in_degree_items = tree.in_degree()
    # Prior to networkx ~2.0, in_degree() returned a dictionary.
    # Now it is a view on items, so only call .items() for the old case
    if hasattr(in_degree_items, 'items'):
        in_degree_items = in_degree_items.items()
    root = [u for u,d in in_degree_items if d == 0]
    assert len(root) == 1
    root = root[0]
    num_stages = networkx.eccentricity(tree, v=root) + 1
    if num_stages < 2:
        raise ValueError(
            "The number of stages must be at least 2")
    m = CreateAbstractScenarioTreeModel()
    if stage_names is not None:
        unique_stage_names = set()
        for cnt, stage_name in enumerate(stage_names,1):
            m.Stages.add(stage_name)
            unique_stage_names.add(stage_name)
        if cnt != num_stages:
            raise ValueError(
                "incorrect number of stages names (%s), should be %s"
                % (cnt, num_stages))
        if len(unique_stage_names) != cnt:
            raise ValueError("all stage names were not unique")
    else:
        for i in range(num_stages):
            m.Stages.add('Stage'+str(i+1))
    node_to_name = {}
    node_to_scenario = {}
    scenario_bundle = {}
    def _setup(u, succ):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.nodes[u]:
                raise KeyError(
                    "node '%s' missing node name "
                    "attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.nodes[u][node_name_attribute]
        else:
            node_name = u
        node_to_name[u] = node_name
        m.Nodes.add(node_name)
        if u in succ:
            for v in succ[u]:
                _setup(v, succ)
        else:
            # a leaf node
            if scenario_name_attribute is not None:
                if scenario_name_attribute not in tree.nodes[u]:
                    raise KeyError(
                        "node '%s' missing scenario name "
                        "attribute: '%s'"
                        % (u, scenario_name_attribute))
                scenario_name = tree.nodes[u][scenario_name_attribute]
            else:
                scenario_name = u
            node_to_scenario[u] = scenario_name
            m.Scenarios.add(scenario_name)
            scenario_bundle[scenario_name] = \
                tree.nodes[u].get('bundle', None)
    _setup(root,
           networkx.dfs_successors(tree, root))
    m = m.create_instance()
    def _add_node(u, stage, succ, pred):
        node_name = node_to_name[u]
        m.NodeStage[node_name] = m.Stages[stage]
        if u == root:
            m.ConditionalProbability[node_name] = 1.0
        else:
            assert u in pred
            # prior to networkx ~2.0, we used a .edge attribute on DiGraph,
            # which no longer exists.
            if hasattr(tree, 'edge'):
                edge = tree.edge[pred[u]][u]
            else:
                edge = tree.edges[pred[u],u]
            probability = None
            if edge_probability_attribute is not None:
                if edge_probability_attribute not in edge:
                    raise KeyError(
                        "edge '(%s, %s)' missing probability attribute: '%s'"
                        % (pred[u], u, edge_probability_attribute))
                probability = edge[edge_probability_attribute]
            else:
                probability = 1.0/len(succ[pred[u]])
            m.ConditionalProbability[node_name] = probability
        # get node variables
        if "variables" in tree.nodes[u]:
            node_variables = tree.nodes[u]["variables"]
            assert type(node_variables) in [tuple, list]
            for varstring in node_variables:
                m.NodeVariables[node_name].add(varstring)
        if "derived_variables" in tree.nodes[u]:
            node_derived_variables = tree.nodes[u]["derived_variables"]
            assert type(node_derived_variables) in [tuple, list]
            for varstring in node_derived_variables:
                m.NodeDerivedVariables[node_name].add(varstring)
        if "cost" in tree.nodes[u]:
            assert isinstance(tree.nodes[u]["cost"], six.string_types)
            m.NodeCost[node_name].value = tree.nodes[u]["cost"]
        if u in succ:
            child_names = []
            for v in succ[u]:
                child_names.append(
                    _add_node(v, stage+1, succ, pred))
            total_probability = 0.0
            for child_name in child_names:
                m.Children[node_name].add(child_name)
                total_probability += \
                    pyomo.core.value(m.ConditionalProbability[child_name])
            if abs(total_probability - 1.0) > 1e-5:
                raise ValueError(
                    "edge probabilities leaving node '%s' "
                    "do not sum to 1 (total=%r)"
                    % (u, total_probability))
        else:
            # a leaf node
            scenario_name = node_to_scenario[u]
            m.ScenarioLeafNode[scenario_name] = node_name
            m.Children[node_name].clear()

        return node_name

    _add_node(root,
              1,
              networkx.dfs_successors(tree, root),
              networkx.dfs_predecessors(tree, root))

    if any(_b is not None for _b in scenario_bundle.values()):
        if any(_b is None for _b in scenario_bundle.values()):
            raise ValueError("Incomplete bundle specification. "
                             "All scenarios require a bundle "
                             "identifier.")
        m.Bundling.value = True
        bundle_scenarios = {}
        for bundle_name in sorted(set(scenario_bundle.values())):
            m.Bundles.add(bundle_name)
            bundle_scenarios[bundle_name] = []
        for scenario_name in m.Scenarios:
            bundle_scenarios[scenario_bundle[scenario_name]].\
                append(scenario_name)
        for bundle_name in m.Bundles:
            for scenario_name in sorted(bundle_scenarios[bundle_name]):
                m.BundleScenarios[bundle_name].add(scenario_name)

    return m
Ejemplo n.º 42
0
T = nx.DiGraph()
for e in edges:
	T.add_edge(e[0], e[1])
draw("T", T)

# reverse the edges so we have bottom up.
edges.reverse()

for n in nx.dfs_postorder_nodes(BC):
	print n
sys.exit()
"""

# compute orderings.
root = cnodes[0]
preds = nx.dfs_predecessors(BC, root)
nlist = [x for x in nx.dfs_postorder_nodes(BC, root)]

# test solving.
solved = set()
for p in nlist:

    # get parent.
    parent = preds[p]

    # flip if is cut node.
    t = BC.node[p]["type"]
    if t == "C":
        continue

        # simplify.
Ejemplo n.º 43
0
def ScenarioTreeModelFromNetworkX(
        tree,
        node_name_attribute=None,
        edge_probability_attribute='weight',
        stage_names=None,
        scenario_name_attribute=None):
    """
    Create a scenario tree model from a networkx tree.  The
    height of the tree must be at least 1 (meaning at least
    2 stages).

    Required node attributes:
        - cost (str): A string identifying a component on
              the model whose value indicates the cost at
              the time stage of the node for any scenario
              traveling through it.

    Optional node attributes:
        - variables (list): A list of variable identifiers
              that will be tracked by the node. If the node
              is not a leaf node, these indicate variables
              with non-anticipativity constraints.
        - derived_variables (list): A list of variable or
              expression identifiers that will be tracked by
              the node (but will never have
              non-anticipativity constraints enforced).
        - bundle: A bundle identifier for the scenario
              defined by a leaf-stage node. This attribute
              is ignored on non-terminal tree nodes. This
              attribute appears on at least one leaf-stage
              node (and is not set to :const:`None`), then
              it must be set on all leaf-stage nodes (to
              something other than :const:`None`);
              otherwise, an exception will be raised.

    Optional edge attributes:
        - weight (float): Indicates the conditional
              probability of moving from the parent node to
              the child node in the directed edge. If not
              present, it will be assumed that all edges
              leaving the parent node have equal probability
              (normalized to sum to one).

    Args:
        stage_names: Can define a list of stage names to use
           (assumed in time order). The length of this list
           much match the number of stages in the tree. The
           default value of :const:`None` indicates that
           stage names should be automatically generated in
           with the form ['Stage1','Stage2',...].
        node_name_attribute: By default, node names are the
           same as the node hash in the networkx tree. This
           keyword can be set to the name of some property
           of nodes in the graph that will be used for their
           name in the PySP scenario tree.
        scenario_name_attribute: By default, scenario names
           are the same as the leaf-node hash in the
           networkx tree. This keyword can be set to the
           name of some property of leaf-nodes in the graph
           that will be used for their corresponding
           scenario name in the PySP scenario tree.
        edge_probability_attribute: Can be set to the name
           of some property of edges in the graph that
           defines the conditional probability of that
           branch (default: 'weight'). If this keyword is
           set to :const:`None`, then all branches leaving a
           node are assigned equal conditional
           probabilities.

    Examples:

        A 2-stage scenario tree with 10 scenarios grouped
        into 2 bundles:

        >>> G = networkx.DiGraph()
        >>> G.add_node("root", variables=["x"])
        >>> N = 10
        >>> for i in range(N):
        >>>     node_name = "s"+str(i)
        >>>     bundle_name = "b"+str(i%2)
        >>>     G.add_node(node_name, bundle=bundle)
        >>>     G.add_edge("root", node_name, weight=1.0/N)
        >>> model = ScenarioTreeModelFromNetworkX(G)

        A 4-stage scenario tree with 125 scenarios:

        >>> branching_factor = 5
        >>> height = 3
        >>> G = networkx.balanced_tree(
                   branching_factor,
                   height,
                   networkx.DiGraph())
        >>> model = ScenarioTreeModelFromNetworkX(G)
    """

    if not has_networkx:                          #pragma:nocover
        raise ValueError(
            "networkx module is not available")

    if not networkx.is_tree(tree):
        raise TypeError(
            "Graph object is not a tree "
            "(see networkx.is_tree)")

    if not networkx.is_directed(tree):
        raise TypeError(
            "Graph object is not directed "
            "(see networkx.is_directed)")

    if not networkx.is_branching(tree):
        raise TypeError(
            "Grapn object is not a branching "
            "(see networkx.is_branching")

    in_degree_items = tree.in_degree()
    # Prior to networkx ~2.0, in_degree() returned a dictionary.
    # Now it is a view on items, so only call .items() for the old case
    if hasattr(in_degree_items, 'items'):
        in_degree_items = in_degree_items.items()
    root = [u for u,d in in_degree_items if d == 0]
    assert len(root) == 1
    root = root[0]
    num_stages = networkx.eccentricity(tree, v=root) + 1
    if num_stages < 2:
        raise ValueError(
            "The number of stages must be at least 2")
    m = CreateAbstractScenarioTreeModel()
    if stage_names is not None:
        unique_stage_names = set()
        for cnt, stage_name in enumerate(stage_names,1):
            m.Stages.add(stage_name)
            unique_stage_names.add(stage_name)
        if cnt != num_stages:
            raise ValueError(
                "incorrect number of stages names (%s), should be %s"
                % (cnt, num_stages))
        if len(unique_stage_names) != cnt:
            raise ValueError("all stage names were not unique")
    else:
        for i in range(num_stages):
            m.Stages.add('Stage'+str(i+1))
    node_to_name = {}
    node_to_scenario = {}
    scenario_bundle = {}
    def _setup(u, succ):
        if node_name_attribute is not None:
            if node_name_attribute not in tree.node[u]:
                raise KeyError(
                    "node '%s' missing node name "
                    "attribute: '%s'"
                    % (u, node_name_attribute))
            node_name = tree.node[u][node_name_attribute]
        else:
            node_name = u
        node_to_name[u] = node_name
        m.Nodes.add(node_name)
        if u in succ:
            for v in succ[u]:
                _setup(v, succ)
        else:
            # a leaf node
            if scenario_name_attribute is not None:
                if scenario_name_attribute not in tree.node[u]:
                    raise KeyError(
                        "node '%s' missing scenario name "
                        "attribute: '%s'"
                        % (u, scenario_name_attribute))
                scenario_name = tree.node[u][scenario_name_attribute]
            else:
                scenario_name = u
            node_to_scenario[u] = scenario_name
            m.Scenarios.add(scenario_name)
            scenario_bundle[scenario_name] = \
                tree.node[u].get('bundle', None)
    _setup(root,
           networkx.dfs_successors(tree, root))
    m = m.create_instance()
    def _add_node(u, stage, succ, pred):
        node_name = node_to_name[u]
        m.NodeStage[node_name] = m.Stages[stage]
        if u == root:
            m.ConditionalProbability[node_name] = 1.0
        else:
            assert u in pred
            # prior to networkx ~2.0, we used a .edge attribute on DiGraph,
            # which no longer exists.
            if hasattr(tree, 'edge'):
                edge = tree.edge[pred[u]][u]
            else:
                edge = tree.edges[pred[u],u]
            probability = None
            if edge_probability_attribute is not None:
                if edge_probability_attribute not in edge:
                    raise KeyError(
                        "edge '(%s, %s)' missing probability attribute: '%s'"
                        % (pred[u], u, edge_probability_attribute))
                probability = edge[edge_probability_attribute]
            else:
                probability = 1.0/len(succ[pred[u]])
            m.ConditionalProbability[node_name] = probability
        # get node variables
        if "variables" in tree.node[u]:
            node_variables = tree.node[u]["variables"]
            assert type(node_variables) in [tuple, list]
            for varstring in node_variables:
                m.NodeVariables[node_name].add(varstring)
        if "derived_variables" in tree.node[u]:
            node_derived_variables = tree.node[u]["derived_variables"]
            assert type(node_derived_variables) in [tuple, list]
            for varstring in node_derived_variables:
                m.NodeDerivedVariables[node_name].add(varstring)
        if "cost" in tree.node[u]:
            assert isinstance(tree.node[u]["cost"], six.string_types)
            m.NodeCost[node_name].value = tree.node[u]["cost"]
        if u in succ:
            child_names = []
            for v in succ[u]:
                child_names.append(
                    _add_node(v, stage+1, succ, pred))
            total_probability = 0.0
            for child_name in child_names:
                m.Children[node_name].add(child_name)
                total_probability += \
                    pyomo.core.value(m.ConditionalProbability[child_name])
            if abs(total_probability - 1.0) > 1e-5:
                raise ValueError(
                    "edge probabilities leaving node '%s' "
                    "do not sum to 1 (total=%r)"
                    % (u, total_probability))
        else:
            # a leaf node
            scenario_name = node_to_scenario[u]
            m.ScenarioLeafNode[scenario_name] = node_name
            m.Children[node_name].clear()

        return node_name

    _add_node(root,
              1,
              networkx.dfs_successors(tree, root),
              networkx.dfs_predecessors(tree, root))

    if any(_b is not None for _b in scenario_bundle.values()):
        if any(_b is None for _b in scenario_bundle.values()):
            raise ValueError("Incomplete bundle specification. "
                             "All scenarios require a bundle "
                             "identifier.")
        m.Bundling.value = True
        bundle_scenarios = {}
        for bundle_name in sorted(set(scenario_bundle.values())):
            m.Bundles.add(bundle_name)
            bundle_scenarios[bundle_name] = []
        for scenario_name in m.Scenarios:
            bundle_scenarios[scenario_bundle[scenario_name]].\
                append(scenario_name)
        for bundle_name in m.Bundles:
            for scenario_name in sorted(bundle_scenarios[bundle_name]):
                m.BundleScenarios[bundle_name].add(scenario_name)

    return m
Ejemplo n.º 44
0
 def test_predecessor(self):
     assert_equal(nx.dfs_predecessors(self.G,source=0),
                  {1: 0, 2: 1, 3: 4, 4: 2})
     assert_equal(nx.dfs_predecessors(self.D), {1: 0, 3: 2})
    def solveSteinerTreeDTH(self):
        print("start node: ", self.start_loc)
        print("home nodes: ", self.homes)
        # print("Traversal actual with homes: ", traversal_ordering)
        leaf_homes = self.getLeafNodes()
        preorder_nodes = dfs_preorder_nodes(self.steiner_tree,
                                            source=self.start_loc)
        traversal_ordering = [n for n in preorder_nodes if n in leaf_homes]
        print("Traversal ordering of the leaf: ", traversal_ordering)
        # """Remove non-leaf nodes from the order."""
        # for home in traversal_ordering:
        #     if home not in leaf_homes:
        #         print(home)
        #         traversal_ordering.remove(home)
        # current_loc = self.start_loc
        # """ Needs to start and end at root"""
        # print("Traversal_ordering: ", traversal_ordering)
        # # traversal_ordering.insert(0, current_loc)
        # # traversal_ordering.append(current_loc)
        """Hash map of the dropoffs"""
        dropoffs = dict()
        for i in range(len(traversal_ordering) - 1):
            current_leaf_home = traversal_ordering[i]
            next_leaf_home = traversal_ordering[i + 1]
            """Shortest path between current and next leaf home on the graph"""
            shortest_path = netx.shortest_path(self.netx_graph,
                                               source=current_leaf_home,
                                               target=next_leaf_home)
            print("Shortest path between ", current_leaf_home, " and ",
                  next_leaf_home, shortest_path)

            for node in shortest_path:
                """Check if any node in the shortest node is a part of the Steiner Tree """
                if node != current_leaf_home and node != next_leaf_home and node in self.steiner_tree:
                    #print("Node : ", node)
                    """Shares a common ancestor."""
                    """Drop off curr_node at curr->parent"""
                    #print("Steiner tree: ", set(self.steiner_tree))
                    print(
                        "Dfs ",
                        networkx.dfs_predecessors(self.steiner_tree,
                                                  source=self.start_loc))
                    dropoffs[current_leaf_home] = networkx.dfs_predecessors(
                        self.steiner_tree,
                        source=self.start_loc)[current_leaf_home]
        print("Dropoffs ", dropoffs)

        new_candidate_dropoff = set()
        for home in self.homes:
            if home in dropoffs.keys():
                new_candidate_dropoff.add(dropoffs[home])
            else:
                new_candidate_dropoff.add(home)
        print("homes ", self.homes)
        new_candidate_dropoff = list(new_candidate_dropoff)
        """Add source to the candidate dropoff list to create the steiner tree."""
        new_candidate_dropoff.append(self.start_loc)
        #print("Candidate_dropoffs ", new_candidate_dropoff)
        steiner_tree_candidate_dropoffs = steiner_tree(self.netx_graph,
                                                       new_candidate_dropoff,
                                                       weight='weight')
        preorder_nodes = dfs_preorder_nodes(steiner_tree_candidate_dropoffs,
                                            source=self.start_loc)
        preorder_nodes = list(preorder_nodes)
        final_order = [
            n for n in preorder_nodes if n in steiner_tree_candidate_dropoffs
        ]
        #print("final order pre", final_order)

        for elem in final_order:
            if elem in dropoffs.values():
                keys = [k for k, v in dropoffs.items() if v == elem]
                if elem in self.homes or elem == self.start_loc:
                    for i in keys:
                        final_order.insert(
                            final_order.index(elem) + 1, elem + " " + i)
                else:
                    index = final_order.index(elem)
                    for i in keys:
                        final_order[index] = elem + " " + i
                        index = index + 1
        print("final order ", final_order)
        return self.get_cost_params(final_order)
Ejemplo n.º 46
0
def unaccelerated_get_node_to_pset(T, root,
        node_to_allowed_states=None, P_default=None):
    """
    This is unused, but could possibly be used for unit testing.
    """

    # Bookkeeping.
    successors = nx.dfs_successors(T, root)
    predecessors = nx.dfs_predecessors(T, root)

    # Compute the map from node to set.
    node_to_pset = {}
    for nb in nx.dfs_postorder_nodes(T, root):

        # If a parent node is available, get a set of states
        # involved in the transition matrix associated with the parent edge.
        # A more complicated implementation would use only the sink
        # states of that transition matrix.
        na_set = None
        if nb in predecessors:
            na = predecessors[nb]
            P = T[na][nb].get('P', P_default)
            na_set = set(P)

        # Use the set of allowed states for the current node,
        # if it is known.
        nb_set = None
        if nb in node_to_allowed_states:
            nb_set = set(node_to_allowed_states[nb])

        # If a child node is available, get the set of states
        # that have transition to child states
        # for which the child subtree likelihoods are positive.
        nc_set = None
        if nb in successors:
            for nc in successors[nb]:
                allowed_set = set()
                P = T[nb][nc].get('P', P_default)
                for sb, sc in P.edges():
                    if sc in node_to_pset[nc]:
                        allowed_set.add(sb)
                if nc_set is None:
                    nc_set = allowed_set
                else:
                    nc_set.intersection_update(allowed_set)

        # Take the intersection of informative constraints due to
        # possible parent transitions,
        # possible direct constraints on the node state,
        # and possible child node state constraints.
        pset = None
        for constraint_set in (na_set, nb_set, nc_set):
            if constraint_set is not None:
                if pset is None:
                    pset = constraint_set
                else:
                    pset.intersection_update(constraint_set)

        # If the pset is still None,
        # then as a last attempt to get a node set,
        # try using the states in P_default if it is available.
        if pset is None:
            if P_default is not None:
                pset = set(P_default)

        # This value should not be None unless there has been some problem.
        if pset is None:
            raise ValueError('internal error')

        # Define the pset for the node.
        node_to_pset[nb] = pset

    # Return the node_to_pset map.
    return node_to_pset
Ejemplo n.º 47
0
def add_trajectories(T, root, trajectories, edge_to_event_times=None):
    """
    Construct a tree with merged trajectories.

    Parameters
    ----------
    T_base : undirected weighted networkx graph
        A weighted base tree.
    root : integer
        Root node common to all trajectories.
    trajectories : sequence of undirected weighted networkx graphs
        Edges should be annotated with 'weight' and with 'state'.
        The state should change only at nodes of degree two.
    edge_to_event_times : dict, optional
        If available, this provides a map from an edge of T
        to a collection of times.
        The format of each edge key is as an ordered pair of nodes of T,
        in bfs order radiating from the root.
        The times are with respect to the node that is closer to the root.
        The purpose of this arg is to optionally break the
        trajectory history into pieces for resampling the trajectory
        of a variable that depends on the trajectories of the variables
        provided to this function.

    Returns
    -------
    T_merged : undirected weighted networkx graph
        A new tree with more nodes.
        Edges are annotated with 'states' which gives a state
        for each trajectory.
    dummy_nodes : set of integers
        A set of dummy nodes added to the tree.
        These correspond to edge_to_event_times provided as input
        to this function.

    """
    # Bookkeeping.
    predecessors = nx.dfs_predecessors(T, root)
    successors = nx.dfs_successors(T, root)
    T_bfs_edges = list(tuple(x) for x in nx.bfs_edges(T, root))

    # Check that the trajectories have correct shape.
    for traj in trajectories:
        traj_specific_nodes = set(traj) - set(T)
        traj_skeleton = remove_selected_degree_two_nodes(
                traj, root, traj_specific_nodes)
        base_tree_edges = set(T_bfs_edges)
        traj_tree_edges = set(nx.bfs_edges(traj_skeleton, root))
        extra_base_tree_edges = base_tree_edges - traj_tree_edges
        extra_traj_tree_edges = traj_tree_edges - base_tree_edges
        if extra_base_tree_edges or extra_traj_tree_edges:
            msg = ('expected the trajectory '
                    'to follow the basic shape of the base tree; ')
            if extra_base_tree_edges:
                msg += 'extra base tree edges: %s ' % extra_base_tree_edges
            if extra_traj_tree_edges:
                msg += 'extra traj tree edges: %s ' % extra_traj_tree_edges
            raise ValueError(msg)

    # Check that the trajectories have the correct total edge weight.
    total_base_edge_weight = T.size(weight='weight')
    for traj in trajectories:
        traj_weight = traj.size(weight='weight')
        if not np.allclose(traj_weight, total_base_edge_weight):
            raise ValueError('each trajectory should have '
                    'the same total weight as the base tree\n'
                    'base tree weight: %s\n'
                    'traj tree weight: %s' % (
                        total_base_edge_weight, traj_weight))

    # For each trajectory get the map from base node to state.
    traj_node_to_state = []
    for traj in trajectories:
        query_nodes = set(T)
        node_to_state = get_node_to_state(traj, query_nodes)
        traj_node_to_state.append(node_to_state)

    # For each directed edge of the base tree,
    # maintain a priority queue of interleaved transitions along trajectories.
    base_edge_to_q = {}
    for na, nb in T_bfs_edges:
        base_edge = (na, nb)
        base_edge_to_q[base_edge] = []

    # Put dummy events into the edge-specific priority queues.
    if edge_to_event_times is not None:
        for edge, times in edge_to_event_times.items():
            q = base_edge_to_q[edge]
            for tm in times:
                q_item = (tm, None, None)
                heapq.heappush(q, q_item)

    # For each trajectory, put events in the priority queue of each edge.
    for traj_index, traj in enumerate(trajectories):

        # Get the timing and nature of events along the edges.
        event_map = get_event_map(T, root, traj, predecessors=predecessors)

        # Add the event information to the priority queue
        # of the appropriate edge.
        for base_edge, events in event_map.items():
            base_na, base_nb = base_edge
            for tm, traj_edge_object in events:
                traj_state = traj_edge_object['state']
                q_item = (tm, traj_index, traj_state)
                heapq.heappush(base_edge_to_q[base_edge], q_item)

    # Initialize the return values.
    T_merged = nx.Graph()
    dummy_nodes = set()

    # For each edge of the original tree,
    # add segments to the merged tree, such that no trajectory
    # transition occurs within any segment.
    # Annotate every segment with the state of every trajectory.
    next_new_node = max(T) + 1
    for base_edge in T_bfs_edges:

        # Unpack the edge endpoints.
        base_na, base_nb = base_edge

        # Get the edge weight from the base tree.
        base_edge_weight = T[base_na][base_nb]['weight']

        # Initialize the most recent segment node.
        prev_node = base_na

        # Define the trajectory states at the beginning of the edge.
        current_states = []
        for node_to_state in traj_node_to_state:
            current_states.append(node_to_state[base_na])

        # Iterate through the priority queue, adding an edge
        # when a transition is encountered on any trajectory.
        q = base_edge_to_q[base_edge]
        tm = 0
        while q:
            tm_event, traj_index, traj_state = heapq.heappop(q)
            T_merged.add_edge(
                    prev_node, next_new_node,
                    weight=tm_event-tm,
                    states=list(current_states))

            # If the traj_index is None then it means that
            # the event is a dummy event.
            # Dummy events do not change the state.
            if traj_index is None:
                dummy_nodes.add(next_new_node)
            else:
                current_states[traj_index] = traj_state

            # Update for the next iteration.
            prev_node = next_new_node
            next_new_node += 1
            tm = tm_event

        # Check that we have reached the states
        # that we had expected to reach.
        for traj_index, node_to_state in enumerate(traj_node_to_state):
            if node_to_state[base_nb] != current_states[traj_index]:
                raise Exception('internal error')

        # Add the final segment.
        T_merged.add_edge(
                prev_node, base_nb,
                weight=base_edge_weight-tm,
                states=list(current_states))

    # Return the merged tree and the set of dummy nodes.
    return T_merged, dummy_nodes
def minibatch_(all_train_imgs,all_test_imgs,clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode,mous):
    if mode == 'loss_test' or mode == 'loss_scikit_test' or mode == 'levels_test':
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c, subsamples)
    else:
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c, subsamples)        
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        if less_features:
            X_p = [fts[0:features_used] for fts in X_p]
        if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord,y[2]])
        assert(len(boxes)<1500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,1500)
        
        gr = []
        if os.path.isfile('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))):
            gr = open('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r')
        ground_truths = []
        if gr != []: #if no class image -> no ground truth. (I think this is only needed for learn _ntersection)
            for line in gr:
               tmp = line.split(',')
               ground_truth = []
               for s in tmp:
                  ground_truth.append(int(s))
               ground_truths.append(ground_truth)
        
        if mode == 'mean_variance':
            scaler.partial_fit(X_p)  # Don't cheat - fit only on training data
            return scaler
            
        # create_tree
        G, levels = create_tree(boxes)
        
        #prune tree to only have levels which fully cover the image
        # tested
        if prune_fully_covered:
            nr_levels_covered = 100
            total_size = surface_area(boxes, levels[0])
            for level in levels:
                sa = surface_area(boxes, levels[level])
                sa_co = sa/total_size
                if sa_co != 1.0:
                    G.remove_nodes_from(levels[level])
                else:
                    nr_levels_covered = level
            levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)}
        
        #either subsampling or prune_fully_covered
        #assert(subsampling != prune_fully_covered)
        
        # prune levels, speedup + performance 
        levels = {k:v for k,v in levels.iteritems() if k<prune_tree_levels}
        
        #prune G in order to remove nodes of the lower levels
        remaining_nodes = []
        for lev in levels.values():
            remaining_nodes.extend(lev)
        for g_node in G.nodes():
            if g_node not in remaining_nodes:
                G.remove_node(g_node)
        
        coords = []
        features = []
        f_c = []
        f = []
        
        if learn_intersections and not prune_fully_covered:
            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+')
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples))
                    if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                        f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f = open('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') 
                
                
        elif prune_fully_covered:
            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f_c = open('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+')
                
                
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),class_,format(img_nr, "06d")))
                    if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d"))):
                        f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f = open('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') 
                        
                
#        else: #we don't need to load intersections
#            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d"))):
#                f = open('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d")), 'r') 
#            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d"))):
#                f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d")), 'r+')
#            else:
#                print '/home/stahl/Features_prop_windows/upper_levels/%s%s.txt does not exist'%(class_,format(img_nr, "06d"))
        for i,line in enumerate(f_c):
            str_ = line.rstrip('\n').split(',')
            cc = []
            for s in str_:
               cc.append(float(s))
            coords.append(cc)
        for i,line in enumerate(f):
            str_ = line.rstrip('\n').split(',')  
            ff = []
            for s in str_:
               ff.append(float(s))
            features.append(ff)
        if less_features:
            features = [fts[0:features_used] for fts in features]
        if normalize and features != []:
            features = scaler.transform(features)
        
        print len(y_p), len(X_p)
        print len(features), len(coords)
        assert len(coords) == len(features)
        
        # append x,y of intersections
        #if learn_intersections:
        #    for inters,coord in zip(features,coords):
#                if inters not in pruned_x:
        #        X_p.append(inters)
        #        ol = 0.0
        #        ol = get_intersection_count(coord, ground_truths)
        #        y_p.append(ol)
        print len(y_p), len(X_p)
        #normalize
        norm_x = []
        if normalize:
#            for p_x in pruned_x:
#                norm_x.append((p_x-mean)/variance)
            norm_x = scaler.transform(X_p)
        else:
            norm_x = X_p
        data = (G, levels, y_p, norm_x, boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'train':
            if alphas[2] == 0 and alphas[3] == 0: #just learn proposals and intersections
                # only use proposals and intersections used in pruned tree
                used_ind = get_used_proposals(G, boxes, coords, levels)
                used_x = []
                used_y = []
                for ind in used_ind['prop']:
                    used_x.append(norm_x[ind])
                    used_y.append(y_p[ind])
                for ind in used_ind['inters']:
                    used_x.append(features[ind])
                    used_y.append(get_intersection_count(coords[ind], ground_truths))
                print len(used_x),len(used_y)
                for x_i,y_i in zip(used_x,used_y):
                    w = learn_root(w,x_i,y_i,learning_rate,alphas)
            else:
                nodes = list(G.nodes())
                for node in nodes:
                    if node == 0:
                        if alphas[0] != 0:
                            w = learn_root(w,norm_x[0],y_p[0],learning_rate,alphas)
                        else:
                            print 'learn nothing'
                    else:
                        w = constrained_regression(class_,features,coords,scaler,w,norm_x,y_p,node,predecs,children,boxes,learning_rate,alphas,img_nr, squared_hinge_loss)
            return w, len(y_p), len(G.nodes())
        elif mode == 'scikit_train':
            print norm_x,y_p
            clf.partial_fit(norm_x,y_p)
            return clf
        elif mode == 'loss_train' or mode == 'loss_test':
            loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
#            mse.append(((data[2] - np.dot(w,np.array(data[3]).T)) ** 2).sum())
#            a2 = alphas[2]
#            data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,a2,0])
#            hinge1.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
#            a3 = alphas[3]
#            data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,0,a3])
#            hinge2.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
            full_image.append([y_p[0],max(0,np.dot(w,np.array(norm_x[0]).T))])
            return loss__, full_image
        elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train':
            loss__.append(((clf.predict(norm_x) - y_p)**2).sum())
            return loss__ 
        elif mode == 'levels_train' or mode == 'levels_test':
            #im = mpimg.imread('/home/stahl/Images/'+ (format(img_nr, "06d")) +'.jpg')
            preds = []
            for i,x_ in enumerate(norm_x):
                preds.append(np.dot(w, x_))
            cpls = []
            truelvls = []
            used_boxes_ = []
            total_size = surface_area(boxes, levels[0])
            fully_covered_score = 0.0
            fully_covered_score_lvls = 0.0
            covered_levels = []
            for level in levels:
                #tru and truelvls was in order to check if count_per_level method is correct
                cpl,used_boxes,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], '',[])
                cpl = max(0,cpl)
                if used_boxes != []:
                    used_boxes_.append(used_boxes[0][1])
                tru,_,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], 'gt',[])
                cpls.append(cpl)
                sa = surface_area(boxes, levels[level])
                sa_co = sa/total_size
                if sa_co == 1.0:
                   fully_covered_score += cpl
                   fully_covered_score_lvls += 1
                   covered_levels.append(cpl)
                truelvls.append(tru)
            return cpls, truelvls, used_boxes_,boxes,preds,fully_covered_score/fully_covered_score_lvls,covered_levels
Ejemplo n.º 49
0
def preorder_st_traversal(H, T):
    """
    Conducts a pre-ordered traversal of tree **T** with an **s-t** orientation.

    **s** is the source node and **t** is the target node for the PDP.
    Traversal of an **s-t** oriented tree involves traversing the tree in
    pre-order, but leaving the branch with **t** to be visited last at
    each step possible.

    Parameters
    ----------
    H: graph
        The :ref:`request graph<Request (PDP) Graph>`.

    T: graph
        A spanning tree of **H**.

    Returns
    -------
    P: graph
        :ref:`PDP tour<Tour Graph>` solution.
    """

    s = T.graph['s']
    t = T.graph['t']
    requests = T.graph['requests']

    P = nx.Graph()
    P.graph.update(T.graph)

    # Find successors and predecessors of nodes in T for traversing.
    V = set(T.nodes())
    T_successors = nx.dfs_successors(T, s)
    successors = set(T_successors.keys())
    # Update for nodes with empty successors.
    [T_successors.update({j: []}) for j in V.difference(successors)]
    T_predecessors = nx.dfs_predecessors(T, s)
    # Branch with t must be visited last. Store the nodes on this branch
    # for identification of traversal priority.
    last_branch = {t}
    b = T_predecessors[t]
    while not b == s:
        last_branch.add(b)
        b = T_predecessors[b]

    # boundary_nodes will keep track of the next nodes to be visited.
    boundary_nodes = []
    # If a node is visited and added to P, its request number is added.
    visited_requests = {requests[s][0]}
    # Initialize with source, s.
    P.add_nodes_from([(s, T.nodes[s])])
    u = s
    # While P does not contain all nodes, traverse T and add every
    # feasible node in order.
    while P.number_of_nodes() < T.number_of_nodes():
        v = s
        neighbors = T_successors[v].copy()
        # For each node w branching from v, determine if w is to be
        # visited last, if so, relocate to front of boundary_nodes
        # so that it is popped (visited) last.
        for w in neighbors:
            if w in last_branch:
                boundary_nodes = [w] + boundary_nodes
            else:
                boundary_nodes = boundary_nodes + [w]
        # Consider nodes while there are nodes in boundary_nodes.
        while len(boundary_nodes) > 0:
            v = boundary_nodes.pop()
            # If v has not been added to P, and:
            #   a) v is an origin type,
            #   b) v is a destination type with corresponding origin in P, or
            #   c) v is t and all other nodes are in P
            # Then add v to P and make an edge from last added u node.
            if v not in P and (
                    requests[v][1] == 'o' or
                (requests[v][1] == 'd' and requests[v][0] in visited_requests)
                    or
                (v == t and P.number_of_nodes() == T.number_of_nodes() - 1)):
                visited_requests.add(requests[v][0])
                P.add_nodes_from([(v, T.nodes[v])])
                P.add_edges_from([(u, v, H.edges[u, v])])
                u = v
            neighbors = T_successors[v].copy()
            # Update the boundary nodes.
            for w in neighbors:
                if w in last_branch:
                    boundary_nodes = [w] + boundary_nodes
                else:
                    boundary_nodes = boundary_nodes + [w]

    # Add in final edge from t to s to complete cycle.
    P.add_edges_from([(t, s, H.edges[t, s])])
    P.graph['dist'] = P.size(weight='weight')

    return P
Ejemplo n.º 50
0
def get_node_to_pset(T, root, node_to_state=None, P_default=None):
    """
    For each node, get the set of states that give positive subtree likelihood.

    This function is analogous to get_node_to_pmap.

    Parameters
    ----------
    T : undirected unweighted acyclic networkx graph
        A tree whose edges are optionally annotated
        with edge-specific state transition probability matrix P.
    root : integer
        The root node.
    node_to_state : dict, optional
        A sparse map from a node to its known state if any.
        Nodes in this map are assumed to have completely known state.
        Nodes not in this map are assumed to have completely missing state.
        If this map is not provided,
        all states information will be assumed to be completely missing.
        Entries of this dict that correspond to nodes not in the tree
        will be silently ignored.
    P_default : networkx directed weighted graph, optional
        Sparse transition matrix to be used for edges
        which are not annotated with an edge-specific transition matrix.

    Returns
    -------
    node_to_pset : dict
        A map from a node to the set of states with positive subtree likelihood.

    """
    if len(set(T)) == 1:
        if root not in T:
            raise ValueError('unrecognized root')
        if (node_to_state is not None) and (root in node_to_state):
            root_state = node_to_state[root]
            root_pset = {root_state}
        else:
            all_states = set(P_default)
            root_pset = all_states
        return {root : root_pset}

    # Bookkeeping.
    successors = nx.dfs_successors(T, root)
    predecessors = nx.dfs_predecessors(T, root)

    # Compute the map from node to set.
    node_to_pset = {}
    for nb in nx.dfs_postorder_nodes(T, root):

        # If a parent node is available, get a set of states
        # involved in the transition matrix associated with the parent edge.
        # A more complicated implementation would use only the sink
        # states of that transition matrix.
        na_set = None
        if nb in predecessors:
            na = predecessors[nb]
            P = T[na][nb].get('P', P_default)
            na_set = set(P)

        # If the state of the current state is known,
        # define the set containing only that state.
        nb_set = None
        if nb in node_to_state:
            nb_set = {node_to_state[nb]}

        # If a child node is available, get the set of states
        # that have transition to child states
        # for which the child subtree likelihoods are positive.
        nc_set = None
        if nb in successors:
            for nc in successors[nb]:
                allowed_set = set()
                P = T[nb][nc].get('P', P_default)
                for sb, sc in P.edges():
                    if sc in node_to_pset[nc]:
                        allowed_set.add(sb)
                if nc_set is None:
                    nc_set = allowed_set
                else:
                    nc_set.intersection_update(allowed_set)

        # Take the intersection of informative constraints due to
        # possible parent transitions,
        # possible direct constraints on the node state,
        # and possible child node state constraints.
        pset = None
        for constraint_set in (na_set, nb_set, nc_set):
            if constraint_set is not None:
                if pset is None:
                    pset = constraint_set
                else:
                    pset.intersection_update(constraint_set)

        # This value should not be None unless there has been some problem.
        if pset is None:
            raise ValueError('internal error')

        # Define the pset for the node.
        node_to_pset[nb] = pset

    # Return the node_to_pset map.
    return node_to_pset
Ejemplo n.º 51
0
# --- 🖥 Data analysis packages 🖥 --- #
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import collections
from collections import deque
import time

# --- 📃 Web scraping packages 📃 --- #
import requests
from bs4 import BeautifulSoup
import re

# --- 📅 Search algorithms and network analysis package 📅 --- #
import networkx as nx
# Create a new graph for the next problems
G = nx.Graph()
G.add_edges_from([("A","B"),("A","S"),("B","A"),("C","D"),("C","E"),("C","F"),("C","S"),("D","C"),("E","C"),("E","H"),("F","C"),("F","G"),("H","E"),("H","G"),("S","A"),("S","C"),("S","G")])

nx.draw_networkx(G, with_labels=True)
plt.title('A more complex graph')
#plt.show();
print(nx.dfs_predecessors(G, source='C'))
print("The average shortest path is:", round(nx.average_shortest_path_length(G)))
print("Shortest path scenarios:", [p for p in nx.all_shortest_paths(G, source='A', target='H')])
print("The shortest path length is", nx.shortest_path_length(G, source='A', target='H'))
Ejemplo n.º 52
0
def find_outer_bags(graph, root):
    rev_graph = graph.reverse()
    predecessors = nx.dfs_predecessors(rev_graph, root)
    return len(predecessors)
Ejemplo n.º 53
0
def get_event_map(T, root, traj, predecessors=None):
    """

    Parameters
    ----------
    T : undirected weighted networkx graph
        The base tree.
    root : integer
        The root node.
        This is used as the root for both the base tree
        and for the trajectory tree.
    traj : undirected weighted networkx graph
        Like the base tree, but with some extra degree-2 nodes.
        The naming of this argument is inspired by the term trajectory,
        although it is not required to be a trajectory in any technical sense.
        In particular its edges do not need to be annotated with states.
        On the other hand its edges need to be annotated with weights.
    predecessors : dict, optional
        Map from a node in the base tree T to its predecessor in the base tree.
        The directionality of the predecessor vs. successor
        is determined by the root.

    Returns
    -------
    event_map : dict
        Map from an edge of the base tree to a list of
        (time, augmented tree edge object) pairs.
        The edge of the base tree is represented by an ordered pair,
        with the ordering being in the direction away from the root.
        The edge object of the augmented tree
        corresponds to the edge following the transition event.

    """
    # Initialize the event map.
    event_map = defaultdict(list)

    # Bookkeeping.
    if predecessors is None:
        predecessors = nx.dfs_predecessors(T, root)
    traj_successors = nx.dfs_successors(traj, root)

    # Associate each trajectory edge to a base tree edge.
    traj_edge_to_base_edge = {}
    traj_preorder_edges = list(tuple(x) for x in nx.bfs_edges(traj, root))
    for traj_edge in reversed(traj_preorder_edges):
        traj_na, traj_nb = traj_edge
        if traj_nb in T:
            base_nb = traj_nb
            base_na = predecessors[base_nb]
            base_edge = (base_na, base_nb)
        else:
            traj_nb_successors = traj_successors[traj_nb]
            if len(traj_nb_successors) != 1:
                raise Exception('internal error')
            traj_nb_successor = traj_nb_successors[0]
            traj_child_edge = (traj_nb, traj_nb_successor)
            base_edge = traj_edge_to_base_edge[traj_child_edge]
        traj_edge_to_base_edge[traj_edge] = base_edge
    
    # Each traj node that is not in T is a traj transition event.
    # Put each transition event into the priority queue
    # of the corresponding edge of the base tree.
    base_edge_to_tm = {}
    for traj_edge in traj_preorder_edges:

        # Unpack the endpoints of the trajectory edge.
        traj_na, traj_nb = traj_edge

        # If there is no event on this edge then continue.
        if (traj_na in T) and (traj_nb in T):
            continue

        # Map the trajectory event back to an edge of the base tree.
        base_edge = traj_edge_to_base_edge[traj_edge]

        # Get the timing of the current event along the edge.
        tm = base_edge_to_tm.get(base_edge, 0)

        # Define the networkx edge
        # corresponding to the segment of the trajectory.
        traj_edge_object = traj[traj_na][traj_nb]

        # If traj_na is a transition event,
        # then add its information to the event map.
        if traj_na not in T:
            event_info = (tm, traj_edge_object)
            event_map[base_edge].append(event_info)

        # Update the timing along the edge.
        traj_weight = traj_edge_object['weight']
        base_edge_to_tm[base_edge] = tm + traj_weight

    # Return the event map.
    return event_map