def test_rdflib_to_graphtool():
    try:
        from graph_tool import util as gt_util
    except ImportError:
        raise SkipTest("couldn't find graph_tool")
    from rdflib.extras.external_graph_libs import rdflib_to_graphtool
    g = Graph()
    a, b, l = URIRef('a'), URIRef('b'), Literal('l')
    p, q = URIRef('p'), URIRef('q')
    edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    for t in edges:
        g.add(t)

    mdg = rdflib_to_graphtool(g)
    assert len(list(mdg.edges())) == 4

    vpterm = mdg.vertex_properties['term']
    va = gt_util.find_vertex(mdg, vpterm, a)[0]
    vb = gt_util.find_vertex(mdg, vpterm, b)[0]
    vl = gt_util.find_vertex(mdg, vpterm, l)[0]
    assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]

    epterm = mdg.edge_properties['term']
    assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3
    assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1

    mdg = rdflib_to_graphtool(
        g,
        e_prop_names=[text_type('name')],
        transform_p=lambda s, p, o: {text_type('name'): text_type(p)})
    epterm = mdg.edge_properties['name']
    assert len(list(gt_util.find_edge(mdg, epterm, text_type(p)))) == 3
    assert len(list(gt_util.find_edge(mdg, epterm, text_type(q)))) == 1
def test_rdflib_to_graphtool():
    try:
        from graph_tool import util as gt_util
    except ImportError:
        raise SkipTest("couldn't find graph_tool")
    from rdflib.extras.external_graph_libs import rdflib_to_graphtool

    g = Graph()
    a, b, l = URIRef("a"), URIRef("b"), Literal("l")
    p, q = URIRef("p"), URIRef("q")
    edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
    for t in edges:
        g.add(t)

    mdg = rdflib_to_graphtool(g)
    assert len(list(mdg.edges())) == 4

    vpterm = mdg.vertex_properties["term"]
    va = gt_util.find_vertex(mdg, vpterm, a)[0]
    vb = gt_util.find_vertex(mdg, vpterm, b)[0]
    vl = gt_util.find_vertex(mdg, vpterm, l)[0]
    assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]

    epterm = mdg.edge_properties["term"]
    assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3
    assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1

    mdg = rdflib_to_graphtool(
        g,
        e_prop_names=[str("name")],
        transform_p=lambda s, p, o: {str("name"): str(p)})
    epterm = mdg.edge_properties["name"]
    assert len(list(gt_util.find_edge(mdg, epterm, str(p)))) == 3
    assert len(list(gt_util.find_edge(mdg, epterm, str(q)))) == 1
Esempio n. 3
0
def run(filename, output, header_bool, sub, obj, pred, props, undirected,
        strong):
    # import modules locally
    import csv
    import sys
    from graph_tool import load_graph_from_csv
    from graph_tool.util import find_edge
    from graph_tool.topology import label_components
    from kgtk.exceptions import KGTKException
    from kgtk.cli_argparse import KGTKArgumentParser

    def find_pred_position(sub, pred, obj):
        if pred < sub and pred < obj:
            return pred
        elif (pred > sub and pred < obj) or (pred < sub and pred > obj):
            return pred - 1
        else:
            return pred - 2

    try:
        header = ['node1', 'label', 'node2']
        label = 'c' + str(find_pred_position(sub, pred, obj))
        g = load_graph_from_csv(filename,
                                not (undirected),
                                skip_first=not (header_bool),
                                hashed=True,
                                csv_options={'delimiter': '\t'},
                                ecols=(sub, obj))
        es = []
        if props:
            properties = props.split(',')
            for e in properties:
                es += (find_edge(g, g.edge_properties[label], e))
            g.clear_edges()
            g.add_edge_list(list(set(es)))
        comp, hist = label_components(g, directed=strong)
        if output:
            f = open(output, 'w')
            wr = csv.writer(f,
                            quoting=csv.QUOTE_NONE,
                            delimiter="\t",
                            escapechar="\n",
                            quotechar='')
            wr.writerow(header)
            for v, c in enumerate(comp):
                wr.writerow(
                    [g.vertex_properties['name'][v], 'connected_component', c])
            f.close()
        else:
            sys.stdout.write('%s\t%s\t%s\n' % ('node1', 'label', 'node2'))
            for v, c in enumerate(comp):
                sys.stdout.write('%s\t%s\t%s\n' %
                                 (g.vertex_properties['name'][v],
                                  'connected_component', str(c)))
    except:
        raise KGTKException
Esempio n. 4
0
 def _get_edges(self, flows):
     edges = []
     for flow in flows:
         e = util.find_edge(self.graph, self.graph.ep['id'], flow.id)
         if len(e) > 0:
             edges.append(e[0])
         else:
             # shouldn't happen if graph is up to date
             raise Exception(f'graph is missing flow {flow.id}')
     return edges
Esempio n. 5
0
    def translate_to_db(self):
        # ToDo: filter for changes
        # store edges (flows) to database
        strat_flows = []
        changed_edges = util.find_edge(self.graph, self.graph.ep['changed'],
                                       True)
        for edge in changed_edges:
            new_amount = self.graph.ep.amount[edge]
            # get the related FractionFlow
            flow = FractionFlow.objects.get(id=self.graph.ep.id[edge])
            material = self.graph.ep.material[edge]
            process = self.graph.ep.process[edge]
            if process == -1:
                process = None
            waste = self.graph.ep.waste[edge]
            hazardous = self.graph.ep.hazardous[edge]
            # new flow is marked with strategy relation
            # (no seperate strategy fraction flow needed)
            if flow.strategy is not None:
                flow.amount = new_amount
                flow.hazardous = hazardous
                flow.material_id = material
                flow.waste = waste
                flow.process_id = process
                flow.save()
            # changed flow gets a related strategy fraction flow holding changes
            else:
                ex = StrategyFractionFlow.objects.filter(
                    fractionflow=flow, strategy=self.strategy)
                # if there already was a modification, overwrite it
                if len(ex) == 1:
                    strat_flow = ex[0]
                    strat_flow.amount = new_amount
                    strat_flow.material_id = material
                    strat_flow.waste = waste
                    strat_flow.hazardous = hazardous
                    strat_flow.process_id = process
                    strat_flow.save()
                elif len(ex) > 1:
                    raise Exception('more than StrategyFractionFlow '
                                    'found per flow. This should not happen.')
                else:
                    strat_flow = StrategyFractionFlow(strategy=self.strategy,
                                                      amount=new_amount,
                                                      fractionflow=flow,
                                                      material_id=material,
                                                      waste=waste,
                                                      hazardous=hazardous,
                                                      process_id=process)
                    strat_flows.append(strat_flow)

        StrategyFractionFlow.objects.bulk_create(strat_flows)
Esempio n. 6
0
    def process(self):
        input_kr: KgtkReader = KgtkReader.open(
            self.input_file_path,
            error_file=self.error_file,
            who="input",
            options=self.input_reader_options,
            value_options=self.value_options,
            verbose=self.verbose,
            very_verbose=self.very_verbose,
        )

        input_key_columns: typing.List[int] = self.get_key_columns(
            input_kr, "input")
        label_col_idx = input_key_columns[1]
        label = '{}{}'.format('c', label_col_idx)

        g = load_graph_from_csv(str(input_kr.file_path),
                                not (self.undirected),
                                skip_first=not (self.no_header),
                                hashed=True,
                                csv_options={'delimiter': '\t'},
                                ecols=(input_key_columns[0],
                                       input_key_columns[2]))

        es = []
        header = ['node1', 'label', 'node2']
        if self.properties:
            properties = self.properties.split(',')
            for e in properties:
                es += (find_edge(g, g.edge_properties[label], e))
            g.clear_edges()
            g.add_edge_list(list(set(es)))
        comp, hist = label_components(g, directed=self.strong)

        ew: KgtkWriter = KgtkWriter.open(header,
                                         self.output_file_path,
                                         mode=input_kr.mode,
                                         require_all_columns=False,
                                         prohibit_extra_columns=True,
                                         fill_missing_columns=True,
                                         gzip_in_parallel=False,
                                         verbose=self.verbose,
                                         very_verbose=self.very_verbose)
        for v, c in enumerate(comp):
            ew.write([
                g.vertex_properties['name'][v], 'connected_component',
                str(c)
            ])
Esempio n. 7
0
    def _chain_flows(self,
                     referenced_flows,
                     possible_new_targets,
                     formula,
                     new_material=None,
                     new_process=None,
                     prepend=True,
                     new_waste=-1,
                     new_hazardous=-1):
        '''
        creates new flows based on given referenced flows and prepends
        (prepend==True) or appends (prepend==False) them

        if new flows already exist, changes existing ones instead

        returns new/changed flows and deltas in same order as flows

        ToDo: almost the same as shift_flows(), generalize!
        '''
        if formula.is_absolute:
            raise ValueError(
                'Formula for PrependFlow and AppendFlow must be relative')

        new_flows = []
        deltas = []

        ids = referenced_flows.values_list('destination') if prepend\
            else referenced_flows.values_list('origin')
        actors_kept = Actor.objects.filter(id__in=ids)

        closest_dict = self.find_closest_actor(actors_kept,
                                               possible_new_targets)

        # create new flows and add corresponding edges
        for flow in referenced_flows:
            kept_id = flow.destination_id if prepend \
                else flow.origin_id

            # no target actor found within range
            if kept_id not in closest_dict:
                continue

            # get new target out of dictionary
            new_id = closest_dict[kept_id]

            new_vertex = self._get_vertex(new_id)

            delta = formula.calculate_delta(flow.strategy_amount)

            # the edge corresponding to the referenced flow
            edges = util.find_edge(self.graph, self.graph.ep['id'], flow.id)
            if len(edges) > 1:
                raise ValueError("FractionFlow.id ", flow.id,
                                 " is not unique in the graph")
            elif len(edges) == 0:
                print("Cannot find FractionFlow.id ", flow.id, " in the graph")
                continue
            edge = edges[0]

            new_edge_args = [new_vertex, edge.source()] if prepend \
                else [edge.target(), new_vertex]
            new_edge = self.graph.edge(*new_edge_args)

            # create a new fractionflow for the implementation flow in db,
            # setting id to None creates new one when saving
            # while keeping attributes of original model;
            # the new flow is added with zero amount and to be changed
            # by calculated delta
            new_flow = copy_django_model(flow)
            new_flow.id = None
            new_flow.amount = 0
            if prepend:
                new_flow.destination_id = new_flow.origin_id
                new_flow.origin_id = new_id
            else:
                new_flow.origin_id = new_flow.destination_id
                new_flow.destination_id = new_id
            if new_material:
                new_flow.material = new_material
            if new_process:
                new_flow.process = new_process
            if new_waste >= 0:
                new_flow.waste = new_waste == 1
            if new_hazardous >= 0:
                new_flow.hazardous = new_hazardous == 1

            # strategy marks flow as new flow
            new_flow.strategy = self.strategy
            new_flow.save()

            # create the edge in the graph
            new_edge = self.graph.add_edge(*new_edge_args)
            self.graph.ep.id[new_edge] = new_flow.id
            self.graph.ep.amount[new_edge] = 0

            self.graph.ep.material[new_edge] = new_flow.material.id
            # process doesn't have to be set, missing attributes
            # are marked with -1 in graph (if i remember correctly?)
            self.graph.ep.process[new_edge] = \
                new_flow.process.id if new_flow.process is not None else - 1
            self.graph.ep.waste[new_edge] = new_flow.waste
            self.graph.ep.hazardous[new_edge] = new_flow.hazardous

            new_flows.append(new_flow)
            deltas.append(delta)

        return new_flows, deltas
Esempio n. 8
0
    def _shift_flows(self,
                     referenced_flows,
                     possible_new_targets,
                     formula,
                     new_material=None,
                     new_process=None,
                     shift_origin=True,
                     reduce_reference=True,
                     new_waste=-1,
                     new_hazardous=-1):
        '''
        creates new flows based on given referenced flows and redirects them
        to target actor (either origin or destinations are changing)

        referenced_flows are reduced by amout of new flows if reduce_reference
        is True, otherwise they stay untouched

        returns flows to be changed in order of change and the deltas added to
        be to each flow in walker algorithm in same order as flows
        '''
        changed_ref_flows = []
        new_flows = []
        changed_ref_deltas = []
        new_deltas = []

        # the actors to keep (not shifted)
        ids = referenced_flows.values_list('destination') if shift_origin\
            else referenced_flows.values_list('origin')
        actors_kept = Actor.objects.filter(id__in=ids)

        # actors in possible new targets that are closest
        closest_dict = self.find_closest_actor(actors_kept,
                                               possible_new_targets)
        if formula.is_absolute:
            formula.set_total(referenced_flows)

        # create new flows and add corresponding edges
        for flow in referenced_flows:
            kept_id = flow.destination_id if shift_origin \
                else flow.origin_id

            # no target actor found within range
            if kept_id not in closest_dict:
                continue

            # get new target out of dictionary
            new_id = closest_dict[kept_id]

            new_vertex = self._get_vertex(new_id)

            delta = formula.calculate_delta(flow.strategy_amount)
            delta = min(delta, flow.strategy_amount)

            # the edge corresponding to the referenced flow
            # (the one to be shifted)
            edges = util.find_edge(self.graph, self.graph.ep['id'], flow.id)
            if len(edges) > 1:
                raise ValueError("FractionFlow.id ", flow.id,
                                 " is not unique in the graph")
            elif len(edges) == 0:
                print("Cannot find FractionFlow.id ", flow.id, " in the graph")
                continue
            edge = edges[0]

            new_edge_args = [new_vertex, edge.target()] if shift_origin \
                else [edge.source(), new_vertex]
            new_edge = self.graph.edge(*new_edge_args)

            # create a new fractionflow for the implementation flow in db,
            # setting id to None creates new one when saving
            # while keeping attributes of original model;
            # the new flow is added with zero amount and to be changed
            # by calculated delta
            new_flow = copy_django_model(flow)
            new_flow.id = None
            new_flow.amount = 0
            if shift_origin:
                new_flow.origin_id = new_id
            else:
                new_flow.destination_id = new_id
            if new_material:
                new_flow.material = new_material
            if new_process:
                new_flow.process = new_process
            if new_waste >= 0:
                new_flow.waste = new_waste == 1
            if new_hazardous >= 0:
                new_flow.hazardous = new_hazardous == 1

            # strategy marks flow as new flow
            new_flow.strategy = self.strategy
            new_flow.save()

            # create the edge in the graph
            new_edge = self.graph.add_edge(*new_edge_args)
            self.graph.ep.id[new_edge] = new_flow.id
            self.graph.ep.amount[new_edge] = 0

            self.graph.ep.material[new_edge] = new_flow.material.id
            # process doesn't have to be set, missing attributes
            # are marked with -1 in graph (if i remember correctly?)
            self.graph.ep.process[new_edge] = \
                new_flow.process.id if new_flow.process is not None else - 1
            self.graph.ep.waste[new_edge] = new_flow.waste
            self.graph.ep.hazardous[new_edge] = new_flow.hazardous

            new_flows.append(new_flow)
            new_deltas.append(delta)

            # reduce (resp. increase) the referenced flow by the same amount
            if reduce_reference:
                changed_ref_flows.append(flow)
                changed_ref_deltas.append(-delta)

        # new flows shall be created before modifying the existing ones
        return new_flows + changed_ref_flows, new_deltas + changed_ref_deltas
Esempio n. 9
0
 def get_edges_by_edge_prop(g, p, v):
     return find_edge(g, prop=g.properties[('e', p)], match=v)
Esempio n. 10
0
    def makeGraph(self,img,dia,xScale,yScale):
        print 'Building Graph Data Structure'
        start=time.time()
        G = Graph(directed=False)
        vprop=G.new_vertex_property('object')
        eprop=G.new_edge_property('object')
        epropW=G.new_edge_property("int32_t")
        avgScale=(xScale+yScale)/2

        test=np.where(img==True)
        ss = np.shape(test)
        cccc=0
        percentOld=0.0
        print str(np.round(percentOld,1))+'%'
        for (i,j) in zip(test[1],test[0]):
                cccc+=1
                percent=(float(cccc)/float(ss[1]))*100
                if percentOld+10< percent: 
                    print str(np.round(percent,1))+'%'
                    percentOld=percent
                nodeNumber1 = (float(i)*yScale,float(j)*xScale)
                if gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}):
                            v1=gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale})[0]
                else:
                    v1=G.add_vertex()
                    vprop[G.vertex(v1)]={'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}
                try:
                    
                    if img[j,i+1] == True:
                        nodeNumber2 = (float(i+1)*yScale,float(j)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:
                    pass
                try:
                    if img[j,i-1] == True:
                        nodeNumber2 = (float(i-1)*yScale,float(j)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:pass
                try:
                    if img[j + 1,i] == True:
                        nodeNumber2 = (float(i)*yScale,float(j+1)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:pass
                try:
                    if img[j - 1,i] == True:
                        nodeNumber2 = (float(i)*yScale,float(j-1)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except: pass
#                    
        print '100.0%'
        print 'selecting largest connected component'
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop
        l = gt.label_largest_component(G)
        print(l.a)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        print '# edges'
        print(u.num_edges())
        print 'building graph finished in: '+str(time.time()-start)+'s'
        return u 
Esempio n. 11
0
def gn(g, odir, focus=None):
    """
    Takes graph and uses Girvan Newman to slowly break graph down.
    Can operate in faster mode that constrains view to single graph.
    Creates new output_directory "odir" in which JSON graphs are placed,
    as well as names of clusters, and index mapping tests to clusters.
    -----------------------------------------
    g:     graph_tool graph
    odir:  output_directory
    focus: if True, only looks at clusters with vertex named "focus"
    """

    total_edges = g.num_edges()
    vprint(INFO,
           'Applying Girvan Newman algorithm on %i edges...' % (g.num_edges()))
    if focus: vprint(INFO, 'Focused on %s' % (focus))

    ### Pull some properties of the graph out
    weight = g.ep['weight']
    name = g.vp['name']

    ### If focus, make sure focus actually there!
    if focus:
        if focus not in name:
            raise KeyboardInterrupt

    ### Initialize output
    vprint(INFO, 'Output: %s' % (os.path.abspath(odir)))
    if not os.path.exists(odir):
        vprint(INFO, '\tDirectory did not exist!  Created.')
        os.mkdir(odir)
    json_name = os.path.join(odir, "%i_%i.json")
    text_name = os.path.join(odir, "%i_%i.txt")
    idx_name = os.path.join(odir, "index.csv")

    ### Create new property for graph & configure for fast
    g.ep["ebc"] = g.new_edge_property("float")
    g.set_fast_edge_removal(True)

    ### Initialize variables for tracking connected components
    ### and indexing
    if focus: cc_cts = [g.num_vertices()]
    else: cc_cts = [0]
    index = dict([(name[v], [(0, 0)]) for v in g.vertices()])

    ### Begin Girvan Newman algorithm
    _, _ = gt_bt(g, eprop=g.ep["ebc"], weight=weight, norm=False)
    while g.num_edges() != 0:
        ### Get & remove edge of max() betweenness; recalc betweenness
        maxedge = find_edge(g, g.ep["ebc"], g.ep["ebc"].a.max())[0]
        g.ep["ebc"] = g.new_edge_property("float")
        g.remove_edge(maxedge)
        _, _ = gt_bt(g, eprop=g.ep["ebc"], weight=weight, norm=False)

        ### If we're in a focused situation, find relevant cc
        if focus:
            cc_lbl = gt_cc_out(g, focus)
            cc_ct = cc_lbl.a.sum()
            g.set_vertex_filter(cc_lbl)  # Mask all other edge/verts

            ### If edge removed creates new clusters
            if cc_ct != cc_cts[-1]:
                iter_num = len(cc_cts)
                cc_cts.append(cc_ct)

                ### Write sections
                gt_to_json(g, json_name % (iter_num, 0))
                out = open(text_name % (i - iter_num, 0), 'w')
                for v in g.vertices():
                    if cc_lbl[v]:
                        out.write(name[v] + '\n')
                        index[name[v]].append((iter_num, 0))

        ### Otherwise...
        else:
            ### First, gather connected components
            cc_lbl, _ = gt_cc(g, directed=False)
            cc_ct = cc_lbl.a.max()
            ### If we've generated new clusters...
            if cc_ct != cc_cts[-1]:
                iter_num = len(cc_cts)
                cc_cts.append(cc_ct)

                ### Create a filter for each cc label
                filters = dict([(i, g.new_vertex_property("bool"))
                                for i in range(cc_lbl.a.max() + 1)])
                for v in g.vertices():
                    lbl = cc_lbl[v]
                    filters[lbl][v] = True

                for (i, f) in filters.iteritems():
                    if f.a.sum() == 1:
                        index[name[v]].append((iter_num, i))
                        continue
                    g.set_vertex_filter(f)
                    gt_to_json(g, json_name % (iter_num, i))
                    out = open(text_name % (iter_num, i), 'w')
                    for v in g.vertices():
                        if f[v]:
                            out.write(name[v] + '\n')
                            index[name[v]].append((iter_num, i))
                    g.set_vertex_filter(None)

        # Progress bar
        progress_bar(total_edges - g.num_edges(), total_edges, 100)

    ### Final step: break down index into legible file
    progress_bar_complete(total_edges)

    vprint(INFO, 'Making index file.')

    m = np.ones((len(index), len(cc_cts))) * -1
    ordered_keys = sorted(index.keys())
    for (key, i) in zip(ordered_keys, range(len(index))):
        for (iter_num, clust_num) in index[key]:
            m[i, iter_num] = clust_num
    write_mtx(idx_name, m, ordered_keys, range(len(cc_cts)))

    vprint(INFO, 'Girvan Newman Complete!')

    return None
Esempio n. 12
0
    def process(self):
        input_kr: KgtkReader = KgtkReader.open(
            self.input_file_path,
            error_file=self.error_file,
            who="input",
            options=self.input_reader_options,
            value_options=self.value_options,
            verbose=self.verbose,
            very_verbose=self.very_verbose,
        )

        input_key_columns: typing.List[int] = self.get_key_columns(
            input_kr, "input")
        label_col_idx = input_key_columns[1]
        label = input_kr.column_names[label_col_idx]

        g = load_graph_from_kgtk(input_kr, directed=not self.undirected)

        es = []
        header = ['node1', 'label', 'node2']
        if self.properties:
            properties = self.properties.split(',')
            for e in properties:
                es += (find_edge(g, g.edge_properties[label], e))
            g.clear_edges()
            g.add_edge_list(list(set(es)))
        comp, hist = label_components(g, directed=self.strong)

        ew: KgtkWriter = KgtkWriter.open(header,
                                         self.output_file_path,
                                         mode=input_kr.mode,
                                         require_all_columns=False,
                                         prohibit_extra_columns=True,
                                         fill_missing_columns=True,
                                         gzip_in_parallel=False,
                                         verbose=self.verbose,
                                         very_verbose=self.very_verbose)

        clusters: typing.MutableMapping[str, typing.List[str]] = dict()
        cluster_id: str
        name: str

        v: int
        for v, c in enumerate(comp):
            name = g.vertex_properties['name'][v]
            cluster_id = str(c)
            if cluster_id not in clusters:
                clusters[cluster_id] = [name]
            else:
                clusters[cluster_id].append(name)

        trimmed_clusters: typing.MutableMapping[str, typing.List[str]] = dict()
        for cluster_id in clusters.keys():
            if len(clusters[cluster_id]) >= self.minimum_cluster_size:
                trimmed_clusters[cluster_id] = clusters[cluster_id]

        named_clusters: typing.MutableMapping[
            str, typing.List[str]] = self.name_clusters(trimmed_clusters)
        for cluster_id in sorted(named_clusters.keys()):
            for name in sorted(named_clusters[cluster_id]):
                ew.write([name, 'connected_component', cluster_id])

        ew.close()
Esempio n. 13
0
    def makeGraph(self, img, dia, xScale, yScale):
        print 'Building Graph Data Structure'
        start = time.time()
        G = Graph(directed=False)
        vprop = G.new_vertex_property('object')
        eprop = G.new_edge_property('object')
        epropW = G.new_edge_property("int32_t")
        avgScale = (xScale + yScale) / 2

        test = np.where(img == True)
        ss = np.shape(test)
        cccc = 0
        percentOld = 0.0
        print str(np.round(percentOld, 1)) + '%'
        for (i, j) in zip(test[1], test[0]):
            cccc += 1
            percent = (float(cccc) / float(ss[1])) * 100
            if percentOld + 10 < percent:
                print str(np.round(percent, 1)) + '%'
                percentOld = percent
            nodeNumber1 = (float(i) * yScale, float(j) * xScale)
            if gu.find_vertex(
                    G, vprop, {
                        'imgIdx': (j, i),
                        'coord': nodeNumber1,
                        'nrOfPaths': 0,
                        'diameter': float(dia[j][i]) * avgScale
                    }):
                v1 = gu.find_vertex(
                    G, vprop, {
                        'imgIdx': (j, i),
                        'coord': nodeNumber1,
                        'nrOfPaths': 0,
                        'diameter': float(dia[j][i]) * avgScale
                    })[0]
            else:
                v1 = G.add_vertex()
                vprop[G.vertex(v1)] = {
                    'imgIdx': (j, i),
                    'coord': nodeNumber1,
                    'nrOfPaths': 0,
                    'diameter': float(dia[j][i]) * avgScale
                }
            try:

                if img[j, i + 1] == True:
                    nodeNumber2 = (float(i + 1) * yScale, float(j) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i + 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i + 1]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i + 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i + 1]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j, i + 1),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j][i + 1]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j, i - 1] == True:
                    nodeNumber2 = (float(i - 1) * yScale, float(j) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i - 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i - 1]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i - 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i - 1]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j, i - 1),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j][i - 1]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j + 1, i] == True:
                    nodeNumber2 = (float(i) * yScale, float(j + 1) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j + 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j + 1][i]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j + 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j + 1][i]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j + 1, i),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j + 1][i]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j - 1, i] == True:
                    nodeNumber2 = (float(i) * yScale, float(j - 1) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j - 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j - 1][i]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j - 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j - 1][i]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j - 1, i),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j - 1][i]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
#
        print '100.0%'
        print 'selecting largest connected component'
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop
        l = gt.label_largest_component(G)
        print(l.a)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        print '# edges'
        print(u.num_edges())
        print 'building graph finished in: ' + str(time.time() - start) + 's'
        return u
Esempio n. 14
0
def gn(g, odir, focus = None):
    """
    Takes graph and uses Girvan Newman to slowly break graph down.
    Can operate in faster mode that constrains view to single graph.
    Creates new output_directory "odir" in which JSON graphs are placed,
    as well as names of clusters, and index mapping tests to clusters.
    -----------------------------------------
    g:     graph_tool graph
    odir:  output_directory
    focus: if True, only looks at clusters with vertex named "focus"
    """

    total_edges = g.num_edges()
    vprint(INFO, 'Applying Girvan Newman algorithm on %i edges...'%(g.num_edges()))
    if focus: vprint(INFO, 'Focused on %s'%(focus))

    ### Pull some properties of the graph out
    weight = g.ep['weight']
    name   = g.vp['name']

    ### If focus, make sure focus actually there!
    if focus:
        if focus not in name:
            raise KeyboardInterrupt

    ### Initialize output
    vprint(INFO, 'Output: %s'%(os.path.abspath(odir)))
    if not os.path.exists(odir):
        vprint(INFO, '\tDirectory did not exist!  Created.')
        os.mkdir(odir)
    json_name = os.path.join(odir, "%i_%i.json")
    text_name = os.path.join(odir, "%i_%i.txt")
    idx_name  = os.path.join(odir, "index.csv")

    ### Create new property for graph & configure for fast
    g.ep["ebc"] = g.new_edge_property("float") 
    g.set_fast_edge_removal(True)

    ### Initialize variables for tracking connected components
    ### and indexing
    if focus: cc_cts = [g.num_vertices()]
    else:     cc_cts = [0]
    index  = dict( [(name[v], [(0,0)]) for v in g.vertices()] ) 

    ### Begin Girvan Newman algorithm
    _, _    = gt_bt(g, eprop = g.ep["ebc"], weight = weight, norm = False) 
    while g.num_edges() != 0:
        ### Get & remove edge of max() betweenness; recalc betweenness 
        maxedge = find_edge(g, g.ep["ebc"], g.ep["ebc"].a.max())[0]
        g.ep["ebc"] = g.new_edge_property("float")
        g.remove_edge(maxedge)
        _, _    = gt_bt(g, eprop = g.ep["ebc"], weight = weight, norm = False)

        ### If we're in a focused situation, find relevant cc
        if focus:
            cc_lbl = gt_cc_out(g, focus)
            cc_ct  = cc_lbl.a.sum()
            g.set_vertex_filter(cc_lbl) # Mask all other edge/verts

            ### If edge removed creates new clusters
            if cc_ct != cc_cts[-1]: 
                iter_num = len(cc_cts)
                cc_cts.append(cc_ct)

                ### Write sections
                gt_to_json(g, json_name%(iter_num, 0))
                out = open(text_name%(i - iter_num, 0), 'w')
                for v in g.vertices():
                    if cc_lbl[v]:
                        out.write(name[v] + '\n')
                        index[name[v]].append((iter_num, 0))

        ### Otherwise...
        else:
            ### First, gather connected components
            cc_lbl, _ = gt_cc(g, directed = False)
            cc_ct     = cc_lbl.a.max()
            ### If we've generated new clusters...
            if cc_ct != cc_cts[-1]:
                iter_num = len(cc_cts) 
                cc_cts.append(cc_ct)

                ### Create a filter for each cc label 
                filters = dict([(i, g.new_vertex_property("bool")) for i in range(cc_lbl.a.max()+1)])
                for v in g.vertices():
                    lbl = cc_lbl[v]
                    filters[lbl][v] = True

                for (i, f) in filters.iteritems():
                    if f.a.sum() == 1:
                        index[name[v]].append((iter_num,i))
                        continue
                    g.set_vertex_filter(f)
                    gt_to_json(g, json_name%(iter_num, i))
                    out = open(text_name%(iter_num, i), 'w')
                    for v in g.vertices():
                        if f[v]:
                            out.write(name[v] + '\n')
                            index[name[v]].append((iter_num,i))
                    g.set_vertex_filter(None)            

        # Progress bar
        progress_bar(total_edges - g.num_edges(), total_edges, 100)

    ### Final step: break down index into legible file
    progress_bar_complete(total_edges)
    
    vprint(INFO, 'Making index file.')

    m = np.ones( (len(index), len(cc_cts) ) ) * -1
    ordered_keys = sorted(index.keys())
    for (key, i) in zip(ordered_keys, range(len(index))):
        for (iter_num, clust_num) in index[key]:
            m[i, iter_num] = clust_num
    write_mtx(idx_name, m, ordered_keys, range(len(cc_cts))) 

    vprint(INFO, 'Girvan Newman Complete!')

    return None