Esempio n. 1
0
    def setUp(self):

        self.pr_model = gl.pagerank.create(gl.SGraph())
        self.cc_model = gl.connected_components.create(gl.SGraph())

        self.__remove_file('./tmp_model-%d' % temp_number)
        self.__remove_file('/tmp/tmp_model-%d' % temp_number)
        self.__remove_file('/tmp/tmp_model2-%d' % temp_number)
def pagerank_triple_apply(input_graph, reset_prob=0.15, threshold=1e-3, 
                          max_iterations=20):
    g = gl.SGraph(input_graph.vertices, input_graph.edges)

    # compute normalized edge weight
    g.vertices['total_weight'] = 0.0
    g = g.triple_apply(sum_weight, ['total_weight'])
    g = g.triple_apply(normalize_weight, ['weight'])
    del g.vertices['total_weight']

    # initialize vertex field
    g.vertices['prev_pagerank'] = 1.0
    it = 0
    total_l1_delta = len(g.vertices)
    start = time.time()
    while(total_l1_delta > threshold and it < max_iterations):
        g.vertices['pagerank'] = 0.0
        g = g.triple_apply(pagerank_update_fn, ['pagerank'])
        g.vertices['pagerank'] = g.vertices['pagerank'] * (1 - reset_prob) \
                                                                  + reset_prob
        g.vertices['l1_delta'] = (g.vertices['pagerank'] - \
                          g.vertices['prev_pagerank']).apply(lambda x: abs(x))
        total_l1_delta = g.vertices['l1_delta'].sum()
        g.vertices['prev_pagerank'] = g.vertices['pagerank']
        print 'Iteration %d: total pagerank changed in L1 = %f' % (it,\
                                                                total_l1_delta)
        it = it + 1
    print 'Triple apply pagerank finished in: %f secs' % (time.time() - start)
    del g.vertices['prev_pagerank']
    return g
Esempio n. 3
0
def select_subgraph(movie1='Point Break', movie2='Shrek'):
    selection = [movie1, movie2]
    subgraph = graphlab.SGraph()
    g = construct_graph()
    extracted = g.get_edges(dst_ids = selection)
    subgraph = subgraph.add_edges(extracted, src_field='__src_id', dst_field='__dst_id')
    return subgraph
Esempio n. 4
0
def make_graph(target_id, followers, apis, update=False):

    #cache_path = '../cache/_graphs/'
    #file_name  = str(target_id) + '.graph'

    #if os.path.isfile(cache_path + file_name) and not update:
    #   return gl.load_graph(cache_path + file_name)

    g - graphlab.SGraph()

    verts = [make_verts(target_id)]

    edges = make_edges(followers, target_id)

    verts.append(make_verts(id))

    #if fol_followers: edges.extend(make_edges(fol_followers, id))

    #if fol_following: edges.extend(make_edges(id, fol_following))

    #g = gl.SGraph().add_vertices(verts).add_edges(edges)

    #g.save(cache_path + file_name)

    return g
def triple_apply_knn(features):

    def graph_JSD(src,edge,dst):
        P = src['X1']
        Q = dst['X1']
        _P = P / norm(P, ord=1)
        _Q = Q / norm(Q, ord=1)
        _M = 0.5 * (_P + _Q)
        edge['distance'] = 0.5 * (entropy(_P, _M) + entropy(_Q, _M))
        return (src, edge, dst)

    n = len(features)
    sf = gl.SFrame(features)
    sf = sf.add_row_number('row_id')

    sg = gl.SGraph().add_vertices(sf, vid_field='row_id')

    edges = [gl.Edge(u, v, attr={'distance': None}) for (u, v) in itertools.combinations(range(n), 2)]
    sg = sg.add_edges(edges)

    sg_dist = sg.triple_apply(graph_JSD, mutated_fields=['distance'])

    #knn = sg_dist.edges.groupby("__src_id", {"knn" : gl.aggregate.CONCAT("__dst_id","distance")}).sort("__src_id")
    #top_neighbors = knn.apply(lambda row: sorted(row['knn'],key=row['knn'].get)[:N])

    top_neighbors = []
    for idx in xrange(n):
        topN_sf = sg_dist.get_edges(src_ids=[idx,None],dst_ids=[None,idx]).topk('distance',k=N,reverse=True)
        topN = topN_sf.apply(lambda row: row['__src_id'] if row['__dst_id']==idx else row['__dst_id'])
        top_neighbors.append(topN)

    return gl.SArray(top_neighbors)
Esempio n. 6
0
def _wrap_function_return(val):
    """
    Recursively walks each thing in val, opening lists and dictionaries,
    converting all occurances of UnityGraphProxy to an SGraph,
    UnitySFrameProxy to SFrame, and UnitySArrayProxy to SArray.
    """
    if type(val) == _UnityGraphProxy:
        return _gl.SGraph(_proxy=val)
    elif type(val) == _UnitySFrameProxy:
        return _gl.SFrame(_proxy=val)
    elif type(val) == _UnitySArrayProxy:
        return _gl.SArray(_proxy=val)
    elif type(val) == _UnityModel:
        # we need to cast it up to the appropriate type
        try:
            if '__uid__' in val.list_fields():
                uid = val.get('__uid__')
                if uid in class_uid_to_class:
                    return class_uid_to_class[uid](_proxy=val)
        except:
            pass
        return val
    elif type(val) == list:
        return [_wrap_function_return(i) for i in val]
    elif type(val) == dict:
        return {i: _wrap_function_return(val[i]) for i in val}
    else:
        return val
Esempio n. 7
0
def get_sgraph_from_neo4j_json(json_filename):
    '''
    Reads a JSON file, created by Neo4j, into an SGraph.
    Args:
        json_filename: The name of the JSON file created by Neo4j.
    Returns:
        SGraph 
    '''

    # Load json_filename into an SFrame
    sf = gl.SFrame.read_csv(json_filename,
                            header=False,
                            column_type_hints=dict,
                            verbose=False)

    # Extract the graph data from sf
    sf = sf.unpack('X1', column_name_prefix='')
    sf = sf[['data']].stack('data', new_column_name='data')

    # Extract nodes and edges
    nodes_sf = extract_entities(sf, 'nodes')
    edges_sf = extract_entities(sf, 'relationships')

    # Create the SGraph
    sgraph = gl.SGraph()
    sgraph = sgraph.add_edges(edges_sf,
                              src_field='startNode',
                              dst_field='endNode')
    sgraph = sgraph.add_vertices(nodes_sf, vid_field='id')

    return sgraph
Esempio n. 8
0
def load_graph_task(task):
    graph_data = gl.SFrame.read_csv(task.params['csv'],
                                    header=False,
                                    delimiter=' ',
                                    column_type_hints=long)
    task.outputs['graph'] = gl.SGraph().add_edges(graph_data,
                                                  src_field='X1',
                                                  dst_field='X2')
Esempio n. 9
0
    def test_shortest_path(self):
        if "sssp" in get_unity().list_toolkit_functions():
            m = gl.shortest_path.create(self.graph, source_vid=0)
            print m
            m.summary()
            self.__test_model_save_load_helper__(m)

            m2 = gl.shortest_path.create(self.graph, source_vid=0)
            print m2
            self.__test_model_save_load_helper__(m2)

            # Test get_path function on a simple chain graph and star graph
            chain_graph = gl.SGraph().add_edges(
                [gl.Edge(i, i + 1) for i in range(10)])
            m3 = gl.shortest_path.create(chain_graph, source_vid=0)
            for i in range(10):
                self.assertSequenceEqual(m3.get_path(i),
                                         [(j, float(j)) for j in range(i + 1)])

            star_graph = gl.SGraph().add_edges(
                [gl.Edge(0, i + 1) for i in range(10)])
            m4 = gl.shortest_path.create(star_graph, source_vid=0)
            for i in range(1, 11):
                self.assertSequenceEqual(m4.get_path(i), [(0, 0.0), (i, 1.0)])

            # Test sssp ignoring the existing distance field
            star_graph.vertices['distance'] = 0
            m5 = gl.shortest_path.create(star_graph, source_vid=0)
            for i in range(1, 11):
                self.assertSequenceEqual(m5.get_path(i), [(0, 0.0), (i, 1.0)])

            default_options = gl.shortest_path.get_default_options()
            print default_options
            self.assertTrue(len(default_options.keys()) == 2)
            self.assertTrue(default_options['weight_field'] == "")
            self.assertTrue(default_options['max_distance'] == 1e30)

            m6 = gl.shortest_path.create(chain_graph,
                                         source_vid=0,
                                         max_distance=3)
            current_options = m6.get_current_options()
            print current_options
            self.assertTrue(len(current_options.keys()) == 2)
            self.assertTrue(current_options['weight_field'] == "")
            self.assertTrue(current_options['max_distance'] == 3)
Esempio n. 10
0
def loadData():
	edgesData = gl.load_sframe(sframeDataFolder)
	print 'num_rows:%d ' %edgesData.num_rows()

	#create graph
	G = gl.SGraph()
	G = G.add_edges(edges = edgesData, src_field ='src',dst_field = 'dst')
	pritn 'create graph done!'
	return G
Esempio n. 11
0
 def __init__(self, vertices=None, edges=None, child=None):
     self.parent = None
     self.child = child
     if vertices and edges:
         self.g = gl.SGraph(vertices=vertices,
                            edges=edges,
                            vid_field='__id',
                            src_field='__src_id',
                            dst_field='__dst_id')
Esempio n. 12
0
def networkx_to_graphlab(g):
    p = gl.SGraph()
    # Add nodes
    p = p.add_vertices(map(lambda x: gl.Vertex(x[0], attr=x[1]), g.nodes(data=True)))
    # Add edges
    p = p.add_edges(map(lambda x: gl.Edge(x[0], x[1], attr=x[2]), g.edges(data=True)))
    if not g.is_directed():
        p = p.add_edges(map(lambda x: gl.Edge(x[1], x[0], attr=x[2]), g.edges(data=True)))
    return p
Esempio n. 13
0
def load_data(dpath, maxn=None):
    cites = gl.SFrame.read_csv(dpath+'cites.csv', column_type_hints = [int, int])
    paper = gl.SFrame.read_csv(dpath+'papers.csv', column_type_hints = [int, int])
    bad = cites.apply(lambda x:x['p1'] == x['p2'])
    print '%d self-citation detected, delete' % bad.sum()
    cites = cites[bad == 0]
    if maxn is not None:
        paper = paper[paper['id'] < maxn]
        cites = cites[cites.apply(lambda x: x['p1'] < maxn and x['p2'] < maxn)]
    sg = gl.SGraph(vertices = paper, edges = cites, vid_field = 'id', src_field = 'p1', dst_field = 'p2')
    return sg
Esempio n. 14
0
def create_test_objects():
    vertices = pandas.DataFrame({'vid': ['1', '2', '3'],
                                 'color': ['g', 'r', 'b'],
                                 'vec': [[.1, .1, .1], [.1, .1, .1], [.1, .1, .1]]})
    edges = pandas.DataFrame({'src_id': ['1', '2', '3'],
                              'dst_id': ['2', '3', '4'],
                              'weight': [0., 0.1, 1.]})

    graph = graphlab.SGraph().add_vertices(vertices, 'vid').add_edges(edges, 'src_id', 'dst_id')
    sframe = graphlab.SFrame(edges)
    model = graphlab.pagerank.create(graph)
    return (graph, sframe, model)
Esempio n. 15
0
def loadSubData():
	sframeFiles = os.listdir('/home/lt/sframe')
	edgesData = gl.sFrame()
	for sf in sframeFiles:
		edgesData.append(gl.load_sframe('/home/lt/sframe',sf))
	edgesData.rename({'X1':'src','X2':'dst'})

	#create graph
	G = gl.SGraph()
	G = G.add_edges(edges = edgesData, src_field ='src',dst_field = 'dst')
	pritn 'create graph done!'
	return G
Esempio n. 16
0
def load_data(dpath, maxn=None):
    cites = gl.SFrame.read_csv(dpath + 'cites.csv',
                               column_type_hints=[int, int])
    paper = gl.SFrame.read_csv(dpath + 'papers.csv',
                               column_type_hints=[int, int])
    if maxn is not None:
        paper = paper[paper['id'] < maxn]
        cites = cites[cites.apply(lambda x: x['p1'] < maxn and x['p2'] < maxn)]
    sg = gl.SGraph(vertices=paper,
                   edges=cites,
                   vid_field='id',
                   src_field='p1',
                   dst_field='p2')
    return sg
Esempio n. 17
0
    def test_compute_shortest_path(self):
        edge_src_ids = ['src1', 'src2', 'a', 'b', 'c']
        edge_dst_ids = ['a', 'b', 'dst', 'c', 'dst']
        edges = gl.SFrame({'__src_id': edge_src_ids, '__dst_id': edge_dst_ids})
        g = gl.SGraph().add_edges(edges)
        res = list(
            gl.shortest_path._compute_shortest_path(g, ["src1", "src2"],
                                                    "dst"))
        self.assertEquals(res, [["src1", "a", "dst"]])
        res = list(gl.shortest_path._compute_shortest_path(g, "src2", "dst"))
        self.assertEquals(res, [["src2", "b", "c", "dst"]])

        edge_src_ids = [0, 1, 2, 3, 4]
        edge_dst_ids = [2, 3, 5, 4, 5]
        edge_weights = [1, 0.1, 1, 0.1, 0.1]
        g = gl.SFrame({
            '__src_id': edge_src_ids,
            '__dst_id': edge_dst_ids,
            'weights': edge_weights
        })
        g = gl.SGraph(edges=g)
        t = gl.shortest_path._compute_shortest_path(g, [0, 1], [5], "weights")
        self.assertEquals(t, [1, 3, 4, 5])
Esempio n. 18
0
def build_sptgraph(sg, create_self_edges, baseid_name, layer_name):
    # It is used to generate node ids in the causal multilayer graph
    # IMPORTANT baseID starts at 1 and not 0
    max_id = sg.vertices['__id'].max()

    def expand_edge_layers(x):
        """Closure, capture max_id to generate all edges for the graph"""
        base_src = x['__src_id']
        base_tgt = x['__dst_id']
        bitfield = int(x['sp_edges'])
        return expand_causal_edges_from_bitfield(bitfield, base_src, base_tgt, max_id)

    def expand_vertex_layers(x):
        """Closure, capture max_id to generate all edges for the graph"""
        base_src = x['__id']
        base_tgt = x['__id']
        bitfield = shift_and_bitstrings(x['layers'], x['layers'])
        return expand_causal_edges_from_bitfield(bitfield, base_src, base_tgt, max_id)

    def generate_sp_edge_sframe(x):
        return make_list(x['sp_edges'])

    # Create empty field which will hold the bitstring for the edge creation
    sg.edges['sp_edges'] = ''
    # Create edge bitstring
    sg = sg.triple_apply(create_causal_edges_bitstring, mutated_fields=['sp_edges'])
    # Expand to actual source and destination as a string
    sg.edges['sp_edges'] = sg.edges.apply(expand_edge_layers, dtype=str)
    # Create new sframe with actual ids
    sp_edges = sg.edges.flat_map(['source', 'dest'], generate_sp_edge_sframe, column_types=[int, int])
    # Create the graph from edges
    h = gl.SGraph().add_edges(sp_edges, src_field='source', dst_field='dest')
    del sg.edges['sp_edges']

    if create_self_edges:
        sg.vertices['sp_edges'] = ''
        # Expand to actual source and destination as a string
        sg.vertices['sp_edges'] = sg.vertices.apply(expand_vertex_layers, dtype=str)
        # Create new sframe with actual ids
        sp_edges = sg.vertices.flat_map(['source', 'dest'], generate_sp_edge_sframe, column_types=[int, int])
        h = h.add_edges(sp_edges, src_field='source', dst_field='dest')
        del sg.vertices['sp_edges']

    # Add baseid and layer to spt graph
    h.vertices[layer_name] = h.vertices.apply(lambda x: (x['__id'] - 1) // max_id, dtype=int)
    # base_src = src - (l * max_id)
    h.vertices[baseid_name] = h.vertices.apply(lambda x: x['__id'] - (x[layer_name] * max_id), dtype=int)

    return h
Esempio n. 19
0
def create_spatio_temporal_graph(g,
                                 data,
                                 create_self_edges=True,
                                 baseid_name='baseID',
                                 layer_name='layer',
                                 verbose=True,
                                 force_python=False,
                                 excluded_ids=None):
    start = time.time()
    if verbose:
        LOGGER.info('Start spatio-temporal graph creation')

    signal = gl.SFrame(data)

    if HAS_FAST_MODULE and not force_python:
        node_signal = create_node_signal_fast(signal,
                                              baseid_name,
                                              layer_name,
                                              verbose=verbose)
    else:
        node_signal = create_node_signal(signal,
                                         baseid_name,
                                         layer_name,
                                         verbose=verbose)

    sg = merge_signal_on_graph(g,
                               node_signal,
                               baseid_name,
                               layer_name,
                               excluded_ids=excluded_ids,
                               verbose=verbose)
    # Create graph
    if HAS_FAST_MODULE and not force_python:
        h = fast.build_sptgraph(sg,
                                baseid_name,
                                layer_name,
                                create_self_edges,
                                verbose=False)
    else:
        h = sptgraph_impl.build_sptgraph(sg, create_self_edges, baseid_name,
                                         layer_name)

    k = gl.SGraph(h.vertices.join(signal, ['page_id', 'layer']), h.edges)

    if verbose:
        LOGGER.info('Spatio-temporal graph created in: %s seconds',
                    time.time() - start)

    return k
def calc_pagerank():

    sf = gl.SFrame.read_csv('../data/full_citation.csv',
                            delimiter=',',
                            error_bad_lines=True)
    gr = gl.SGraph(sf,
                   vid_field='Patent',
                   src_field='Citation',
                   dst_field='Patent')
    gr = gr.add_edges(sf, src_field='Citation', dst_field='Patent')
    pr = gl.pagerank.create(gr)
    pr_out = pr['pagerank']
    pr_out = pr_out.rename({'__id': 'Patent'})
    pr_out = pr_out[['Patent', 'pagerank']]
    pr_out.save('../data/full_pagerank.csv', format='csv')
Esempio n. 21
0
 def run_shortest_path(self, analytic):
     try:
         sf = graphlab.SFrame.read_csv(analytic.dump.get_data_file_path())
         g = graphlab.SGraph()
         g = g.add_edges(sf, 'src', 'dest')
     except Exception as e:
         raise Exception(LOAD_FILE, "Error loading the file")
     try:
         sp = graphlab.shortest_path.create(g)
     except Exception as e:
         raise Exception(RUN_ALGOS, "Error executing the task")
     try:
         return sp.get('distance')
     except Exception as e:
         raise Exception(PROC_FINA, "Error finishing the task: " + str(e))
Esempio n. 22
0
 def run_connected_components(self, analytic):
     try:
         sf = graphlab.SFrame.read_csv(analytic.dump.get_data_file_path())
         g = graphlab.SGraph()
         g = g.add_edges(sf, 'src', 'dest')
     except Exception as e:
         raise Exception(LOAD_FILE, "Error loading the file")
     try:
         cc = graphlab.connected_components.create(g)
     except Exception as e:
         raise Exception(RUN_ALGOS, "Error executing the task")
     try:
         return cc.get('componentid')
     except Exception as e:
         raise Exception(PROC_FINA, "Error finishing the task: " + str(e))
def sssp_triple_apply(input_graph, src_vid, max_distance=1e30):
    g = gl.SGraph(input_graph.vertices, input_graph.edges)
    g.vertices['distance'] = \
      g.vertices['__id'].apply(lambda x: max_distance if x != src_vid else 0.0)
    it = 0
    num_changed = len(g.vertices)
    start = time.time()
    while (num_changed > 0):
        g.vertices['changed'] = 0
        g = g.triple_apply(sssp_update_fn, ['distance', 'changed'])
        num_changed = g.vertices['changed'].sum()
        print 'Iteration %d: num_vertices changed = %d' % (it, num_changed)
        it = it + 1
    print 'Triple apply sssp finished in: %f secs' % (time.time() - start)
    return g
Esempio n. 24
0
def construct_graph():
    sf = get_sframe()
    actors = sf['actor_name'].unique()
    films = sf['film_name'].unique()
    g = graphlab.SGraph()

    # we do this twice has a hack around its default Directed graph
    # this will make it is undirected
    g = g.add_edges(sf, src_field='actor_name', dst_field='film_name')
    g = g.add_edges(sf, src_field='film_name', dst_field='actor_name')

    print "Actor vertex sample:"
    g.get_vertices(ids=actors).tail(5)

    print "Movie graph summary:\n", g.summary(), "\n"
    return g
Esempio n. 25
0
    def label_communities(self):
        frame = self.child.g.vertices.join(self.verticy_descriptions,
                                           '__id',
                                           how='left')
        frame = frame.groupby(
            'community_id', {
                "labels": gl.aggregate.CONCAT("description"),
                "member_count": gl.aggregate.COUNT("__id")
            })

        def remove_dups(_str):
            words = _str.split()
            return " ".join(sorted(set(words), key=words.index))

        frame['labels'] = frame['labels'].apply(lambda descriptions: ' '.join(
            [remove_dups(x) for x in descriptions]))
        frame['labels'] = gl.text_analytics.count_words(frame['labels'])
        frame['labels'] = frame['labels'].dict_trim_by_values(3)
        stopwords = gl.text_analytics.stopwords()
        stopwords.update(['http', 'https'])
        frame['labels'] = frame['labels'].dict_trim_by_keys(stopwords,
                                                            exclude=True)
        frame['labels'] = gl.text_analytics.tf_idf(frame['labels'])

        def label_score(row):
            new_scores = {}
            for label, value in row['labels'].items():
                new_scores[label] = value / row['member_count']
            return new_scores

        frame['labels'] = frame.apply(label_score)

        def top_labels(labels_dict):
            labels = sorted(labels_dict.items(),
                            key=lambda x: x[1],
                            reverse=True)[:5]
            return [x[0] for x in labels]

        frame['top_labels'] = frame['labels'].apply(top_labels)

        frame = self.g.vertices.join(frame, {'__id': 'community_id'})
        self.g = gl.SGraph(vertices=frame,
                           edges=self.g.get_edges(),
                           vid_field='__id',
                           src_field='__src_id',
                           dst_field='__dst_id')
Esempio n. 26
0
def merge_signal_on_graph(g,
                          node_signal,
                          baseid_name,
                          layer_name,
                          excluded_ids=None,
                          use_fast=True,
                          verbose=True,
                          remove_self=True):
    start = time.time()
    if verbose:
        LOGGER.info('Start reducing graph to minimum')

    p = g
    if isinstance(g, nx.Graph) or isinstance(g,
                                             nx.DiGraph):  # convert if needed
        start2 = time.time()
        if verbose:
            LOGGER.info('Start networkx to graphlab conversion')
        p = utils.networkx_to_graphlab(g)
        if verbose:
            LOGGER.info('Conversion done in: %s', time.time() - start2)

    if use_fast:
        good_nodes = node_signal
        good_nodes.rename({baseid_name: '__id'})
    else:
        good_nodes = p.vertices.join(node_signal,
                                     on={'__id': baseid_name},
                                     how='inner')

    if excluded_ids:
        good_nodes = good_nodes.filter_by(excluded_ids, '__id', exclude=True)

    good_edges = p.get_edges(dst_ids=good_nodes['__id']).filter_by(
        good_nodes['__id'], '__src_id')

    # Remove self-edges
    if remove_self:
        good_edges = good_edges[(good_edges['__src_id'] -
                                 good_edges['__dst_id']) != 0]

    if verbose:
        LOGGER.info('Graph reduction done in: %s seconds', time.time() - start)

    return gl.SGraph(good_nodes, good_edges)
Esempio n. 27
0
def createGraph():
    zhima_usr = loadZhima()
    zhima_usr.rename({'snwb': '_id'})

    subgraph_edges = gl.load_sframe(
        os.path.join(resultDataFolder, 'subgraph_zhima_2'))
    #create graph
    sub_G = gl.SGraph()
    sub_G = sub_G.add_edges(edges=subgraph_edges,
                            src_field='src',
                            dst_field='dst')

    #join label to vertices
    sub_G.vertices.join(zhima_usr, on='_id', how='left')
    # sub_G.vertices.head(5)

    print 'save graph'
    sub_G.save(os.path.join(resultDataFolder, 'subgraph_zhima'))
Esempio n. 28
0
    def show_recomnedations(self, recoms, existing):
        def shorten_name(name):
            full_name = name.split(" ")
            return "{0}. {1}".format(full_name[0][0], full_name[1])

        g = gl.SGraph()
        existing.sort()
        recoms.sort()
        list_existing = []
        for recom in recoms:
            g = g.add_edges(
                gl.Edge(shorten_name(recom[0]), shorten_name(recom[1])))
            if recom in existing:
                list_existing.append(1)
            else:
                list_existing.append(0)
        pass
        g.show(vlabel="id")
        print
def run_graph_analytics(data, data_name):

    ####################################################################################################################

    g = gl.SGraph().add_edges(data,
                              src_field='input_address',
                              dst_field='output_address')

    print(g)

    ####################################################################################################################

    transaction_count = data.groupby(['year', 'month'],
                                     agg.COUNT).sort(['year', 'month'],
                                                     ascending=True)
    n_month = transaction_count.num_rows()
    transaction_count['label'] = transaction_count['month'].astype(
        str) + "/" + transaction_count['year'].astype(str)
    print(transaction_count)

    ####################################################################################################################

    deg = degree_counting.create(g)
    deg_graph = deg[
        'graph']  # a new SGraph with degree data attached to each vertex

    in_degree = deg_graph.vertices[['__id', 'in_degree']]
    out_degree = deg_graph.vertices[['__id', 'out_degree']]

    in_degree.export_csv('analytics/' + data_name + '/in_degree.csv',
                         delimiter=',')
    out_degree.export_csv('analytics/' + data_name + '/out_degree.csv',
                          delimiter=',')

    pr = gl.pagerank.create(g)
    pr_out = pr['pagerank']
    pr_out.export_csv('analytics/' + data_name + '/pr_out.csv', delimiter=',')
Esempio n. 30
0
    def test_directed_no_self_edge(self):
        # Directed no self-edge
        g = sptgraph.create_spatio_temporal_graph(gen_graph(True),
                                                  gen_signal(),
                                                  False,
                                                  verbose=False)

        # The graph has only 1 connected component
        h = components.find_connected_components(g)
        cc = components.create_component_sframe(h)
        comps = components.extract_components(h, cc)
        self.assertEqual(1, len(comps))

        # We remove the edge (7, 12) to create 2 weakly connected components
        nodes = g.vertices
        edges = g.edges.add_row_number('eid')
        to_remove = g.get_edges(7, 12)
        edges = edges[edges['eid'] != to_remove['eid'][0]]
        g = gl.SGraph(nodes, edges)

        h = components.find_connected_components(g)
        cc = components.create_component_sframe(h)
        comps = components.extract_components(h, cc)
        self.assertEqual(2, len(comps))