def CC():
    url = '/home/gengl/Datasets/CC/BerkStan/edge.txt'
    data = SFrame.read_csv(url,
                           delimiter='\t',
                           header=False,
                           column_type_hints=[int, int])
    graph = SGraph().add_edges(data, src_field='X1', dst_field='X2')
    cc_model = connected_components.create(graph, verbose=True)
    cc_model.summary()
Beispiel #2
0
 def get_number_weakly_connected_components(self, g):
     cc = connected_components.create(g)
     return len(cc['component_size'])
Beispiel #3
0
import datetime

# Create cluster
c = gl.deploy.hadoop_cluster.create(name='test-cluster',dato_dist_path='hdfs://ec2-54-215-136-187.us-west-1.compute.amazonaws.com:9000/dato/tmp',hadoop_conf_dir='/usr/local/hadoop/etc/hadoop',num_containers=3)
print c

from graphlab import SFrame, SGraph
url = 'hdfs://ec2-54-215-136-187.us-west-1.compute.amazonaws.com:9000/data/pokec.txt'
data = SFrame.read_csv(url, delimiter='\t',header=False)
g = SGraph().add_edges(data, src_field='X2', dst_field='X1')


# triangle counting
from graphlab import triangle_counting
tc = triangle_counting.create(g)
tc_out = tc['triangle_count']


#pagerank
from graphlab import pagerank
datetime.datetime.now()
pr = pagerank.create(g,threshold=0.001)
datetime.datetime.now()


# Connected Components
from graphlab import connected_components
datetime.datetime.now()
cc = connected_components.create(g)
datetime.datetime.now()
Beispiel #4
0
outputPath = os.environ.get("OUTPUT_PATH")
startScale = int(os.environ.get("START_SCALE"))

tagFile = './tmp'
with open(tagFile, 'r') as f:
    infor = f.readline().strip().split(",")
    maxScale = int(infor[1])
    realEndScale = int(infor[2])

scaleRange = range(startScale, realEndScale + 1)

for scale in scaleRange:

    inputPath = os.path.join(outputPath, 'tmp', 'AdjacentRelationships',
                             str(scale))
    url = inputPath
    data = SFrame.read_csv(url, header=False)
    if (data.num_rows() == 0):
        cc_ids = SFrame({"__id": [], "component_id": []})
    else:
        g = SGraph().add_edges(data,
                               src_field=data.column_names()[0],
                               dst_field=data.column_names()[1])
        cc = connected_components.create(g)
        cc_ids = cc.get('component_id')
    path = os.path.join(outputPath, 'tmp', 'ConnectedComponents', str(scale))
    if (~os.path.exists(path)):
        os.makedirs(path)

    SFrame.export_csv(cc_ids, os.path.join(path))