def CC(): url = '/home/gengl/Datasets/CC/BerkStan/edge.txt' data = SFrame.read_csv(url, delimiter='\t', header=False, column_type_hints=[int, int]) graph = SGraph().add_edges(data, src_field='X1', dst_field='X2') cc_model = connected_components.create(graph, verbose=True) cc_model.summary()
def get_number_weakly_connected_components(self, g): cc = connected_components.create(g) return len(cc['component_size'])
import datetime # Create cluster c = gl.deploy.hadoop_cluster.create(name='test-cluster',dato_dist_path='hdfs://ec2-54-215-136-187.us-west-1.compute.amazonaws.com:9000/dato/tmp',hadoop_conf_dir='/usr/local/hadoop/etc/hadoop',num_containers=3) print c from graphlab import SFrame, SGraph url = 'hdfs://ec2-54-215-136-187.us-west-1.compute.amazonaws.com:9000/data/pokec.txt' data = SFrame.read_csv(url, delimiter='\t',header=False) g = SGraph().add_edges(data, src_field='X2', dst_field='X1') # triangle counting from graphlab import triangle_counting tc = triangle_counting.create(g) tc_out = tc['triangle_count'] #pagerank from graphlab import pagerank datetime.datetime.now() pr = pagerank.create(g,threshold=0.001) datetime.datetime.now() # Connected Components from graphlab import connected_components datetime.datetime.now() cc = connected_components.create(g) datetime.datetime.now()
outputPath = os.environ.get("OUTPUT_PATH") startScale = int(os.environ.get("START_SCALE")) tagFile = './tmp' with open(tagFile, 'r') as f: infor = f.readline().strip().split(",") maxScale = int(infor[1]) realEndScale = int(infor[2]) scaleRange = range(startScale, realEndScale + 1) for scale in scaleRange: inputPath = os.path.join(outputPath, 'tmp', 'AdjacentRelationships', str(scale)) url = inputPath data = SFrame.read_csv(url, header=False) if (data.num_rows() == 0): cc_ids = SFrame({"__id": [], "component_id": []}) else: g = SGraph().add_edges(data, src_field=data.column_names()[0], dst_field=data.column_names()[1]) cc = connected_components.create(g) cc_ids = cc.get('component_id') path = os.path.join(outputPath, 'tmp', 'ConnectedComponents', str(scale)) if (~os.path.exists(path)): os.makedirs(path) SFrame.export_csv(cc_ids, os.path.join(path))