def test_parallel_evaluation(self): xin = 33 repeat = 8 # execute the task bulk using one process to get a baseline start_time = time.time() glconnect.get_unity().eval_lambda(lambda x: [fib(i) for i in x], [xin] * repeat) single_thread_time = time.time() - start_time logging.info("Single thread lambda eval takes %s secs" % single_thread_time) # execute the task in parallel start_time = time.time() ans_list = glconnect.get_unity().parallel_eval_lambda( lambda x: fib(x), [xin] * repeat) multi_thread_time = time.time() - start_time logging.info("Multi thread lambda eval takes %s secs" % multi_thread_time) # test the speed up by running in parallel nproc = multiprocessing.cpu_count() if (nproc > 1 and multi_thread_time > (single_thread_time / 1.5)): logging.warning( "Slow parallel processing: single thread takes %s secs, multithread on %s procs takes %s secs" % (single_thread_time, nproc, multi_thread_time)) # test accuracy ans = fib(xin) for a in ans_list: self.assertEqual(a, ans)
def test_exception(self): x = 3 self.assertRaises(RuntimeError, glconnect.get_unity().eval_lambda, lambda y: x / y, 0) self.assertRaises(RuntimeError, glconnect.get_unity().parallel_eval_lambda, lambda y: x / y, [0 for i in range(10)])
def test_simple_evaluation(self): x = 3 self.assertEqual(glconnect.get_unity().eval_lambda(lambda y: y + x, 0), 3) self.assertEqual(glconnect.get_unity().eval_lambda(lambda y: y + x, 1), 4) self.assertEqual( glconnect.get_unity().eval_lambda(lambda x: x.upper(), 'abc'), 'ABC') self.assertEqual( glconnect.get_unity().eval_lambda(lambda x: x.lower(), 'ABC'), 'abc') self.assertEqual(glconnect.get_unity().eval_lambda(fib, 1), 1)
def save(self, location): """ Save the model. The model is saved as a directory which can then be loaded using the :py:func:`~turicreate.load_model` method. Parameters ---------- location : string Target destination for the model. Can be a local path or remote URL. See Also ---------- turicreate.load_model Examples ---------- >>> model.save('my_model_file') >>> loaded_model = tc.load_model('my_model_file') """ import copy state = copy.copy(self._get_native_state()) state['model_version'] = self._get_version() return glconnect.get_unity().save_model2(self.__class__._native_name(), location, state)
def test_degree_count(self): if "degree_count" in get_unity().list_toolkit_functions(): m = tc.degree_counting.create(self.graph) m.summary() self.__test_model_save_load_helper__(m) g = m.graph expected_out_deg = g.edges.groupby( '__src_id', {'expected': tc.aggregate.COUNT}) expected_out_deg = expected_out_deg.join(g.vertices[['__id']], on={ '__src_id': "__id" }, how="right").fillna( "expected", 0) expected_out_deg = expected_out_deg.sort("__src_id")['expected'] expected_in_deg = g.edges.groupby('__dst_id', {'expected': tc.aggregate.COUNT}) expected_in_deg = expected_in_deg.join(g.vertices[['__id']], on={ '__dst_id': "__id" }, how="right").fillna( "expected", 0) expected_in_deg = expected_in_deg.sort("__dst_id")['expected'] sf = g.vertices.sort('__id') actual_out_deg = sf['out_degree'] actual_in_deg = sf['in_degree'] actual_all_deg = sf['total_degree'] self.assertEqual((expected_in_deg - actual_in_deg).sum(), 0) self.assertEqual((expected_out_deg - actual_out_deg).sum(), 0) self.assertEqual( (actual_all_deg - (actual_out_deg + actual_in_deg)).sum(), 0)
def test_graph_coloring(self): if "graph_coloring" in get_unity().list_toolkit_functions(): m = tc.graph_coloring.create(self.graph) print(m) m.summary() # coloring is non-deterministic, so we cannot verify the result here self.__test_model_save_load_helper__(m)
def test_triangle_counting(self): if "triangle_counting" in get_unity().list_toolkit_functions(): m = tc.triangle_counting.create(self.graph) print(m) m.summary() self.__test_model_save_load_helper__(m) self.assertEqual(m.num_triangles, 934)
def test_pagerank(self): if "pagerank" in get_unity().list_toolkit_functions(): m = tc.pagerank.create(self.graph) print(m) m.summary() self.assertEqual((m.pagerank.num_rows(), m.pagerank.num_columns()), (self.graph.summary()['num_vertices'], 3)) self.assertEqual(int(m.pagerank['pagerank'].sum()), 2727) self.__test_model_save_load_helper__(m) m2 = tc.pagerank.create(self.graph, reset_probability=0.5) print(m2) self.assertEqual( (m2.pagerank.num_rows(), m2.pagerank.num_columns()), (self.graph.summary()['num_vertices'], 3)) self.assertAlmostEqual(m2.pagerank['pagerank'].sum(), 7087.08, delta=1e-2) with self.assertRaises(Exception): assert_frame_equal(m.pagerank.topk('pagerank'), m2.pagerank.topk('pagerank')) pr_out = m2['pagerank'] with self.assertRaises(Exception): assert_frame_equal(m.pagerank.topk('pagerank'), pr_out.topk('pagerank')) self.__test_model_save_load_helper__(m2)
def test_shortest_path(self): if "sssp" in get_unity().list_toolkit_functions(): m = tc.shortest_path.create(self.graph, source_vid=0) print(m) m.summary() self.__test_model_save_load_helper__(m) m2 = tc.shortest_path.create(self.graph, source_vid=0) print(m2) self.__test_model_save_load_helper__(m2) # Test get_path function on a simple chain graph and star graph chain_graph = tc.SGraph().add_edges([tc.Edge(i, i + 1) for i in range(10)]) m3 = tc.shortest_path.create(chain_graph, source_vid=0) for i in range(10): self.assertSequenceEqual(m3.get_path(i), [(j, float(j)) for j in range(i + 1)]) star_graph = tc.SGraph().add_edges([tc.Edge(0, i + 1) for i in range(10)]) m4 = tc.shortest_path.create(star_graph, source_vid=0) for i in range(1, 11): self.assertSequenceEqual(m4.get_path(i), [(0, 0.0), (i, 1.0)]) # Test that get_path with the 'show' parameter set to True doesn't # break. # # Showing is problematic when there is actually a browser. # This will pause scripts. # m4.get_path(i, show=True) # Test sssp ignoring the existing distance field star_graph.vertices['distance'] = 0 m5 = tc.shortest_path.create(star_graph, source_vid=0) for i in range(1, 11): self.assertSequenceEqual(m5.get_path(i), [(0, 0.0), (i, 1.0)])
def test_crash_recovery(self): import time, sys ls = list(range(1000)) def good_fun(x): return x def bad_fun(x): if (x + 1) % 251 == 0: cy_test_utils.force_exit_fun( ) # this will force the worker process to exit return x self.assertRaises( RuntimeError, lambda: glconnect.get_unity().parallel_eval_lambda( lambda x: bad_fun(x), ls)) glconnect.get_unity().parallel_eval_lambda(lambda x: good_fun(x), ls)
def test_connected_component(self): if "connected_component" in get_unity().list_toolkit_functions(): m = tc.connected_components.create(self.graph) print(m) m.summary() print(m.component_id) print(m.component_size) self.assertEqual(m.component_size.num_rows(), 1) self.__test_model_save_load_helper__(m)
def test_kcore(self): if "kcore" in get_unity().list_toolkit_functions(): m = tc.kcore.create(self.graph) print(m) m.summary() biggest_core = m.core_id.groupby('core_id', tc.aggregate.COUNT).topk('Count').head(1) self.assertEqual(biggest_core['core_id'][0], 6) self.assertEqual(biggest_core['Count'][0], 4492) self.__test_model_save_load_helper__(m)
def test_label_propagation(self): if "label_propagation" in get_unity().list_toolkit_functions(): g = self.graph.copy() num_vertices = len(g.vertices) num_classes = 2 def get_label(vid): if vid < 100: return 0 elif vid > num_vertices - 100: return 1 else: return None g.vertices['label'] = g.vertices['__id'].apply(get_label, int) m = tc.label_propagation.create(g, label_field='label') m.summary() self.__test_model_save_load_helper__(m) for row in m.graph.vertices: predicted_label = row['predicted_label'] if predicted_label is None: for k in ['P%d' % i for i in range(num_classes)]: self.assertAlmostEqual(row[k], 1.0 / num_classes) else: sum_of_prob = 0.0 for k in ['P%d' % i for i in range(num_classes)]: sum_of_prob += row[k] self.assertGreaterEqual(row['P%d' % predicted_label], row[k]) self.assertAlmostEqual(sum_of_prob, 1.0) # Add more options: weighted edges, change self weight, and undirected edges def get_edge_weight(vid): return float(vid) * 10 / num_vertices g.edges['weight'] = g.edges['__src_id'].apply(get_edge_weight, float) m = tc.label_propagation.create(g, label_field='label', threshold=1e-2, weight_field='weight', self_weight=0.5, undirected=True) # Test early termination using max_iteration max_iter = 3 m = tc.label_propagation.create(g, label_field='label', threshold=1e-10, max_iterations=max_iter) self.assertEqual(m.num_iterations, max_iter) # Test that the predict class should be None if all class probabilities are equal g = g.add_vertices(tc.SFrame({'__id': [-1]})) m = tc.label_propagation.create(g, label_field='label', threshold=1e-10, max_iterations=max_iter) result = m.graph.vertices self.assertEquals(result[result['__id'] == -1]['predicted_label'][0], None)
def save(self, location): """ Save the model. The model is saved as a directory which can then be loaded using the :py:func:`~turicreate.load_model` method. Parameters ---------- location : string Target destination for the model. Can be a local path or remote URL. See Also ---------- turicreate.load_model Examples ---------- >>> model.save('my_model_file') >>> loaded_model = turicreate.load_model('my_model_file') """ return glconnect.get_unity().save_model(self, _make_internal_url(location))
def load_model(location): """ Load any Turi Create model that was previously saved. This function assumes the model (can be any model) was previously saved in Turi Create model format with model.save(filename). Parameters ---------- location : string Location of the model to load. Can be a local path or a remote URL. Because models are saved as directories, there is no file extension. Examples ---------- >>> model.save('my_model_file') >>> loaded_model = tc.load_model('my_model_file') """ # Check if the location is a dir_archive, if not, use glunpickler to load # as pure python model # If the location is a http location, skip the check, and directly proceed # to load model as dir_archive. This is because # 1) exists() does not work with http protocol, and # 2) GLUnpickler does not support http protocol = file_util.get_protocol(location) dir_archive_exists = False if protocol == '': model_path = file_util.expand_full_path(location) dir_archive_exists = file_util.exists( os.path.join(model_path, 'dir_archive.ini')) else: model_path = location if protocol in ['http', 'https']: dir_archive_exists = True else: import posixpath dir_archive_exists = file_util.exists( posixpath.join(model_path, 'dir_archive.ini')) if not dir_archive_exists: raise IOError("Directory %s does not exist" % location) _internal_url = _make_internal_url(location) saved_state = glconnect.get_unity().load_model(_internal_url) # The archive version could be both bytes/unicode key = u'archive_version' archive_version = saved_state[key] if key in saved_state else saved_state[ key.encode()] if archive_version < 0: raise ToolkitError("File does not appear to be a Turi Create model.") elif archive_version > 1: raise ToolkitError( "Unable to load model.\n\n" "This model looks to have been saved with a future version of Turi Create.\n" "Please upgrade Turi Create before attempting to load this model file." ) elif archive_version == 1: cls = MODEL_NAME_MAP[saved_state['model_name']] if 'model' in saved_state: # this is a native model return cls(saved_state['model']) else: # this is a CustomModel model_data = saved_state['side_data'] model_version = model_data['model_version'] del model_data['model_version'] return cls._load_version(model_data, model_version) else: # very legacy model format. Attempt pickle loading import sys sys.stderr.write( "This model was saved in a legacy model format. Compatibility cannot be guaranteed in future versions.\n" ) if _six.PY3: raise ToolkitError( "Unable to load legacy model in Python 3.\n\n" "To migrate a model, try loading it using Turi Create 4.0 or\n" "later in Python 2 and then re-save it. The re-saved model should\n" "work in Python 3.") if 'graphlab' not in sys.modules: sys.modules['graphlab'] = sys.modules['turicreate'] # backward compatibility. Otherwise old pickles will not load sys.modules["turicreate_util"] = sys.modules['turicreate.util'] sys.modules["graphlab_util"] = sys.modules['turicreate.util'] # More backwards compatibility with the turicreate namespace code. for k, v in list(sys.modules.items()): if 'turicreate' in k: sys.modules[k.replace('turicreate', 'graphlab')] = v #legacy loader import pickle model_wrapper = pickle.loads(saved_state[b'model_wrapper']) return model_wrapper(saved_state[b'model_base'])