def load_phlist(self, wordlist): assert self._loaded == False, "The words were already loaded" assert isinstance(wordlist, PyHuskyList) param = {OperationParam.list_str: self.list_name} op = Operation("Word#load_phlist_py", param, [wordlist.pending_op]) scheduler.compute(op) self._loaded = True
def wordcount(self): """wordcount function is to compute wordcount using the C++ library """ assert self._loaded == True, "Words are not loaded" param = {OperationParam.list_str: self.list_name, "Type": "cpp"} op = Operation("Word#wordcount_py", param, []) scheduler.compute(op) self._computed = True
def load_edgelist_phlist(self, edgelist): assert self._loaded == False, "The graph was already loaded" assert isinstance(edgelist, PyHuskyList) param = {OperationParam.list_str: self.list_name} op = Operation("Graph#load_edgelist_phlist_py", param, [edgelist.pending_op]) scheduler.compute(op) self._loaded = True
def write_to_hdfs(self, url): param = { OperationParam.url_str: url, OperationParam.list_str: self.list_name } op = Operation("Functional#write_to_hdfs_py", param, [self.pending_op]) compute(op) return None
def uncache(self): if self.pending_op.is_materialized is False: return None param = {OperationParam.list_str: self.list_name} op = Operation("Functional#uncache_py", param, []) compute(op) self.pending_op.is_materialized = False return None
def cache(self): if self.pending_op.is_materialized is True: return self param = {OperationParam.list_str: self.list_name} op = Operation("Functional#cache_py", param, [self.pending_op]) compute(op) self.pending_op.is_materialized = True return self
def compute_pagerank(self, iter): assert self._loaded == True, "The graph is not loaded" param = { "iter": str(iter), OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("Graph#pagerank_py", param, []) scheduler.compute(op) self._computed = True
def load_pyhlist(self, xy_list): assert not self.loaded if isinstance(xy_list, PyHuskyList): param = {OperationParam.list_str: self.list_name} self.pending_op = Operation("SVMModel#SVM_load_pyhlist_py", param, [xy_list.pending_op]) scheduler.compute(self.pending_op) self.loaded = True else: return NotImplemented
def load_hdfs(self, url): assert self._loaded == False, "The words were already loaded" assert type(url) is str param = { OperationParam.list_str: self.list_name, "url": url, "Type": "cpp" } op = Operation("Word#load_hdfs_py", param, []) scheduler.compute(op) self._loaded = True
def load_adjlist_hdfs(self, url): assert type(url) is str assert self._loaded == False, "The graph was already loaded" param = { OperationParam.list_str: self.list_name, "url": url, "Type": "cpp" } op = Operation("Graph#load_adjlist_hdfs_py", param, []) scheduler.compute(op) self._loaded = True
def foreach(self, func): if hasattr(func, '__call__'): param = { OperationParam.lambda_str: func, OperationParam.list_str: self.list_name } op = Operation("Functional#foreach_py", param, [self.pending_op]) compute(op) return else: return NotImplemented
def load_hdfs(self, url): assert isinstance(url, str) assert not self.loaded param = { "url": url, OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("SVMModel#SVM_load_hdfs_py", param, []) scheduler.compute(op) self.loaded = True
def load_hdfs(self, url, is_sparse=0, fmat="tsv"): assert isinstance(url, str) assert not self.loaded param = { "url": url, OperationParam.list_str: self.list_name, "is_sparse": str(is_sparse), "format": fmat, "Type": "cpp" } op = Operation("LinearRegressionModel#LinearR_load_hdfs_py", param, []) scheduler.compute(op) self.loaded = True
def load_pyhlist(self, xy_list, is_sparse=1): assert not self.loaded if isinstance(xy_list, PyHuskyList): param = { OperationParam.list_str: self.list_name, "is_sparse": str(is_sparse) } self.pending_op = Operation( "LogisticRegressionModel#LogisticR_load_pyhlist_py", param, [xy_list.pending_op]) scheduler.compute(self.pending_op) self.loaded = True else: return NotImplemented
def __init__(self, n_feature=-1): assert isinstance(n_feature, int) super(SVMModel, self).__init__() self.list_name += "SVM" self.loaded = False self.trained = False self.param = None self.intercept = None param = { "n_feature": str(n_feature), OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("SVMModel#SVM_init_py", param, []) scheduler.compute(op)