コード例 #1
0
ファイル: word.py プロジェクト: fagan2888/pyhusky
 def load_phlist(self, wordlist):
     assert self._loaded == False, "The words were already loaded"
     assert isinstance(wordlist, PyHuskyList)
     param = {OperationParam.list_str: self.list_name}
     op = Operation("Word#load_phlist_py", param, [wordlist.pending_op])
     scheduler.compute(op)
     self._loaded = True
コード例 #2
0
ファイル: word.py プロジェクト: fagan2888/pyhusky
 def wordcount(self):
     """wordcount function is to compute wordcount using the C++ library
     """
     assert self._loaded == True, "Words are not loaded"
     param = {OperationParam.list_str: self.list_name, "Type": "cpp"}
     op = Operation("Word#wordcount_py", param, [])
     scheduler.compute(op)
     self._computed = True
コード例 #3
0
ファイル: graph.py プロジェクト: fagan2888/pyhusky
 def load_edgelist_phlist(self, edgelist):
     assert self._loaded == False, "The graph was already loaded"
     assert isinstance(edgelist, PyHuskyList)
     param = {OperationParam.list_str: self.list_name}
     op = Operation("Graph#load_edgelist_phlist_py", param,
                    [edgelist.pending_op])
     scheduler.compute(op)
     self._loaded = True
コード例 #4
0
 def write_to_hdfs(self, url):
     param = {
         OperationParam.url_str: url,
         OperationParam.list_str: self.list_name
     }
     op = Operation("Functional#write_to_hdfs_py", param, [self.pending_op])
     compute(op)
     return None
コード例 #5
0
 def uncache(self):
     if self.pending_op.is_materialized is False:
         return None
     param = {OperationParam.list_str: self.list_name}
     op = Operation("Functional#uncache_py", param, [])
     compute(op)
     self.pending_op.is_materialized = False
     return None
コード例 #6
0
 def cache(self):
     if self.pending_op.is_materialized is True:
         return self
     param = {OperationParam.list_str: self.list_name}
     op = Operation("Functional#cache_py", param, [self.pending_op])
     compute(op)
     self.pending_op.is_materialized = True
     return self
コード例 #7
0
ファイル: graph.py プロジェクト: fagan2888/pyhusky
 def compute_pagerank(self, iter):
     assert self._loaded == True, "The graph is not loaded"
     param = {
         "iter": str(iter),
         OperationParam.list_str: self.list_name,
         "Type": "cpp"
     }
     op = Operation("Graph#pagerank_py", param, [])
     scheduler.compute(op)
     self._computed = True
コード例 #8
0
ファイル: svm.py プロジェクト: fagan2888/pyhusky
    def load_pyhlist(self, xy_list):
        assert not self.loaded

        if isinstance(xy_list, PyHuskyList):
            param = {OperationParam.list_str: self.list_name}
            self.pending_op = Operation("SVMModel#SVM_load_pyhlist_py", param,
                                        [xy_list.pending_op])
            scheduler.compute(self.pending_op)
            self.loaded = True
        else:
            return NotImplemented
コード例 #9
0
ファイル: word.py プロジェクト: fagan2888/pyhusky
 def load_hdfs(self, url):
     assert self._loaded == False, "The words were already loaded"
     assert type(url) is str
     param = {
         OperationParam.list_str: self.list_name,
         "url": url,
         "Type": "cpp"
     }
     op = Operation("Word#load_hdfs_py", param, [])
     scheduler.compute(op)
     self._loaded = True
コード例 #10
0
ファイル: graph.py プロジェクト: fagan2888/pyhusky
 def load_adjlist_hdfs(self, url):
     assert type(url) is str
     assert self._loaded == False, "The graph was already loaded"
     param = {
         OperationParam.list_str: self.list_name,
         "url": url,
         "Type": "cpp"
     }
     op = Operation("Graph#load_adjlist_hdfs_py", param, [])
     scheduler.compute(op)
     self._loaded = True
コード例 #11
0
 def foreach(self, func):
     if hasattr(func, '__call__'):
         param = {
             OperationParam.lambda_str: func,
             OperationParam.list_str: self.list_name
         }
         op = Operation("Functional#foreach_py", param, [self.pending_op])
         compute(op)
         return
     else:
         return NotImplemented
コード例 #12
0
ファイル: svm.py プロジェクト: fagan2888/pyhusky
    def load_hdfs(self, url):
        assert isinstance(url, str)
        assert not self.loaded

        param = {
            "url": url,
            OperationParam.list_str: self.list_name,
            "Type": "cpp"
        }
        op = Operation("SVMModel#SVM_load_hdfs_py", param, [])
        scheduler.compute(op)
        self.loaded = True
コード例 #13
0
    def load_hdfs(self, url, is_sparse=0, fmat="tsv"):
        assert isinstance(url, str)
        assert not self.loaded

        param = {
            "url": url,
            OperationParam.list_str: self.list_name,
            "is_sparse": str(is_sparse),
            "format": fmat,
            "Type": "cpp"
        }
        op = Operation("LinearRegressionModel#LinearR_load_hdfs_py", param, [])
        scheduler.compute(op)
        self.loaded = True
コード例 #14
0
    def load_pyhlist(self, xy_list, is_sparse=1):
        assert not self.loaded

        if isinstance(xy_list, PyHuskyList):
            param = {
                OperationParam.list_str: self.list_name,
                "is_sparse": str(is_sparse)
            }
            self.pending_op = Operation(
                "LogisticRegressionModel#LogisticR_load_pyhlist_py", param,
                [xy_list.pending_op])
            scheduler.compute(self.pending_op)
            self.loaded = True
        else:
            return NotImplemented
コード例 #15
0
ファイル: svm.py プロジェクト: fagan2888/pyhusky
    def __init__(self, n_feature=-1):
        assert isinstance(n_feature, int)
        super(SVMModel, self).__init__()

        self.list_name += "SVM"
        self.loaded = False
        self.trained = False
        self.param = None
        self.intercept = None

        param = {
            "n_feature": str(n_feature),
            OperationParam.list_str: self.list_name,
            "Type": "cpp"
        }
        op = Operation("SVMModel#SVM_init_py", param, [])
        scheduler.compute(op)