예제 #1
0
 def setUp(self):
     data = [Row(to_index="to_index%d" % i, value=i) for i in range(10)]
     self.data = data
     self.session = create_spark_for_test()
     self.data_rdd = self.session.sparkContext \
         .parallelize(range(len(data))) \
         .map(lambda x: data[x])
예제 #2
0
파일: test_tfidf.py 프로젝트: y1026/ml
    def setUp(self):
        self.session = create_spark_for_test()

        df = DocumentFrequencies().construct(10, {str(i): i for i in range(1, 5)})
        self.docs = df.docs
        self.tfidf = TFIDF(df, df.docs, self.session.sparkContext)

        class Columns:
            """
            Stores column names for return value.
            """
            token = "t"
            document = "d"
            value = "v"

        self.tfidf.Columns = Columns
예제 #3
0
 def setUp(self):
     self.sc = create_spark_for_test()
     self.bag2tf = BagFeatures2TermFreq()
예제 #4
0
 def setUp(self):
     self.sc = create_spark_for_test()
예제 #5
0
 def setUp(self):
     self.sc = create_spark_for_test()
     self.bag2df = BagFeatures2DocFreq()