Ejemplo n.º 1
0
 def setUp(self):
     data = [Row(to_index="to_index%d" % i, value=i) for i in range(10)]
     self.data = data
     self.session = create_spark_for_test()
     self.data_rdd = self.session.sparkContext \
         .parallelize(range(len(data))) \
         .map(lambda x: data[x])
Ejemplo n.º 2
0
    def setUp(self):
        self.session = create_spark_for_test()

        df = DocumentFrequencies().construct(10, {str(i): i for i in range(1, 5)})
        self.docs = df.docs
        self.tfidf = TFIDF(df, df.docs, self.session.sparkContext)

        class Columns:
            """
            Stores column names for return value.
            """
            token = "t"
            document = "d"
            value = "v"

        self.tfidf.Columns = Columns
Ejemplo n.º 3
0
 def setUp(self):
     self.sc = create_spark_for_test()
     self.bag2tf = BagFeatures2TermFreq()
Ejemplo n.º 4
0
 def setUp(self):
     self.sc = create_spark_for_test()
Ejemplo n.º 5
0
 def setUp(self):
     self.sc = create_spark_for_test()
     self.bag2df = BagFeatures2DocFreq()