def setUp(self): data = [Row(to_index="to_index%d" % i, value=i) for i in range(10)] self.data = data self.session = create_spark_for_test() self.data_rdd = self.session.sparkContext \ .parallelize(range(len(data))) \ .map(lambda x: data[x])
def setUp(self): self.session = create_spark_for_test() df = DocumentFrequencies().construct(10, {str(i): i for i in range(1, 5)}) self.docs = df.docs self.tfidf = TFIDF(df, df.docs, self.session.sparkContext) class Columns: """ Stores column names for return value. """ token = "t" document = "d" value = "v" self.tfidf.Columns = Columns
def setUp(self): self.sc = create_spark_for_test() self.bag2tf = BagFeatures2TermFreq()
def setUp(self): self.sc = create_spark_for_test()
def setUp(self): self.sc = create_spark_for_test() self.bag2df = BagFeatures2DocFreq()