Example #1
0
 def test_set_rdd(self):
     first_dataset = self.context.spark.createDataFrame([['a']],
                                                        'column1: string')
     second_dataset = self.context.spark.sparkContext.parallelize(
         [Row(column1='aa')])
     cardo_dataframe = CardoDataFrame(first_dataset, '6')
     cardo_dataframe.rdd = second_dataset
     self.assertItemsEqual(second_dataset.collect(),
                           cardo_dataframe.dataframe.collect())
     self.assertItemsEqual(second_dataset.collect(),
                           cardo_dataframe.rdd.collect())
Example #2
0
    def test_unpersist_rdd(self):
        # Arrange
        rdd = self.context.spark.sparkContext.parallelize([Row(column1='aa')])
        second_rdd = self.context.spark.sparkContext.parallelize(
            [Row(column1='bb')])
        cardo_dataframe = CardoDataFrame(rdd, '')
        cardo_dataframe.persist()
        cardo_dataframe.rdd = second_rdd

        # Act
        cardo_dataframe.unpersist()

        # Assert
        self.assertFalse(rdd.is_cached)