Exemple #1
0
def run_tests():
    """
    Setup and run the doc tests.
    """
    import doctest
    from sparklingpandas.pcontext import PSparkContext

    globs = globals().copy()
    # The small batch size here ensures that we see multiple batches,
    # even in these small test examples:
    globs['psc'] = PSparkContext.simple('local[4]', 'PythonTest', batchSize=2)
    (failure_count, test_count) = doctest.testmod(globs=globs,
                                                  optionflags=doctest.ELLIPSIS)
    globs['psc'].sc.stop()
    msg = "{0} test ran {1} failures".format(test_count, failure_count)
    try:
        # My kingdom for the letter u
        from termcolor import colored

        if failure_count:
            msg = colored(msg, 'red')
        else:
            msg = colored(msg, 'green')
        print msg
    except ImportError:
        if failure_count:
            msg = '\033[91m' + msg
        else:
            msg = '\033[92m' + msg
        print msg + '\033[0m'
    if failure_count:
        exit(-1)
Exemple #2
0
 def setUp(self):
     """Setup the basic panda spark test case. This right now just creates a
     PSparkContext."""
     self._old_sys_path = list(sys.path)
     class_name = self.__class__.__name__
     self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2)
     # Add a common basic input and basicpframe we can reuse in testing
     self.basicinput = [("tea", "happy"), ("water", "sad"),
                        ("coffee", "happiest"), ("tea", "water")]
     self.basiccolumns = ['magic', 'thing']
     self.basicpframe = self.psc.DataFrame(self.basicinput,
                                           columns=self.basiccolumns)
     self.basicframe = pandas.DataFrame(self.basicinput,
                                        columns=self.basiccolumns)
     # Add a numeric frame
     self.numericinput = [(1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20),
                          (8, 9)]
     self.numericpframe = self.psc.DataFrame(self.numericinput,
                                             columns=['a', 'b'])
     self.numericframe = pandas.DataFrame(self.numericinput,
                                          columns=['a', 'b'])
     # A three column numeric frame
     self.numericthreeinput = [(1, 2, -100.5), (3, 4, 93), (1, 3, 100.2),
                               (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80),
                               (8, 9, 20)]
     self.numericthreepframe = self.psc.DataFrame(self.numericthreeinput,
                                                  columns=['a', 'b', 'c'])
     self.numericthreeframe = pandas.DataFrame(self.numericthreeinput,
                                               columns=['a', 'b', 'c'])
     self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")]
     self.mixedpframe = self.psc.DataFrame(self.mixedinput,
                                           columns=['a', 'b', 'c'])
     self.mixedframe = pandas.DataFrame(self.mixedinput,
                                        columns=['a', 'b', 'c'])
 def setUp(self):
     """
     Setup the basic panda spark test case. This right now just creates a
     PSparkContext.
     """
     self._old_sys_path = list(sys.path)
     class_name = self.__class__.__name__
     self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2)
 def setUp(self):
     """Setup the basic panda spark test case. This right now just creates a
     PSparkContext."""
     logging.info("Setting up spark context")
     self._old_sys_path = list(sys.path)
     class_name = self.__class__.__name__
     conf = SparkConf()
     conf.set("spark.cores.max", "4")
     conf.set("spark.master", "local[4]")
     conf.set("spark.app-name", class_name)
     conf.set("spark.driver.allowMultipleContexts", "true")
     self.psc = PSparkContext.simple(conf=conf)
     # Add a common basic input and basicpframe we can reuse in testing
     self.basicinput = [
         ("tea", "happy"),
         ("water", "sad"),
         ("coffee", "happiest"),
         ("tea", "water")]
     self.basiccolumns = ['magic', 'thing']
     self.basicpframe = self.psc.DataFrame(
         self.basicinput, columns=self.basiccolumns)
     self.basicframe = pandas.DataFrame(
         self.basicinput, columns=self.basiccolumns)
     # Add a numeric frame
     self.numericinput = [
         (1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)]
     self.numericpframe = self.psc.DataFrame(
         self.numericinput, columns=['a', 'b'])
     self.numericframe = pandas.DataFrame(
         self.numericinput, columns=['a', 'b'])
     # A three column numeric frame
     self.numericthreeinput = [
         (1, 2, -100.5),
         (3, 4, 93),
         (1, 3, 100.2),
         (2, 6, 0.5),
         (3, 100, 1.5),
         (3, 20, 80),
         (8, 9, 20)]
     self.numericthreepframe = self.psc.DataFrame(
         self.numericthreeinput, columns=['a', 'b', 'c'])
     self.numericthreeframe = pandas.DataFrame(
         self.numericthreeinput, columns=['a', 'b', 'c'])
     self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")]
     self.mixedpframe = self.psc.DataFrame(self.mixedinput,
                                           columns=['a', 'b', 'c'])
     self.mixedframe = pandas.DataFrame(self.mixedinput,
                                        columns=['a', 'b', 'c'])
     # Mixed NA frame
     self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)]
     self.mixednapframe = self.psc.DataFrame(self.mixednainput,
                                             columns=['a', 'b', 'c', 'd'])
     self.mixednaframe = pandas.DataFrame(self.mixednainput,
                                          columns=['a', 'b', 'c', 'd'])
 def setUp(self):
     """Setup the basic panda spark test case. This right now just creates a
     PSparkContext."""
     self._old_sys_path = list(sys.path)
     class_name = self.__class__.__name__
     self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2)
     # Add a common basic input and basicpframe we can reuse in testing
     self.basicinput = [
         ("tea", "happy"),
         ("water", "sad"),
         ("coffee", "happiest"),
         ("tea", "water")]
     self.basiccolumns = ['magic', 'thing']
     self.basicpframe = self.psc.DataFrame(
         self.basicinput, columns=self.basiccolumns)
     self.basicframe = pandas.DataFrame(
         self.basicinput, columns=self.basiccolumns)
     # Add a numeric frame
     self.numericinput = [
         (1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)]
     self.numericpframe = self.psc.DataFrame(
         self.numericinput, columns=['a', 'b'])
     self.numericframe = pandas.DataFrame(
         self.numericinput, columns=['a', 'b'])
     # A three column numeric frame
     self.numericthreeinput = [
         (1, 2, -100.5),
         (3, 4, 93),
         (1, 3, 100.2),
         (2, 6, 0.5),
         (3, 100, 1.5),
         (3, 20, 80),
         (8, 9, 20)]
     self.numericthreepframe = self.psc.DataFrame(
         self.numericthreeinput, columns=['a', 'b', 'c'])
     self.numericthreeframe = pandas.DataFrame(
         self.numericthreeinput, columns=['a', 'b', 'c'])
     self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")]
     self.mixedpframe = self.psc.DataFrame(self.mixedinput,
                                           columns=['a', 'b', 'c'])
     self.mixedframe = pandas.DataFrame(self.mixedinput,
                                        columns=['a', 'b', 'c'])
     # Mixed NA frame
     self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)]
     self.mixednapframe = self.psc.DataFrame(self.mixednainput,
                                             columns=['a', 'b', 'c', 'd'])
     self.mixednaframe = pandas.DataFrame(self.mixednainput,
                                          columns=['a', 'b', 'c', 'd'])
Exemple #6
0
 def setUp(self):
     """Setup the basic panda spark test case. This right now just creates a
     PSparkContext."""
     logging.info("Setting up spark context")
     self._old_sys_path = list(sys.path)
     class_name = self.__class__.__name__
     conf = SparkConf()
     conf.set("spark.cores.max", "4")
     conf.set("spark.master", "local[4]")
     conf.set("spark.app-name", class_name)
     conf.set("spark.driver.allowMultipleContexts", "true")
     self.psc = PSparkContext.simple(conf=conf)
     # Add a common basic input and basicpframe we can reuse in testing
     self.basicinput = [("tea", "happy"), ("water", "sad"),
                        ("coffee", "happiest"), ("tea", "water")]
     self.basiccolumns = ['magic', 'thing']
     self.basicpframe = self.psc.DataFrame(self.basicinput,
                                           columns=self.basiccolumns)
     self.basicframe = pandas.DataFrame(self.basicinput,
                                        columns=self.basiccolumns)
     # Add a numeric frame
     self.numericinput = [(1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20),
                          (8, 9)]
     self.numericpframe = self.psc.DataFrame(self.numericinput,
                                             columns=['a', 'b'])
     self.numericframe = pandas.DataFrame(self.numericinput,
                                          columns=['a', 'b'])
     # A three column numeric frame
     self.numericthreeinput = [(1, 2, -100.5), (3, 4, 93), (1, 3, 100.2),
                               (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80),
                               (8, 9, 20)]
     self.numericthreepframe = self.psc.DataFrame(self.numericthreeinput,
                                                  columns=['a', 'b', 'c'])
     self.numericthreeframe = pandas.DataFrame(self.numericthreeinput,
                                               columns=['a', 'b', 'c'])
     self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")]
     self.mixedpframe = self.psc.DataFrame(self.mixedinput,
                                           columns=['a', 'b', 'c'])
     self.mixedframe = pandas.DataFrame(self.mixedinput,
                                        columns=['a', 'b', 'c'])
     # Mixed NA frame
     self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)]
     self.mixednapframe = self.psc.DataFrame(self.mixednainput,
                                             columns=['a', 'b', 'c', 'd'])
     self.mixednaframe = pandas.DataFrame(self.mixednainput,
                                          columns=['a', 'b', 'c', 'd'])
     self.merge = merge