Exemple #1
0
 def __init__(self, dt_index, rdd, jtsrdd=None, sc=None):
     if jtsrdd == None:
         # Construct from a Python RDD object and a Python DateTimeIndex
         jvm = rdd.ctx._jvm
         jrdd = rdd._reserialize(_TimeSeriesSerializer())._jrdd.mapToPair( \
             jvm.com.cloudera.sparkts.BytesToKeyAndSeries())
         self._jtsrdd = jvm.com.cloudera.sparkts.api.java.JavaTimeSeriesRDDFactory.timeSeriesRDD( \
             dt_index._jdt_index, jrdd)
         RDD.__init__(self, rdd._jrdd, rdd.ctx)
     else:
         # Construct from a py4j.JavaObject pointing to a JavaTimeSeriesRDD and a Python SparkContext
         jvm = sc._jvm
         jrdd = jtsrdd.map( \
             jvm.com.cloudera.sparkts.KeyAndSeriesToBytes())
         RDD.__init__(self, jrdd, sc, _TimeSeriesSerializer())
         self._jtsrdd = jtsrdd
 def __init__(self, dt_index, rdd, jtsrdd = None, sc = None):
     if jtsrdd == None:
         # Construct from a Python RDD object and a Python DateTimeIndex
         jvm = rdd.ctx._jvm
         jrdd = rdd._reserialize(_TimeSeriesSerializer())._jrdd.map( \
             jvm.com.cloudera.sparkts.BytesToKeyAndSeries())
         self._jtsrdd = jvm.com.cloudera.sparkts.TimeSeriesRDD( \
             dt_index._jdt_index, jrdd.rdd())
         RDD.__init__(self, rdd._jrdd, rdd.ctx)
     else:
         # Construct from a py4j.JavaObject pointing to a TimeSeriesRDD and a Python SparkContext
         jvm = sc._jvm
         jrdd = jvm.org.apache.spark.api.java.JavaRDD(jtsrdd, None).map( \
             jvm.com.cloudera.sparkts.KeyAndSeriesToBytes())
         RDD.__init__(self, jrdd, sc, _TimeSeriesSerializer())
         self._jtsrdd = jtsrdd
    def __init__(self, rdd, file_type='CSV', t_rdd=None, sc=None):
        if rdd is not None:
            jvm = rdd.ctx._jvm
            java_import(jvm, ClassNames.BYTES_TO_STRING)
            java_import(jvm, ClassNames.TRANSFORMABLE_RDD)

            self.__set_file_type(jvm, file_type)
            self.spark_context = rdd.ctx
            java_rdd = rdd._reserialize(BuddySerializer())._jrdd.map(jvm.BytesToString())
            self._transformable_rdd = jvm.JavaTransformableRDD(java_rdd, self.__file_type)
            RDD.__init__(self, rdd._jrdd, rdd.ctx)
        else:
            jvm = sc._jvm
            java_import(jvm, ClassNames.STRING_TO_BYTES)
            self.spark_context = sc
            self.__set_file_type(jvm, file_type)
            self._transformable_rdd = t_rdd
            rdd = t_rdd.map(jvm.StringToBytes())
            RDD.__init__(self, rdd, sc, BuddySerializer())
    def __init__(self, rdd, file_type='CSV', t_rdd=None, sc=None):
        if rdd is not None:
            jvm = rdd.ctx._jvm
            java_import(jvm, ClassNames.BYTES_TO_STRING)
            java_import(jvm, ClassNames.TRANSFORMABLE_RDD)

            self.__set_file_type(jvm, file_type)
            self.spark_context = rdd.ctx
            java_rdd = rdd._reserialize(BuddySerializer())._jrdd.map(
                jvm.BytesToString())
            self._transformable_rdd = jvm.JavaTransformableRDD(
                java_rdd, self.__file_type)
            RDD.__init__(self, rdd._jrdd, rdd.ctx)
        else:
            jvm = sc._jvm
            java_import(jvm, ClassNames.STRING_TO_BYTES)
            self.spark_context = sc
            self.__set_file_type(jvm, file_type)
            self._transformable_rdd = t_rdd
            rdd = t_rdd.map(jvm.StringToBytes())
            RDD.__init__(self, rdd, sc, BuddySerializer())