def __init__(self, dt_index, rdd, jtsrdd=None, sc=None): if jtsrdd == None: # Construct from a Python RDD object and a Python DateTimeIndex jvm = rdd.ctx._jvm jrdd = rdd._reserialize(_TimeSeriesSerializer())._jrdd.mapToPair( \ jvm.com.cloudera.sparkts.BytesToKeyAndSeries()) self._jtsrdd = jvm.com.cloudera.sparkts.api.java.JavaTimeSeriesRDDFactory.timeSeriesRDD( \ dt_index._jdt_index, jrdd) RDD.__init__(self, rdd._jrdd, rdd.ctx) else: # Construct from a py4j.JavaObject pointing to a JavaTimeSeriesRDD and a Python SparkContext jvm = sc._jvm jrdd = jtsrdd.map( \ jvm.com.cloudera.sparkts.KeyAndSeriesToBytes()) RDD.__init__(self, jrdd, sc, _TimeSeriesSerializer()) self._jtsrdd = jtsrdd
def __init__(self, dt_index, rdd, jtsrdd = None, sc = None): if jtsrdd == None: # Construct from a Python RDD object and a Python DateTimeIndex jvm = rdd.ctx._jvm jrdd = rdd._reserialize(_TimeSeriesSerializer())._jrdd.map( \ jvm.com.cloudera.sparkts.BytesToKeyAndSeries()) self._jtsrdd = jvm.com.cloudera.sparkts.TimeSeriesRDD( \ dt_index._jdt_index, jrdd.rdd()) RDD.__init__(self, rdd._jrdd, rdd.ctx) else: # Construct from a py4j.JavaObject pointing to a TimeSeriesRDD and a Python SparkContext jvm = sc._jvm jrdd = jvm.org.apache.spark.api.java.JavaRDD(jtsrdd, None).map( \ jvm.com.cloudera.sparkts.KeyAndSeriesToBytes()) RDD.__init__(self, jrdd, sc, _TimeSeriesSerializer()) self._jtsrdd = jtsrdd
def __init__(self, rdd, file_type='CSV', t_rdd=None, sc=None): if rdd is not None: jvm = rdd.ctx._jvm java_import(jvm, ClassNames.BYTES_TO_STRING) java_import(jvm, ClassNames.TRANSFORMABLE_RDD) self.__set_file_type(jvm, file_type) self.spark_context = rdd.ctx java_rdd = rdd._reserialize(BuddySerializer())._jrdd.map(jvm.BytesToString()) self._transformable_rdd = jvm.JavaTransformableRDD(java_rdd, self.__file_type) RDD.__init__(self, rdd._jrdd, rdd.ctx) else: jvm = sc._jvm java_import(jvm, ClassNames.STRING_TO_BYTES) self.spark_context = sc self.__set_file_type(jvm, file_type) self._transformable_rdd = t_rdd rdd = t_rdd.map(jvm.StringToBytes()) RDD.__init__(self, rdd, sc, BuddySerializer())
def __init__(self, rdd, file_type='CSV', t_rdd=None, sc=None): if rdd is not None: jvm = rdd.ctx._jvm java_import(jvm, ClassNames.BYTES_TO_STRING) java_import(jvm, ClassNames.TRANSFORMABLE_RDD) self.__set_file_type(jvm, file_type) self.spark_context = rdd.ctx java_rdd = rdd._reserialize(BuddySerializer())._jrdd.map( jvm.BytesToString()) self._transformable_rdd = jvm.JavaTransformableRDD( java_rdd, self.__file_type) RDD.__init__(self, rdd._jrdd, rdd.ctx) else: jvm = sc._jvm java_import(jvm, ClassNames.STRING_TO_BYTES) self.spark_context = sc self.__set_file_type(jvm, file_type) self._transformable_rdd = t_rdd rdd = t_rdd.map(jvm.StringToBytes()) RDD.__init__(self, rdd, sc, BuddySerializer())