def test_get_mapped_events_event_fields_dictionary_none(spark_context): rdd = spark_context.parallelize(["1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20", "10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200", "100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000"]) event_fields = MapEvents.get_event_fields(rdd) header = None mapped_events = MapEvents.map_events(event_fields,None,header) assert(mapped_events == None)
def test_get_mapped_events(spark_context): rdd = spark_context.parallelize(["1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20", "10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200", "100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000"]) event_fields = MapEvents.get_event_fields(rdd) header = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] mapped_events = MapEvents.map_events(event_fields,{},header).collect() assert(len(mapped_events) == 3)
def map_events(self): if self.rdd is None or self.dictionary is None: raise Exception("Invalid RDD or Dictionary") if not isinstance(self.rdd, RDD): raise Exception("RDD passed is not an instance of RDD") events_fields = MapEvents.get_event_fields(self.rdd) if events_fields is None or not isinstance(events_fields, RDD): raise Exception("events_fields is None or not an instance of RDD") header = events_fields.first()[1] mapped_events = MapEvents.map_events(events_fields, self.dictionary, header) if mapped_events is None or not isinstance(mapped_events, RDD): raise Exception("events_fields is None or not an instance of RDD") return mapped_events.reduceByKey(lambda r1, r2: r1 + r2)
def test_get_event_fields(spark_context): rdd = spark_context.parallelize( ["1,2,3,4,5,6,7", "10,20,30,40,50,60,70", "100,200,300,400,500,600,700"]) rdd_new = MapEvents.get_event_fields(rdd).collect() for r in rdd_new: if r[0] == "1": assert r[1] == ("2", "3", "4", "5", "6", "7") if r[0] == "10": assert r[1] == ("20", "30", "40", "50", "60", "70") if r[0] == "100": assert r[1] == ("200", "300", "400", "500", "600", "700")
def test_get_mapped_events_event_fields_not_rdd(spark_context): header = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] mapped_events = MapEvents.map_events({},{},header) assert(mapped_events == None)
def test_get_event_fields_not_rdd(): rdd = {} rdd_new = MapEvents.get_event_fields(rdd) assert rdd_new is None
def test_get_event_fields_null_rdd(): rdd = None rdd_new = MapEvents.get_event_fields(rdd) assert rdd_new is None
def test_get_event_fields_empty_rdd(spark_context): rdd = spark_context.emptyRDD() rdd_new = MapEvents.get_event_fields(rdd).collect() assert len(rdd_new) == 0