def test_count_distinct(self): conf = SparkConf().setMaster('local') sc = SparkContext(conf=conf) data = sc.textFile('data/data.csv') combo = '0,0,0,1' quarter = '2015Q2' all_months = all_months = adpb.get_12_months(quarter) header = data.first() data = data\ .filter(lambda x: x != header) \ .filter(lambda line: line.strip().split(",")[-1] in all_months) \ .filter(lambda line: line.strip().split(",")[3] in adpb.all_states) \ .map(lambda line: (",".join(line.strip().split(",")[:5]), line.strip().split(",")[3])) result = Calculator(combo=combo).count_distinct(data=data) self.assertEqual(result.first()[1], 51)
def test_get_12_months(self): expect = [ '201506', '201505', '201504', '201503', '201502', '201501', '201412', '201411', '201410', '201409', '201408', '201407' ] self.assertEqual(adpb.get_12_months('2015Q2'), expect)
def test_get_12_months(self): expect = ['201506', '201505', '201504', '201503', '201502', '201501' , '201412', '201411', '201410', '201409', '201408', '201407'] self.assertEqual(adpb.get_12_months('2015Q2'), expect)