def test_count_distinct(self):
        conf = SparkConf().setMaster('local')
        sc = SparkContext(conf=conf)
        data = sc.textFile('data/data.csv')
        combo = '0,0,0,1'
        quarter = '2015Q2'
        all_months = all_months = adpb.get_12_months(quarter)
        header = data.first()

        data = data\
                .filter(lambda x: x != header) \
                .filter(lambda line: line.strip().split(",")[-1] in all_months) \
                .filter(lambda line: line.strip().split(",")[3] in adpb.all_states) \
                .map(lambda line: (",".join(line.strip().split(",")[:5]),
                           line.strip().split(",")[3]))

        result = Calculator(combo=combo).count_distinct(data=data)

        self.assertEqual(result.first()[1], 51)
    def test_count_distinct(self):
        conf = SparkConf().setMaster('local')
        sc = SparkContext(conf=conf)
        data = sc.textFile('data/data.csv')
        combo = '0,0,0,1'
        quarter = '2015Q2'
        all_months = all_months = adpb.get_12_months(quarter)
        header = data.first()

        data = data\
                .filter(lambda x: x != header) \
                .filter(lambda line: line.strip().split(",")[-1] in all_months) \
                .filter(lambda line: line.strip().split(",")[3] in adpb.all_states) \
                .map(lambda line: (",".join(line.strip().split(",")[:5]),
                           line.strip().split(",")[3]))

        result = Calculator(combo=combo).count_distinct(data=data)

        self.assertEqual(result.first()[1], 51)
 def test_get_12_months(self):
     expect = [
         '201506', '201505', '201504', '201503', '201502', '201501',
         '201412', '201411', '201410', '201409', '201408', '201407'
     ]
     self.assertEqual(adpb.get_12_months('2015Q2'), expect)
 def test_get_12_months(self):
     expect = ['201506', '201505', '201504', '201503', '201502', '201501'
               , '201412', '201411', '201410', '201409', '201408', '201407']
     self.assertEqual(adpb.get_12_months('2015Q2'), expect)