def test_transform_raises_MissingDatSetError_when_one_dataset_given(self): """ When the transformer class receives only one dataset, exception should be raised """ transformer = Transform( Extract.from_files(Constants._NYT_DATA_GOOD).get_datasets()) self.assertRaises(MissingDatasetError, transformer.transform_data)
def test_extract_from_two_urls_returns_two_datasets(self): """ Test we can load URLs """ extractor = Extract.from_urls(Constants._JH_URL, Constants._NYT_URL) datasets = extractor.get_datasets() assert len(datasets) == 2
def test_transform_raises_InvalidDatasetError_when_column_is_missing(self): """ If a requied column is missing from a dataset, exception should be raised """ transformer = Transform( Extract.from_files( Constants._NYT_DATA_MISSING_COLUMN).get_datasets()) self.assertRaises(InvalidDatasetError, transformer.transform_data)
def test_transform_raises_InvalidDatasetError_when_date_cannot_be_parsed( self): """ if a date cannot be parsed, exception should be raised """ transformer = Transform( Extract.from_files(Constants._NYT_DATA_BAD_DATE, Constants._JH_DATA_GOOD).get_datasets()) self.assertRaises(InvalidDatasetError, transformer.transform_data)
def test_extract_from_two_files_returns_two_datasets(self): """ Test we can load files. Will be used in other tests """ extractor = Extract.from_files(Constants._JH_DATA_GOOD, Constants._NYT_DATA_GOOD) datasets = extractor.get_datasets() assert len(datasets) == 2
def extract(options): url = None ref = None if "url" in options: url = options["url"] if "ref" in options: ref = options["ref"] map_path = None if url: map_path = get_mapping(url) Extract("map", map_path, 16, ref)
def test_transform_with_valid_data_returns_correct_date_range(self): """ When two valid dataasets are input, the merged output should only have rows for the dates in common across both inputs JH input ranges from 2020-01-22 to 2020-02-03 NYT input ranges from 2020-01-21 to 2020-02-03 Valid range therefore 2020-01-22 to 2020-02-03 """ expected_min_date = date(2020, 1, 22) expected_max_date = date(2020, 2, 3) transformer = Transform( Extract.from_files(Constants._NYT_DATA_GOOD, Constants._JH_DATA_GOOD).get_datasets()) merged_data = transformer.transform_data() min_date = min([d['date'] for d in merged_data]) max_date = max([d['date'] for d in merged_data]) assert min_date == expected_min_date and max_date == expected_max_date
def start_extract_map(self): self.root.destroy() Extract("dw")
from src.extract import Extract from src.report import Report if __name__ == "__main__": extract = Extract() # Ingest data data = extract.get_events_data_from_file("../input/input.txt") extract.ingest(data) # Run report report = Report() report.TopXSimpleLTVCustomers(10) # # week = dt.strftime("%U") # year = dt.strftime("%Y") # # # 53rd week of previous year will be combined with 0th week of next year # if week == "53": # week = '00' # temp_year = int(year) + 1 # year = str(temp_year) # # weekly_visit_key = year + '-' + week # print (weekly_visit_key) # # # # # d = "2017-01-06T12:45:52.041Z" # # r = datetime.datetime.strptime(d + '-0', "%Y-%M-%DT%H-%M-W%W-%w") # # print(r)
from src.extract import Extract if __name__ == "__main__": obj = Extract() obj.boot()