def check_dataset(dataset): os.chdir(os.path.join(file_location)) md5 = None status = None reason = None diff = None try: try: dataset_detail = json.load(open('dataset_details.json', 'r')) except FileNotFoundError: with open("dataset_details.json", 'w') as json_file: dataset_detail = dict() json.dump(dataset_detail, json_file) if dataset_type(dataset) == 'spatial': workdir = None try: workdir = mkdtemp(dir=file_location) download(dataset.name, path=workdir) md5 = getmd5(workdir, data_type='dir') except Exception: raise finally: if workdir: rmtree(workdir) else: md5 = get_dataset_md5(dataset) if dataset.name not in dataset_detail \ or md5 != dataset_detail[dataset.name]['md5']: diff = diff_generator(dataset) status = True except Exception as e: reason = str(e) status = False finally: os.chdir(os.path.join(file_location)) with FileLock('dataset_details.json.lock'): dataset_details_read = open('dataset_details.json', 'r') json_file_details = json.load(dataset_details_read) json_file_details[dataset.name] = { "md5": md5, "status": status, "reason": reason, "diff": diff } dataset_details_write = open('dataset_details.json', 'w') json.dump(json_file_details, dataset_details_write, sort_keys=True, indent=4)
def check_dataset(dataset): md5 = None status = None reason = None diff = None dataset_detail = None try: try: with open(os.path.join(file_location, "dataset_details.json"), 'r') as json_file: dataset_detail = json.load(json_file) except (OSError, JSONDecodeError): dataset_detail = dict() dataset_detail['dataset_details'] = {} if dataset_type(dataset) == 'spatial': workdir = None try: workdir = mkdtemp(dir=file_location) download(dataset.name, path=workdir) md5 = getmd5(workdir, data_type='dir') except Exception: raise finally: if workdir: rmtree(workdir) else: md5 = get_dataset_md5(dataset) if dataset.name not in dataset_detail \ or md5 != dataset_detail[dataset.name]['md5']: diff = diff_generator(dataset) status = True except Exception as e: reason = str(e) status = False finally: json_file_details = dataset_detail json_file_details["dataset_details"][dataset.name] = { "md5": md5, "status": status, "reason": reason, "diff": diff} json_file_details["last_checked_on"] = datetime.now(timezone.utc).strftime("%d %b %Y") dataset_details_write = open(os.path.join(file_location, 'dataset_details.json'), 'w') json.dump(json_file_details, dataset_details_write, sort_keys=True, indent=4) dataset_details_write.close()
def test_download_regression(dataset, expected): """Test download regression.""" os.chdir(retriever_root_dir) base_path = 'test_raw_data' path = os.path.join(base_path, dataset) data_dir = "test_temp" data = os.path.normpath(os.path.join(retriever_root_dir, path, data_dir)) rt.download(dataset, path=path) current_md5 = getmd5(data=path, data_type='dir') assert current_md5 == expected shutil.rmtree(base_path) # download using path and sub_dir os.chdir(retriever_root_dir) rt.download(dataset, path=path, sub_dir=data_dir) current_md5 = getmd5(data=data, data_type='dir') assert current_md5 == expected shutil.rmtree(base_path)
def test_download_regression(dataset, expected): """Test download regression.""" os.chdir(retriever_root_dir) download(dataset, "raw_data/{0}".format(dataset)) current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir') assert current_md5 == expected