Esempio n. 1
0
def check_dataset(dataset):
    os.chdir(os.path.join(file_location))
    md5 = None
    status = None
    reason = None
    diff = None
    try:
        try:
            dataset_detail = json.load(open('dataset_details.json', 'r'))
        except FileNotFoundError:
            with open("dataset_details.json", 'w') as json_file:
                dataset_detail = dict()
                json.dump(dataset_detail, json_file)

        if dataset_type(dataset) == 'spatial':
            workdir = None
            try:
                workdir = mkdtemp(dir=file_location)
                download(dataset.name, path=workdir)
                md5 = getmd5(workdir, data_type='dir')
            except Exception:
                raise
            finally:
                if workdir:
                    rmtree(workdir)
        else:
            md5 = get_dataset_md5(dataset)
            if dataset.name not in dataset_detail \
                    or md5 != dataset_detail[dataset.name]['md5']:
                diff = diff_generator(dataset)
        status = True
    except Exception as e:
        reason = str(e)
        status = False
    finally:
        os.chdir(os.path.join(file_location))
        with FileLock('dataset_details.json.lock'):
            dataset_details_read = open('dataset_details.json', 'r')
            json_file_details = json.load(dataset_details_read)
            json_file_details[dataset.name] = {
                "md5": md5,
                "status": status,
                "reason": reason,
                "diff": diff
            }
            dataset_details_write = open('dataset_details.json', 'w')
            json.dump(json_file_details,
                      dataset_details_write,
                      sort_keys=True,
                      indent=4)
Esempio n. 2
0
def check_dataset(dataset):
    md5 = None
    status = None
    reason = None
    diff = None
    dataset_detail = None
    try:
        try:
            with open(os.path.join(file_location, "dataset_details.json"), 'r') as json_file:
                dataset_detail = json.load(json_file)
        except (OSError, JSONDecodeError):
            dataset_detail = dict()
            dataset_detail['dataset_details'] = {}

        if dataset_type(dataset) == 'spatial':
            workdir = None
            try:
                workdir = mkdtemp(dir=file_location)
                download(dataset.name, path=workdir)
                md5 = getmd5(workdir, data_type='dir')
            except Exception:
                raise
            finally:
                if workdir:
                    rmtree(workdir)
        else:
            md5 = get_dataset_md5(dataset)
            if dataset.name not in dataset_detail \
                    or md5 != dataset_detail[dataset.name]['md5']:
                diff = diff_generator(dataset)
        status = True
    except Exception as e:
        reason = str(e)
        status = False
    finally:
        json_file_details = dataset_detail
        json_file_details["dataset_details"][dataset.name] = {
            "md5": md5,
            "status": status,
            "reason": reason,
            "diff": diff}
        json_file_details["last_checked_on"] = datetime.now(timezone.utc).strftime("%d %b %Y")
        dataset_details_write = open(os.path.join(file_location, 'dataset_details.json'), 'w')
        json.dump(json_file_details, dataset_details_write,
                  sort_keys=True, indent=4)
        dataset_details_write.close()
Esempio n. 3
0
def test_download_regression(dataset, expected):
    """Test download regression."""
    os.chdir(retriever_root_dir)
    base_path = 'test_raw_data'
    path = os.path.join(base_path, dataset)
    data_dir = "test_temp"
    data = os.path.normpath(os.path.join(retriever_root_dir, path, data_dir))

    rt.download(dataset, path=path)
    current_md5 = getmd5(data=path, data_type='dir')
    assert current_md5 == expected
    shutil.rmtree(base_path)

    # download using path and sub_dir
    os.chdir(retriever_root_dir)
    rt.download(dataset, path=path, sub_dir=data_dir)
    current_md5 = getmd5(data=data, data_type='dir')
    assert current_md5 == expected
    shutil.rmtree(base_path)
Esempio n. 4
0
def test_download_regression(dataset, expected):
    """Test download regression."""
    os.chdir(retriever_root_dir)
    base_path = 'test_raw_data'
    path = os.path.join(base_path, dataset)
    data_dir = "test_temp"
    data = os.path.normpath(os.path.join(retriever_root_dir, path, data_dir))

    rt.download(dataset, path=path)
    current_md5 = getmd5(data=path, data_type='dir')
    assert current_md5 == expected
    shutil.rmtree(base_path)

    # download using path and sub_dir
    os.chdir(retriever_root_dir)
    rt.download(dataset, path=path, sub_dir=data_dir)
    current_md5 = getmd5(data=data, data_type='dir')
    assert current_md5 == expected
    shutil.rmtree(base_path)
Esempio n. 5
0
def test_download_regression(dataset, expected):
    """Test download regression."""
    os.chdir(retriever_root_dir)
    download(dataset, "raw_data/{0}".format(dataset))
    current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir')
    assert current_md5 == expected
Esempio n. 6
0
def test_download_regression(dataset, expected):
    """Test download regression."""
    os.chdir(retriever_root_dir)
    download(dataset, "raw_data/{0}".format(dataset))
    current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir')
    assert current_md5 == expected