def test_status_dashboard():
    if not os.path.exists(os.path.join(file_location, 'test_dir')):
        os.makedirs(os.path.join(file_location, 'test_dir'))
        create_dirs(os.path.join(test_files_location))
        os.makedirs(
            os.path.join(file_location, 'test_dir', 'old', 'sample-dataset'))
    os.chdir(os.path.join(test_files_location, 'old', 'sample-dataset'))
    script_module = get_script_module('sample_dataset')
    sqlite_engine.opts = {
        'install': 'sqlite',
        'file': 'test_db.sqlite3',
        'table_name': '{db}_{table}',
        'data_dir': '.'
    }
    sqlite_engine.use_cache = False
    script_module.download(engine=sqlite_engine)
    script_module.engine.final_cleanup()
    script_module.engine.to_csv()
    os.remove('test_db.sqlite3')

    # Finding the md5 of the modified dataset
    setattr(script_module.tables['main'], 'path', modified_dataset_path)
    workdir = mkdtemp(dir=test_files_location)
    os.chdir(workdir)
    sqlite_engine.use_cache = False
    script_module.download(engine=sqlite_engine)
    script_module.engine.final_cleanup()
    script_module.engine.to_csv()
    os.remove('test_db.sqlite3')
    calculated_md5 = getmd5(os.getcwd(), data_type='dir')
    rmtree(workdir)

    # If md5 of current dataset doesn't match with current
    # md5 we have to find the diff
    if calculated_md5 != precalculated_md5:
        os.chdir(os.path.join(test_files_location, 'current'))
        sqlite_engine.opts = {
            'install': 'sqlite',
            'file': 'test_db_new.sqlite3',
            'table_name': '{db}_{table}',
            'data_dir': '.'
        }
        sqlite_engine.use_cache = False
        script_module.download(sqlite_engine)
        script_module.engine.final_cleanup()
        script_module.engine.to_csv()
        os.remove('test_db_new.sqlite3')
        diff_generator(script_module, location=test_files_location)

    diff_exist = True if os.path.isfile(
        os.path.join(test_files_location, 'diffs',
                     'sample_dataset_main.html')) else False
    csv_exist = True if os.path.isfile(
        os.path.join(test_files_location, 'old', 'sample-dataset',
                     'sample_dataset_main.csv')) else False
    os.chdir(file_location)
    rmtree(test_files_location)
    assert diff_exist == True
    assert csv_exist == True
Exemple #2
0
def check_dataset(dataset):
    md5 = None
    status = None
    reason = None
    diff = None
    dataset_detail = None
    previous_md5 = ""

    try:
        dataset_detail = load_dataset_details()
        previous_detail_records = "dataset_details" in dataset_detail and dataset_detail[
            "dataset_details"]
        dataset_has_record = dataset.name in dataset_detail['dataset_details']
        if previous_detail_records and dataset_has_record:
            previous_md5 = dataset_detail['dataset_details'][
                dataset.name]['md5']

        if dataset_type(dataset) == 'spatial':
            install_postgres(dataset)
            dir_path = DATASET_DATA_FOLDER.format(dataset_name=dataset.name)
            md5 = getmd5(dir_path, data_type='dir')
            if not dataset_has_record or md5 != previous_md5:
                diff = diff_generator_spatial(dataset)
            else:
                remove_old_diff(dataset)
            data_shift(dataset, is_spatial=True)
        else:
            md5 = get_dataset_md5(dataset)
            if not dataset_has_record or md5 != previous_md5:
                diff = diff_generator(dataset)
            else:
                remove_old_diff(dataset)
            data_shift(dataset)
        status = True
    except Exception as e:
        reason = str(e)
        status = False
    finally:
        json_file_details = dataset_detail
        json_file_details["dataset_details"][dataset.name] = {
            "md5": md5,
            "status": status,
            "reason": reason,
            "diff": diff
        }
        json_file_details["last_checked_on"] = datetime.now(
            timezone.utc).strftime("%d %b %Y")
        dataset_details_write = open(DATASET_DETAIL_JSON, 'w')
        json.dump(json_file_details,
                  dataset_details_write,
                  sort_keys=True,
                  indent=4)
        dataset_details_write.close()
        delete_raw_data(dataset)