예제 #1
0
def test_program_extract_json():
    result = runner.invoke(program, "extract data/table.csv --json")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract("data/table.csv")
예제 #2
0
def test_program_extract_field_names():
    result = runner.invoke(program, "extract data/table.csv --json --field-names 'a,b'")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", detector=Detector(field_names=["a", "b"])
    )
예제 #3
0
def test_program_extract_yaml():
    result = runner.invoke(program, "extract data/table.csv --json")
    assert result.exit_code == 0
    assert yaml.safe_load(result.stdout) == extract("data/table.csv")
예제 #4
0
def test_extract_resource_process():
    process = lambda row: row.to_list()
    assert extract("data/resource.json", process=process) == [
        [1, "english"],
        [2, "中国人"],
    ]
예제 #5
0
def test_program_extract_field_type():
    result = runner.invoke(program, "extract data/table.csv --json --field-type string")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", detector=Detector(field_type="string")
    )
예제 #6
0
def test_program_extract_header_rows():
    result = runner.invoke(program, "extract data/table.csv --json --header-rows '1,2'")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", layout={"headerRows": [1, 2]}
    )
예제 #7
0
def test_program_extract_offset_fields():
    result = runner.invoke(program, "extract data/table.csv --json --offset-fields 1")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", layout={"offsetFields": 1}
    )
예제 #8
0
def test_to_str_with_doublequotes():
    source = 'id,name\n1,"english,UK"\n2,"german,GE"'
    rows = extract(source, scheme="text", format="csv")
    assert rows[0].to_str() == '1,"english,UK"'
    assert rows[1].to_str() == '2,"german,GE"'
예제 #9
0
def test_extract_table_process():
    process = lambda row: row.to_list()
    assert extract("data/table.csv", process=process) == [
        [1, "english"],
        [2, "中国人"],
    ]
예제 #10
0
# View first 4 lines of data file.
! cat merops_peptidase_families.csv | sed '1,4!d'

# Describe data file's inferred schema.
! frictionless describe merops_peptidase_families.csv

# Extract normalized data that conforms to inferred schema.
# E.g. invalid cells removed.
! frictionless extract merops_peptidase_families.csv | sed '1,10!d'

# Validate data file.
! frictionless validate merops_peptidase_families.csv

#############################
### Python syntax.
#############################

# View first 4 lines of data file.
df = pd.read_csv("merops_peptidase_families.csv")
display(df.head(4))

# Describe data file's inferred schema.
frictionless.describe("merops_peptidase_families.csv")

# Extract normalized data that conforms to inferred schema.
# E.g. invalid cells removed.
frictionless.extract("merops_peptidase_families.csv")[:4]

# Validate data file.
frictionless.validate("merops_peptidase_families.csv")
예제 #11
0
def test_to_str():
    rows = extract("data/table.csv")
    assert rows[0].to_str() == "1,english"
    assert rows[1].to_str() == "2,中国人"
예제 #12
0
def test_extract_limit_rows():
    result = runner.invoke(program,
                           "extract data/table.csv --json --limit-rows 1")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract("data/table.csv",
                                                layout={"limitRows": 1})
예제 #13
0
def test_extract_field_missing_values():
    result = runner.invoke(
        program, "extract data/table.csv --json --field-missing-values 1")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", detector=Detector(field_missing_values=["1"]))
예제 #14
0
def test_extract_schema():
    result = runner.invoke(
        program, "extract data/table.csv --json --schema data/schema.json")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract("data/table.csv",
                                                schema="data/schema.json")
예제 #15
0
def test_program_extract_dialect_sheet_option():
    file = "data/sheet2.xls"
    sheet = "Sheet2"
    result = runner.invoke(program, f"extract {file} --sheet {sheet} --json")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(file, dialect={"sheet": sheet})
예제 #16
0
def test_extract_source_type():
    assert extract("data/package.json", source_type="package") == {
        "data/table.csv": [{"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}]
    }
예제 #17
0
def test_program_extract_dialect_keyed_option():
    file = "data/table.keyed.json"
    keyed = True
    result = runner.invoke(program, f"extract --path {file} --keyed {keyed} --json")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(path=file, dialect={"keyed": keyed})
예제 #18
0
def test_extract():
    assert extract("data/table.csv") == [
        {"id": 1, "name": "english"},
        {"id": 2, "name": "中国人"},
    ]
예제 #19
0
def test_program_extract_skip_fields():
    result = runner.invoke(program, "extract data/table.csv --json --skip-fields 'id'")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", layout={"skipFields": ["id"]}
    )
예제 #20
0
def test_extract_package():
    assert extract("data/package.json") == {
        "data/table.csv": [{"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}]
    }
예제 #21
0
def test_program_extract_skip_rows():
    result = runner.invoke(program, "extract data/table.csv --json --skip-rows 1")
    assert result.exit_code == 0
    assert json.loads(result.stdout) == extract(
        "data/table.csv", layout={"skipRows": [1]}
    )
예제 #22
0
def test_extract_package_descriptor_type_package():
    data = extract(descriptor="data/package/datapackage.json")
    assert isinstance(data, dict)