def test_program_extract_json(): result = runner.invoke(program, "extract data/table.csv --json") assert result.exit_code == 0 assert json.loads(result.stdout) == extract("data/table.csv")
def test_program_extract_field_names(): result = runner.invoke(program, "extract data/table.csv --json --field-names 'a,b'") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", detector=Detector(field_names=["a", "b"]) )
def test_program_extract_yaml(): result = runner.invoke(program, "extract data/table.csv --json") assert result.exit_code == 0 assert yaml.safe_load(result.stdout) == extract("data/table.csv")
def test_extract_resource_process(): process = lambda row: row.to_list() assert extract("data/resource.json", process=process) == [ [1, "english"], [2, "中国人"], ]
def test_program_extract_field_type(): result = runner.invoke(program, "extract data/table.csv --json --field-type string") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", detector=Detector(field_type="string") )
def test_program_extract_header_rows(): result = runner.invoke(program, "extract data/table.csv --json --header-rows '1,2'") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", layout={"headerRows": [1, 2]} )
def test_program_extract_offset_fields(): result = runner.invoke(program, "extract data/table.csv --json --offset-fields 1") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", layout={"offsetFields": 1} )
def test_to_str_with_doublequotes(): source = 'id,name\n1,"english,UK"\n2,"german,GE"' rows = extract(source, scheme="text", format="csv") assert rows[0].to_str() == '1,"english,UK"' assert rows[1].to_str() == '2,"german,GE"'
def test_extract_table_process(): process = lambda row: row.to_list() assert extract("data/table.csv", process=process) == [ [1, "english"], [2, "中国人"], ]
# View first 4 lines of data file. ! cat merops_peptidase_families.csv | sed '1,4!d' # Describe data file's inferred schema. ! frictionless describe merops_peptidase_families.csv # Extract normalized data that conforms to inferred schema. # E.g. invalid cells removed. ! frictionless extract merops_peptidase_families.csv | sed '1,10!d' # Validate data file. ! frictionless validate merops_peptidase_families.csv ############################# ### Python syntax. ############################# # View first 4 lines of data file. df = pd.read_csv("merops_peptidase_families.csv") display(df.head(4)) # Describe data file's inferred schema. frictionless.describe("merops_peptidase_families.csv") # Extract normalized data that conforms to inferred schema. # E.g. invalid cells removed. frictionless.extract("merops_peptidase_families.csv")[:4] # Validate data file. frictionless.validate("merops_peptidase_families.csv")
def test_to_str(): rows = extract("data/table.csv") assert rows[0].to_str() == "1,english" assert rows[1].to_str() == "2,中国人"
def test_extract_limit_rows(): result = runner.invoke(program, "extract data/table.csv --json --limit-rows 1") assert result.exit_code == 0 assert json.loads(result.stdout) == extract("data/table.csv", layout={"limitRows": 1})
def test_extract_field_missing_values(): result = runner.invoke( program, "extract data/table.csv --json --field-missing-values 1") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", detector=Detector(field_missing_values=["1"]))
def test_extract_schema(): result = runner.invoke( program, "extract data/table.csv --json --schema data/schema.json") assert result.exit_code == 0 assert json.loads(result.stdout) == extract("data/table.csv", schema="data/schema.json")
def test_program_extract_dialect_sheet_option(): file = "data/sheet2.xls" sheet = "Sheet2" result = runner.invoke(program, f"extract {file} --sheet {sheet} --json") assert result.exit_code == 0 assert json.loads(result.stdout) == extract(file, dialect={"sheet": sheet})
def test_extract_source_type(): assert extract("data/package.json", source_type="package") == { "data/table.csv": [{"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}] }
def test_program_extract_dialect_keyed_option(): file = "data/table.keyed.json" keyed = True result = runner.invoke(program, f"extract --path {file} --keyed {keyed} --json") assert result.exit_code == 0 assert json.loads(result.stdout) == extract(path=file, dialect={"keyed": keyed})
def test_extract(): assert extract("data/table.csv") == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_program_extract_skip_fields(): result = runner.invoke(program, "extract data/table.csv --json --skip-fields 'id'") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", layout={"skipFields": ["id"]} )
def test_extract_package(): assert extract("data/package.json") == { "data/table.csv": [{"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}] }
def test_program_extract_skip_rows(): result = runner.invoke(program, "extract data/table.csv --json --skip-rows 1") assert result.exit_code == 0 assert json.loads(result.stdout) == extract( "data/table.csv", layout={"skipRows": [1]} )
def test_extract_package_descriptor_type_package(): data = extract(descriptor="data/package/datapackage.json") assert isinstance(data, dict)