Esempio n. 1
0
def test_key_field_already_exists_b():
    """
    Test that specifying a key when one already exists doesn't result in an error.

    (overwrite = True).
    """
    rows = read_csv("startrek.csv")
    validate_csv(rows, key_field="name", overwrite=True)
Esempio n. 2
0
def test_missing_key_field():
    """Test that missing key fields are handled properly."""
    rows = read_csv("startrek_no_key_field.csv")

    correct = UnsupportedTable().dict()
    with pytest.raises(ValidationFailed) as v_error:
        validate_csv(rows, key_field="_key", overwrite=False)

    validation_resp = v_error.value.errors
    assert len(validation_resp) == 1
    assert validation_resp[0] == correct
Esempio n. 3
0
def test_invalid_key_field():
    """Test that specifying a missing key field results in an error."""
    rows = read_csv("startrek.csv")
    invalid_key = "invalid"

    correct = KeyFieldDoesNotExist(key=invalid_key).dict()
    with pytest.raises(ValidationFailed) as v_error:
        validate_csv(rows, key_field=invalid_key, overwrite=False)

    validation_resp = v_error.value.errors
    assert len(validation_resp) == 1
    assert validation_resp[0] == correct
Esempio n. 4
0
def test_duplicate_keys():
    """Test that duplicate keys are handled properly."""
    rows = read_csv("clubs_invalid_duplicate_keys.csv")
    with pytest.raises(ValidationFailed) as v_error:
        validate_csv(rows, key_field="_key", overwrite=False)

    validation_resp = v_error.value.errors
    correct = [
        err.dict() for err in [DuplicateKey(
            key="2"), DuplicateKey(key="5")]
    ]
    assert all(err in validation_resp for err in correct)
Esempio n. 5
0
    def create_aql_table(self, table: str, aql_query: str) -> Table:
        """Create a table in this workspace from an aql query."""
        if self.has_table(table):
            raise AlreadyExists("table", table)

        # In the future, the result of this validation can be
        # used to determine dependencies in virtual tables
        rows = list(self.run_query(aql_query))
        validate_csv(rows, "_key", False)

        loaded_table = self.create_table(table, False)
        loaded_table.insert(rows)

        return loaded_table
Esempio n. 6
0
def test_invalid_headers():
    """Test that invalid headers are handled properly."""
    rows = read_csv("membership_invalid_syntax.csv")
    with pytest.raises(ValidationFailed) as v_error:
        validate_csv(rows, key_field="_key", overwrite=False)

    validation_resp = v_error.value.errors
    correct = [
        err.dict() for err in [
            InvalidRow(row=3, columns=["_from"]),
            InvalidRow(row=4, columns=["_to"]),
            InvalidRow(row=5, columns=["_from", "_to"]),
        ]
    ]
    assert all(err in validation_resp for err in correct)
Esempio n. 7
0
def upload(workspace: str,
           table: str,
           key: str = "_key",
           overwrite: bool = False) -> Any:
    """
    Store a CSV file into the database as a node or edge table.

    `workspace` - the target workspace
    `table` - the target table
    `data` - the CSV data, passed in the request body. If the CSV data contains
             `_from` and `_to` fields, it will be treated as an edge table.
    """
    loaded_workspace = Workspace(workspace)

    if loaded_workspace.has_table(table):
        raise AlreadyExists("table", table)

    app.logger.info("Bulk Loading")

    # Read the request body into CSV format
    body = decode_data(request.data)

    try:
        # Type to a Dict rather than an OrderedDict
        rows: List[Dict[str, str]] = list(csv.DictReader(StringIO(body)))
    except csv.Error:
        raise CSVReadError()

    # Perform validation.
    validate_csv(rows, key, overwrite)

    # Once we reach here, we know that the specified key field must be present,
    # and either:
    #   key == "_key"   # noqa: E800
    #   or key != "_key" and the "_key" field is not present
    #   or key != "_key" and "_key" is present, but overwrite = True
    if key != "_key":
        rows = set_table_key(rows, key)

    # Check if it's an edge table or not
    fieldnames = rows[0].keys()
    edges = "_from" in fieldnames and "_to" in fieldnames

    # Create table and insert the data
    loaded_table = loaded_workspace.create_table(table, edges)
    results = loaded_table.insert(rows)

    return {"count": len(results)}
Esempio n. 8
0
def test_key_field_already_exists_a():
    """
    Test that specifying a key when one already exists results in an error.

    (overwrite = False)
    """
    rows = read_csv("startrek.csv")
    key_field = "name"

    correct = KeyFieldAlreadyExists(key=key_field).dict()
    with pytest.raises(ValidationFailed) as v_error:
        validate_csv(rows, key_field=key_field, overwrite=False)

    validation_resp = v_error.value.errors
    assert len(validation_resp) == 1
    assert validation_resp[0] == correct
def test_missing_key_field():
    """Test that missing key fields are handled properly."""
    rows = read_csv("startrek_no_key_field.csv")

    correct = UnsupportedTable().dict()
    errors = validate_csv(rows, key_field="_key", overwrite=False)

    assert len(errors) == 1
    assert errors[0] == correct
def test_invalid_key_field():
    """Test that specifying a missing key field results in an error."""
    rows = read_csv("startrek.csv")
    invalid_key = "invalid"

    correct = KeyFieldDoesNotExist(key=invalid_key).dict()
    errors = validate_csv(rows, key_field=invalid_key, overwrite=False)

    assert len(errors) == 1
    assert errors[0] == correct
def test_duplicate_keys():
    """Test that duplicate keys are handled properly."""
    rows = read_csv("clubs_invalid_duplicate_keys.csv")
    errors = validate_csv(rows, key_field="_key", overwrite=False)

    correct = [
        err.dict() for err in [DuplicateKey(
            key="2"), DuplicateKey(key="5")]
    ]
    assert all(err in errors for err in correct)
def test_invalid_headers():
    """Test that invalid headers are handled properly."""
    rows = read_csv("membership_invalid_syntax.csv")
    errors = validate_csv(rows, key_field="_key", overwrite=False)

    correct = [
        err.dict() for err in [
            InvalidRow(row=3, columns=["_from"]),
            InvalidRow(row=4, columns=["_to"]),
            InvalidRow(row=5, columns=["_from", "_to"]),
        ]
    ]
    assert all(err in errors for err in correct)
def test_key_field_already_exists_a():
    """
    Test that specifying a key when one already exists results in an error.

    (overwrite = False)
    """
    rows = read_csv("startrek.csv")
    key_field = "name"

    correct = KeyFieldAlreadyExists(key=key_field).dict()
    errors = validate_csv(rows, key_field=key_field, overwrite=False)

    assert len(errors) == 1
    assert errors[0] == correct
Esempio n. 14
0
def upload(
    workspace: str,
    table: str,
    key: str = "_key",
    overwrite: bool = False,
    metadata: Optional[str] = None,
) -> Any:
    """
    Store a CSV file into the database as a node or edge table.

    `workspace` - the target workspace
    `table` - the target table
    `data` - the CSV data, passed in the request body. If the CSV data contains
             `_from` and `_to` fields, it will be treated as an edge table.
    """
    loaded_workspace = Workspace(workspace)

    if loaded_workspace.has_table(table):
        raise AlreadyExists("table", table)

    app.logger.info("Bulk Loading")

    # Read the request body into CSV format
    body = decode_data(request.data)

    try:
        # Type to a Dict rather than an OrderedDict
        csv_rows: List[UnprocessedTableRow] = list(csv.DictReader(StringIO(body)))
    except csv.Error:
        raise CSVReadError()

    # TODO: This temporarily needs to be done here, so that validation of the metadata
    # can be done before the table is actually created. Once the API is updated, this
    # will change.
    # https://github.com/multinet-app/multinet-server/issues/493
    metadata_dict = {}
    if metadata:
        try:
            metadata_dict = json.loads(metadata)
        except json.decoder.JSONDecodeError:
            raise BadQueryArgument("metadata", metadata)

    table_metadata = table_metadata_from_dict(metadata_dict)
    rows, metadata_validation_errors = process_rows(csv_rows, table_metadata.columns)

    # Perform validation.
    csv_validation_errors = validate_csv(rows, key, overwrite)

    validation_errors = [*metadata_validation_errors, *csv_validation_errors]
    if len(validation_errors):
        raise ValidationFailed(errors=validation_errors)

    # Once we reach here, we know that the specified key field must be present,
    # and either:
    #   key == "_key"   # noqa: E800
    #   or key != "_key" and the "_key" field is not present
    #   or key != "_key" and "_key" is present, but overwrite = True
    if key != "_key":
        rows = set_table_key(rows, key)

    # Create table and insert the data
    loaded_table = loaded_workspace.create_table(table, edge=is_edge_table(rows))

    # Set table metadata
    loaded_table.set_metadata(metadata_dict)

    results = loaded_table.insert(rows)
    return {"count": len(results)}