Beispiel #1
0
def test_reshape(config):
    forge = KnowledgeGraphForge(config)
    reshaper = Reshaper(versioned_id_template="{x.id}?_version={x._store_metadata.version}")

    simple = Resource(type="Experiment", url="file.gz")
    r = reshaper.reshape(simple, keep=['type'],versioned=False)
    expected = { "type": "Experiment"}
    assert expected == forge.as_json(r)

    simple = Resource(type=["Experiment"], url="file.gz")
    r = reshaper.reshape(simple, keep=['type'], versioned=True)
    expected = {"type": ["Experiment"]}
    assert expected == forge.as_json(r)
Beispiel #2
0
def download_from_nexus(uri, config_file_path, output_path, nexus_endpoint,
                        nexus_bucket, unzip=False):
    forge = KnowledgeGraphForge(
        config_file_path, endpoint=nexus_endpoint, bucket=nexus_bucket)
    dataset = forge.retrieve(id=uri)
    filepath = os.path.join(output_path, dataset.distribution.name)
    print("Downloading the file to '{}'".format(filepath))
    forge.download(
        dataset, path=output_path, overwrite=True,
        follow="distribution.contentUrl")
    if unzip:
        print(f"Decompressing ...")
        with zipfile.ZipFile(f"{filepath}", 'r') as zip_ref:
            zip_ref.extractall(output_path)
    return dataset
Beispiel #3
0
def test_download(config):
    simple = Resource(type="Experiment", url="file.gz")
    with pytest.raises(DownloadingError):
        forge = KnowledgeGraphForge(config)
        forge._store.download(simple,
                              "fake.path",
                              "./",
                              overwrite=False,
                              cross_bucket=False)
Beispiel #4
0
def forge():
    config = {
        "Model": {
            "name": "DemoModel",
            "origin": "directory",
            "source": "tests/data/demo-model/",
        },
        "Store": {
            "name": "DemoStore",
        },
    }
    return KnowledgeGraphForge(config)
Beispiel #5
0
def _preprocess_data(data, data_type, auth=None):
    """Preprocess input data according to the specified type.

    Possoble data types are:

    - "raw" use data as is provided in the request
    - "json_pgframe" create a PandasPGFrame from the provided JSON repr
    - "nexus_dataset" download a JSON dataset from Nexus and
      create a PandasPGFrame from this representation
    # - collection of Nexus resources to build a PG from
    # - (then i guess we need a bucket/org/project/token)
    """
    if data_type == "raw":
        # Use passed data as is
        return data
    elif data_type == "json_pgframe":
        return PandasPGFrame.from_json(data)
    elif data_type == "nexus_dataset":
        if auth is None:
            raise ValueError(
                "To use Nexus-hosted property graph as the dataset "
                "authentication token should be provided in the "
                "request header")
        forge = KnowledgeGraphForge(app.config["FORGE_CONFIG"],
                                    endpoint=data["endpoint"],
                                    bucket=data["bucket"],
                                    token=auth)
        resource = forge.retrieve(data["resource_id"])
        forge.download(resource, "distribution.contentUrl",
                       app.config["DOWNLOAD_DIR"])
        downloaded_file = os.path.join(app.config["DOWNLOAD_DIR"],
                                       resource.distribution.name)
        graph = PandasPGFrame.load_json(downloaded_file)
        os.remove(downloaded_file)
        return graph
    else:
        raise ValueError("Unknown data type")
Beispiel #6
0
def test_freeze(config, store_metadata_value):

    forge = KnowledgeGraphForge(config, debug=True)
    derivation1 = Dataset(forge, type="Dataset", name="A derivation dataset")
    derivation1.id = "http://derivation1"
    derivation1._store_metadata = wrap_dict(store_metadata_value)

    generation1 = Dataset(forge, type="Dataset", name="A generation dataset")
    generation1.id = "http://generation1"
    generation1._store_metadata = wrap_dict(store_metadata_value)

    invalidation1 = Dataset(forge, type="Activity", name="An invalidation activity")
    invalidation1.id = "http://invalidation1"
    invalidation1._store_metadata = wrap_dict(store_metadata_value)

    contribution1 = Resource(type="Person", name="A contributor")
    contribution1.id = "http://contribution1"
    contribution1._store_metadata = wrap_dict(store_metadata_value)

    dataset = Dataset(forge, type="Dataset", name="A dataset")
    dataset._store_metadata = wrap_dict(store_metadata_value)
    dataset.add_derivation(derivation1, versioned=False)
    dataset.add_generation(generation1, versioned=False)
    dataset.add_invalidation(invalidation1, versioned=False)
    dataset.add_contribution(contribution1, versioned=False)

    expected_derivation = json.loads(json.dumps({"type":"Derivation", "entity":{"id": "http://derivation1",
                                                                                "type":"Dataset", "name":"A derivation dataset"}}))
    assert forge.as_json(dataset.derivation) == expected_derivation

    expected_generation = json.loads(json.dumps({"type": "Generation",
                                                 "activity": {"id": "http://generation1", "type": "Dataset"}}))
    assert forge.as_json(dataset.generation) == expected_generation

    expected_contribution = json.loads(json.dumps({"type": "Contribution",
                                                 "agent": {"id": "http://contribution1", "type": "Person"}}))
    assert forge.as_json(dataset.contribution) == expected_contribution

    expected_invalidation = json.loads(json.dumps({"type": "Invalidation",
                                                   "activity": {"id": "http://invalidation1", "type": "Activity"}}))
    assert forge.as_json(dataset.invalidation) == expected_invalidation

    dataset.id = "http://dataset"
    dataset._synchronized = True
    forge._store.freeze(dataset)
    assert dataset.id == "http://dataset?_version=1"
    assert dataset.derivation.entity.id == "http://derivation1?_version=1"
    assert dataset.generation.activity.id == "http://generation1?_version=1"
    assert dataset.contribution.agent.id == "http://contribution1?_version=1"
    assert dataset.invalidation.activity.id == "http://invalidation1?_version=1"
 def test_initialization(self, config):
     forge = KnowledgeGraphForge(config)
     assert type(forge._model).__name__ == MODEL
     assert type(forge._store).__name__ == STORE
     assert type(forge._resolvers[SCOPE][RESOLVER]).__name__ == RESOLVER
 def test_resolver_returns_correct_dictionary(self, config):
     dict_result = {'terms': {'sex': {'bucket': 'sex.json'}}}
     forge = KnowledgeGraphForge(config)
     resolvers_dict = forge.resolvers(output="dict")
     assert resolvers_dict == dict_result
 def test_resolver_does_not_return_if_print(self, config):
     forge = KnowledgeGraphForge(config)
     resolvers_dict = forge.resolvers(output="print")
     assert resolvers_dict is None
Beispiel #10
0
 def test_resolver_returns_dict(self, config):
     forge = KnowledgeGraphForge(config)
     resolvers_dict = forge.resolvers(output="dict")
     assert type(resolvers_dict) is dict
Beispiel #11
0
                    pipeline_path = os.path.join(app.config["DOWNLOAD_DIR"],
                                                 path)
                    app.models[model_name]["object"] = EmbeddingPipeline.load(
                        pipeline_path,
                        embedder_interface=GraphElementEmbedder,
                        embedder_ext="zip")
            break


app = Flask(__name__)

app.config.from_pyfile('configs/app_config.py')

if app.config["LOCAL"] is False:
    TOKEN = os.environ["NEXUS_TOKEN"]
    app.forge = KnowledgeGraphForge(app.config["FORGE_CONFIG"], token=TOKEN)
else:
    app.forge = None

app.models = {}
_retrieve_models(app.config["LOCAL"])

# --------------- Handlers ----------------


def _respond_success():
    return (json.dumps({"success": True}), 200, {
        'ContentType': 'application/json'
    })

def test_from_resource(config, person, organization, store_metadata_value):
    forge = KnowledgeGraphForge(config)
    data = {
        'id': 'c51f4e4e-2b30-41f4-8f7c-aced85632b03',
        'type': ['Person', 'Agent'],
        'name': 'Jami Booth'
    }
    assert isinstance(person, Resource)
    dataset = Dataset.from_resource(forge, person)
    assert isinstance(dataset, Dataset)
    assert forge.as_json(dataset) == forge.as_json(person)
    assert forge.as_json(dataset) == data
    assert dataset._store_metadata is None

    person_with_store_metadata = Resource(**forge.as_json(person))
    person_with_store_metadata._store_metadata = store_metadata_value
    dataset = Dataset.from_resource(forge,
                                    person_with_store_metadata,
                                    store_metadata=True)
    assert isinstance(dataset, Dataset)
    person_with_store_metadata_json = forge.as_json(person_with_store_metadata)
    assert forge.as_json(dataset) == person_with_store_metadata_json
    assert forge.as_json(dataset) == data
    assert dataset._store_metadata == person_with_store_metadata._store_metadata
    assert forge.as_json(dataset, store_metadata=False) != forge.as_json(
        person_with_store_metadata, store_metadata=True)
    assert forge.as_json(dataset, store_metadata=True) == forge.as_json(
        person_with_store_metadata, store_metadata=True)

    assert isinstance(organization, Resource)
    dataset = Dataset.from_resource(forge, [person, organization])
    assert isinstance(dataset, List)
    assert len(dataset) == 2