コード例 #1
0
def look_up(schema_template: SchemaTemplate, header, context_concrete_type, context=None,
            order_of_occurrence=1):
    # Context refers to the context in which the header is being specified for.
    # For example, the property `project.contributors.email` will have a slightly different
    # specification in the context of `project.contributors`, than in the context of `project`.
    # In the former, email does not have a multivalue parent, whereas it has in the latter.
    # Framing it differently, in the former, it is each of the `contributors` that's being defined;
    # in the latter it is the `project` that's being defined.
    if not context:
        context = context_concrete_type

    parent_field, *_ = utils.split_field_chain(header)
    parent_spec = schema_template.lookup(parent_field) if parent_field != context else None

    field_spec = schema_template.lookup(header)
    data_type = field_spec.get('value_type')

    # concrete_type is the actual concrete type that the header represents. Particularly in cases
    # where the column represents a linking detail to another type, concrete_type is different from
    # context_concrete_type. concrete_type is the "inherent" type of the column whichever context
    # it is specified in.
    concrete_type = utils.extract_root_field(header)
    type_spec = schema_template.lookup(concrete_type)
    schema = type_spec.get('schema')
    domain_type, *_ = schema.get('domain_entity').split('/')

    return ColumnSpecification(header, context_concrete_type, domain_type, data_type,
                               identity=field_spec.get('identifiable'),
                               external_reference=field_spec.get('external_reference'),
                               multivalue=field_spec.get('multivalue'),
                               multivalue_parent=(parent_spec and parent_spec.get('multivalue')),
                               order_of_occurrence=order_of_occurrence)
コード例 #2
0
def get_ontology_schemas(key, schema_object=None):
    key = key.split(".")
    if not schema_object:
        schema_object = SchemaTemplate(
            ingest_api_url='https://api.ingest.archive.data.humancellatlas.org/'
        )
    schema_object._get_json_objs_from_metadata_schema_urls()
    for schema in schema_object.json_schemas:
        if "name" not in schema:
            continue
        elif key[0] == schema["name"]:
            key.pop(0)
            while len(key) > 1:
                if schema["properties"][key[0]]["type"] == "array":
                    if isinstance(schema["properties"][key[0]]["items"], list):
                        response = re.get(
                            schema["properties"][key[0]]["items"][0]["$ref"])
                    else:
                        response = re.get(
                            schema["properties"][key[0]]["items"]["$ref"])
                else:
                    response = re.get(schema["properties"][key[0]]["$ref"])
                schema = response.json()
                key.pop(0)
            return schema
コード例 #3
0
def build(schemas, ingest_api) -> TemplateManager:
    if not schemas:
        template = SchemaTemplate(ingest_api_url=ingest_api.url)
    else:
        template = SchemaTemplate(ingest_api_url=ingest_api.url, metadata_schema_urls=schemas)

    template_mgr = TemplateManager(template, ingest_api)
    return template_mgr
コード例 #4
0
    def test__lookup_property_in_schema__success(
            self, property_migrations_request_mock):
        property_migrations_request_mock.return_value = Mock(ok=True)
        property_migrations_request_mock.return_value.json.return_value = {
            'migrations': []
        }

        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse",
            "description": "Information relating to a timecourse.",
            "type": "object",
            "properties": {
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
                "unit": {
                    "description":
                    "The unit in which the Timecourse value is expressed.",
                    "type": "object",
                    "user_friendly": "Timecourse unit"
                },
            }
        }

        schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json])

        expected_property_value = SimplePropertyDescriptor({
            "description":
            "The unit in which the Timecourse value is expressed.",
            "type":
            "object",
            "user_friendly":
            "Timecourse unit"
        })
        self.assertEqual(
            schema_template.lookup_property_attributes_in_metadata(
                "timecourse.unit"),
            expected_property_value.
            get_dictionary_representation_of_descriptor())
コード例 #5
0
 def _map_key_to_spec(schema_template: SchemaTemplate, key):
     try:
         spec = schema_template.lookup_property_from_template(key)
     except UnknownKeySchemaException:
         _LOGGER.warning(f'[{key}] not found in the schema.')
         spec = {}
     return spec
コード例 #6
0
    def test_no_schemas(self):
        data = {
            "id": self.donorUri,
            "properties": {
                "foo_bar": {
                    "user_friendly": "Foo bar",
                    "description": "this is a foo bar",
                    "example": "e.g. foo"
                }
            }
        }

        file = "foo.xlsx"
        spreadsheet_builder = VanillaSpreadsheetBuilder(file)
        template = SchemaTemplate(json_schema_docs=[data],
                                  property_migrations=[])
        spreadsheet_builder.build(template)
        spreadsheet_builder.save_spreadsheet()

        reader = Reader("foo.xlsx")
        sheet = reader["Donor organism"]

        self.assertEqual("this is a foo bar",
                         sheet.cell(row=2, column=1).value)
        self.assertEqual("FOO BAR", sheet.cell(row=1, column=1).value)
        self.assertEqual("For example: e.g. foo",
                         sheet.cell(row=3, column=1).value.strip())
        self.assertEqual("donor_organism.foo_bar",
                         sheet.cell(row=4, column=1).value)
        # clean up
        os.remove(file)
コード例 #7
0
def get_template_for_json(mock_urlopen, data="{}"):
    cm = MagicMock()
    cm.getcode.return_value = 200
    cm.read.return_value = data.encode()
    cm.__enter__.return_value = cm
    mock_urlopen.return_value = cm

    return SchemaTemplate(list_of_schema_urls=['test_url'])
コード例 #8
0
    def test__generate_yaml_full_schema_template__success(self):
        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse",
            "description": "Information relating to a timecourse.",
            "type": "object",
            "properties": {
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
            }
        }
        sample_property_migration = {
            "source_schema": "timecourse",
            "property": "value",
            "target_schema": "fancy_new_timecourse",
            "replaced_by": "fancy_new_value",
            "effective_from": "3.0.0",
            "reason": "For fun",
            "type": "renamed property"
        }
        schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json],
            property_migrations=[sample_property_migration])

        yaml_representation = schema_template.generate_yaml_representation_of_spreadsheets(
            tabs_only=False)

        self.assertEqual(
            SortedDict(yaml.load(yaml_representation, Loader=yaml.FullLoader)),
            SortedDict(schema_template.get_dictionary_representation()))
コード例 #9
0
    def list_of_latest_attributes(self):
        template = SchemaTemplate()

        latest_attributes = []
        for tab in template.tabs:
            for type, content in tab.items():
                attributes = content.get('columns')
                for attribute in attributes:
                    latest_attributes.append(type + '.' + attribute)
        return latest_attributes
コード例 #10
0
    def test__lookup_next_latest_key_migration_simple_migration__success(self):
        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse",
            "description": "Information relating to a timecourse.",
            "type": "object",
            "properties": {
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
            }
        }
        sample_property_migration = {
            "source_schema": "timecourse",
            "property": "value",
            "target_schema": "fancy_new_timecourse",
            "replaced_by": "fancy_new_value",
            "effective_from": "3.0.0",
            "reason": "For fun",
            "type": "renamed property"
        }

        schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json],
            property_migrations=[sample_property_migration])

        expected_replacement_key = "fancy_new_timecourse.fancy_new_value"
        self.assertEqual(
            schema_template.lookup_next_latest_key_migration(
                "timecourse.value"), expected_replacement_key)
コード例 #11
0
    def test__lookup_unknown_property_in_schema__throws_exception(
            self, property_migrations_request_mock):
        property_migrations_request_mock.return_value = Mock(ok=True)
        property_migrations_request_mock.return_value.json.return_value = {
            'migrations': []
        }

        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse",
            "description": "Information relating to a timecourse.",
            "type": "object",
            "properties": {
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
            }
        }

        schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json])

        with self.assertRaisesRegex(UnknownKeySchemaException,
                                    "Cannot find key"):
            schema_template.lookup_property_attributes_in_metadata(
                "timecourse.unit")
コード例 #12
0
 def test_correct_description_used(self):
     file = "uf_test.xlsx"
     template = SchemaTemplate()
     builder = VanillaSpreadsheetBuilder(file)
     test_field = "enrichment_protocol.method.text"
     returned_description = builder.get_value_for_column(
         template=template, column_name=test_field, property="description")
     print("returned description: " + returned_description)
     expected_description = "A term that may be associated with a process-related ontology term."
     returned_example_text = builder.get_value_for_column(
         template=template, column_name=test_field, property="example")
     print("returned_example_text: " + returned_example_text)
     expected_example_text = "enzymatic dissociation; blood draw"
     self.assertEqual(expected_description, returned_description)
コード例 #13
0
    def test__creation_of_template_with_urls_and_jsons__throws_exception(self):
        sample_schema_url = "https://schema.humancellatlas.org/bundle/5.0.0/biomaterial"
        sample_schema_json = {
            "$schema":
            "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse"
        }

        with self.assertRaisesRegex(
                Exception,
                "Only one of function arguments metadata_schema_urls or json_schema_docs [^/]* "
                "may be populated"):
            SchemaTemplate(metadata_schema_urls=[sample_schema_url],
                           json_schema_docs=[sample_schema_json])
コード例 #14
0
 def test_vanilla_spreadsheet(self):
     file = "vanilla_test.xlsx"
     template = SchemaTemplate(metadata_schema_urls=[
         "https://schema.humancellatlas.org/type/biomaterial/15.5.0/donor_organism",
         "https://schema.humancellatlas.org/type/biomaterial/10.4.0/specimen_from_organism",
         "https://schema.humancellatlas.org/type/biomaterial/13.3.0/cell_suspension",
         "https://schema.humancellatlas.org/type/protocol/sequencing/6.2.0/library_preparation_protocol",
         "https://schema.humancellatlas.org/type/file/9.2.0/sequence_file"
     ])
     builder = VanillaSpreadsheetBuilder(file)
     builder.generate_spreadsheet(schema_template=template)
     builder.save_spreadsheet()
     reader = Reader("vanilla_test.xlsx")
     self.assertIsInstance(reader, Workbook)
     # cleanup
     os.remove(file)
コード例 #15
0
    def setUp(self):
        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/somedomain/2.0.2/someschema",
            "description": "Just a plain old test schema",
            "required": ["required_property"],
            "type": "object",
            "properties": {
                "protocol_id": {
                    "type": "integer",
                    "multivalue": False,
                },
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
                "required_property": {
                    "description": "Some generic required property",
                    "type": "object",
                    "user_friendly": "Required property"
                },
                "multivalue_property": {
                    "description": "Some generic multivalue property",
                    "type": "array",
                    "multivalue": True,
                    "items": {
                        "type": "integer"
                    }
                }
            }
        }
        self._mock_fetching_of_property_migrations()

        self.schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json])
コード例 #16
0
    def test_determine_converter_for_multivalue_type(self):
        data_types_to_test = [
            DataType.BOOLEAN, DataType.INTEGER, DataType.STRING,
            DataType.UNDEFINED
        ]

        for data_type in data_types_to_test:
            sample_metadata_schema_json = {
                "$schema": "http://json-schema.org/draft-07/schema#",
                "$id":
                "https://schema.humancellatlas.org/module/protocol/2.0.2/someschema",
                "description": "Just a plain old test schema",
                "type": "object",
                "properties": {
                    "multivalue_property": {
                        "description": "Some generic multivalue property",
                        "type": "array",
                        "multivalue": True,
                        "items": {
                            "type": data_type.value
                        }
                    }
                }
            }
            schema_template = SchemaTemplate(
                json_schema_docs=[sample_metadata_schema_json])

            column_specification = ColumnSpecification(
                schema_template, "someschema.multivalue_property",
                "someschema")

            self.assertEqual(column_specification.field_name,
                             "someschema.multivalue_property")
            self.assertTrue(column_specification.is_multivalue())
            self.assertIsInstance(column_specification.determine_converter(),
                                  ListConverter)
            self.assertEqual(
                column_specification.determine_converter().base_type,
                data_type)
コード例 #17
0
    def test_user_friendly(self):
        user_friendly_dict = {
            "donor_organism.human_specific.body_mass_index": "Body mass index",
            "specimen_from_organism.purchased_specimen.manufacturer":
            "Purchased specimen - Manufacturer",
            "donor_organism.organism_age_unit.text": "Age unit",
            "donor_organism.organism_age_unit.ontology":
            "Age unit ontology ID",
            "library_preparation_protocol.cell_barcode.barcode_length":
            "Cell barcode - Barcode length",
            "project.contributors.project_role.ontology_label":
            "Contributor role ontology label",
            "donor_organism.human_specific.ethnicity.text": "Ethnicity",
            "collection_protocol.reagents.retail_name": "Retail name",
            "imaging_protocol.probe.probe_reagents.catalog_number":
            "Catalog number",
            "donor_organism.genus_species.text": "Genus species",
            "donor_organism.genus_species.ontology":
            "Genus species ontology ID",
            "donor_organism.genus_species.ontology_label":
            "Genus species ontology label",
            "cell_suspension.cell_morphology.cell_size_unit.text":
            "Cell size unit",
            "specimen_from_organism.preservation_storage.storage_time_unit.text":
            "Storage time unit",
            "cell_suspension.timecourse.unit.text": "Timecourse unit"
        }

        file = "uf_test.xlsx"
        template = SchemaTemplate()
        builder = VanillaSpreadsheetBuilder(file)
        for key in user_friendly_dict.keys():
            uf = builder.get_user_friendly_column_name(template, key)
            print("from method: " + uf)
            print("expected: " + user_friendly_dict[key])
            self.assertEqual(user_friendly_dict[key], uf)

        delete_file("uf_test.xlsx")
コード例 #18
0
    def test_look_up_nested_object_field(self):
        nested_sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/somedomain/2.0.2/someschema",
            "description": "Just a plain old test schema",
            "required": [],
            "type": "object",
            "properties": {
                "some_parent_property": {
                    "description": "A parent property",
                    "type": "array",
                    "multivalue": True,
                    "items": {
                        "type": "integer"
                    },
                    "properties": {
                        "some_child_property": {
                            "description": "A child property",
                            "type": "string",
                        }
                    }
                }
            }
        }
        schema_template = SchemaTemplate(
            json_schema_docs=[nested_sample_metadata_schema_json])

        column_specification = ColumnSpecification(
            schema_template,
            "someschema.some_parent_property.some_child_property",
            "someschema")

        self.assertFalse(column_specification.multivalue)
        self.assertTrue(column_specification.is_field_of_list_element())
        self.assertEqual(ConversionType.FIELD_OF_LIST_ELEMENT,
                         column_specification.get_conversion_type())
コード例 #19
0
    def generate_metadata_spreadsheet(self, metadata_absolute_file_path,
                                      number_of_bundles):
        """ Generate bogus metadata based on the latest metadata schemas and store the fixture. """

        # Create a SchemaTemplate object that encapsulates information about the latest set of metadata schemas.
        ingest_api_url = "http://api.ingest." + os.environ[
            'CI_COMMIT_REF_NAME'] + ".data.humancellatlas.org"
        property_migrations_url = "https://schema." + os.environ[
            'CI_COMMIT_REF_NAME'] + ".data.humancellatlas.org/property_migrations"
        metadata_schema_template = SchemaTemplate(
            ingest_api_url=ingest_api_url,
            migrations_url=property_migrations_url)

        # Given the SchemaTemplate that represents all schemas, generate a spreadsheet
        spreadsheet_builder = VanillaSpreadsheetBuilder(
            output_file=metadata_absolute_file_path, hide_row=False)
        spreadsheet_builder.generate_spreadsheet(
            schema_template=metadata_schema_template)
        spreadsheet_builder.save_spreadsheet()

        # Generate as many bogus lines of data in the spreadsheet as there is number of bundles.
        self._populate_spreadsheet_with_bogus_data(metadata_absolute_file_path,
                                                   metadata_schema_template,
                                                   number_of_bundles)
コード例 #20
0
    def test_determine_converter_for_single_value(self):
        data_types_to_test = [
            DataType.BOOLEAN, DataType.INTEGER, DataType.STRING,
            DataType.UNDEFINED
        ]
        expected_respective_converter = [
            BooleanConverter, IntegerConverter, StringConverter,
            DefaultConverter
        ]

        for data_type, expected_converter in zip(
                data_types_to_test, expected_respective_converter):
            sample_metadata_schema_json = {
                "$schema": "http://json-schema.org/draft-07/schema#",
                "$id":
                "https://schema.humancellatlas.org/module/protocol/2.0.2/someschema",
                "description": "Just a plain old test schema",
                "type": "object",
                "properties": {
                    "some_property": {
                        "description": "Some generic property",
                        "type": data_type.value,
                        "multivalue": False,
                    }
                }
            }
            schema_template = SchemaTemplate(
                json_schema_docs=[sample_metadata_schema_json])

            column_specification = ColumnSpecification(
                schema_template, "someschema.some_property", "someschema")

            self.assertIsInstance(column_specification.determine_converter(),
                                  expected_converter)
            self.assertEqual(column_specification.get_conversion_type(),
                             ConversionType.MEMBER_FIELD)
コード例 #21
0
def main(spreadsheet_name):
    wb = openpyxl.load_workbook(spreadsheet_name)
    schema = SchemaTemplate(
        ingest_api_url="http://api.ingest.dev.archive.data.humancellatlas.org")
    parse_wb(spreadsheet_name, wb, schema)
コード例 #22
0
 def from_ref(ref: SchemaRef):
     return Schema(
         ref,
         SchemaTemplate(
             list_of_schema_urls=[ref.url_string]).get_template())
コード例 #23
0

# main launcher for running the app locally
if __name__ == '__main__':

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    # if '/generator' in dir:
    #     dir = dir.replace('/generator', '')
    # base_uri = dir + "/"

    CONFIG_FILE = _loadConfig('config.ini')

    if 'blacklist' in CONFIG_FILE and 'schema_list' in CONFIG_FILE['blacklist']:
        EXCLUDED_SCHEMAS = CONFIG_FILE['blacklist']['schema_list'].split(',')

    env = ''
    if 'system' in CONFIG_FILE and 'environment' in CONFIG_FILE['system']:
        env = CONFIG_FILE['system']['environment']

    if env == 'prod':
        api_url = INGEST_API_URL.replace("{env}.", '')
    else:
        api_url = INGEST_API_URL.replace("{env}", env)

    SCHEMA_TEMPLATE = SchemaTemplate(
        ingest_api_url=api_url,
        migrations_url='https://schema.humancellatlas.org/property_migrations')

    app.run(host='0.0.0.0', port=5000)
コード例 #24
0
    "https://schema.humancellatlas.org/type/biomaterial/5.1.0/cell_suspension",
    "https://schema.humancellatlas.org/type/biomaterial/5.1.0/specimen_from_organism",
    "https://schema.humancellatlas.org/type/biomaterial/5.1.1/donor_organism",
    "https://schema.humancellatlas.org/type/file/5.1.0/sequence_file",
    "https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/collection_process",
    "https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/dissociation_process",
    "https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/enrichment_process",
    "https://schema.humancellatlas.org/type/process/sequencing/5.1.0/library_preparation_process",
    "https://schema.humancellatlas.org/type/process/sequencing/5.1.0/sequencing_process",
    "https://schema.humancellatlas.org/type/protocol/5.1.0/protocol",
    "https://schema.humancellatlas.org/type/protocol/biomaterial/5.1.0/biomaterial_collection_protocol",
    "https://schema.humancellatlas.org/type/protocol/sequencing/5.1.0/sequencing_protocol",
    "https://schema.humancellatlas.org/type/process/1.0.0/process",
]

template = SchemaTemplate(list_of_schema_urls=schemas)

# get key from user friendly name

# tabs = TabConfig().load("tabs_human_10x.yaml")

print(template.get_key_for_label("Biomaterial name", tab="Cell suspension"))

# lookup where to submit this entity

print(template.lookup("cell_suspension.schema.domain_entity"))

# lookup text field for donor_organism.human_specific.ethnicity.text

print(
    template.get_key_for_label("donor_organism.human_specific.ethnicity.text",
コード例 #25
0
ファイル: schema_demod.py プロジェクト: ebi-ait/ingest-client
Script to demodularise the HCA metadta schemas by resolvong all $refs
"""
__author__ = "jupp"
__license__ = "Apache 2.0"
__date__ = "11/01/2019"

from ingest.template.schema_template import SchemaTemplate
from ingest.template.schema_template import SchemaParser
from yaml import dump as yaml_dump
from yaml import load as yaml_load

import urllib.request
import json
import jsonref

template = SchemaTemplate()
parser = SchemaParser(template)

INGESTAPI = "http://api.ingest.dev.data.humancellatlas.org"

list_of_schema_urls = template.get_latest_submittable_schema_urls(INGESTAPI)


def get_data(uri):

    print("getting " + uri)

    with urllib.request.urlopen(uri) as url:
        data = json.loads(url.read().decode())

        if parser.get_high_level_entity_from_url(uri) != 'type':
コード例 #26
0
    def test__creation_of_template_with_json__success(
            self, property_migrations_request_mock):
        property_migrations_request_mock.return_value = Mock(ok=True)
        property_migrations_request_mock.return_value.json.return_value = {
            'migrations': []
        }

        sample_metadata_schema_json = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "$id":
            "https://schema.humancellatlas.org/module/biomaterial/2.0.2/timecourse",
            "description": "Information relating to a timecourse.",
            "required": ["unit"],
            "type": "object",
            "properties": {
                "value": {
                    "description":
                    "The numerical value in Timecourse unit",
                    "pattern":
                    "^[0-9]+\\.?[0-9]*-?[0-9]*\\.?[0-9]*$",
                    "type":
                    "string",
                    "example":
                    "2; 5.5-10.5",
                    "user_friendly":
                    "Timecourse value",
                    "guidelines":
                    "Enter either a single value or a range of values. Indicate a range using a hyphen."
                },
                "unit": {
                    "description":
                    "The unit in which the Timecourse value is expressed.",
                    "type": "object",
                    "user_friendly": "Timecourse unit"
                },
            }
        }

        schema_template = SchemaTemplate(
            json_schema_docs=[sample_metadata_schema_json])

        expected_schema_version = "1.0.0"
        expected_schema_metadata_properties = {
            "timecourse":
            SchemaParser(sample_metadata_schema_json).schema_dictionary
        }
        expected_schema_labels = {
            "timecourse.value": ["timecourse.value"],
            "timecourse.unit": ["timecourse.unit"],
            "timecourse value": ["timecourse.value"],
            "timecourse unit": ["timecourse.unit"]
        }
        expected_schema_tabs = [{
            "timecourse": {
                "display_name": "Timecourse",
                "columns": ["timecourse.value", "timecourse.unit"]
            }
        }]

        self.assertEqual(schema_template.template_version,
                         expected_schema_version)
        self.assertEqual(schema_template.meta_data_properties,
                         expected_schema_metadata_properties)
        self.assertEqual(schema_template.labels, expected_schema_labels)
        self.assertEqual(schema_template.tabs, expected_schema_tabs)
コード例 #27
0
def main(args):
    env = args.env
    if env == "prod":
        ingest_api_root = "http://api.ingest.humancellatlas.org"
    elif env == "integration":
        ingest_api_root = "http://api.ingest.integration.data.humancellatlas.org"
    elif env == "develop":
        ingest_api_root = "http://api.ingest.dev.data.humancellatlas.org"

    else:
        print('--env must be "prod", "develop" or "integration"')
        print(args.env)
        sys.exit()

    schemas_api = ingest_api_root + "/schemas"
    latest_type_schemas_search = schemas_api + "/search/filterLatestSchemas?highLevelEntity=type"

    ingestapi = IngestApi(ingest_api_root)

    latest_schemas_resources = ingestapi._getAllObjectsFromSet(
        latest_type_schemas_search, "schemas", 50)
    latest_schemas_ = list(
        map(
            lambda schema_resource: schema_resource['_links']['json-schema'][
                'href'], latest_schemas_resources))

    # remove some known irrelevent types
    remove_list = [
        'https://schema.humancellatlas.org/type/protocol/analysis/7.0.0/analysis_protocol',
        'https://schema.humancellatlas.org/type/file/5.2.1/analysis_file',
        'https://schema.humancellatlas.org/type/process/analysis/5.1.0/analysis_process',
        'https://schema.humancellatlas.org/type/process/3.0.0/process',
        'https://schema.humancellatlas.org/type/protocol/6.1.1/protocol',
        'https://schema.humancellatlas.org/type/process/analysis/5.1.0/analysis_process',
        'https://schema.humancellatlas.org/type/process/imaging/5.1.0/imaging_process',
        'https://schema.humancellatlas.org/type/process/3.0.0/process',
        'https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/collection_process',
        'https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/enrichment_process',
        'https://schema.humancellatlas.org/type/process/sequencing/5.1.0/library_preparation_process',
        'https://schema.humancellatlas.org/type/process/sequencing/5.1.0/sequencing_process',
        'https://schema.humancellatlas.org/type/process/biomaterial_collection/5.1.0/dissociation_process'
    ]
    latest_schemas = [x for x in latest_schemas_ if x not in remove_list]

    print(
        '\nMake sure you have git pulled ingest-client locally for most up to date version!!!\n'
    )

    print('\n\nUsing schemas:\n')
    # print(latest_schemas)
    print("\n latest_schemas size: " + str(len(latest_schemas)))

    # make yaml
    template = SchemaTemplate(list_of_schema_urls=latest_schemas)
    with open('config.yaml', 'w') as o:
        print(template.yaml_dump(tabs_only=True), file=o)

    # make spreadsheet
    spreadsheet_builder = SpreadsheetBuilder(args.sheet)
    spreadsheet_builder.generate_workbook(tabs_template=args.yaml)
    spreadsheet_builder.save_workbook()
コード例 #28
0
            if schema[dp] and schema[dp]['value_type'] and schema[dp][
                    'value_type'] == 'object' and dp not in references.keys():
                if schema[dp]['required']:
                    references[dp] = "required"
                else:
                    references[dp] = "not required"

                print(dp + " is an object property")

    structure["references"] = references
    return structure


if __name__ == '__main__':

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    # if '/generator' in dir:
    #     dir = dir.replace('/generator', '')
    # base_uri = dir + "/"

    CONFIG_FILE = _loadConfig('config.ini')

    env = ''
    if 'system' in CONFIG_FILE and 'environment' in CONFIG_FILE['system']:
        env = CONFIG_FILE['system']['environment']
    api_url = INGEST_API_URL.replace("{env}", env)

    SCHEMA_TEMPLATE = SchemaTemplate(ingest_api_url=api_url)

    app.run(host='0.0.0.0', port=5000)