コード例 #1
0
    def setUp(self):
        self.tmpdir = os.path.join(tempfile.gettempdir(), 'schemaorg-test')

        if not os.path.exists(self.tmpdir):
            os.mkdir(self.tmpdir)

        self.here = os.path.abspath(os.path.dirname(__file__))
        recipe_yml = os.path.join(self.here, "recipe.yml")
        self.recipe = RecipeParser(recipe_yml)
        self.dataset = Schema("Dataset")
        self.templates = [
            'google/dataset-table.html',  # bootstrap
            'google/visual-dataset.html',  # default
            'google/dataset.html',  # json only 
            'google/dataset-vue-table.html'
        ]  # vue js

        person = make_person(name="Dinosaur Pancakes",
                             description='Dataset maintainer',
                             url='https://www.github.com/vsoch',
                             contact_type='customer support',
                             telephone='999-999-9999')
        self.dataset.add_property('creator', person)

        self.dataset.add_property('version', "1.0.0")
        self.dataset.add_property('description', "This is the best dataset.")
        self.dataset.add_property('name', "Dinosaur Dataset")
        self.dataset.add_property(
            'thumbnailUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        self.dataset.add_property('about', "This is a dataset")

        download = Schema('DataDownload')
        download.add_property(
            'contentUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        download.add_property('encodingFormat', 'CSV')
        self.dataset.add_property('distribution', [download])

        self.recipe.validate(self.dataset)
コード例 #2
0
## Thing > CreativeWork > SoftwareSourceCode > ContainerRecipe
################################################################################

import os

# Step 0. Define absolute paths to our Dockerfile, recipe, output

here = os.path.abspath(os.path.dirname(__file__))
recipe_yml = os.path.join(here, "recipe.yml")
index_html = os.path.join(here, "index.html")
spec_yml = os.path.join(here, "specification.yml")
dockerfile = os.path.join(os.path.dirname(here), "Dockerfile")

# Step 1: Read in the (custom) yaml file as a custom (under development) Schema

containerRecipe = Schema(spec_yml)

# Step 2: Show required and recommended fields from recipe

recipe = RecipeParser(recipe_yml)
print(recipe.loaded)

# Step 3: Extract Container Things! First, the recipe file

from spython.main.parse.parsers import DockerParser

parser = DockerParser(dockerfile).parse()

# containerRecipe.properties

containerRecipe.add_property('version', containerRecipe.version)
コード例 #3
0
## Thing > CreativeWork > SoftwareSourceCode > ContainerRecipe
################################################################################

import os

# Step 0. Define absolute paths to our Dockerfile, recipe, output

here = os.path.abspath(os.path.dirname(__file__))
recipe_yml = os.path.join(here, "recipe.yml")
index_html = os.path.join(here, "index.html")
spec_yml = os.path.join(here, "specification.yml")
dockerfile = os.path.join(here, "Dockerfile")

# Step 1: Read in the (custom) yaml file as a custom (under development) Schema

containerRecipe = Schema(spec_yml)

# Step 2: Show required and recommended fields from recipe

recipe = RecipeParser(recipe_yml)
print(recipe.loaded)

# Step 3: Extract Container Things! First, the recipe file

from spython.main.parse.parsers import DockerParser
parser = DockerParser(dockerfile).parse()

# See definitions at containerRecipe._properties.keys()

# When you add, they are found at:
# containerRecipe.properties
コード例 #4
0
recipe = RecipeParser(recipe_yml)
print(recipe.loaded)

# Step 2: Generate a Person (these are Google Helper functions)
from schemaorg.templates.google import (make_person, make_dataset)

# make_person(name, description, url="", telephone="", email="")
person = make_person(name="@vsoch",
                     description='research software engineer, dinosaur')

# Step 3: Create SoftwareSourceCode

from spython.main.parse.parsers import DockerParser
parser = DockerParser(dockerfile).parse()

sourceCode = Schema("SoftwareSourceCode")

# sourceCode.properties

sourceCode.add_property('creator', person)
sourceCode.add_property('version', sourceCode.version)
sourceCode.add_property('description', 'A Dockerfile build recipe')
sourceCode.add_property('name', parser.fromHeader)

# Step 4: Validate Data Structure

recipe.validate(sourceCode)

# Step 5a: Add additional fields (extra parsing!)
#         Since this is a demo, we won't do this here (we don't have URI)
#         I'll do a separate example for this using vsoch/dockerfiles on Github
コード例 #5
0
def make_person(name,
                description,
                url="",
                telephone="",
                email="",
                contact_type="customer support"):

    # Create an individual (persona)
    person = Schema('Person')
    person.add_property('name', name)
    contactPoint = Schema('ContactPoint')

    # Update the contact point
    contactPoint.add_property('telephone', telephone)
    contactPoint.add_property('email', email)
    contactPoint.add_property('url', url)
    contactPoint.add_property('contactType', contact_type)

    # Update the person with it
    person.add_property('contactPoint', contactPoint)
    return person
コード例 #6
0
ファイル: extract.py プロジェクト: aiinsights/extractors
def extract(name, url, description,
            about=None, thumbnail=None, output_file=None, **kwargs):
    ''' extract a DataCatalog to describe some dataset(s). To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")
    
    # Step 3: Create Data Catalog
    catalog = Schema("DataCatalog")

    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)
    
    # datacatalog.properties
    catalog.add_property('url', about)
    catalog.add_property('name', name)
    catalog.add_property('description', description)
    catalog.add_property('thumbnailUrl', thumbnail)    
    catalog.add_property('about', about)

    # Additional (won't be added if not part of schema)
    for key,value in kwargs.items():
        catalog.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(catalog)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(catalog, output_file)

    return catalog
コード例 #7
0
def extract(name,
            url=None,
            telephone=None,
            email=None,
            output_file=None,
            contact_type="customer_service",
            **kwargs):
    ''' extract an Organization. Some of the fields required are for
        a "ContactPoint" included within.
 
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        contact_type: the type of contact for the ContactPoint
        telephone: the telephone of the ContactPoint
        email: the email of the ContactPoint
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")

    # Step 2: Create Contact Point
    contact = Schema("ContactPoint")
    contact.add_property('contactType', contact_type)
    contact.add_property('telephone', telephone)
    contact.add_property('email', email)

    # Step 3: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)

    # Organization properties
    org = Schema("Organization")
    org.add_property('contactPoint', contact)
    org.add_property('url', url)
    org.add_property('name', name)

    # Additional (won't be added if not part of schema)
    for key, value in kwargs.items():
        org.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(org)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(org, output_file)

    return org
コード例 #8
0
ファイル: extract.py プロジェクト: aiinsights/extractors
def extract(dockerfile, contact, container_name=None, output_html=True):
    '''extract a dataset from a given dockerfile, write to html output file.
       Use container-diff and spython to get information about the container.
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")
    spec_yml = os.path.join(here, "specification.yml")

    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)

    # Step 2: Create Dataset
    parser = DockerRecipe(dockerfile)
    image = Schema(spec_yml)

    # We can obtain these from the environment, or use reasonable defaults
    thumbnail = os.environ.get(
        'IMAGE_THUMBNAIL',
        'https://vsoch.github.io/datasets/assets/img/avocado.png')
    about = os.environ.get(
        'IMAGE_ABOUT',
        'This is a Dockerfile parsed by the openschemas/extractors container.')
    repository = os.environ.get('GITHUB_REPOSITORY', 'openschemas/extractors')
    description = os.environ.get('IMAGE_DESCRIPTION',
                                 'A Dockerfile build recipe')

    # Step 3: Generate a Person (these are Google Helper functions)
    contact = os.environ.get('GITHUB_ACTOR', contact)
    contact_url = os.environ.get('CONTACT_URL', repository)
    contact_description = os.environ.get('CONTACT_DESCRIPTION',
                                         'Dockerfile maintainer')
    contact_type = os.environ.get('CONTACT_TYPE', 'customer support')
    contact_telephone = os.environ.get('CONTACT_TELEPHONE')

    # Get the repository full url for contact
    if not contact_url.startswith('http'):
        contact_url = "https://www.github.com/%s" % contact_url

    if contact is not None:
        person = make_person(name=contact,
                             description=contact_description,
                             url=contact_url,
                             contact_type=contact_type,
                             telephone=contact_telephone)
        image.properties['creator'] = person
        image.properties['author'] = person

    # image.properties
    if len(parser.environ) > 0:
        image.properties['environment'] = parser.environ
    image.properties['entrypoint'] = parser.entrypoint
    image.properties['version'] = image.version
    image.properties['description'] = description
    image.properties['ContainerImage'] = parser.fromHeader
    image.properties['name'] = container_name

    # Fun properties :)
    image.properties['thumbnailUrl'] = thumbnail
    image.properties['sameAs'] = 'ImageDefinition'
    image.properties['about'] = about
    image.properties[
        'codeRepository'] = 'https://www.github.com/%s' % repository
    image.properties['runtime'] = 'Docker'

    # Generate temporary filename
    output_file = "%s.json" % get_tmpfile("image-definition")

    # Try using container name, if not available default to ContainerImage (FROM)
    layers = run_container_diff(container_name, parser.fromHeader, output_file)

    if len(layers) > 0:

        # softwareRequirements
        requires = []  # APT and PIP

        # note that the top level key here can be history, files, pip, apt, etc.
        for layer in layers:

            ## Pip and Apt will go into softwareRequirements
            if layer['AnalyzeType'] in ["Pip", "Apt"]:
                for pkg in layer['Analysis']:
                    requires.append(
                        '%s > %s==%s' %
                        (layer['AnalyzeType'], pkg['Name'], pkg['Version']))

        image.properties["softwareRequirements"] = requires

    if output_html:
        return make_dataset(image)
    return image.dump_json(pretty_print=True)
コード例 #9
0
def extract(name, description=None, thumbnail=None, sameAs=None, version=None,
            about=None, output_file=None, person=None, repository=None,
            runtime=None, **kwargs):

    ''' extract a SoftwareSourceCode to describe a codebase. To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
        version: the software version. If not provided, uses schemarg version
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")
    
    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)
    
    # Step 2: Create SoftwareSourceCode
    ssc = Schema("SoftwareSourceCode")

    # dataset.properties
    ssc.add_property('creator', person)
    ssc.add_property('version', version or ssc.version)
    ssc.add_property('description', description)
    ssc.add_property('name', name)
    ssc.add_property('thumbnailUrl', thumbnail)
    ssc.add_property('sameAs', sameAs)
    ssc.add_property('about', about)
    ssc.add_property('codeRepository', repository)
    ssc.add_property('runtime', runtime)

    # Step 3: Additional (won't be added if not part of schema)
    for key,value in kwargs.items():
        ssc.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(ssc)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(ssc, output_file)

    return ssc
コード例 #10
0
class TestTemplates(unittest.TestCase):
    def setUp(self):
        self.tmpdir = os.path.join(tempfile.gettempdir(), 'schemaorg-test')

        if not os.path.exists(self.tmpdir):
            os.mkdir(self.tmpdir)

        self.here = os.path.abspath(os.path.dirname(__file__))
        recipe_yml = os.path.join(self.here, "recipe.yml")
        self.recipe = RecipeParser(recipe_yml)
        self.dataset = Schema("Dataset")
        self.templates = [
            'google/dataset-table.html',  # bootstrap
            'google/visual-dataset.html',  # default
            'google/dataset.html',  # json only 
            'google/dataset-vue-table.html'
        ]  # vue js

        person = make_person(name="Dinosaur Pancakes",
                             description='Dataset maintainer',
                             url='https://www.github.com/vsoch',
                             contact_type='customer support',
                             telephone='999-999-9999')
        self.dataset.add_property('creator', person)
        self.dataset.add_property('version', "1.0.0")
        self.dataset.add_property('description', "This is the best dataset.")
        self.dataset.add_property('name', "Dinosaur Dataset")
        self.dataset.add_property(
            'thumbnailUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        self.dataset.add_property('about', "This is a dataset")
        download = Schema('DataDownload')
        download.add_property(
            'contentUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        download.add_property('encodingFormat', 'CSV')
        self.dataset.add_property('distribution', [download])
        self.recipe.validate(self.dataset)

    def tearDown(self):
        pass
        #shutil.rmtree(self.tmpdir)

    def test_templates(self):

        print('Testing templates generation...')
        for template in self.templates:
            prefix, ext = os.path.splitext(template)
            output_file = os.path.join(self.tmpdir,
                                       os.path.basename(prefix) + '.html')
            html = make_dataset(self.dataset, output_file, template=template)
            print(output_file)
コード例 #11
0
ファイル: extract.py プロジェクト: aiinsights/extractors
def extract(name,
            version=None,
            contact=None,
            output_html=True,
            description=None,
            thumbnail=None,
            sameAs=None,
            about=None,
            repository=None):
    ''' extract a Dataset to describe some Github repository. To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        url: the url to get the catalog
        name: the name of the DataCatalog
        contact: name of a person that is in charge of the dataset
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")

    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)

    # Step 2: Create Dataset
    dataset = Schema("Dataset")

    # We can obtain these from the environment, or use reasonable defaults
    thumbnail = os.environ.get(
        'DATASET_THUMBNAIL', thumbnail
        or 'https://vsoch.github.io/datasets/assets/img/avocado.png')
    about = os.environ.get(
        'DATASET_ABOUT', about
        or 'This is a Dataset parsed by the openschemas/extractors container.')
    repository = os.environ.get('GITHUB_REPOSITORY', repository
                                or 'openschemas/extractors')
    description = os.environ.get('DATASET_DESCRIPTION', 'A Dataset')
    email = os.environ.get('DATASET_EMAIL')
    template = os.environ.get('DATASET_TEMPLATE', "google/dataset-table.html")

    # Can be one of:
    # google/dataset-table.html  (bootstrap)
    # google/visual-dataset.html (see vsoch.github.io/zenodo-ml)
    # google/dataset.html        (just blank page, json metadata)
    # google/dataset-vue-table.html
    # see https://openschemas.github.io/schemaorg#7-embed-in-html-with-json-ld

    # Contact metadata
    contact = os.environ.get('GITHUB_ACTOR', contact)
    contact_url = os.environ.get('CONTACT_URL', repository)
    contact_description = os.environ.get('CONTACT_DESCRIPTION',
                                         'Dataset maintainer')
    contact_type = os.environ.get('CONTACT_TYPE', 'customer support')
    contact_telephone = os.environ.get('CONTACT_TELEPHONE')
    contact = add_kwargs(contact, 'DATASET_DOWNLOAD_KWARGS')

    # Download Link
    download_link = os.environ.get('DATASET_DOWNLOAD_LINK')
    encoding = os.environ.get('DATASET_ENCODING_FORMAT')
    if download != None:
        download = Schema('DataDownload')
        download.add_property('encodingFormat', encoding)
        download.add_property('contentUrl', download_link)
        download = add_kwargs(download, 'DATASET_DOWNLOAD_KWARGS')
        dataset.add_property('distribution', [download])

    # Get the repository full url for contact
    if not contact_url.startswith('http'):
        contact_url = "https://www.github.com/%s" % contact_url

    if contact is not None:
        person = make_person(name=contact,
                             description=contact_description,
                             url=contact_url,
                             contact_type=contact_type,
                             telephone=contact_telephone)
        person = add_kwargs(person, 'CONTACT_KWARGS')
        dataset.add_property('creator', person)

    # dataset.properties
    dataset.add_property('version', version)
    dataset.add_property('description', description)
    dataset.add_property('name', name)
    dataset.add_property('thumbnailUrl', thumbnail)
    dataset.add_property('about', about)
    dataset = add_kwargs(dataset, 'DATASET_KWARGS')

    # Step 5: Validate Data Structure
    recipe.validate(dataset)

    if output_html:
        return make_dataset(dataset, template=template)
    return dataset.dump_json(pretty_print=True)