Esempio n. 1
0
def extract(name, url, description,
            about=None, thumbnail=None, output_file=None, **kwargs):
    ''' extract a DataCatalog to describe some dataset(s). To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")
    
    # Step 3: Create Data Catalog
    catalog = Schema("DataCatalog")

    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)
    
    # datacatalog.properties
    catalog.add_property('url', about)
    catalog.add_property('name', name)
    catalog.add_property('description', description)
    catalog.add_property('thumbnailUrl', thumbnail)    
    catalog.add_property('about', about)

    # Additional (won't be added if not part of schema)
    for key,value in kwargs.items():
        catalog.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(catalog)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(catalog, output_file)

    return catalog
Esempio n. 2
0
    def setUp(self):
        self.tmpdir = os.path.join(tempfile.gettempdir(), 'schemaorg-test')

        if not os.path.exists(self.tmpdir):
            os.mkdir(self.tmpdir)

        self.here = os.path.abspath(os.path.dirname(__file__))
        recipe_yml = os.path.join(self.here, "recipe.yml")
        self.recipe = RecipeParser(recipe_yml)
        self.dataset = Schema("Dataset")
        self.templates = [
            'google/dataset-table.html',  # bootstrap
            'google/visual-dataset.html',  # default
            'google/dataset.html',  # json only 
            'google/dataset-vue-table.html'
        ]  # vue js

        person = make_person(name="Dinosaur Pancakes",
                             description='Dataset maintainer',
                             url='https://www.github.com/vsoch',
                             contact_type='customer support',
                             telephone='999-999-9999')
        self.dataset.add_property('creator', person)

        self.dataset.add_property('version', "1.0.0")
        self.dataset.add_property('description', "This is the best dataset.")
        self.dataset.add_property('name', "Dinosaur Dataset")
        self.dataset.add_property(
            'thumbnailUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        self.dataset.add_property('about', "This is a dataset")

        download = Schema('DataDownload')
        download.add_property(
            'contentUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        download.add_property('encodingFormat', 'CSV')
        self.dataset.add_property('distribution', [download])

        self.recipe.validate(self.dataset)
Esempio n. 3
0
containerRecipe = Schema(spec_yml)

# Step 2: Show required and recommended fields from recipe

recipe = RecipeParser(recipe_yml)
print(recipe.loaded)

# Step 3: Extract Container Things! First, the recipe file

from spython.main.parse.parsers import DockerParser

parser = DockerParser(dockerfile).parse()

# containerRecipe.properties

containerRecipe.add_property('version', containerRecipe.version)
containerRecipe.add_property('labels', parser.labels)  # currently lists
containerRecipe.add_property('environment', parser.environ)  # currently a list
containerRecipe.add_property('entrypoint', parser.entrypoint)
containerRecipe.add_property('description', 'A Dockerfile build recipe')

# This would be extracted at build --> push time, so we know the uri.
containerRecipe.add_property('name', "vsoch/salad")
containerRecipe.add_property('ContainerImage', parser.fromHeader)

# Step 4: Validate Data Structure

recipe.validate(containerRecipe)

# Step 5, get extra metadata we would get with container-diff!
# Kids don't run command line things from Python at home, it's just bad :)
Esempio n. 4
0
# Step 2: Show required and recommended fields from recipe

recipe = RecipeParser(recipe_yml)
print(recipe.loaded)

# Step 3: Extract Container Things! First, the recipe file

from spython.main.parse.parsers import DockerParser
parser = DockerParser(dockerfile).parse()

# See definitions at containerRecipe._properties.keys()

# When you add, they are found at:
# containerRecipe.properties

containerRecipe.add_property('version', containerRecipe.version)
containerRecipe.add_property('environment', parser.environ)  # currently a list
containerRecipe.add_property('entrypoint', parser.entrypoint)
containerRecipe.add_property('description', 'A Dockerfile build recipe')

# This would be extracted at build --> push time, so we know the uri.
containerRecipe.add_property('name', "vanessa/sregistry")
containerRecipe.add_property('ContainerImage', parser.fromHeader)

# Step 4: Validate Data Structure

recipe.validate(containerRecipe)

# Step 5, get extra metadata we would get with container-diff!
# Kids don't run command line things from Python at home, it's just bad :)
Esempio n. 5
0
from schemaorg.templates.google import (make_person, make_dataset)

# make_person(name, description, url="", telephone="", email="")
person = make_person(name="@vsoch",
                     description='research software engineer, dinosaur')

# Step 3: Create SoftwareSourceCode

from spython.main.parse.parsers import DockerParser
parser = DockerParser(dockerfile).parse()

sourceCode = Schema("SoftwareSourceCode")

# sourceCode.properties

sourceCode.add_property('creator', person)
sourceCode.add_property('version', sourceCode.version)
sourceCode.add_property('description', 'A Dockerfile build recipe')
sourceCode.add_property('name', parser.fromHeader)

# Step 4: Validate Data Structure

recipe.validate(sourceCode)

# Step 5a: Add additional fields (extra parsing!)
#         Since this is a demo, we won't do this here (we don't have URI)
#         I'll do a separate example for this using vsoch/dockerfiles on Github

# Step 5b: Generate dataset, meaning writing metadata into template.
#          If we provide an output file it, it would write to it.
Esempio n. 6
0
def make_person(name,
                description,
                url="",
                telephone="",
                email="",
                contact_type="customer support"):

    # Create an individual (persona)
    person = Schema('Person')
    person.add_property('name', name)
    contactPoint = Schema('ContactPoint')

    # Update the contact point
    contactPoint.add_property('telephone', telephone)
    contactPoint.add_property('email', email)
    contactPoint.add_property('url', url)
    contactPoint.add_property('contactType', contact_type)

    # Update the person with it
    person.add_property('contactPoint', contactPoint)
    return person
Esempio n. 7
0
def extract(name,
            url=None,
            telephone=None,
            email=None,
            output_file=None,
            contact_type="customer_service",
            **kwargs):
    ''' extract an Organization. Some of the fields required are for
        a "ContactPoint" included within.
 
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        contact_type: the type of contact for the ContactPoint
        telephone: the telephone of the ContactPoint
        email: the email of the ContactPoint
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")

    # Step 2: Create Contact Point
    contact = Schema("ContactPoint")
    contact.add_property('contactType', contact_type)
    contact.add_property('telephone', telephone)
    contact.add_property('email', email)

    # Step 3: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)

    # Organization properties
    org = Schema("Organization")
    org.add_property('contactPoint', contact)
    org.add_property('url', url)
    org.add_property('name', name)

    # Additional (won't be added if not part of schema)
    for key, value in kwargs.items():
        org.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(org)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(org, output_file)

    return org
Esempio n. 8
0
def extract(name, description=None, thumbnail=None, sameAs=None, version=None,
            about=None, output_file=None, person=None, repository=None,
            runtime=None, **kwargs):

    ''' extract a SoftwareSourceCode to describe a codebase. To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        output_file: An html output file to write catalog to (optional)
        url: the url to get the catalog
        name: the name of the DataCatalog
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
        version: the software version. If not provided, uses schemarg version
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")
    
    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)
    
    # Step 2: Create SoftwareSourceCode
    ssc = Schema("SoftwareSourceCode")

    # dataset.properties
    ssc.add_property('creator', person)
    ssc.add_property('version', version or ssc.version)
    ssc.add_property('description', description)
    ssc.add_property('name', name)
    ssc.add_property('thumbnailUrl', thumbnail)
    ssc.add_property('sameAs', sameAs)
    ssc.add_property('about', about)
    ssc.add_property('codeRepository', repository)
    ssc.add_property('runtime', runtime)

    # Step 3: Additional (won't be added if not part of schema)
    for key,value in kwargs.items():
        ssc.add_property(key, value)

    # Step 4: Validate Data Structure
    recipe.validate(ssc)

    # Step 5: If the user wants to write to html, do it before return
    if output_file is not None:
        make_dataset(ssc, output_file)

    return ssc
Esempio n. 9
0
class TestTemplates(unittest.TestCase):
    def setUp(self):
        self.tmpdir = os.path.join(tempfile.gettempdir(), 'schemaorg-test')

        if not os.path.exists(self.tmpdir):
            os.mkdir(self.tmpdir)

        self.here = os.path.abspath(os.path.dirname(__file__))
        recipe_yml = os.path.join(self.here, "recipe.yml")
        self.recipe = RecipeParser(recipe_yml)
        self.dataset = Schema("Dataset")
        self.templates = [
            'google/dataset-table.html',  # bootstrap
            'google/visual-dataset.html',  # default
            'google/dataset.html',  # json only 
            'google/dataset-vue-table.html'
        ]  # vue js

        person = make_person(name="Dinosaur Pancakes",
                             description='Dataset maintainer',
                             url='https://www.github.com/vsoch',
                             contact_type='customer support',
                             telephone='999-999-9999')
        self.dataset.add_property('creator', person)
        self.dataset.add_property('version', "1.0.0")
        self.dataset.add_property('description', "This is the best dataset.")
        self.dataset.add_property('name', "Dinosaur Dataset")
        self.dataset.add_property(
            'thumbnailUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        self.dataset.add_property('about', "This is a dataset")
        download = Schema('DataDownload')
        download.add_property(
            'contentUrl',
            'https://vsoch.github.io/datasets/assets/img/avocado.png')
        download.add_property('encodingFormat', 'CSV')
        self.dataset.add_property('distribution', [download])
        self.recipe.validate(self.dataset)

    def tearDown(self):
        pass
        #shutil.rmtree(self.tmpdir)

    def test_templates(self):

        print('Testing templates generation...')
        for template in self.templates:
            prefix, ext = os.path.splitext(template)
            output_file = os.path.join(self.tmpdir,
                                       os.path.basename(prefix) + '.html')
            html = make_dataset(self.dataset, output_file, template=template)
            print(output_file)
Esempio n. 10
0
def extract(name,
            version=None,
            contact=None,
            output_html=True,
            description=None,
            thumbnail=None,
            sameAs=None,
            about=None,
            repository=None):
    ''' extract a Dataset to describe some Github repository. To add more
        properties, just add them via additional keyword args (kwargs)
    
        Parameters
        ==========
        url: the url to get the catalog
        name: the name of the DataCatalog
        contact: name of a person that is in charge of the dataset
        description: a description of the DataCatalog
        thumbnail: an image thumbnail (web url)
        about: text about the data catalog (optional).
    '''

    # Step 0. Define absolute paths to our Dockerfile, recipe, output
    here = os.path.abspath(os.path.dirname(__file__))
    recipe_yml = os.path.join(here, "recipe.yml")

    # Step 1: Show required and recommended fields from recipe
    recipe = RecipeParser(recipe_yml)

    # Step 2: Create Dataset
    dataset = Schema("Dataset")

    # We can obtain these from the environment, or use reasonable defaults
    thumbnail = os.environ.get(
        'DATASET_THUMBNAIL', thumbnail
        or 'https://vsoch.github.io/datasets/assets/img/avocado.png')
    about = os.environ.get(
        'DATASET_ABOUT', about
        or 'This is a Dataset parsed by the openschemas/extractors container.')
    repository = os.environ.get('GITHUB_REPOSITORY', repository
                                or 'openschemas/extractors')
    description = os.environ.get('DATASET_DESCRIPTION', 'A Dataset')
    email = os.environ.get('DATASET_EMAIL')
    template = os.environ.get('DATASET_TEMPLATE', "google/dataset-table.html")

    # Can be one of:
    # google/dataset-table.html  (bootstrap)
    # google/visual-dataset.html (see vsoch.github.io/zenodo-ml)
    # google/dataset.html        (just blank page, json metadata)
    # google/dataset-vue-table.html
    # see https://openschemas.github.io/schemaorg#7-embed-in-html-with-json-ld

    # Contact metadata
    contact = os.environ.get('GITHUB_ACTOR', contact)
    contact_url = os.environ.get('CONTACT_URL', repository)
    contact_description = os.environ.get('CONTACT_DESCRIPTION',
                                         'Dataset maintainer')
    contact_type = os.environ.get('CONTACT_TYPE', 'customer support')
    contact_telephone = os.environ.get('CONTACT_TELEPHONE')
    contact = add_kwargs(contact, 'DATASET_DOWNLOAD_KWARGS')

    # Download Link
    download_link = os.environ.get('DATASET_DOWNLOAD_LINK')
    encoding = os.environ.get('DATASET_ENCODING_FORMAT')
    if download != None:
        download = Schema('DataDownload')
        download.add_property('encodingFormat', encoding)
        download.add_property('contentUrl', download_link)
        download = add_kwargs(download, 'DATASET_DOWNLOAD_KWARGS')
        dataset.add_property('distribution', [download])

    # Get the repository full url for contact
    if not contact_url.startswith('http'):
        contact_url = "https://www.github.com/%s" % contact_url

    if contact is not None:
        person = make_person(name=contact,
                             description=contact_description,
                             url=contact_url,
                             contact_type=contact_type,
                             telephone=contact_telephone)
        person = add_kwargs(person, 'CONTACT_KWARGS')
        dataset.add_property('creator', person)

    # dataset.properties
    dataset.add_property('version', version)
    dataset.add_property('description', description)
    dataset.add_property('name', name)
    dataset.add_property('thumbnailUrl', thumbnail)
    dataset.add_property('about', about)
    dataset = add_kwargs(dataset, 'DATASET_KWARGS')

    # Step 5: Validate Data Structure
    recipe.validate(dataset)

    if output_html:
        return make_dataset(dataset, template=template)
    return dataset.dump_json(pretty_print=True)