예제 #1
0
def _transform_scraped_resource(target_dept, resource):

    distribution = Resource()

    downloadURL = str()
    if urlparse(resource.get('url')).scheme:
        downloadURL = resource.get('url')
    else:
        downloadURL = urljoin(resource.get('source_url'), resource.get('url'))

    #remove spaces in links
    downloadURL = downloadURL.replace(' ', '%20')
    distribution.downloadURL = downloadURL

    resource_name = str(resource.get('name'))
    if resource_name and resource_name.lower() not in resources_common_names:
        distribution.title = resource.get('name')
    else:
        distribution.title = h.extract_resource_name_from_url(
            distribution.downloadURL)

    if resource.get('description'):
        distribution.description = resource.get('description')

    if resource.get('format'):
        distribution.resource_format = resource.get('format')
        distribution.mediaType = h.get_media_type(resource.get('format'))
    else:
        extension = h.extract_resource_format_from_url(
            distribution.downloadURL)
        if extension:
            distribution.resource_format = extension
            distribution.mediaType = h.get_media_type(extension)

    return distribution
예제 #2
0
 def __init__(self):
     self.resource_type = "dcat:Distribution"
     self.description = "n/a"
     self.resource_format = "txt"
     self.mediaType = h.get_media_type(self.resource_format)
     self.headerMetadata = dict()