Python compact_and_validate Examples, mediagoblin.tools.metadata.compact_and_validate Python Examples

Example #1

0

Show file

File: test_metadata.py Project: shahidge4/mediagoblin

    def testCompactAndValidate(self):
        # First, test out a well formatted piece of metadata
        ######################################################
        test_metadata = {
            'dc:title': 'My Pet Bunny',
            'dc:description': 'A picture displaying how cute my pet bunny is.',
            'location': '/home/goblin/Pictures/bunny.png',
            'license': 'http://www.gnu.org/licenses/gpl.txt'
        }
        jsonld_metadata = compact_and_validate(test_metadata)
        assert jsonld_metadata
        assert jsonld_metadata.get('dc:title') == 'My Pet Bunny'
        # Free floating nodes should be removed
        assert jsonld_metadata.get('location') is None
        assert jsonld_metadata.get('@context') == \
            u"http://www.w3.org/2013/json-ld-context/rdfa11"

        # Next, make sure that various badly formatted metadata
        # will be rejected.
        #######################################################
        #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
        # Metadata with a non-URI license should fail :
        #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'
        metadata_fail_1 = {
            'dc:title': 'My Pet Bunny',
            'dc:description': 'A picture displaying how cute my pet bunny is.',
            'location': '/home/goblin/Pictures/bunny.png',
            'license': 'All Rights Reserved.'
        }
        jsonld_fail_1 = None
        try:
            jsonld_fail_1 = compact_and_validate(metadata_fail_1)
        except ValidationError, e:
            assert e.message == "'All Rights Reserved.' is not a 'uri'"

Example #2

0

Show file

File: test_metadata.py Project: vasilenkomike/mediagoblin

    def testCompactAndValidate(self):
        # First, test out a well formatted piece of metadata
        ######################################################
        test_metadata = {
            "dc:title": "My Pet Bunny",
            "dc:description": "A picture displaying how cute my pet bunny is.",
            "location": "/home/goblin/Pictures/bunny.png",
            "license": "http://www.gnu.org/licenses/gpl.txt",
        }
        jsonld_metadata = compact_and_validate(test_metadata)
        assert jsonld_metadata
        assert jsonld_metadata.get("dc:title") == "My Pet Bunny"
        # Free floating nodes should be removed
        assert jsonld_metadata.get("location") is None
        assert jsonld_metadata.get("@context") == u"http://www.w3.org/2013/json-ld-context/rdfa11"

        # Next, make sure that various badly formatted metadata
        # will be rejected.
        #######################################################
        # ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
        # Metadata with a non-URI license should fail :
        # `'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'
        metadata_fail_1 = {
            "dc:title": "My Pet Bunny",
            "dc:description": "A picture displaying how cute my pet bunny is.",
            "location": "/home/goblin/Pictures/bunny.png",
            "license": "All Rights Reserved.",
        }
        jsonld_fail_1 = None
        try:
            jsonld_fail_1 = compact_and_validate(metadata_fail_1)
        except ValidationError, e:
            assert e.message == "'All Rights Reserved.' is not a 'uri'"

Example #3

0

Show file

File: test_metadata.py Project: ausbin/mediagoblin

    def testCompactAndValidate(self):
        # First, test out a well formatted piece of metadata
        ######################################################
        test_metadata = {
            'dc:title':'My Pet Bunny',
            'dc:description':'A picture displaying how cute my pet bunny is.',
            'location':'/home/goblin/Pictures/bunny.png',
            'license':'http://www.gnu.org/licenses/gpl.txt'
        }
        jsonld_metadata =compact_and_validate(test_metadata)
        assert jsonld_metadata
        assert jsonld_metadata.get('dc:title') == 'My Pet Bunny'
        # Free floating nodes should be removed
        assert jsonld_metadata.get('location') is None
        assert jsonld_metadata.get('@context') == \
            u"http://www.w3.org/2013/json-ld-context/rdfa11"

        # Next, make sure that various badly formatted metadata
        # will be rejected.
        #######################################################
        #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
        # Metadata with a non-URI license should fail :
        #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'
        metadata_fail_1 = {
            'dc:title':'My Pet Bunny',
            'dc:description':'A picture displaying how cute my pet bunny is.',
            'location':'/home/goblin/Pictures/bunny.png',
            'license':'All Rights Reserved.'
        }
        jsonld_fail_1 = None
        try:
            jsonld_fail_1 = compact_and_validate(metadata_fail_1)
        except ValidationError as e:
            assert e.message == "'All Rights Reserved.' is not a 'uri'"
        assert jsonld_fail_1 == None
        #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,
        # Metadata with an ivalid date-time dc:created should fail :
        #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`''
        metadata_fail_2 = {
            'dc:title':'My Pet Bunny',
            'dc:description':'A picture displaying how cute my pet bunny is.',
            'location':'/home/goblin/Pictures/bunny.png',
            'license':'http://www.gnu.org/licenses/gpl.txt',
            'dc:created':'The other day'
        }
        jsonld_fail_2 = None
        try:
            jsonld_fail_2 = compact_and_validate(metadata_fail_2)
        except ValidationError as e:
            assert e.message == "'The other day' is not a 'date-time'"
        assert jsonld_fail_2 == None

Example #4

0

Show file

File: views.py Project: eliroca/mediagoblin-mirror

def edit_metadata(request, media):
    # If media is not processed, return NotFound.
    if not media.state == 'processed':
        return render_404(request)

    form = forms.EditMetaDataForm(request.method == 'POST' and request.form
                                  or None)
    if request.method == "POST" and form.validate():
        metadata_dict = {
            row['identifier']: row['value']
            for row in form.media_metadata.data
        }
        json_ld_metadata = None
        json_ld_metadata = compact_and_validate(metadata_dict)
        media.media_metadata = json_ld_metadata
        media.save()
        return redirect_obj(request, media)

    if len(form.media_metadata) == 0:
        for identifier, value in media.media_metadata.items():
            if identifier == "@context": continue
            form.media_metadata.append_entry({
                'identifier': identifier,
                'value': value
            })

    return render_to_response(request, 'mediagoblin/edit/metadata.html', {
        'form': form,
        'media': media
    })

Example #5

0

Show file

File: views.py Project: incorpusyehtee/mediagoblin

def edit_metadata(request, media):
    form = forms.EditMetaDataForm(request.form)
    if request.method == "POST" and form.validate():
        metadata_dict = dict([(row['identifier'],row['value'])
                            for row in form.media_metadata.data])
        json_ld_metadata = None
        json_ld_metadata = compact_and_validate(metadata_dict)
        media.media_metadata = json_ld_metadata
        media.save()
        return redirect_obj(request, media)      

    if len(form.media_metadata) == 0:
        for identifier, value in media.media_metadata.iteritems():
            if identifier == "@context": continue
            form.media_metadata.append_entry({
                'identifier':identifier,
                'value':value})

    return render_to_response(
        request,
        'mediagoblin/edit/metadata.html',
        {'form':form,
         'media':media})

Example #6

0

Show file

File: views.py Project: sherlockliu/mediagoblin

def edit_metadata(request, media):
    form = forms.EditMetaDataForm(request.form)
    if request.method == "POST" and form.validate():
        metadata_dict = dict([(row['identifier'], row['value'])
                              for row in form.media_metadata.data])
        json_ld_metadata = None
        json_ld_metadata = compact_and_validate(metadata_dict)
        media.media_metadata = json_ld_metadata
        media.save()
        return redirect_obj(request, media)

    if len(form.media_metadata) == 0:
        for identifier, value in media.media_metadata.iteritems():
            if identifier == "@context": continue
            form.media_metadata.append_entry({
                'identifier': identifier,
                'value': value
            })

    return render_to_response(request, 'mediagoblin/edit/metadata.html', {
        'form': form,
        'media': media
    })

Example #7

0

Show file

File: batchaddmedia.py Project: incorpusyehtee/mediagoblin

def batchaddmedia(args):
    # Run eagerly unless explicetly set not to
    if not args.celery:
        os.environ['CELERY_ALWAYS_EAGER'] = 'true'

    app = commands_util.setup_app(args)

    files_uploaded, files_attempted = 0, 0

    # get the user
    user = app.db.User.query.filter_by(username=args.username.lower()).first()
    if user is None:
        print _(u"Sorry, no user by username '{username}' exists".format(
                    username=args.username))
        return

    upload_limit, max_file_size = get_upload_file_limits(user)
    temp_files = []

    if os.path.isfile(args.metadata_path):
        metadata_path = args.metadata_path

    else:
        error = _(u'File at {path} not found, use -h flag for help'.format(
                    path=args.metadata_path))
        print error
        return

    abs_metadata_filename = os.path.abspath(metadata_path)
    abs_metadata_dir = os.path.dirname(abs_metadata_filename)
    upload_limit, max_file_size = get_upload_file_limits(user)

    def maybe_unicodeify(some_string):
        # this is kinda terrible
        if some_string is None:
            return None
        else:
            return unicode(some_string)

    with codecs.open(
            abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
        contents = all_metadata.read()
        media_metadata = parse_csv_file(contents)

    for media_id, file_metadata in media_metadata.iteritems():
        files_attempted += 1
        # In case the metadata was not uploaded initialize an empty dictionary.
        json_ld_metadata = compact_and_validate({})

        # Get all metadata entries starting with 'media' as variables and then
        # delete them because those are for internal use only.
        original_location = file_metadata['location']

        ### Pull the important media information for mediagoblin from the
        ### metadata, if it is provided.
        title = file_metadata.get('title') or file_metadata.get('dc:title')
        description = (file_metadata.get('description') or
            file_metadata.get('dc:description'))

        license = file_metadata.get('license')
        try:
            json_ld_metadata = compact_and_validate(file_metadata)
        except ValidationError, exc:
            error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
Metadata was not uploaded.""".format(
                media_id=media_id,
                error_path=exc.path[0],
                error_msg=exc.message))
            print error
            continue

        url = urlparse(original_location)
        filename = url.path.split()[-1]

        if url.scheme == 'http':
            res = requests.get(url.geturl(), stream=True)
            media_file = res.raw

        elif url.scheme == '':
            path = url.path
            if os.path.isabs(path):
                file_abs_path = os.path.abspath(path)
            else:
                file_path = os.path.join(abs_metadata_dir, path)
                file_abs_path = os.path.abspath(file_path)
            try:
                media_file = file(file_abs_path, 'r')
            except IOError:
                print _(u"""\
FAIL: Local file {filename} could not be accessed.
{filename} will not be uploaded.""".format(filename=filename))
                continue
        try:
            submit_media(
                mg_app=app,
                user=user,
                submitted_file=media_file,
                filename=filename,
                title=maybe_unicodeify(title),
                description=maybe_unicodeify(description),
                license=maybe_unicodeify(license),
                metadata=json_ld_metadata,
                tags_string=u"",
                upload_limit=upload_limit, max_file_size=max_file_size)
            print _(u"""Successfully submitted {filename}!
Be sure to look at the Media Processing Panel on your website to be sure it
uploaded successfully.""".format(filename=filename))
            files_uploaded += 1
        except FileUploadLimit:
            print _(
u"FAIL: This file is larger than the upload limits for this site.")
        except UserUploadLimit:
            print _(
"FAIL: This file will put this user past their upload limits.")
        except UserPastUploadLimit:
            print _("FAIL: This user is already past their upload limits.")

Example #8

0

Show file

def batchaddmedia(args):
    # Run eagerly unless explicetly set not to
    if not args.celery:
        os.environ['CELERY_ALWAYS_EAGER'] = 'true'

    app = commands_util.setup_app(args)

    files_uploaded, files_attempted = 0, 0

    # get the user
    user = app.db.LocalUser.query.filter(
        LocalUser.username == args.username.lower()).first()
    if user is None:
        print(
            _("Sorry, no user by username '{username}' exists".format(
                username=args.username)))
        return

    if os.path.isfile(args.metadata_path):
        metadata_path = args.metadata_path

    else:
        error = _('File at {path} not found, use -h flag for help'.format(
            path=args.metadata_path))
        print(error)
        return

    abs_metadata_filename = os.path.abspath(metadata_path)
    abs_metadata_dir = os.path.dirname(abs_metadata_filename)

    all_metadata = open(abs_metadata_filename, 'r')
    media_metadata = csv.DictReader(all_metadata)
    for index, file_metadata in enumerate(media_metadata):
        if six.PY2:
            file_metadata = {
                k.decode('utf-8'): v.decode('utf-8')
                for k, v in file_metadata.items()
            }

        files_attempted += 1
        # In case the metadata was not uploaded initialize an empty dictionary.
        json_ld_metadata = compact_and_validate({})

        # Get all metadata entries starting with 'media' as variables and then
        # delete them because those are for internal use only.
        original_location = file_metadata['location']

        ### Pull the important media information for mediagoblin from the
        ### metadata, if it is provided.
        slug = file_metadata.get('slug')
        title = file_metadata.get('title') or file_metadata.get('dc:title')
        description = (file_metadata.get('description')
                       or file_metadata.get('dc:description'))
        collection_slug = file_metadata.get('collection-slug')

        license = file_metadata.get('license')
        try:
            json_ld_metadata = compact_and_validate(file_metadata)
        except ValidationError as exc:
            media_id = file_metadata.get('id') or index
            error = _(
                """Error with media '{media_id}' value '{error_path}': {error_msg}
Metadata was not uploaded.""".format(media_id=media_id,
                                     error_path=exc.path[0],
                                     error_msg=exc.message))
            print(error)
            continue

        if slug and MediaEntry.query.filter_by(actor=user.id,
                                               slug=slug).count():
            # Avoid re-importing media from a previous batch run. Note that this
            # check isn't quite robust enough, since it requires that a slug is
            # specified. Probably needs to be based on "location" since this is
            # the only required field.
            error = '{}: {}'.format(
                slug,
                _('An entry with that slug already exists for this user.'))
            print(error)
            continue

        url = urlparse(original_location)
        filename = url.path.split()[-1]

        if url.scheme.startswith('http'):
            res = requests.get(url.geturl(), stream=True)
            if res.headers.get('content-encoding'):
                # The requests library's "raw" method does not deal with content
                # encoding. Alternative could be to use iter_content(), and
                # write chunks to the temporary file.
                raise NotImplementedError(
                    'URL-based media with content-encoding (eg. gzip) are not currently supported.'
                )

            # To avoid loading the media into memory all at once, we write it to
            # a file before importing. This currently requires free space up to
            # twice the size of the media file. Memory use can be tested by
            # running something like `ulimit -Sv 200000` before running
            # `batchaddmedia` to upload a file larger than 200MB.
            media_file = tempfile.TemporaryFile()
            shutil.copyfileobj(res.raw, media_file)
            if six.PY2:
                media_file.seek(0)

        elif url.scheme == '':
            path = url.path
            if os.path.isabs(path):
                file_abs_path = os.path.abspath(path)
            else:
                file_path = os.path.join(abs_metadata_dir, path)
                file_abs_path = os.path.abspath(file_path)
            try:
                media_file = open(file_abs_path, 'rb')
            except IOError:
                print(
                    _("""\
FAIL: Local file {filename} could not be accessed.
{filename} will not be uploaded.""".format(filename=filename)))
                continue
        try:
            entry = submit_media(mg_app=app,
                                 user=user,
                                 submitted_file=media_file,
                                 filename=filename,
                                 title=title,
                                 description=description,
                                 collection_slug=collection_slug,
                                 license=license,
                                 metadata=json_ld_metadata,
                                 tags_string="")
            if slug:
                # Slug is automatically set by submit_media, so overwrite it
                # with the desired slug.
                entry.slug = slug
                entry.save()
            print(
                _("""Successfully submitted {filename}!
Be sure to look at the Media Processing Panel on your website to be sure it
uploaded successfully.""".format(filename=filename)))
            files_uploaded += 1
        except FileUploadLimit:
            print(
                _("FAIL: This file is larger than the upload limits for this site."
                  ))
        except UserUploadLimit:
            print(
                _("FAIL: This file will put this user past their upload limits."
                  ))
        except UserPastUploadLimit:
            print(_("FAIL: This user is already past their upload limits."))
        finally:
            media_file.close()
    print(
        _("{files_uploaded} out of {files_attempted} files successfully submitted"
          .format(files_uploaded=files_uploaded,
                  files_attempted=files_attempted)))

Example #9

0

Show file

def batchaddmedia(args):
    # Run eagerly unless explicetly set not to
    if not args.celery:
        os.environ['CELERY_ALWAYS_EAGER'] = 'true'

    app = commands_util.setup_app(args)

    files_uploaded, files_attempted = 0, 0

    # get the user
    user = app.db.LocalUser.query.filter(
        LocalUser.username == args.username.lower()).first()
    if user is None:
        print(
            _(u"Sorry, no user by username '{username}' exists".format(
                username=args.username)))
        return

    temp_files = []

    if os.path.isfile(args.metadata_path):
        metadata_path = args.metadata_path

    else:
        error = _(u'File at {path} not found, use -h flag for help'.format(
            path=args.metadata_path))
        print(error)
        return

    abs_metadata_filename = os.path.abspath(metadata_path)
    abs_metadata_dir = os.path.dirname(abs_metadata_filename)

    def maybe_unicodeify(some_string):
        # this is kinda terrible
        if some_string is None:
            return None
        else:
            return six.text_type(some_string)

    with codecs.open(abs_metadata_filename, 'r',
                     encoding='utf-8') as all_metadata:
        contents = all_metadata.read()
        media_metadata = parse_csv_file(contents)

    for media_id, file_metadata in media_metadata.iteritems():
        files_attempted += 1
        # In case the metadata was not uploaded initialize an empty dictionary.
        json_ld_metadata = compact_and_validate({})

        # Get all metadata entries starting with 'media' as variables and then
        # delete them because those are for internal use only.
        original_location = file_metadata['location']

        ### Pull the important media information for mediagoblin from the
        ### metadata, if it is provided.
        title = file_metadata.get('title') or file_metadata.get('dc:title')
        description = (file_metadata.get('description')
                       or file_metadata.get('dc:description'))

        license = file_metadata.get('license')
        try:
            json_ld_metadata = compact_and_validate(file_metadata)
        except ValidationError as exc:
            error = _(
                u"""Error with media '{media_id}' value '{error_path}': {error_msg}
Metadata was not uploaded.""".format(media_id=media_id,
                                     error_path=exc.path[0],
                                     error_msg=exc.message))
            print(error)
            continue

        url = urlparse(original_location)
        filename = url.path.split()[-1]

        if url.scheme == 'http':
            res = requests.get(url.geturl(), stream=True)
            media_file = res.raw

        elif url.scheme == '':
            path = url.path
            if os.path.isabs(path):
                file_abs_path = os.path.abspath(path)
            else:
                file_path = os.path.join(abs_metadata_dir, path)
                file_abs_path = os.path.abspath(file_path)
            try:
                media_file = file(file_abs_path, 'r')
            except IOError:
                print(
                    _(u"""\
FAIL: Local file {filename} could not be accessed.
{filename} will not be uploaded.""".format(filename=filename)))
                continue
        try:
            submit_media(mg_app=app,
                         user=user,
                         submitted_file=media_file,
                         filename=filename,
                         title=maybe_unicodeify(title),
                         description=maybe_unicodeify(description),
                         license=maybe_unicodeify(license),
                         metadata=json_ld_metadata,
                         tags_string=u"")
            print(
                _(u"""Successfully submitted {filename}!
Be sure to look at the Media Processing Panel on your website to be sure it
uploaded successfully.""".format(filename=filename)))
            files_uploaded += 1
        except FileUploadLimit:
            print(
                _(u"FAIL: This file is larger than the upload limits for this site."
                  ))
        except UserUploadLimit:
            print(
                _("FAIL: This file will put this user past their upload limits."
                  ))
        except UserPastUploadLimit:
            print(_("FAIL: This user is already past their upload limits."))
    print(
        _("{files_uploaded} out of {files_attempted} files successfully submitted"
          .format(files_uploaded=files_uploaded,
                  files_attempted=files_attempted)))