def testCompactAndValidate(self): # First, test out a well formatted piece of metadata ###################################################### test_metadata = { 'dc:title': 'My Pet Bunny', 'dc:description': 'A picture displaying how cute my pet bunny is.', 'location': '/home/goblin/Pictures/bunny.png', 'license': 'http://www.gnu.org/licenses/gpl.txt' } jsonld_metadata = compact_and_validate(test_metadata) assert jsonld_metadata assert jsonld_metadata.get('dc:title') == 'My Pet Bunny' # Free floating nodes should be removed assert jsonld_metadata.get('location') is None assert jsonld_metadata.get('@context') == \ u"http://www.w3.org/2013/json-ld-context/rdfa11" # Next, make sure that various badly formatted metadata # will be rejected. ####################################################### #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. # Metadata with a non-URI license should fail : #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`' metadata_fail_1 = { 'dc:title': 'My Pet Bunny', 'dc:description': 'A picture displaying how cute my pet bunny is.', 'location': '/home/goblin/Pictures/bunny.png', 'license': 'All Rights Reserved.' } jsonld_fail_1 = None try: jsonld_fail_1 = compact_and_validate(metadata_fail_1) except ValidationError, e: assert e.message == "'All Rights Reserved.' is not a 'uri'"
def testCompactAndValidate(self): # First, test out a well formatted piece of metadata ###################################################### test_metadata = { "dc:title": "My Pet Bunny", "dc:description": "A picture displaying how cute my pet bunny is.", "location": "/home/goblin/Pictures/bunny.png", "license": "http://www.gnu.org/licenses/gpl.txt", } jsonld_metadata = compact_and_validate(test_metadata) assert jsonld_metadata assert jsonld_metadata.get("dc:title") == "My Pet Bunny" # Free floating nodes should be removed assert jsonld_metadata.get("location") is None assert jsonld_metadata.get("@context") == u"http://www.w3.org/2013/json-ld-context/rdfa11" # Next, make sure that various badly formatted metadata # will be rejected. ####################################################### # ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. # Metadata with a non-URI license should fail : # `'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`' metadata_fail_1 = { "dc:title": "My Pet Bunny", "dc:description": "A picture displaying how cute my pet bunny is.", "location": "/home/goblin/Pictures/bunny.png", "license": "All Rights Reserved.", } jsonld_fail_1 = None try: jsonld_fail_1 = compact_and_validate(metadata_fail_1) except ValidationError, e: assert e.message == "'All Rights Reserved.' is not a 'uri'"
def testCompactAndValidate(self): # First, test out a well formatted piece of metadata ###################################################### test_metadata = { 'dc:title':'My Pet Bunny', 'dc:description':'A picture displaying how cute my pet bunny is.', 'location':'/home/goblin/Pictures/bunny.png', 'license':'http://www.gnu.org/licenses/gpl.txt' } jsonld_metadata =compact_and_validate(test_metadata) assert jsonld_metadata assert jsonld_metadata.get('dc:title') == 'My Pet Bunny' # Free floating nodes should be removed assert jsonld_metadata.get('location') is None assert jsonld_metadata.get('@context') == \ u"http://www.w3.org/2013/json-ld-context/rdfa11" # Next, make sure that various badly formatted metadata # will be rejected. ####################################################### #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. # Metadata with a non-URI license should fail : #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`' metadata_fail_1 = { 'dc:title':'My Pet Bunny', 'dc:description':'A picture displaying how cute my pet bunny is.', 'location':'/home/goblin/Pictures/bunny.png', 'license':'All Rights Reserved.' } jsonld_fail_1 = None try: jsonld_fail_1 = compact_and_validate(metadata_fail_1) except ValidationError as e: assert e.message == "'All Rights Reserved.' is not a 'uri'" assert jsonld_fail_1 == None #,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,., # Metadata with an ivalid date-time dc:created should fail : #`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'`'' metadata_fail_2 = { 'dc:title':'My Pet Bunny', 'dc:description':'A picture displaying how cute my pet bunny is.', 'location':'/home/goblin/Pictures/bunny.png', 'license':'http://www.gnu.org/licenses/gpl.txt', 'dc:created':'The other day' } jsonld_fail_2 = None try: jsonld_fail_2 = compact_and_validate(metadata_fail_2) except ValidationError as e: assert e.message == "'The other day' is not a 'date-time'" assert jsonld_fail_2 == None
def edit_metadata(request, media): # If media is not processed, return NotFound. if not media.state == 'processed': return render_404(request) form = forms.EditMetaDataForm(request.method == 'POST' and request.form or None) if request.method == "POST" and form.validate(): metadata_dict = { row['identifier']: row['value'] for row in form.media_metadata.data } json_ld_metadata = None json_ld_metadata = compact_and_validate(metadata_dict) media.media_metadata = json_ld_metadata media.save() return redirect_obj(request, media) if len(form.media_metadata) == 0: for identifier, value in media.media_metadata.items(): if identifier == "@context": continue form.media_metadata.append_entry({ 'identifier': identifier, 'value': value }) return render_to_response(request, 'mediagoblin/edit/metadata.html', { 'form': form, 'media': media })
def edit_metadata(request, media): form = forms.EditMetaDataForm(request.form) if request.method == "POST" and form.validate(): metadata_dict = dict([(row['identifier'],row['value']) for row in form.media_metadata.data]) json_ld_metadata = None json_ld_metadata = compact_and_validate(metadata_dict) media.media_metadata = json_ld_metadata media.save() return redirect_obj(request, media) if len(form.media_metadata) == 0: for identifier, value in media.media_metadata.iteritems(): if identifier == "@context": continue form.media_metadata.append_entry({ 'identifier':identifier, 'value':value}) return render_to_response( request, 'mediagoblin/edit/metadata.html', {'form':form, 'media':media})
def edit_metadata(request, media): form = forms.EditMetaDataForm(request.form) if request.method == "POST" and form.validate(): metadata_dict = dict([(row['identifier'], row['value']) for row in form.media_metadata.data]) json_ld_metadata = None json_ld_metadata = compact_and_validate(metadata_dict) media.media_metadata = json_ld_metadata media.save() return redirect_obj(request, media) if len(form.media_metadata) == 0: for identifier, value in media.media_metadata.iteritems(): if identifier == "@context": continue form.media_metadata.append_entry({ 'identifier': identifier, 'value': value }) return render_to_response(request, 'mediagoblin/edit/metadata.html', { 'form': form, 'media': media })
def batchaddmedia(args): # Run eagerly unless explicetly set not to if not args.celery: os.environ['CELERY_ALWAYS_EAGER'] = 'true' app = commands_util.setup_app(args) files_uploaded, files_attempted = 0, 0 # get the user user = app.db.User.query.filter_by(username=args.username.lower()).first() if user is None: print _(u"Sorry, no user by username '{username}' exists".format( username=args.username)) return upload_limit, max_file_size = get_upload_file_limits(user) temp_files = [] if os.path.isfile(args.metadata_path): metadata_path = args.metadata_path else: error = _(u'File at {path} not found, use -h flag for help'.format( path=args.metadata_path)) print error return abs_metadata_filename = os.path.abspath(metadata_path) abs_metadata_dir = os.path.dirname(abs_metadata_filename) upload_limit, max_file_size = get_upload_file_limits(user) def maybe_unicodeify(some_string): # this is kinda terrible if some_string is None: return None else: return unicode(some_string) with codecs.open( abs_metadata_filename, 'r', encoding='utf-8') as all_metadata: contents = all_metadata.read() media_metadata = parse_csv_file(contents) for media_id, file_metadata in media_metadata.iteritems(): files_attempted += 1 # In case the metadata was not uploaded initialize an empty dictionary. json_ld_metadata = compact_and_validate({}) # Get all metadata entries starting with 'media' as variables and then # delete them because those are for internal use only. original_location = file_metadata['location'] ### Pull the important media information for mediagoblin from the ### metadata, if it is provided. title = file_metadata.get('title') or file_metadata.get('dc:title') description = (file_metadata.get('description') or file_metadata.get('dc:description')) license = file_metadata.get('license') try: json_ld_metadata = compact_and_validate(file_metadata) except ValidationError, exc: error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg} Metadata was not uploaded.""".format( media_id=media_id, error_path=exc.path[0], error_msg=exc.message)) print error continue url = urlparse(original_location) filename = url.path.split()[-1] if url.scheme == 'http': res = requests.get(url.geturl(), stream=True) media_file = res.raw elif url.scheme == '': path = url.path if os.path.isabs(path): file_abs_path = os.path.abspath(path) else: file_path = os.path.join(abs_metadata_dir, path) file_abs_path = os.path.abspath(file_path) try: media_file = file(file_abs_path, 'r') except IOError: print _(u"""\ FAIL: Local file {filename} could not be accessed. {filename} will not be uploaded.""".format(filename=filename)) continue try: submit_media( mg_app=app, user=user, submitted_file=media_file, filename=filename, title=maybe_unicodeify(title), description=maybe_unicodeify(description), license=maybe_unicodeify(license), metadata=json_ld_metadata, tags_string=u"", upload_limit=upload_limit, max_file_size=max_file_size) print _(u"""Successfully submitted {filename}! Be sure to look at the Media Processing Panel on your website to be sure it uploaded successfully.""".format(filename=filename)) files_uploaded += 1 except FileUploadLimit: print _( u"FAIL: This file is larger than the upload limits for this site.") except UserUploadLimit: print _( "FAIL: This file will put this user past their upload limits.") except UserPastUploadLimit: print _("FAIL: This user is already past their upload limits.")
def batchaddmedia(args): # Run eagerly unless explicetly set not to if not args.celery: os.environ['CELERY_ALWAYS_EAGER'] = 'true' app = commands_util.setup_app(args) files_uploaded, files_attempted = 0, 0 # get the user user = app.db.LocalUser.query.filter( LocalUser.username == args.username.lower()).first() if user is None: print( _("Sorry, no user by username '{username}' exists".format( username=args.username))) return if os.path.isfile(args.metadata_path): metadata_path = args.metadata_path else: error = _('File at {path} not found, use -h flag for help'.format( path=args.metadata_path)) print(error) return abs_metadata_filename = os.path.abspath(metadata_path) abs_metadata_dir = os.path.dirname(abs_metadata_filename) all_metadata = open(abs_metadata_filename, 'r') media_metadata = csv.DictReader(all_metadata) for index, file_metadata in enumerate(media_metadata): if six.PY2: file_metadata = { k.decode('utf-8'): v.decode('utf-8') for k, v in file_metadata.items() } files_attempted += 1 # In case the metadata was not uploaded initialize an empty dictionary. json_ld_metadata = compact_and_validate({}) # Get all metadata entries starting with 'media' as variables and then # delete them because those are for internal use only. original_location = file_metadata['location'] ### Pull the important media information for mediagoblin from the ### metadata, if it is provided. slug = file_metadata.get('slug') title = file_metadata.get('title') or file_metadata.get('dc:title') description = (file_metadata.get('description') or file_metadata.get('dc:description')) collection_slug = file_metadata.get('collection-slug') license = file_metadata.get('license') try: json_ld_metadata = compact_and_validate(file_metadata) except ValidationError as exc: media_id = file_metadata.get('id') or index error = _( """Error with media '{media_id}' value '{error_path}': {error_msg} Metadata was not uploaded.""".format(media_id=media_id, error_path=exc.path[0], error_msg=exc.message)) print(error) continue if slug and MediaEntry.query.filter_by(actor=user.id, slug=slug).count(): # Avoid re-importing media from a previous batch run. Note that this # check isn't quite robust enough, since it requires that a slug is # specified. Probably needs to be based on "location" since this is # the only required field. error = '{}: {}'.format( slug, _('An entry with that slug already exists for this user.')) print(error) continue url = urlparse(original_location) filename = url.path.split()[-1] if url.scheme.startswith('http'): res = requests.get(url.geturl(), stream=True) if res.headers.get('content-encoding'): # The requests library's "raw" method does not deal with content # encoding. Alternative could be to use iter_content(), and # write chunks to the temporary file. raise NotImplementedError( 'URL-based media with content-encoding (eg. gzip) are not currently supported.' ) # To avoid loading the media into memory all at once, we write it to # a file before importing. This currently requires free space up to # twice the size of the media file. Memory use can be tested by # running something like `ulimit -Sv 200000` before running # `batchaddmedia` to upload a file larger than 200MB. media_file = tempfile.TemporaryFile() shutil.copyfileobj(res.raw, media_file) if six.PY2: media_file.seek(0) elif url.scheme == '': path = url.path if os.path.isabs(path): file_abs_path = os.path.abspath(path) else: file_path = os.path.join(abs_metadata_dir, path) file_abs_path = os.path.abspath(file_path) try: media_file = open(file_abs_path, 'rb') except IOError: print( _("""\ FAIL: Local file {filename} could not be accessed. {filename} will not be uploaded.""".format(filename=filename))) continue try: entry = submit_media(mg_app=app, user=user, submitted_file=media_file, filename=filename, title=title, description=description, collection_slug=collection_slug, license=license, metadata=json_ld_metadata, tags_string="") if slug: # Slug is automatically set by submit_media, so overwrite it # with the desired slug. entry.slug = slug entry.save() print( _("""Successfully submitted {filename}! Be sure to look at the Media Processing Panel on your website to be sure it uploaded successfully.""".format(filename=filename))) files_uploaded += 1 except FileUploadLimit: print( _("FAIL: This file is larger than the upload limits for this site." )) except UserUploadLimit: print( _("FAIL: This file will put this user past their upload limits." )) except UserPastUploadLimit: print(_("FAIL: This user is already past their upload limits.")) finally: media_file.close() print( _("{files_uploaded} out of {files_attempted} files successfully submitted" .format(files_uploaded=files_uploaded, files_attempted=files_attempted)))
def batchaddmedia(args): # Run eagerly unless explicetly set not to if not args.celery: os.environ['CELERY_ALWAYS_EAGER'] = 'true' app = commands_util.setup_app(args) files_uploaded, files_attempted = 0, 0 # get the user user = app.db.LocalUser.query.filter( LocalUser.username == args.username.lower()).first() if user is None: print( _(u"Sorry, no user by username '{username}' exists".format( username=args.username))) return temp_files = [] if os.path.isfile(args.metadata_path): metadata_path = args.metadata_path else: error = _(u'File at {path} not found, use -h flag for help'.format( path=args.metadata_path)) print(error) return abs_metadata_filename = os.path.abspath(metadata_path) abs_metadata_dir = os.path.dirname(abs_metadata_filename) def maybe_unicodeify(some_string): # this is kinda terrible if some_string is None: return None else: return six.text_type(some_string) with codecs.open(abs_metadata_filename, 'r', encoding='utf-8') as all_metadata: contents = all_metadata.read() media_metadata = parse_csv_file(contents) for media_id, file_metadata in media_metadata.iteritems(): files_attempted += 1 # In case the metadata was not uploaded initialize an empty dictionary. json_ld_metadata = compact_and_validate({}) # Get all metadata entries starting with 'media' as variables and then # delete them because those are for internal use only. original_location = file_metadata['location'] ### Pull the important media information for mediagoblin from the ### metadata, if it is provided. title = file_metadata.get('title') or file_metadata.get('dc:title') description = (file_metadata.get('description') or file_metadata.get('dc:description')) license = file_metadata.get('license') try: json_ld_metadata = compact_and_validate(file_metadata) except ValidationError as exc: error = _( u"""Error with media '{media_id}' value '{error_path}': {error_msg} Metadata was not uploaded.""".format(media_id=media_id, error_path=exc.path[0], error_msg=exc.message)) print(error) continue url = urlparse(original_location) filename = url.path.split()[-1] if url.scheme == 'http': res = requests.get(url.geturl(), stream=True) media_file = res.raw elif url.scheme == '': path = url.path if os.path.isabs(path): file_abs_path = os.path.abspath(path) else: file_path = os.path.join(abs_metadata_dir, path) file_abs_path = os.path.abspath(file_path) try: media_file = file(file_abs_path, 'r') except IOError: print( _(u"""\ FAIL: Local file {filename} could not be accessed. {filename} will not be uploaded.""".format(filename=filename))) continue try: submit_media(mg_app=app, user=user, submitted_file=media_file, filename=filename, title=maybe_unicodeify(title), description=maybe_unicodeify(description), license=maybe_unicodeify(license), metadata=json_ld_metadata, tags_string=u"") print( _(u"""Successfully submitted {filename}! Be sure to look at the Media Processing Panel on your website to be sure it uploaded successfully.""".format(filename=filename))) files_uploaded += 1 except FileUploadLimit: print( _(u"FAIL: This file is larger than the upload limits for this site." )) except UserUploadLimit: print( _("FAIL: This file will put this user past their upload limits." )) except UserPastUploadLimit: print(_("FAIL: This user is already past their upload limits.")) print( _("{files_uploaded} out of {files_attempted} files successfully submitted" .format(files_uploaded=files_uploaded, files_attempted=files_attempted)))