def getuploadedfile(self, req, form): """ Stream uploaded files. For the moment, restrict to files in ./curdir/files/uid or ./curdir/icons/uid directory, so that we are sure we stream files only to the user who uploaded them. """ argd = wash_urlargd( form, { 'indir': (str, None), 'doctype': (str, None), 'access': (str, None), 'icon': (int, 0), 'key': (str, None), 'filename': (str, None), 'nowait': (int, 0) }) if None in argd.values(): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) uid = getUid(req) if argd['icon']: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'icons', str(uid), argd['key'], argd['filename']) else: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'files', str(uid), argd['key'], argd['filename']) abs_file_path = os.path.abspath(file_path) if abs_file_path.startswith(CFG_WEBSUBMIT_STORAGEDIR): # Check if file exist. Note that icon might not yet have # been created. if not argd['nowait']: for i in range(5): if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) time.sleep(1) else: if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) # Send error 404 in all other cases raise apache.SERVER_RETURN(apache.HTTP_NOT_FOUND)
def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" try: recid_fd = file(os.path.join(curdir, 'SN')) recid = recid_fd.read() recid_fd.close() except: recid = '' user_is_owner = False if recid: user_is_owner = is_user_owner_of_record(user_info, recid) try: categ_fd = file(os.path.join(curdir, 'combo%s' % argd['doctype'])) categ = categ_fd.read() categ_fd.close() except IOError: categ = '*' # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action, categ=categ) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0 and not user_is_owner: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon({ 'input-file': os.path.join(dir_to_open, filename), 'icon-name': filename, # extension stripped automatically 'icon-file-format': 'gif', 'multipage-icon': False, 'multipage-icon-delay': 100, 'icon-scale': "300>", # Resize only if width > 300 'verbosity': 0, }) icons_dir = os.path.join( os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = { 'name': filename, 'iconName': icon_name } except InvenioWebSubmitIconCreatorError as e: # We could not create the icon added_files[key] = {'name': filename} continue else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Send our response if CFG_JSON_AVAILABLE: return json.dumps(added_files)
def managedocfilesasync(self, req, form): "Upload file and returns upload interface" argd = wash_urlargd( form, { 'ln': (str, ''), 'recid': (int, 1), 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), }) user_info = collect_user_info(req) include_headers = False # User submitted either through WebSubmit, or admin interface. if 'doctype' in form and 'indir' in form \ and 'access' in form: # Submitted through WebSubmit. Check rights include_headers = True working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) try: assert (working_dir == os.path.abspath(working_dir)) except AssertionError: raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) try: # Retrieve recid from working_dir, safer. recid_fd = file(os.path.join(working_dir, 'SN')) recid = int(recid_fd.read()) recid_fd.close() except: recid = "" try: act_fd = file(os.path.join(working_dir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? auth_code = acc_authorize_action( user_info, "submit", authorized_if_no_roles=not isGuestUser(getUid(req)), doctype=argd['doctype'], act=action)[0] if auth_code and not acc_is_role( "submit", doctype=argd['doctype'], act=action): # There is NO authorization plugged. User should have access auth_code = 0 else: # User must be allowed to attach files auth_code = acc_authorize_action(user_info, 'runbibdocfile')[0] recid = argd['recid'] if auth_code: raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) return create_file_upload_interface(recid=recid, ln=argd['ln'], print_outside_form_tag=False, print_envelope=False, form=form, include_headers=include_headers, sbm_indir=argd['indir'], sbm_access=argd['access'], sbm_doctype=argd['doctype'], uid=user_info['uid'])[1]
def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ def gcd(a, b): """ the euclidean algorithm """ while a: a, b = b % a, a return b from invenio.modules.encoder.extract import extract_frames from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.modules.encoder.encode import determine_aspect from invenio.modules.encoder.utils import probe from invenio.modules.encoder.metadata import ffprobe_metadata from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd( form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if "indir" not in form or \ "doctype" not in form or \ "access" not in form: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if "session_id" in form: # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. uid = session.uid user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action( uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath( os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert ( dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError as e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN( apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists( os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir( os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove( os.path.join( os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in [ 'jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd' ] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split( formfields.file.name )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[ 'access'] + "_" + os.path.split( formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join( curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs( os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames( input_file=new_tmp_fullpath, output_file=os.path.join( sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME ), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get( 'TAG:title') json_response['meta_description'] = metadata[ 'format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get( 'TAG:year') json_response['meta_author'] = metadata['format'].get( 'TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response
def redirect_to_url(req, url, redirection_type=None, norobot=False): """ Redirect current page to url. @param req: request as received from apache @param url: url to redirect to @param redirection_type: what kind of redirection is required: e.g.: apache.HTTP_MULTIPLE_CHOICES = 300 apache.HTTP_MOVED_PERMANENTLY = 301 apache.HTTP_MOVED_TEMPORARILY = 302 apache.HTTP_SEE_OTHER = 303 apache.HTTP_NOT_MODIFIED = 304 apache.HTTP_USE_PROXY = 305 apache.HTTP_TEMPORARY_REDIRECT = 307 The default is apache.HTTP_MOVED_TEMPORARILY @param norobot: wether to instruct crawlers and robots such as GoogleBot not to index past this point. @see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3 """ url = url.strip() if redirection_type is None: redirection_type = apache.HTTP_MOVED_TEMPORARILY from flask import redirect r = redirect(url, code=redirection_type) raise apache.SERVER_RETURN, r #FIXME enable code bellow del req.headers_out["Cache-Control"] req.headers_out["Cache-Control"] = "no-cache, private, no-store, " \ "must-revalidate, post-check=0, pre-check=0, max-age=0" req.headers_out["Pragma"] = "no-cache" if norobot: req.headers_out[ "X-Robots-Tag"] = "noarchive, nosnippet, noindex, nocache" user_agent = req.headers_in.get('User-Agent', '') if 'Microsoft Office Existence Discovery' in user_agent or 'ms-office' in user_agent: ## HACK: this is to workaround Microsoft Office trying to be smart ## when users click on URLs in Office documents that require ## authentication. Office will check the validity of the URL ## but will pass the browser the redirected URL rather than ## the original one. This is incompatible with e.g. Shibboleth ## based SSO since the referer would be lost. ## See: http://support.microsoft.com/kb/899927 req.status = 200 req.content_type = 'text/html' if req.method != 'HEAD': req.write( """ <html> <head> <title>Intermediate page for URLs clicked on MS Office Documents</title> <meta http-equiv="REFRESH" content="5;url=%(url)s"></meta> </head> <body> <p>You are going to be redirected to the desired content within 5 seconds. If the redirection does not happen automatically please click on <a href="%(url)s">%(url_ok)s</a>.</p> </body> </html>""" % { 'url': escape(req.unparsed_uri, True), 'url_ok': escape(req.unparsed_uri) }) raise apache.SERVER_RETURN(apache.DONE) req.headers_out["Location"] = url if req.response_sent_p: raise IOError("Cannot redirect after headers have already been sent.") req.status = redirection_type req.write('<p>Please go to <a href="%s">here</a></p>\n' % url) raise apache.SERVER_RETURN, apache.DONE