def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True): dataset_name = override_name def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge) except Exception: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file).read() def start_of_url(content): start_of_url_paste = content.lstrip()[0:10].lower() looks_like_url = False for url_prefix in URI_PREFIXES: if start_of_url_paste.startswith(url_prefix): looks_like_url = True break return looks_like_url if start_of_url(url_paste): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not start_of_url(line): continue # non-url line, ignore if "file://" in line: if not trans.user_is_admin: raise AdminRequiredException() elif not trans.app.config.allow_path_paste: raise ConfigDoesNotAllowException() upload_path = line[len("file://"):] dataset_name = os.path.basename(upload_path) else: dataset_name = line if override_name: dataset_name = override_name yield Bunch(type='url', path=line, name=dataset_name) else: dataset_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name yield Bunch(type='file', path=url_paste_file, name=dataset_name) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip( ): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink( user_ftp_dir ), "User FTP directory cannot be a symbolic link" for dirpath, _dirnames, filenames in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): ftp_data_file = { 'local_filename': os.path.abspath( os.path.join(user_ftp_dir, path)), 'filename': os.path.basename(path) } purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [ unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, str) ] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink( user_ftp_dir ), "User FTP directory cannot be a symbolic link" for dirpath, _dirnames, filenames in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, str): valid_files.append( unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename': os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename': os.path.basename(ftp_file) } purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) return rval file_type = self.get_file_type(context) file_count = self.get_file_count(trans, context) d_type = self.get_datatype(trans, context) dbkey = self.get_dbkey(context) tag_using_filenames = context.get('tag_using_filenames', False) tags = context.get('tags', False) force_composite = asbool(context.get('force_composite', 'False')) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for _ in range(file_count)] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None or force_composite: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None dataset.tag_using_filenames = None dataset.tags = None # load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = { composite_file.substitute_name_with_metadata: d_type.metadata_spec[ composite_file.substitute_name_with_metadata].default for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata } for meta_name, meta_spec in d_type.metadata_spec.items(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name]) dataset.metadata[meta_name] = meta_value dataset.name = self.get_composite_dataset_name(context) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name = sniff.stream_to_file( io.StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.auto_decompress = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.auto_decompress = file_bunch.auto_decompress dataset.space_to_tab = file_bunch.space_to_tab if file_bunch.file_type: dataset.file_type = file_type if file_bunch.dbkey: dataset.dbkey = dbkey dataset.warnings.extend(warnings) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) if not force_composite: keys = [value.name for value in writable_files.values()] else: keys = [str(index) for index in range(file_count)] for i, group_incoming in enumerate( groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if not force_composite and group_incoming is None and not writable_files[ list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: if force_composite: key = group_incoming.get("NAME") or i dataset.composite_files[key] = file_bunch.__dict__ elif not force_composite: dataset.composite_files[key] = None if not writable_files[list(writable_files.keys())[ keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) return [dataset] else: rval = [] for i, file_contexts in enumerate(context[self.name]): datasets = get_filenames(file_contexts) for dataset in datasets: override_file_type = self.get_file_type( context[self.name][i], parent_context=context) d_type = self.get_datatype(trans, context[self.name][i], parent_context=context) dataset.file_type = override_file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context) dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context) dataset.tag_using_filenames = tag_using_filenames dataset.tags = tags rval.append(dataset) return rval
def get_uploaded_datasets( self, trans, context, override_name=None, override_info=None ): def get_data_file_filename( data_file, override_name=None, override_info=None ): dataset_name = override_name dataset_info = override_info def get_file_name( file_name ): file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name( data_file['filename'] ) if not dataset_info: dataset_info = 'uploaded file' return Bunch( type='file', path=data_file['local_filename'], name=dataset_name ) # return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch( type=None, path=None, name=None ) # return None, None, None, None, None def get_url_paste_urls_or_filename( group_incoming, override_name=None, override_info=None ): url_paste_file = group_incoming.get( 'url_paste', None ) if url_paste_file is not None: url_paste = open( url_paste_file, 'r' ).read( 1024 ) if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ) or url_paste.lstrip().lower().startswith( 'https://' ): url_paste = url_paste.replace( '\r', '' ).split( '\n' ) for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ) and not line.lower().startswith( 'https://' ): continue # non-url line, ignore dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch( type='url', path=line, name=dataset_name ) # yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch( type='file', path=url_paste_file, name=precreated_name ) # yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename( context ): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) uuid = context.get( 'uuid', None ) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]: to_posix_lines = True space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info ) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored." ) elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored." ) elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ), 'filename' : os.path.basename( path ) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings def get_filenames( context ): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get( 'uuid', None ) or None # Turn '' to None name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) to_posix_lines = False if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]: to_posix_lines = True space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info ) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files ) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append(unicodedata.normalize('NFC', path )) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file ) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ), 'filename' : os.path.basename( ftp_file ) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info ) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) return rval file_type = self.get_file_type( context ) d_type = self.get_datatype( trans, context ) dbkey = context.get( 'dbkey', None ) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [ None for _ in writable_files ] for group_incoming in context.get( self.name, [] ): i = int( group_incoming['__index__'] ) groups_incoming[ i ] = group_incoming if d_type.composite_type is not None: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None # load metadata files_metadata = context.get( self.metadata_ref, {} ) metadata_name_substition_default_dict = dict( [ ( composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata ].default ) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ] ) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[ meta_name ] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name ] ) dataset.metadata[ meta_name ] = meta_value dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context ) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file( dataset ) ), prefix='upload_auto_primary_file' ) dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] ) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.space_to_tab = file_bunch.space_to_tab dataset.warnings.extend( warnings ) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [ value.name for value in writable_files.values() ] for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ): key = keys[ i + writable_files_offset ] if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) dataset.composite_files[ key ] = None else: file_bunch, warnings = get_one_filename( group_incoming ) dataset.warnings.extend( warnings ) if file_bunch.path: dataset.composite_files[ key ] = file_bunch.__dict__ else: dataset.composite_files[ key ] = None if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) return [ dataset ] else: datasets = get_filenames( context[ self.name ][0] ) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext( trans, context ) dataset.dbkey = dbkey rval.append( dataset ) return rval
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None): dataset_name = override_name dataset_info = override_info def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) if not dataset_info: dataset_info = 'uploaded file' return Bunch(type='file', path=data_file['local_filename'], name=dataset_name) #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) #return None, None, None, None, None def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): filenames = [] url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read(1024) if url_paste.lstrip().lower().startswith( 'http://') or url_paste.lstrip().lower().startswith( 'ftp://') or url_paste.lstrip().lower().startswith( 'https://'): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://') and not line.lower().startswith( 'ftp://') and not line.lower( ).startswith('https://'): continue # non-url line, ignore dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch(type='url', path=line, name=dataset_name) #yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch(type='file', path=url_paste_file, name=precreated_name) #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip( ): #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): ftp_data_file = { 'local_filename': os.path.abspath( os.path.join(user_ftp_dir, path)), 'filename': os.path.basename(path) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [ unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode) ] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append( unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename': os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename': os.path.basename(ftp_file) } file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) return rval file_type = self.get_file_type(context) d_type = self.get_datatype(trans, context) dbkey = context.get('dbkey', None) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for filename in writable_files] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None: #handle uploading of composite datatypes #Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} #load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = dict([ (composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ]) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name]) dataset.metadata[meta_name] = meta_value dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context) if dataset.datatype.composite_type == 'auto_primary_file': #replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.space_to_tab = file_bunch.space_to_tab dataset.warnings.extend(warnings) if dataset.primary_file is None: #remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [value.name for value in writable_files.values()] for i, group_incoming in enumerate( groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if group_incoming is None and not writable_files[ writable_files.keys()[keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: dataset.composite_files[key] = file_bunch.__dict__ else: dataset.composite_files[key] = None if not writable_files[writable_files.keys()[keys.index( key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) return [dataset] else: datasets = get_filenames(context[self.name][0]) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context) dataset.dbkey = dbkey rval.append(dataset) return rval
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True): dataset_name = override_name def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge) except Exception: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read() def start_of_url(content): start_of_url_paste = content.lstrip()[0:8].lower() looks_like_url = False for url_prefix in ["http://", "https://", "ftp://", "file://"]: if start_of_url_paste.startswith(url_prefix): looks_like_url = True break return looks_like_url if start_of_url(url_paste): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not start_of_url(line): continue # non-url line, ignore if "file://" in line: if not trans.user_is_admin: raise AdminRequiredException() elif not trans.app.config.allow_path_paste: raise ConfigDoesNotAllowException() upload_path = line[len("file://"):] dataset_name = os.path.basename(upload_path) else: dataset_name = line if override_name: dataset_name = override_name yield Bunch(type='url', path=line, name=dataset_name) else: dataset_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name yield Bunch(type='file', path=url_paste_file, name=dataset_name) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append("All file contents specified in the paste box were ignored.") if ftp_files: warnings.append("All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append("All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)), 'filename' : os.path.basename(path)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)] if trans.user is None: log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, text_type): valid_files.append(unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename' : os.path.basename(ftp_file)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) return rval file_type = self.get_file_type(context) file_count = self.get_file_count(trans, context) d_type = self.get_datatype(trans, context) dbkey = self.get_dbkey(context) tag_using_filenames = context.get('tag_using_filenames', False) force_composite = asbool(context.get('force_composite', 'False')) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for _ in range(file_count)] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None or force_composite: # handle uploading of composite datatypes # Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} dataset.uuid = None dataset.tag_using_filenames = None # load metadata files_metadata = context.get(self.metadata_ref, {}) metadata_name_substition_default_dict = dict((composite_file.substitute_name_with_metadata, d_type.metadata_spec[composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata) for meta_name, meta_spec in d_type.metadata_spec.items(): if meta_spec.set_in_upload: if meta_name in files_metadata: meta_value = files_metadata[meta_name] if meta_name in metadata_name_substition_default_dict: meta_value = sanitize_for_filename(meta_value, default=metadata_name_substition_default_dict[meta_name]) dataset.metadata[meta_name] = meta_value dataset.name = self.get_composite_dataset_name(context) if dataset.datatype.composite_type == 'auto_primary_file': # replace sniff here with just creating an empty file temp_name = sniff.stream_to_file(StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.to_posix_lines = True dataset.auto_decompress = True dataset.space_to_tab = False else: file_bunch, warnings = get_one_filename(groups_incoming[0]) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.to_posix_lines = file_bunch.to_posix_lines dataset.auto_decompress = file_bunch.auto_decompress dataset.space_to_tab = file_bunch.space_to_tab if file_bunch.file_type: dataset.file_type = file_type if file_bunch.dbkey: dataset.dbkey = dbkey dataset.warnings.extend(warnings) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception('No primary dataset file was available for composite upload') if not force_composite: keys = [value.name for value in writable_files.values()] else: keys = [str(index) for index in range(file_count)] for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append("A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) dataset.warnings.extend(warnings) if file_bunch.path: if force_composite: key = group_incoming.get("NAME") or i dataset.composite_files[key] = file_bunch.__dict__ elif not force_composite: dataset.composite_files[key] = None if not writable_files[list(writable_files.keys())[keys.index(key)]].optional: dataset.warnings.append("A required composite file (%s) was not specified." % (key)) return [dataset] else: rval = [] for i, file_contexts in enumerate(context[self.name]): datasets = get_filenames(file_contexts) for dataset in datasets: override_file_type = self.get_file_type(context[self.name][i], parent_context=context) d_type = self.get_datatype(trans, context[self.name][i], parent_context=context) dataset.file_type = override_file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context) dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context) dataset.tag_using_filenames = tag_using_filenames rval.append(dataset) return rval