Exemple #1
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None,
                                   purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name,
                             purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file).read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:10].lower()
                    looks_like_url = False
                    for url_prefix in URI_PREFIXES:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(
                    user_ftp_dir
                ), "User FTP directory cannot be a symbolic link"
                for dirpath, _dirnames, filenames in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    purge = getattr(trans.app.config,
                                                    'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, str)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(
                        user_ftp_dir
                    ), "User FTP directory cannot be a symbolic link"
                    for dirpath, _dirnames, filenames in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, str):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info,
                                                    purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        tags = context.get('tags', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            dataset.tags = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = {
                composite_file.substitute_name_with_metadata:
                d_type.metadata_spec[
                    composite_file.substitute_name_with_metadata].default
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            }
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(
                    io.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[
                        list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[
                                keys.index(key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(
                        context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans,
                                               context[self.name][i],
                                               parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans,
                                                        context[self.name][i],
                                                        parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i],
                                                   parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    dataset.tags = tags
                    rval.append(dataset)
            return rval
Exemple #2
0
    def get_uploaded_datasets( self, trans, context, override_name=None, override_info=None ):
        def get_data_file_filename( data_file, override_name=None, override_info=None ):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name( file_name ):
                file_name = file_name.split( '\\' )[-1]
                file_name = file_name.split( '/' )[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name( data_file['filename'] )
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch( type='file', path=data_file['local_filename'], name=dataset_name )
                # return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch( type=None, path=None, name=None )
                # return None, None, None, None, None

        def get_url_paste_urls_or_filename( group_incoming, override_name=None, override_info=None ):
            url_paste_file = group_incoming.get( 'url_paste', None )
            if url_paste_file is not None:
                url_paste = open( url_paste_file, 'r' ).read( 1024 )
                if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ) or url_paste.lstrip().lower().startswith( 'https://' ):
                    url_paste = url_paste.replace( '\r', '' ).split( '\n' )
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ) and not line.lower().startswith( 'https://' ):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch( type='url', path=line, name=dataset_name )
                            # yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch( type='file', path=url_paste_file, name=precreated_name )
                    # yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )

        def get_one_filename( context ):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append( "All file contents specified in the paste box were ignored." )
                if ftp_files:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                                if not os.path.islink( os.path.join( dirpath, filename ) ):
                                    ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ),
                                                      'filename' : os.path.basename( path ) }
                                    file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames( context ):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append( file_bunch )
            for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)]
                if trans.user is None:
                    log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files )
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                        for filename in filenames:
                            path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                            if not os.path.islink( os.path.join( dirpath, filename ) ):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(unicodedata.normalize('NFC', path ))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file )
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ),
                                  'filename' : os.path.basename( ftp_file ) }
                file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            return rval
        file_type = self.get_file_type( context )
        d_type = self.get_datatype( trans, context )
        dbkey = context.get( 'dbkey', None )
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [ None for _ in writable_files ]
        for group_incoming in context.get( self.name, [] ):
            i = int( group_incoming['__index__'] )
            groups_incoming[ i ] = group_incoming
        if d_type.composite_type is not None:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            # load metadata
            files_metadata = context.get( self.metadata_ref, {} )
            metadata_name_substition_default_dict = dict( [ ( composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata ].default ) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ] )
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[ meta_name ]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name ] )
                        dataset.metadata[ meta_name ] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context )
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file( dataset ) ), prefix='upload_auto_primary_file' )
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] )
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend( warnings )
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception( 'No primary dataset file was available for composite upload' )
            keys = [ value.name for value in writable_files.values() ]
            for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ):
                key = keys[ i + writable_files_offset ]
                if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                    dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
                    dataset.composite_files[ key ] = None
                else:
                    file_bunch, warnings = get_one_filename( group_incoming )
                    dataset.warnings.extend( warnings )
                    if file_bunch.path:
                        dataset.composite_files[ key ] = file_bunch.__dict__
                    else:
                        dataset.composite_files[ key ] = None
                        if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                            dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
            return [ dataset ]
        else:
            datasets = get_filenames( context[ self.name ][0] )
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext( trans, context )
                dataset.dbkey = dbkey
                rval.append( dataset )
            return rval
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name)
                #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)
                #return None, None, None, None, None
        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            filenames = []
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read(1024)
                if url_paste.lstrip().lower().startswith(
                        'http://') or url_paste.lstrip().lower().startswith(
                            'ftp://') or url_paste.lstrip().lower().startswith(
                                'https://'):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith(
                                    'http://') and not line.lower().startswith(
                                        'ftp://') and not line.lower(
                                        ).startswith('https://'):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                            #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  #we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=precreated_name)
                    #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  #we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info)
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, unicode)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for (dirpath, dirnames,
                         filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        d_type = self.get_datatype(trans, context)
        dbkey = context.get('dbkey', None)
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for filename in writable_files]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None:
            #handle uploading of composite datatypes
            #Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            #load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict([
                (composite_file.substitute_name_with_metadata,
                 d_type.metadata_spec[
                     composite_file.substitute_name_with_metadata].default)
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            ])
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name(
                context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                #replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file(
                    StringIO.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  #remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            keys = [value.name for value in writable_files.values()]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if group_incoming is None and not writable_files[
                        writable_files.keys()[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        dataset.composite_files[key] = file_bunch.__dict__
                    else:
                        dataset.composite_files[key] = None
                        if not writable_files[writable_files.keys()[keys.index(
                                key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            datasets = get_filenames(context[self.name][0])
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext(trans, context)
                dataset.dbkey = dbkey
                rval.append(dataset)
            return rval
Exemple #4
0
    def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None):
        def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:8].lower()
                    looks_like_url = False
                    for url_prefix in ["http://", "https://", "ftp://", "file://"]:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url', path=line, name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file', path=url_paste_file, name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append("All file contents specified in the paste box were ignored.")
                if ftp_files:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                                if not os.path.islink(os.path.join(dirpath, filename)):
                                    ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                                     'filename' : os.path.basename(path)}
                                    purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval
        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict((composite_file.substitute_name_with_metadata, d_type.metadata_spec[composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata)
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(meta_value, default=metadata_name_substition_default_dict[meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception('No primary dataset file was available for composite upload')
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append("A required composite file (%s) was not specified." % (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                            dataset.warnings.append("A required composite file (%s) was not specified." % (key))
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans, context[self.name][i], parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    rval.append(dataset)
            return rval