Example #1
0
    def to_path(path_or_url):
        is_url = path_or_url.find('://') != -1  # todo fixme
        if is_url:
            try:
                temp_name = sniff.stream_to_file(urlopen(path_or_url), prefix='url_paste')
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e)))

            return temp_name, is_url

        return path_or_url, is_url
Example #2
0
def add_composite_file(dataset, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.items():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                raise UploadProblemException(
                    'A required composite data file was not provided (%s)' %
                    name)
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') != -1  # todo fixme
                if isurl:
                    try:
                        temp_name = sniff.stream_to_file(urlopen(dp),
                                                         prefix='url_paste')
                    except Exception as e:
                        raise UploadProblemException('Unable to fetch %s\n%s' %
                                                     (dp, str(e)))
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    tmpdir = output_adjacent_tmpdir(output_path)
                    tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                    if dataset.composite_file_paths[value.name].get(
                            'space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp,
                                                        tmp_dir=tmpdir,
                                                        tmp_prefix=tmp_prefix)
                    else:
                        sniff.convert_newlines(dp,
                                               tmp_dir=tmpdir,
                                               tmp_prefix=tmp_prefix)
                shutil.move(dp, os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    return dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
Example #3
0
    def to_path(path_or_url):
        is_url = path_or_url.find('://') != -1  # todo fixme
        if is_url:
            try:
                temp_name = sniff.stream_url_to_file(
                    path_or_url, file_sources=get_file_sources())
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' %
                                             (path_or_url, unicodify(e)))

            return temp_name, is_url

        return path_or_url, is_url
Example #4
0
    def to_path(path_or_url):
        isa_url = is_url(path_or_url)
        file_sources = get_file_sources()
        if isa_url or file_sources and file_sources.looks_like_uri(
                path_or_url):
            try:
                temp_name = sniff.stream_url_to_file(path_or_url,
                                                     file_sources=file_sources)
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' %
                                             (path_or_url, unicodify(e)))

            return temp_name, isa_url

        return path_or_url, isa_url
Example #5
0
def add_file(dataset, registry, output_path):
    ext = None
    compression_type = None
    line_count = None
    link_data_only_str = dataset.get('link_data_only', 'copy_files')
    if link_data_only_str not in ['link_to_files', 'copy_files']:
        raise UploadProblemException(
            "Invalid setting '%s' for option link_data_only - upload request misconfigured"
            % link_data_only_str)
    link_data_only = link_data_only_str == 'link_to_files'

    # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
    # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
    # paths during data conversions since this user already owns that path.
    # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
    run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get(
        "in_place", False)

    # purge_source defaults to True unless this is an FTP import and
    # ftp_upload_purge has been overridden to False in Galaxy's config.
    # We set purge_source to False if:
    # - the job does not have write access to the file, e.g. when running as the
    #   real user
    # - the files are uploaded from external paths.
    purge_source = dataset.get(
        'purge_source',
        True) and not run_as_real_user and dataset.type not in ('server_dir',
                                                                'path_paste')

    # in_place is True unless we are running as a real user or importing external paths (i.e.
    # this is a real upload and not a path paste or ftp import).
    # in_place should always be False if running as real user because the uploaded file will
    # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
    # modify files not controlled by Galaxy.
    in_place = not run_as_real_user and dataset.type not in ('server_dir',
                                                             'path_paste',
                                                             'ftp_import')

    # Base on the check_upload_content Galaxy config option and on by default, this enables some
    # security related checks on the uploaded content, but can prevent uploads from working in some cases.
    check_content = dataset.get('check_content', True)

    # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
    # decompressing archive files before sniffing.
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        dataset.file_type
    except AttributeError:
        raise UploadProblemException(
            'Unable to process uploaded file, missing file_type parameter.')

    if dataset.type == 'url':
        try:
            dataset.path = sniff.stream_url_to_file(
                dataset.path, file_sources=get_file_sources())
        except Exception as e:
            raise UploadProblemException('Unable to fetch %s\n%s' %
                                         (dataset.path, unicodify(e)))

    # See if we have an empty file
    if not os.path.exists(dataset.path):
        raise UploadProblemException(
            'Uploaded temporary file (%s) does not exist.' % dataset.path)

    stdout, ext, datatype, is_binary, converted_path = handle_upload(
        registry=registry,
        path=dataset.path,
        requested_ext=dataset.file_type,
        name=dataset.name,
        tmp_prefix='data_id_%s_upload_' % dataset.dataset_id,
        tmp_dir=output_adjacent_tmpdir(output_path),
        check_content=check_content,
        link_data_only=link_data_only,
        in_place=in_place,
        auto_decompress=auto_decompress,
        convert_to_posix_lines=dataset.to_posix_lines,
        convert_spaces_to_tabs=dataset.space_to_tab,
    )

    # Strip compression extension from name
    if compression_type and not getattr(
            datatype, 'compressed',
            False) and dataset.name.endswith('.' + compression_type):
        dataset.name = dataset.name[:-len('.' + compression_type)]

    # Move dataset
    if link_data_only:
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            raise UploadProblemException(err_msg)
    if not link_data_only:
        # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False.
        if purge_source or converted_path:
            try:
                # If user has indicated that the original file to be purged and have converted_path tempfile
                if purge_source and converted_path:
                    shutil.move(converted_path, output_path)
                    os.remove(dataset.path)
                else:
                    shutil.move(converted_path or dataset.path, output_path)
            except OSError as e:
                # We may not have permission to remove the input
                if e.errno != errno.EACCES:
                    raise
        else:
            shutil.copy(dataset.path, output_path)

    # Write the job info
    stdout = stdout or 'uploaded %s file' % ext
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/
    if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
    return info
Example #6
0
def add_composite_file(dataset, registry, output_path, files_path):
    datatype = None

    # Find data type
    if dataset.file_type is not None:
        datatype = registry.get_datatype_by_extension(dataset.file_type)

    def to_path(path_or_url):
        is_url = path_or_url.find('://') != -1  # todo fixme
        if is_url:
            try:
                temp_name = sniff.stream_url_to_file(
                    path_or_url, file_sources=get_file_sources())
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' %
                                             (path_or_url, unicodify(e)))

            return temp_name, is_url

        return path_or_url, is_url

    def make_files_path():
        safe_makedirs(files_path)

    def stage_file(name, composite_file_path, is_binary=False):
        dp = composite_file_path['path']
        path, is_url = to_path(dp)
        if is_url:
            dataset.path = path
            dp = path

        auto_decompress = composite_file_path.get('auto_decompress', True)
        if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(
                dp):
            # It isn't an explicitly composite datatype, so these are just extra files to attach
            # as composite data. It'd be better if Galaxy was communicating this to the tool
            # a little more explicitly so we didn't need to dispatch on the datatype and so we
            # could attach arbitrary extra composite data to an existing composite datatype if
            # if need be? Perhaps that would be a mistake though.
            CompressedFile(dp).extract(files_path)
        else:
            tmpdir = output_adjacent_tmpdir(output_path)
            tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
            sniff.handle_composite_file(
                datatype,
                dp,
                files_path,
                name,
                is_binary,
                tmpdir,
                tmp_prefix,
                composite_file_path,
            )

    # Do we have pre-defined composite files from the datatype definition.
    if dataset.composite_files:
        make_files_path()
        for name, value in dataset.composite_files.items():
            value = bunch.Bunch(**value)
            if value.name not in dataset.composite_file_paths:
                raise UploadProblemException(
                    "Failed to find file_path %s in %s" %
                    (value.name, dataset.composite_file_paths))
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                raise UploadProblemException(
                    'A required composite data file was not provided (%s)' %
                    name)
            elif dataset.composite_file_paths[value.name] is not None:
                composite_file_path = dataset.composite_file_paths[value.name]
                stage_file(name, composite_file_path, value.is_binary)

    # Do we have ad-hoc user supplied composite files.
    elif dataset.composite_file_paths:
        make_files_path()
        for key, composite_file in dataset.composite_file_paths.items():
            stage_file(key, composite_file)  # TODO: replace these defaults

    # Move the dataset to its "real" path
    primary_file_path, _ = to_path(dataset.primary_file)
    shutil.move(primary_file_path, output_path)

    # Write the job info
    return dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
Example #7
0
def add_file(dataset, registry, output_path):
    ext = None
    compression_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only_str = dataset.get('link_data_only', 'copy_files')
    if link_data_only_str not in ['link_to_files', 'copy_files']:
        raise UploadProblemException(
            "Invalid setting '%s' for option link_data_only - upload request misconfigured"
            % link_data_only_str)
    link_data_only = link_data_only_str == 'link_to_files'

    # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
    # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
    # paths during data conversions since this user already owns that path.
    # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
    run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get(
        "in_place", False)

    # purge_source defaults to True unless this is an FTP import and
    # ftp_upload_purge has been overridden to False in Galaxy's config.
    # We set purge_source to False if:
    # - the job does not have write access to the file, e.g. when running as the
    #   real user
    # - the files are uploaded from external paths.
    purge_source = dataset.get(
        'purge_source',
        True) and not run_as_real_user and dataset.type not in ('server_dir',
                                                                'path_paste')

    # in_place is True unless we are running as a real user or importing external paths (i.e.
    # this is a real upload and not a path paste or ftp import).
    # in_place should always be False if running as real user because the uploaded file will
    # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
    # modify files not controlled by Galaxy.
    in_place = not run_as_real_user and dataset.type not in ('server_dir',
                                                             'path_paste',
                                                             'ftp_import')

    # Base on the check_upload_content Galaxy config option and on by default, this enables some
    # security related checks on the uploaded content, but can prevent uploads from working in some cases.
    check_content = dataset.get('check_content', True)

    # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
    # decompressing archive files before sniffing.
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        dataset.file_type
    except AttributeError:
        raise UploadProblemException(
            'Unable to process uploaded file, missing file_type parameter.')

    if dataset.type == 'url':
        try:
            dataset.path = sniff.stream_url_to_file(dataset.path)
        except Exception as e:
            raise UploadProblemException('Unable to fetch %s\n%s' %
                                         (dataset.path, str(e)))

    # See if we have an empty file
    if not os.path.exists(dataset.path):
        raise UploadProblemException(
            'Uploaded temporary file (%s) does not exist.' % dataset.path)

    if not os.path.getsize(dataset.path) > 0:
        raise UploadProblemException('The uploaded file is empty')

    # Does the first 1K contain a null?
    is_binary = check_binary(dataset.path)

    # Decompress if needed/desired and determine/validate filetype. If a keep-compressed datatype is explicitly selected
    # or if autodetection is selected and the file sniffs as a keep-compressed datatype, it will not be decompressed.
    if not link_data_only:
        if is_zip(dataset.path) and not is_single_file_zip(dataset.path):
            stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
        try:
            ext, converted_path, compression_type = sniff.handle_uploaded_dataset_file(
                dataset.path,
                registry,
                ext=dataset.file_type,
                tmp_prefix='data_id_%s_upload_' % dataset.dataset_id,
                tmp_dir=output_adjacent_tmpdir(output_path),
                in_place=in_place,
                check_content=check_content,
                is_binary=is_binary,
                auto_decompress=auto_decompress,
                uploaded_file_ext=os.path.splitext(
                    dataset.name)[1].lower().lstrip('.'),
                convert_to_posix_lines=dataset.to_posix_lines,
                convert_spaces_to_tabs=dataset.space_to_tab,
            )
        except sniff.InappropriateDatasetContentError as exc:
            raise UploadProblemException(str(exc))
    elif dataset.file_type == 'auto':
        # Link mode can't decompress anyway, so enable sniffing for keep-compressed datatypes even when auto_decompress
        # is enabled
        os.environ['GALAXY_SNIFFER_VALIDATE_MODE'] = '1'
        ext = sniff.guess_ext(dataset.path,
                              registry.sniff_order,
                              is_binary=is_binary)
        os.environ.pop('GALAXY_SNIFFER_VALIDATE_MODE')

    # The converted path will be the same as the input path if no conversion was done (or in-place conversion is used)
    converted_path = None if converted_path == dataset.path else converted_path

    # Validate datasets where the filetype was explicitly set using the filetype's sniffer (if any)
    if dataset.file_type != 'auto':
        datatype = registry.get_datatype_by_extension(dataset.file_type)
        # Enable sniffer "validate mode" (prevents certain sniffers from disabling themselves)
        os.environ['GALAXY_SNIFFER_VALIDATE_MODE'] = '1'
        if hasattr(datatype, 'sniff') and not datatype.sniff(dataset.path):
            stdout = (
                "Warning: The file 'Type' was set to '{ext}' but the file does not appear to be of that"
                " type".format(ext=dataset.file_type))
        os.environ.pop('GALAXY_SNIFFER_VALIDATE_MODE')

    # Handle unsniffable binaries
    if is_binary and ext == 'binary':
        upload_ext = os.path.splitext(dataset.name)[1].lower().lstrip('.')
        if registry.is_extension_unsniffable_binary(upload_ext):
            stdout = (
                "Warning: The file's datatype cannot be determined from its contents and was guessed based on"
                " its extension, to avoid this warning, manually set the file 'Type' to '{ext}' when uploading"
                " this type of file".format(ext=upload_ext))
            ext = upload_ext
        else:
            stdout = (
                "The uploaded binary file format cannot be determined automatically, please set the file 'Type'"
                " manually")

    datatype = registry.get_datatype_by_extension(ext)

    # Strip compression extension from name
    if compression_type and not getattr(
            datatype, 'compressed',
            False) and dataset.name.endswith('.' + compression_type):
        dataset.name = dataset.name[:-len('.' + compression_type)]

    # Move dataset
    if link_data_only:
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            raise UploadProblemException(err_msg)
    if not link_data_only:
        # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False.
        if purge_source or converted_path:
            try:
                shutil.move(converted_path or dataset.path, output_path)
            except OSError as e:
                # We may not have permission to remove the input
                if e.errno != errno.EACCES:
                    raise
        else:
            shutil.copy(dataset.path, output_path)

    # Write the job info
    stdout = stdout or 'uploaded %s file' % ext
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/
    if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
    return info
Example #8
0
    def _resolve_src(item):
        converted_path = None

        name, path = _has_src_to_path(item)
        dbkey = item.get("dbkey", "?")
        requested_ext = item.get("ext", "auto")
        info = item.get("info", None)
        object_id = item.get("object_id", None)
        link_data_only = upload_config.link_data_only
        if "link_data_only" in item:
            # Allow overriding this on a per file basis.
            link_data_only = _link_data_only(item)
        to_posix_lines = upload_config.get_option(item, "to_posix_lines")
        space_to_tab = upload_config.get_option(item, "space_to_tab")
        in_place = item.get("in_place", False)
        purge_source = item.get("purge_source", True)

        # Follow upload.py logic but without the auto-decompress logic.
        registry = upload_config.registry
        check_content = upload_config.check_content
        data_type, ext = None, requested_ext

        is_binary = check_binary(path)
        if is_binary:
            data_type, ext = handle_sniffable_binary_check(
                data_type, ext, path, registry)
        if data_type is None:
            root_datatype = registry.get_datatype_by_extension(ext)
            if getattr(root_datatype, 'compressed', False):
                data_type = 'compressed archive'
                ext = ext
            elif is_binary:
                data_type, ext = handle_unsniffable_binary_check(
                    data_type, ext, path, name, is_binary, requested_ext,
                    check_content, registry)
        if not data_type and check_content and check_html(path):
            raise UploadProblemException(
                'The uploaded file contains inappropriate HTML content')

        if data_type != 'binary':
            if not link_data_only:
                if to_posix_lines:
                    if space_to_tab:
                        line_count, converted_path = sniff.convert_newlines_sep2tabs(
                            path, in_place=in_place, tmp_dir=".")
                    else:
                        line_count, converted_path = sniff.convert_newlines(
                            path, in_place=in_place, tmp_dir=".")
                else:
                    if space_to_tab:
                        line_count, converted_path = sniff.sep2tabs(
                            path, in_place=in_place, tmp_dir=".")

            if requested_ext == 'auto':
                ext = sniff.guess_ext(converted_path or path,
                                      registry.sniff_order)
            else:
                ext = requested_ext

            data_type = ext

        if ext == 'auto' and data_type == 'binary':
            ext = 'data'
        if ext == 'auto' and requested_ext:
            ext = requested_ext
        if ext == 'auto':
            ext = 'data'

        datatype = registry.get_datatype_by_extension(ext)
        if link_data_only:
            # Never alter a file that will not be copied to Galaxy's local file store.
            if datatype.dataset_content_needs_grooming(path):
                err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                    '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
                raise UploadProblemException(err_msg)

        # If this file is not in the workdir make sure it gets there.
        if not link_data_only and converted_path:
            path = upload_config.ensure_in_working_directory(
                converted_path, purge_source, in_place)
        elif not link_data_only:
            path = upload_config.ensure_in_working_directory(
                path, purge_source, in_place)

        if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
                path):
            # Groom the dataset content if necessary
            datatype.groom_dataset_content(path)

        rval = {
            "name": name,
            "filename": path,
            "dbkey": dbkey,
            "ext": ext,
            "link_data_only": link_data_only
        }
        if info is not None:
            rval["info"] = info
        if object_id is not None:
            rval["object_id"] = object_id
        return rval
Example #9
0
    def _resolve_item_with_primary(item):
        error_message = None
        converted_path = None

        name, path = _has_src_to_path(upload_config, item, is_dataset=True)
        sources = []

        url = item.get("url")
        if url:
            sources.append({"source_uri": url})
        hashes = item.get("hashes", [])
        for hash_dict in hashes:
            hash_function = hash_dict.get("hash_function")
            hash_value = hash_dict.get("hash_value")
            try:
                _handle_hash_validation(upload_config, hash_function,
                                        hash_value, path)
            except Exception as e:
                error_message = str(e)
                item["error_message"] = error_message

        dbkey = item.get("dbkey", "?")
        link_data_only = upload_config.link_data_only
        if "link_data_only" in item:
            # Allow overriding this on a per file basis.
            link_data_only = _link_data_only(item)

        ext = "data"
        staged_extra_files = None
        if not error_message:
            requested_ext = item.get("ext", "auto")
            to_posix_lines = upload_config.get_option(item, "to_posix_lines")
            space_to_tab = upload_config.get_option(item, "space_to_tab")
            auto_decompress = upload_config.get_option(item, "auto_decompress")
            in_place = item.get("in_place", False)
            purge_source = item.get("purge_source", True)

            registry = upload_config.registry
            check_content = upload_config.check_content

            stdout, ext, datatype, is_binary, converted_path = handle_upload(
                registry=registry,
                path=path,
                requested_ext=requested_ext,
                name=name,
                tmp_prefix='data_fetch_upload_',
                tmp_dir=".",
                check_content=check_content,
                link_data_only=link_data_only,
                in_place=in_place,
                auto_decompress=auto_decompress,
                convert_to_posix_lines=to_posix_lines,
                convert_spaces_to_tabs=space_to_tab,
            )

            if link_data_only:
                # Never alter a file that will not be copied to Galaxy's local file store.
                if datatype.dataset_content_needs_grooming(path):
                    err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                        '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
                    raise UploadProblemException(err_msg)

            # If this file is not in the workdir make sure it gets there.
            if not link_data_only and converted_path:
                path = upload_config.ensure_in_working_directory(
                    converted_path, purge_source, in_place)
            elif not link_data_only:
                path = upload_config.ensure_in_working_directory(
                    path, purge_source, in_place)

            extra_files = item.get("extra_files")
            if extra_files:
                # TODO: optimize to just copy the whole directory to extra files instead.
                assert not upload_config.link_data_only, "linking composite dataset files not yet implemented"
                extra_files_path = path + "_extra"
                staged_extra_files = extra_files_path
                os.mkdir(extra_files_path)

                def walk_extra_files(items, prefix=""):
                    for item in items:
                        if "elements" in item:
                            name = item.get("name")
                            if not prefix:
                                item_prefix = name
                            else:
                                item_prefix = os.path.join(prefix, name)
                            walk_extra_files(item.get("elements"),
                                             prefix=item_prefix)
                        else:
                            name, src_path = _has_src_to_path(
                                upload_config, item)
                            if prefix:
                                rel_path = os.path.join(prefix, name)
                            else:
                                rel_path = name

                            file_output_path = os.path.join(
                                staged_extra_files, rel_path)
                            parent_dir = os.path.dirname(file_output_path)
                            if not os.path.exists(parent_dir):
                                safe_makedirs(parent_dir)
                            shutil.move(src_path, file_output_path)

                walk_extra_files(extra_files.get("elements", []))

            # TODO:
            # in galaxy json add 'extra_files' and point at target derived from extra_files:
            if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
                    path):
                # Groom the dataset content if necessary
                datatype.groom_dataset_content(path)

        rval = {
            "name": name,
            "filename": path,
            "dbkey": dbkey,
            "ext": ext,
            "link_data_only": link_data_only,
            "sources": sources,
            "hashes": hashes
        }
        if staged_extra_files:
            rval["extra_files"] = os.path.abspath(staged_extra_files)
        return _copy_and_validate_simple_attributes(item, rval)
Example #10
0
    def _resolve_src(item):
        converted_path = None

        name, path = _has_src_to_path(item)
        dbkey = item.get("dbkey", "?")
        requested_ext = item.get("ext", "auto")
        info = item.get("info", None)
        object_id = item.get("object_id", None)
        link_data_only = upload_config.link_data_only
        if "link_data_only" in item:
            # Allow overriding this on a per file basis.
            link_data_only = _link_data_only(item)
        to_posix_lines = upload_config.get_option(item, "to_posix_lines")
        space_to_tab = upload_config.get_option(item, "space_to_tab")
        auto_decompress = upload_config.get_option(item, "auto_decompress")
        in_place = item.get("in_place", False)
        purge_source = item.get("purge_source", True)

        registry = upload_config.registry
        check_content = upload_config.check_content

        stdout, ext, datatype, is_binary, converted_path = handle_upload(
            registry=registry,
            path=path,
            requested_ext=requested_ext,
            name=name,
            tmp_prefix='data_fetch_upload_',
            tmp_dir=".",
            check_content=check_content,
            link_data_only=link_data_only,
            in_place=in_place,
            auto_decompress=auto_decompress,
            convert_to_posix_lines=to_posix_lines,
            convert_spaces_to_tabs=space_to_tab,
        )

        if link_data_only:
            # Never alter a file that will not be copied to Galaxy's local file store.
            if datatype.dataset_content_needs_grooming(path):
                err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                    '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
                raise UploadProblemException(err_msg)

        # If this file is not in the workdir make sure it gets there.
        if not link_data_only and converted_path:
            path = upload_config.ensure_in_working_directory(
                converted_path, purge_source, in_place)
        elif not link_data_only:
            path = upload_config.ensure_in_working_directory(
                path, purge_source, in_place)

        if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
                path):
            # Groom the dataset content if necessary
            datatype.groom_dataset_content(path)

        rval = {
            "name": name,
            "filename": path,
            "dbkey": dbkey,
            "ext": ext,
            "link_data_only": link_data_only
        }
        if info is not None:
            rval["info"] = info
        if object_id is not None:
            rval["object_id"] = object_id
        return rval