Python convert_newlinesの例、galaxy.datatypes.sniff.convert_newlines Pythonの例

コード例 #1

0

ファイルを表示

ファイル: upload.py プロジェクト: yiming-kang/galaxy

def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.items():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                raise UploadProblemException('A required composite data file was not provided (%s)' % name)
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') != -1  # todo fixme
                if isurl:
                    try:
                        temp_name = sniff.stream_to_file(urlopen(dp), prefix='url_paste')
                    except Exception as e:
                        raise UploadProblemException('Unable to fetch %s\n%s' % (dp, str(e)))
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    tmpdir = output_adjacent_tmpdir(output_path)
                    tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                    if dataset.composite_file_paths[value.name].get('space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                    else:
                        sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                shutil.move(dp, os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
    json_file.write(dumps(info) + "\n")

コード例 #2

0

ファイルを表示

ファイル: upload.py プロジェクト: gtrack/galaxy

    def stage_file(name, composite_file_path, is_binary=False):
        dp = composite_file_path['path']
        path, is_url = to_path(dp)
        if is_url:
            dataset.path = path
            dp = path

        auto_decompress = composite_file_path.get('auto_decompress', True)
        if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(
                dp):
            # It isn't an explictly composite datatype, so these are just extra files to attach
            # as composite data. It'd be better if Galaxy was communicating this to the tool
            # a little more explicitly so we didn't need to dispatch on the datatype and so we
            # could attach arbitrary extra composite data to an existing composite datatype if
            # if need be? Perhaps that would be a mistake though.
            CompressedFile(dp).extract(files_path)
        else:
            if not is_binary:
                tmpdir = output_adjacent_tmpdir(output_path)
                tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                if composite_file_path.get('space_to_tab'):
                    sniff.convert_newlines_sep2tabs(dp,
                                                    tmp_dir=tmpdir,
                                                    tmp_prefix=tmp_prefix)
                else:
                    sniff.convert_newlines(dp,
                                           tmp_dir=tmpdir,
                                           tmp_prefix=tmp_prefix)

            file_output_path = os.path.join(files_path, name)
            shutil.move(dp, file_output_path)

            # groom the dataset file content if required by the corresponding datatype definition
            if datatype.dataset_content_needs_grooming(file_output_path):
                datatype.groom_dataset_content(file_output_path)

コード例 #3

0

ファイルを表示

def add_composite_file(dataset, registry, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                file_err(
                    'A required composite data file was not provided (%s)' %
                    name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') <> -1  # todo fixme
                if isurl:
                    try:
                        temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                            urllib.urlopen(dp), prefix='url_paste')
                    except Exception, e:
                        file_err('Unable to fetch %s\n%s' % (dp, str(e)),
                                 dataset, json_file)
                        return
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    if dataset.composite_file_paths[value.name].get(
                            'space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp)
                    else:
                        sniff.convert_newlines(dp)
                shutil.move(dp, os.path.join(files_path, name))

コード例 #4

0

ファイルを表示

ファイル: upload.py プロジェクト: bwlang/galaxy

def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.items():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                file_err('A required composite data file was not provided (%s)' % name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') != -1  # todo fixme
                if isurl:
                    try:
                        temp_name, dataset.is_multi_byte = sniff.stream_to_file(urlopen(dp), prefix='url_paste')
                    except Exception as e:
                        file_err('Unable to fetch %s\n%s' % (dp, str(e)), dataset, json_file)
                        return
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    tmpdir = output_adjacent_tmpdir(output_path)
                    tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                    if dataset.composite_file_paths[value.name].get('space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                    else:
                        sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                shutil.move(dp, os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
    json_file.write(dumps(info) + "\n")

コード例 #5

0

ファイルを表示

ファイル: upload.py プロジェクト: msauria/galaxy

    def stage_file(name, composite_file_path, is_binary=False):
        dp = composite_file_path['path']
        path, is_url = to_path(dp)
        if is_url:
            dataset.path = path
            dp = path

        auto_decompress = composite_file_path.get('auto_decompress', True)
        if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp):
            # It isn't an explictly composite datatype, so these are just extra files to attach
            # as composite data. It'd be better if Galaxy was communicating this to the tool
            # a little more explicitly so we didn't need to dispatch on the datatype and so we
            # could attach arbitrary extra composite data to an existing composite datatype if
            # if need be? Perhaps that would be a mistake though.
            CompressedFile(dp).extract(files_path)
        else:
            if not is_binary:
                tmpdir = output_adjacent_tmpdir(output_path)
                tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                if composite_file_path.get('space_to_tab'):
                    sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                else:
                    sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)

            file_output_path = os.path.join(files_path, name)
            shutil.move(dp, file_output_path)

            # groom the dataset file content if required by the corresponding datatype definition
            if datatype.dataset_content_needs_grooming(file_output_path):
                datatype.groom_dataset_content(file_output_path)

コード例 #6

0

ファイルを表示

def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                file_err(
                    'A required composite data file was not provided (%s)' %
                    name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                if not value.is_binary:
                    if uploaded_dataset.composite_files[
                            value.name].space_to_tab:
                        sniff.convert_newlines_sep2tabs(
                            dataset.composite_file_paths[value.name]['path'])
                    else:
                        sniff.convert_newlines(
                            dataset.composite_file_paths[value.name]['path'])
                shutil.move(dataset.composite_file_paths[value.name]['path'],
                            os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
    json_file.write(to_json_string(info) + "\n")

コード例 #7

0

ファイルを表示

ファイル: upload.py プロジェクト: UH-CI/galaxy

def add_composite_file( dataset, registry, json_file, output_path, files_path ):
        if dataset.composite_files:
            os.mkdir( files_path )
            for name, value in dataset.composite_files.iteritems():
                value = util.bunch.Bunch( **value )
                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
                    break
                elif dataset.composite_file_paths[value.name] is not None:
                    dp = dataset.composite_file_paths[value.name][ 'path' ]
                    isurl = dp.find('://') != -1  # todo fixme
                    if isurl:
                        try:
                            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dp ), prefix='url_paste' )
                        except Exception, e:
                            file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file )
                            return
                        dataset.path = temp_name
                        dp = temp_name
                    if not value.is_binary:
                        tmpdir = output_adjacent_tmpdir( output_path )
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ):
                            sniff.convert_newlines_sep2tabs( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                        else:
                            sniff.convert_newlines( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                    move_copy(dp, os.path.join( files_path, name ) )

コード例 #8

0

ファイルを表示

def assert_converts_to_1234_convert(content, block_size=1024):
    with tempfile.NamedTemporaryFile(delete=False, mode='w') as tf:
        tf.write(content)
    rval = convert_newlines(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir(), block_size=block_size)
    actual_contents = open(tf.name).read()
    assert '1 2\n3 4\n' == actual_contents, actual_contents
    assert rval[0:2] == (2, None), f"rval != {rval} for {content}"

コード例 #9

0

ファイルを表示

ファイル: test_sniff.py プロジェクト: xingyongma/galaxy

def test_convert_newlines_non_utf():
    fname = get_test_fname("dosimzml")
    rval = convert_newlines(fname,
                            tmp_prefix="gxtest",
                            tmp_dir=tempfile.gettempdir(),
                            in_place=False)
    new_file = rval[1]
    assert open(new_file, "rb").read() == open(get_test_fname("1imzml"),
                                               "rb").read()

コード例 #10

0

ファイルを表示

ファイル: upload_biopax.py プロジェクト: cogsys-tuebingen/SBVC

def add_composite_file( dataset, json_file ):
        if dataset.composite_files:
            os.mkdir( dataset.extra_files_path )
            for name, value in dataset.composite_files.iteritems():
                value = util.bunch.Bunch( **value )
                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
                    break
                elif dataset.composite_file_paths[value.name] is not None:
                    if not value.is_binary:
                        if uploaded_dataset.composite_files[ value.name ].space_to_tab:
                            sniff.convert_newlines_sep2tabs( dataset.composite_file_paths[ value.name ][ 'path' ] )
                        else:
                            sniff.convert_newlines( dataset.composite_file_paths[ value.name ][ 'path' ] )
                    shutil.move( dataset.composite_file_paths[ value.name ][ 'path' ], os.path.join( dataset.extra_files_path, name ) )
        info = dict( type = 'dataset',
                     dataset_id = dataset.dataset_id,
                     path = dataset.primary_file)
        json_file.write( to_json_string( info ) + "\n" )

コード例 #11

0

ファイルを表示

ファイル: test_sniff.py プロジェクト: zero-raspberry/galaxy

def assert_converts_to_1234_convert(content, block_size=1024):
    fname = get_test_fname('temp2.txt')
    with open(fname, 'w') as fh:
        fh.write(content)
    rval = convert_newlines(fname,
                            tmp_prefix="gxtest",
                            tmp_dir=tempfile.gettempdir(),
                            block_size=block_size)
    actual_contents = open(fname).read()
    assert '1 2\n3 4\n' == actual_contents, actual_contents
    assert rval == (2, None), "rval != %s for %s" % (rval, content)

コード例 #12

0

ファイルを表示

ファイル: upload.py プロジェクト: volpino/Yeps-EURAC

def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                file_err("A required composite data file was not provided (%s)" % name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                if not value.is_binary:
                    if uploaded_dataset.composite_files[value.name].space_to_tab:
                        sniff.convert_newlines_sep2tabs(dataset.composite_file_paths[value.name]["path"])
                    else:
                        sniff.convert_newlines(dataset.composite_file_paths[value.name]["path"])
                shutil.move(dataset.composite_file_paths[value.name]["path"], os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type="dataset", dataset_id=dataset.dataset_id, stdout="uploaded %s file" % dataset.file_type)
    json_file.write(to_json_string(info) + "\n")

コード例 #13

0

ファイルを表示

ファイル: upload.py プロジェクト: blankenberg/galaxy-central

    def add_file(self, trans, file_obj, file_name, file_type, dbkey, info ):
        temp_name = sniff.stream_to_file(file_obj)
        sniff.convert_newlines(temp_name)
        if file_type == 'auto':
            ext = sniff.guess_ext(temp_name)    
        else:
            ext = file_type

        data = trans.app.model.Dataset()
        data.name = file_name
        data.extension = ext
        data.dbkey = dbkey
        data.info = info
        data.flush()
        shutil.move(temp_name, data.file_name)
        data.state = data.states.OK
        data.init_meta()
        data.set_peek()
        if isinstance( data.datatype, datatypes.interval.Interval ):
            if data.missing_meta():
                data.extension = 'tabular'
        trans.history.add_dataset( data )
        trans.app.model.flush()
        return data

コード例 #14

0

ファイルを表示

ファイル: upload.py プロジェクト: jmchilton/galaxy-central

    def add_file(self, trans, file_obj, file_name, file_type, dbkey, info):
        temp_name = sniff.stream_to_file(file_obj)
        sniff.convert_newlines(temp_name)
        if file_type == 'auto':
            ext = sniff.guess_ext(temp_name)
        else:
            ext = file_type

        data = trans.app.model.Dataset()
        data.name = file_name
        data.extension = ext
        data.dbkey = dbkey
        data.info = info
        data.flush()
        shutil.move(temp_name, data.file_name)
        data.state = data.states.OK
        data.init_meta()
        data.set_peek()
        if isinstance(data.datatype, datatypes.interval.Interval):
            if data.missing_meta():
                data.extension = 'tabular'
        trans.history.add_dataset(data)
        trans.app.model.flush()
        return data

コード例 #15

0

ファイルを表示

ファイル: upload.py プロジェクト: bwlang/galaxy

def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    run_as_real_user = in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    # in_place is True if there is no external chmod in place,
    # however there are other instances where modifications should not occur in_place:
    # when a file is added from a directory on the local file system (ftp import folder or any other path).
    if dataset.type in ('server_dir', 'path_paste', 'ftp_import'):
        in_place = False
    check_content = dataset.get('check_content' , True)
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err('Unable to process uploaded file, missing file_type parameter.', dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)), dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path, registry.sniff_order, is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content)
            if is_gzipped and not is_valid:
                file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                return
            elif is_gzipped and is_valid and auto_decompress:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2 ** 20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data', dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path, check_content)
                if is_bzipped and not is_valid:
                    file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                    return
                elif is_bzipped and is_valid and auto_decompress:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err('Problem decompressing bz2 compressed data', dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped and auto_decompress:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err('Problem decompressing zipped data', dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err('Problem decompressing zipped data', dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if check_content and not Binary.is_ext_unsniffable(ext):
                            file_err('The uploaded binary file contains inappropriate content', dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_content and check_html(dataset.path):
                    file_err('The uploaded file contains inappropriate HTML content', dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files' and data_type not in ('gzip', 'bz2', 'zip'):
                    # Convert universal line endings to Posix line endings if to_posix_lines is True
                    # and the data is not binary or gzip-, bz2- or zip-compressed.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and data_type == 'binary':
        ext = 'data'
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir', 'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and converted_path:
        # Move the dataset to its "real" path
        try:
            shutil.move(converted_path, output_path)
        except OSError as e:
            # We may not have permission to remove converted_path
            if e.errno != errno.EACCES:
                raise
    elif link_data_only == 'copy_files':
        if purge_source and not run_as_real_user:
            # if the upload tool runs as a real user the real user
            # can't move dataset.path as this path is owned by galaxy.
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")
    if link_data_only == 'copy_files' and datatype and datatype.dataset_content_needs_grooming(output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)

コード例 #16

0

ファイルを表示

ファイル: upload.py プロジェクト: dbcls/dbcls-galaxy

    def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey, info=None, space_to_tab=False, precreated_dataset=None ):
        def dataset_no_data_error( data, message = 'there was an error uploading your file' ):
            data.info = "No data: %s." % message
            data.state = data.states.ERROR
            if data.extension is None:
                data.extension = 'data'
            return data
        data_type = None
        
        if precreated_dataset is not None:
            data = precreated_dataset
        else:
            data = trans.app.model.HistoryDatasetAssociation( history = trans.history, create_dataset = True )
        trans.app.security_agent.set_all_dataset_permissions( data.dataset, trans.app.security_agent.history_get_default_permissions( trans.history ) )
        
        # See if we have an empty file
        if not os.path.getsize( temp_name ) > 0:
            return dataset_no_data_error( data, message = 'you attempted to upload an empty file' )
            #raise BadFileException( "you attempted to upload an empty file." )
        if is_multi_byte:
            ext = sniff.guess_ext( temp_name, is_multi_byte=True )
        else:
            if not data_type:
                # See if we have a gzipped file, which, if it passes our restrictions,
                # we'll decompress on the fly.
                is_gzipped, is_valid = self.check_gzip( temp_name )
                if is_gzipped and not is_valid:
                    return dataset_no_data_error( data, message = 'you attempted to upload an inappropriate file' )
                    #raise BadFileException( "you attempted to upload an inappropriate file." )
                elif is_gzipped and is_valid:
                    # We need to uncompress the temp_name file
                    CHUNK_SIZE = 2**20 # 1Mb   
                    fd, uncompressed = tempfile.mkstemp()   
                    gzipped_file = gzip.GzipFile( temp_name )
                    while 1:
                        try:
                            chunk = gzipped_file.read( CHUNK_SIZE )
                        except IOError:
                            os.close( fd )
                            os.remove( uncompressed )
                            return dataset_no_data_error( data, message = 'problem decompressing gzipped data' )
                            #raise BadFileException( 'problem decompressing gzipped data.' )
                        if not chunk:
                            break
                        os.write( fd, chunk )
                    os.close( fd )
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file
                    shutil.move( uncompressed, temp_name )
                    file_name = file_name.rstrip( '.gz' )
                    data_type = 'gzip'
                ext = ''
                if not data_type:
                    # See if we have a zip archive
                    is_zipped, is_valid, test_ext = self.check_zip( temp_name )
                    if is_zipped and not is_valid:
                        return dataset_no_data_error( data, message = 'you attempted to upload an inappropriate file' )
                        #raise BadFileException( "you attempted to upload an inappropriate file." )
                    elif is_zipped and is_valid:
                        # Currently, we force specific tools to handle this case.  We also require the user
                        # to manually set the incoming file_type
                        if ( test_ext == 'ab1' or test_ext == 'scf' ) and file_type != 'binseq.zip':
                            return dataset_no_data_error( data, message = "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'" )
                            #raise BadFileException( "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'." )
                        elif test_ext == 'txt' and file_type != 'txtseq.zip':
                            return dataset_no_data_error( data, message = "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'" )
                            #raise BadFileException( "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'." )
                        if not ( file_type == 'binseq.zip' or file_type == 'txtseq.zip' ):
                            return dataset_no_data_error( data, message = "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files" )
                            #raise BadFileException( "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files." )
                        data_type = 'zip'
                        ext = file_type
                if not data_type:
                    if self.check_binary( temp_name ):
                        parts = file_name.split( "." )
                        if len( parts ) > 1:
                            ext = parts[1].strip().lower()
                            if not( ext == 'ab1' or ext == 'scf' ):
                                return dataset_no_data_error( data, message = "you attempted to upload an inappropriate file" )
                                #raise BadFileException( "you attempted to upload an inappropriate file." )
                            if ext == 'ab1' and file_type != 'ab1':
                                return dataset_no_data_error( data, message = "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files" )
                                #raise BadFileException( "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files." )
                            elif ext == 'scf' and file_type != 'scf':
                                return dataset_no_data_error( data, message = "you must manually set the 'File Format' to 'Scf' when uploading scf files" )
                                #raise BadFileException( "you must manually set the 'File Format' to 'Scf' when uploading scf files." )
                        data_type = 'binary'
                if not data_type:
                    # We must have a text file
                    if trans.app.datatypes_registry.get_datatype_by_extension( file_type ).composite_type != 'auto_primary_file' and self.check_html( temp_name ):
                        return dataset_no_data_error( data, message = "you attempted to upload an inappropriate file" )
                        #raise BadFileException( "you attempted to upload an inappropriate file." )
                if data_type != 'binary' and data_type != 'zip':
                    if space_to_tab:
                        self.line_count = sniff.convert_newlines_sep2tabs( temp_name )
                    else:
                        self.line_count = sniff.convert_newlines( temp_name )
                    if file_type == 'auto':
                        ext = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order )    
                    else:
                        ext = file_type
                    data_type = ext
        if info is None:
            info = 'uploaded %s file' %data_type
        data.extension = ext
        data.name = file_name
        data.dbkey = dbkey
        data.info = info
        data.flush()
        shutil.move( temp_name, data.file_name )
        data.state = data.states.OK
        data.set_size()
        data.init_meta()
        if self.line_count is not None:
            try:
                if is_multi_byte:
                    data.set_multi_byte_peek( line_count=self.line_count )
                else:
                    data.set_peek( line_count=self.line_count )
            except:
                if is_multi_byte:
                    data.set_multi_byte_peek()
                else:
                    data.set_peek()
        else:
            if is_multi_byte:
                data.set_multi_byte_peek()
            else:
                data.set_peek()

        # validate incomming data
        # Commented by greg on 3/14/07
        # for error in data.datatype.validate( data ):
        #     data.add_validation_error( 
        #         model.ValidationError( message=str( error ), err_type=error.__class__.__name__, attributes=util.object_to_string( error.__dict__ ) ) )
        if data.missing_meta():
            data.datatype.set_meta( data )
        dbkey_to_store = dbkey
        if type( dbkey_to_store ) == type( [] ):
            dbkey_to_store = dbkey[0]
        if precreated_dataset is not None:
            trans.history.genome_build = dbkey_to_store
        else:
            trans.history.add_dataset( data, genome_build=dbkey_to_store )
        trans.app.model.flush()
        trans.log_event( "Added dataset %d to history %d" %( data.id, trans.history.id ), tool_id="upload" )
        return data

コード例 #17

0

ファイルを表示

ファイル: upload.py プロジェクト: AbhishekKumarSingh/galaxy

                 file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
                 return
         if data_type != 'binary':
             if link_data_only == 'copy_files':
                 if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
                     in_place = False
                 # Convert universal line endings to Posix line endings, but allow the user to turn it off,
                 # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
                 # corrupting the content of those files.
                 if dataset.to_posix_lines:
                     tmpdir = output_adjacent_tmpdir( output_path )
                     tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                     if dataset.space_to_tab:
                         line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                     else:
                         line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
             if dataset.file_type == 'auto':
                 ext = sniff.guess_ext( dataset.path, registry.sniff_order )
             else:
                 ext = dataset.file_type
             data_type = ext
 # Save job info for the framework
 if ext == 'auto' and dataset.ext:
     ext = dataset.ext
 if ext == 'auto':
     ext = 'data'
 datatype = registry.get_datatype_by_extension( ext )
 if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files':
     # Never alter a file that will not be copied to Galaxy's local file store.
     if datatype.dataset_content_needs_grooming( dataset.path ):
         err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \

コード例 #18

0

ファイルを表示

ファイル: upload.py プロジェクト: dbcls/dbcls-galaxy

 def execute( self, tool, trans, incoming={}, set_output_hid = True ):
     dataset_upload_inputs = []
     for input_name, input in tool.inputs.iteritems():
         if input.type == "upload_dataset":
             dataset_upload_inputs.append( input )
     assert dataset_upload_inputs, Exception( "No dataset upload groups were found." )
     # Get any precreated datasets (when using asynchronous uploads)
     async_datasets = []
     self.precreated_datasets = []
     if incoming.get( 'async_datasets', None ) not in ["None", "", None]:
         async_datasets = incoming['async_datasets'].split(',')
     for id in async_datasets:
         try:
             data = trans.app.model.HistoryDatasetAssociation.get( int( id ) )
         except:
             log.exception( 'Unable to load precreated dataset (%s) sent in upload form' % id )
             continue
         if trans.user is None and trans.galaxy_session.current_history != data.history:
            log.error( 'Got a precreated dataset (%s) but it does not belong to anonymous user\'s current session (%s)' % ( data.id, trans.galaxy_session.id ) )
         elif data.history.user != trans.user:
            log.error( 'Got a precreated dataset (%s) but it does not belong to current user (%s)' % ( data.id, trans.user.id ) )
         else:
             self.precreated_datasets.append( data )
     data_list = []
     for dataset_upload_input in dataset_upload_inputs:
         uploaded_datasets = dataset_upload_input.get_uploaded_datasets( trans, incoming )
         for uploaded_dataset in uploaded_datasets:
             precreated_dataset = self.get_precreated_dataset( uploaded_dataset.precreated_name )
             dataset = self.add_file( trans, uploaded_dataset.primary_file, uploaded_dataset.name, uploaded_dataset.file_type, uploaded_dataset.is_multi_byte, uploaded_dataset.dbkey, space_to_tab = uploaded_dataset.space_to_tab, info = uploaded_dataset.info, precreated_dataset = precreated_dataset )
             if uploaded_dataset.composite_files:
                 os.mkdir( dataset.extra_files_path ) #make extra files path
                 for name, value in uploaded_dataset.composite_files.iteritems():
                     #what about binary files here, need to skip converting newlines
                     if value is None and not dataset.datatype.writable_files[ name ].optional:
                         dataset.info = "A required composite data file was not provided (%s)" % name
                         dataset.state = dataset.states.ERROR
                         break
                     elif value is not None:
                         if value.space_to_tab:
                             sniff.convert_newlines_sep2tabs( value.filename )
                         else:
                             sniff.convert_newlines( value.filename )
                         shutil.move( value.filename, os.path.join( dataset.extra_files_path, name ) )
             data_list.append( dataset )
             #clean up extra temp names
             uploaded_dataset.clean_up_temp_files()
     
     #cleanup unclaimed precreated datasets:
     for data in self.precreated_datasets:
         log.info( 'Cleaned up unclaimed precreated dataset (%s).' % ( data.id ) )
         data.state = data.states.ERROR
         data.info = 'No file contents were available.'
     
     if data_list:
         trans.app.model.flush()
     
     # Create the job object
     job = trans.app.model.Job()
     job.session_id = trans.get_galaxy_session().id
     job.history_id = trans.history.id
     job.tool_id = tool.id
     try:
         # For backward compatibility, some tools may not have versions yet.
         job.tool_version = tool.version
     except:
         job.tool_version = "1.0.1"
     job.state = trans.app.model.Job.states.UPLOAD
     job.flush()
     log.info( 'tool %s created job id %d' % ( tool.id, job.id ) )
     trans.log_event( 'created job id %d' % job.id, tool_id=tool.id )
     
     #if we could make a 'real' job here, then metadata could be set before job.finish() is called
     hda = data_list[0] #only our first hda is being added as output for the job, why?
     job.state = trans.app.model.Job.states.OK
     file_size_str = datatypes.data.nice_size( hda.dataset.file_size )
     job.info = "%s, size: %s" % ( hda.info, file_size_str )
     job.add_output_dataset( hda.name, hda )
     job.flush()
     log.info( 'job id %d ended ok, file size: %s' % ( job.id, file_size_str ) )
     trans.log_event( 'job id %d ended ok, file size: %s' % ( job.id, file_size_str ), tool_id=tool.id )
     return dict( output=hda )

コード例 #19

0

ファイルを表示

             file_err(
                 'The uploaded file contains inappropriate HTML content',
                 dataset, json_file)
             return
     if data_type != 'binary':
         if link_data_only == 'copy_files':
             if dataset.type in ('server_dir',
                                 'path_paste') and data_type not in [
                                     'gzip', 'bz2', 'zip'
                                 ]:
                 in_place = False
             if dataset.space_to_tab:
                 line_count, converted_path = sniff.convert_newlines_sep2tabs(
                     dataset.path, in_place=in_place)
             else:
                 line_count, converted_path = sniff.convert_newlines(
                     dataset.path, in_place=in_place)
         if dataset.file_type == 'auto':
             ext = sniff.guess_ext(dataset.path, registry.sniff_order)
         else:
             ext = dataset.file_type
         data_type = ext
 # Save job info for the framework
 if ext == 'auto' and dataset.ext:
     ext = dataset.ext
 if ext == 'auto':
     ext = 'data'
 datatype = registry.get_datatype_by_extension(ext)
 if dataset.type in ('server_dir',
                     'path_paste') and link_data_only == 'link_to_files':
     # Never alter a file that will not be copied to Galaxy's local file store.
     if datatype.dataset_content_needs_grooming(dataset.path):

コード例 #20

0

ファイルを表示

                 file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
                 return
         if data_type != 'binary':
             if link_data_only == 'copy_files':
                 if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
                     in_place = False
                 # Convert universal line endings to Posix line endings, but allow the user to turn it off,
                 # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
                 # corrupting the content of those files.
                 if dataset.to_posix_lines:
                     tmpdir = output_adjacent_tmpdir( output_path )
                     tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                     if dataset.space_to_tab:
                         line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                     else:
                         line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
             if dataset.file_type == 'auto':
                 ext = sniff.guess_ext( dataset.path, registry.sniff_order )
             else:
                 ext = dataset.file_type
             data_type = ext
 # Save job info for the framework
 if ext == 'auto' and dataset.ext:
     ext = dataset.ext
 if ext == 'auto':
     ext = 'data'
 datatype = registry.get_datatype_by_extension( ext )
 if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files':
     # Never alter a file that will not be copied to Galaxy's local file store.
     if datatype.dataset_content_needs_grooming( dataset.path ):
         err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \

コード例 #21

0

ファイルを表示

ファイル: library_dataset.py プロジェクト: dbcls/dbcls-galaxy

 def add_file( self, trans, folder_id, file_obj, name, file_format, dbkey, roles, info='no info', space_to_tab=False,
               replace_dataset=None, library_item_info_template=None, template_elements={}, message=None ):
     folder = trans.app.model.LibraryFolder.get( folder_id )
     data_type = None
     line_count = 0
     temp_name, is_multi_byte = sniff.stream_to_file( file_obj )
     # See if we have an empty file
     if not os.path.getsize( temp_name ) > 0:
         raise BadFileException( "you attempted to upload an empty file." )
     if is_multi_byte:
         ext = sniff.guess_ext( temp_name, is_multi_byte=True )
     else:
         if not data_type:
             # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress on the fly.
             is_gzipped, is_valid = self.check_gzip( temp_name )
             if is_gzipped and not is_valid:
                 raise BadFileException( "you attempted to upload an inappropriate file." )
             elif is_gzipped and is_valid:
                 # We need to uncompress the temp_name file
                 CHUNK_SIZE = 2**20 # 1Mb   
                 fd, uncompressed = tempfile.mkstemp()   
                 gzipped_file = gzip.GzipFile( temp_name )
                 while 1:
                     try:
                         chunk = gzipped_file.read( CHUNK_SIZE )
                     except IOError:
                         os.close( fd )
                         os.remove( uncompressed )
                         raise BadFileException( 'problem uncompressing gzipped data.' )
                     if not chunk:
                         break
                     os.write( fd, chunk )
                 os.close( fd )
                 gzipped_file.close()
                 # Replace the gzipped file with the decompressed file
                 shutil.move( uncompressed, temp_name )
                 name = name.rstrip( '.gz' )
                 data_type = 'gzip'
         ext = ''
         if not data_type:
             # See if we have a zip archive
             is_zipped, is_valid, test_ext = self.check_zip( temp_name )
             if is_zipped and not is_valid:
                 raise BadFileException( "you attempted to upload an inappropriate file." )
             elif is_zipped and is_valid:
                 # Currently, we force specific tools to handle this case.  We also require the user
                 # to manually set the incoming file_format
                 if ( test_ext == 'ab1' or test_ext == 'scf' ) and file_format != 'binseq.zip':
                     raise BadFileException( "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'." )
                 elif test_ext == 'txt' and file_format != 'txtseq.zip':
                     raise BadFileException( "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'." )
                 if not ( file_format == 'binseq.zip' or file_format == 'txtseq.zip' ):
                     raise BadFileException( "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files." )
                 data_type = 'zip'
                 ext = file_format
         if not data_type:
             if self.check_binary( temp_name ):
                 try:
                     ext = name.split( "." )[1].strip().lower()
                 except:
                     ext = ''
                 if not( ext == 'ab1' or ext == 'scf' ):
                     raise BadFileException( "you attempted to upload an inappropriate file." )
                 if ext == 'ab1' and file_format != 'ab1':
                     raise BadFileException( "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files." )
                 elif ext == 'scf' and file_format != 'scf':
                     raise BadFileException( "you must manually set the 'File Format' to 'Scf' when uploading scf files." )
                 data_type = 'binary'
         if not data_type:
             # We must have a text file
             if self.check_html( temp_name ):
                 raise BadFileException( "you attempted to upload an inappropriate file." )
         if data_type != 'binary' and data_type != 'zip':
             if space_to_tab:
                 line_count = sniff.convert_newlines_sep2tabs( temp_name )
             elif os.stat( temp_name ).st_size < 262144000: # 250MB
                 line_count = sniff.convert_newlines( temp_name )
             else:
                 if sniff.check_newlines( temp_name ):
                     line_count = sniff.convert_newlines( temp_name )
                 else:
                     line_count = None
             if file_format == 'auto':
                 ext = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order )    
             else:
                 ext = file_format
             data_type = ext
     if info is None:
         info = 'uploaded %s file' % data_type
     if file_format == 'auto':
         data_type = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order )    
     else:
         data_type = file_format
     if replace_dataset:
         # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
         library_dataset = replace_dataset
     else:
         # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new 
         # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
         library_dataset = trans.app.model.LibraryDataset( folder=folder, name=name, info=info )
         library_dataset.flush()
         trans.app.security_agent.copy_library_permissions( folder, library_dataset )
     ldda = trans.app.model.LibraryDatasetDatasetAssociation( name=name, 
                                                              info=info, 
                                                              extension=data_type, 
                                                              dbkey=dbkey, 
                                                              library_dataset=library_dataset,
                                                              user=trans.get_user(),
                                                              create_dataset=True )
     ldda.message = message
     ldda.flush()
     # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
     trans.app.security_agent.copy_library_permissions( library_dataset, ldda )
     if replace_dataset:
         # Copy the Dataset level permissions from replace_dataset to the new LibraryDatasetDatasetAssociation.dataset
         trans.app.security_agent.copy_dataset_permissions( replace_dataset.library_dataset_dataset_association.dataset, ldda.dataset )
     else:
         # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset
         trans.app.security_agent.set_all_dataset_permissions( ldda.dataset, trans.app.security_agent.user_get_default_permissions( trans.get_user() ) )
         folder.add_library_dataset( library_dataset, genome_build=dbkey )
         folder.flush()
     library_dataset.library_dataset_dataset_association_id = ldda.id
     library_dataset.flush()
     # Handle any templates included in the upload form
     if library_item_info_template:
         user = trans.get_user()
         library_item_info = trans.app.model.LibraryItemInfo( user=user )
         library_item_info.library_item_info_template = library_item_info_template
         library_item_info.flush()
         trans.app.security_agent.copy_library_permissions( library_item_info_template, library_item_info )
         for template_element in library_item_info_template.elements:
             info_element_value = template_elements.get( "info_element_%s_%s" % ( library_item_info_template.id, template_element.id ), None )
             info_element = trans.app.model.LibraryItemInfoElement()
             info_element.contents = info_element_value
             info_element.library_item_info_template_element = template_element
             info_element.library_item_info = library_item_info
             info_element.flush()
         library_item_info_association = trans.app.model.LibraryDatasetDatasetInfoAssociation( user=user )
         library_item_info_association.set_library_item( ldda )
         library_item_info_association.library_item_info = library_item_info
         library_item_info_association.flush()
     # If roles were selected upon upload, restrict access to the Dataset to those roles
     if roles:
         for role in roles:
             dp = trans.app.model.DatasetPermissions( RBACAgent.permitted_actions.DATASET_ACCESS.action, ldda.dataset, role )
             dp.flush()
     shutil.move( temp_name, ldda.dataset.file_name )
     ldda.state = ldda.states.OK
     ldda.init_meta()
     if line_count:
         try:
             if is_multi_byte:
                 ldda.set_multi_byte_peek( line_count=line_count )
             else:
                 ldda.set_peek( line_count=line_count )
         except:
             if is_multi_byte:
                 ldda.set_multi_byte_peek()
             else:
                 ldda.set_peek()
     else:
         if is_multi_byte:
             ldda.set_multi_byte_peek()
         else:
             ldda.set_peek()
     ldda.set_size()
     if ldda.missing_meta():
         ldda.datatype.set_meta( ldda )
     ldda.flush()
     return ldda

コード例 #22

0

ファイルを表示

ファイル: upload.py プロジェクト: broy-va/SciWorCS

def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err(
            'Unable to process uploaded file, missing file_type parameter.',
            dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(
                dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                page,
                prefix='url_paste',
                source_encoding=util.get_charset_from_http_headers(
                    page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)),
                     dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path,
                 dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(
                codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path,
                              registry.sniff_order,
                              is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path)
            if is_gzipped and not is_valid:
                file_err(
                    'The gzipped uploaded file contains inappropriate content',
                    dataset, json_file)
                return
            elif is_gzipped and is_valid:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2**20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(
                        prefix='data_id_%s_upload_gunzip_' %
                        dataset.dataset_id,
                        dir=os.path.dirname(output_path),
                        text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data',
                                     dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if dataset.type in ('server_dir',
                                        'path_paste') or not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type and bz2 is not None:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path)
                if is_bzipped and not is_valid:
                    file_err(
                        'The gzipped uploaded file contains inappropriate content',
                        dataset, json_file)
                    return
                elif is_bzipped and is_valid:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2**20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(
                            prefix='data_id_%s_upload_bunzip2_' %
                            dataset.dataset_id,
                            dir=os.path.dirname(output_path),
                            text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err(
                                    'Problem decompressing bz2 compressed data',
                                    dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if dataset.type in ('server_dir',
                                            'path_paste') or not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2**20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(
                                prefix='data_id_%s_upload_zip_' %
                                dataset.dataset_id,
                                dir=os.path.dirname(output_path),
                                text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err(
                                            'Problem decompressing zipped data',
                                            dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err(
                                        'Problem decompressing zipped data',
                                        dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if dataset.type in ('server_dir',
                                                'path_paste') or not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(
                        dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if not Binary.is_ext_unsniffable(ext):
                            file_err(
                                'The uploaded binary file contains inappropriate content',
                                dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(
                                ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (
                                ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_html(dataset.path):
                    file_err(
                        'The uploaded file contains inappropriate HTML content',
                        dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files':
                    if dataset.type in ('server_dir',
                                        'path_paste') and data_type not in [
                                            'gzip', 'bz2', 'zip'
                                        ]:
                        in_place = False
                    # Convert universal line endings to Posix line endings, but allow the user to turn it off,
                    # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
                    # corrupting the content of those files.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir',
                        'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and dataset.type in (
            'server_dir',
            'path_paste') and data_type not in ['gzip', 'bz2', 'zip']:
        # Move the dataset to its "real" path
        if converted_path is not None:
            shutil.copy(converted_path, output_path)
            try:
                os.remove(converted_path)
            except:
                pass
        else:
            # This should not happen, but it's here just in case
            shutil.copy(dataset.path, output_path)
    elif link_data_only == 'copy_files':
        if purge_source:
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")

    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)

コード例 #23

0

ファイルを表示

ファイル: upload_biopax.py プロジェクト: cogsys-tuebingen/SBVC

                     elif ext == 'scf' and dataset.file_type != 'scf':
                         file_err( "You must manually set the 'File Format' to 'Scf' when uploading scf files.", dataset, json_file )
                         return
                 else:
                     ext = 'binary'
                 data_type = 'binary'
     if not data_type:
         # We must have a text file
         if check_html( dataset.path ):
             file_err( 'The uploaded file contains inappropriate content', dataset, json_file )
             return
     if data_type != 'binary' and data_type != 'zip':
         if dataset.space_to_tab:
             line_count = sniff.convert_newlines_sep2tabs( dataset.path )
         else:
             line_count = sniff.convert_newlines( dataset.path )
         if dataset.file_type == 'auto':
             ext = sniff.guess_ext( dataset.path )
         else:
             ext = dataset.file_type
         data_type = ext
 # Save job info for the framework
 if ext == 'auto' and dataset.ext:
     ext = dataset.ext
 if ext == 'auto':
     ext = 'data'
 info = dict( type = 'dataset',
              dataset_id = dataset.dataset_id,
              path = dataset.path,
              ext = ext,
              name = dataset.name,

コード例 #24

0

ファイルを表示

ファイル: upload.py プロジェクト: yiming-kang/galaxy

def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files') != 'copy_files'

    # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
    # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
    # paths during data conversions since this user already owns that path.
    # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
    run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False)

    # purge_source defaults to True unless this is an FTP import and
    # ftp_upload_purge has been overridden to False in Galaxy's config.
    # We set purge_source to False if:
    # - the job does not have write access to the file, e.g. when running as the
    #   real user
    # - the files are uploaded from external paths.
    purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste')

    # in_place is True unless we are running as a real user or importing external paths (i.e.
    # this is a real upload and not a path paste or ftp import).
    # in_place should always be False if running as real user because the uploaded file will
    # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
    # modify files not controlled by Galaxy.
    in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import')

    # Base on the check_upload_content Galaxy config option and on by default, this enables some
    # security related checks on the uploaded content, but can prevent uploads from working in some cases.
    check_content = dataset.get('check_content' , True)

    # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
    # decompressing archive files before sniffing.
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.')

    if dataset.type == 'url':
        try:
            page = urlopen(dataset.path)  # page will be .close()ed by sniff methods
            temp_name = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
        except Exception as e:
            raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, str(e)))
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
    if not os.path.getsize(dataset.path) > 0:
        raise UploadProblemException('The uploaded file is empty')
    # Is dataset content supported sniffable binary?
    is_binary = check_binary(dataset.path)
    if is_binary:
        # Sniff the data type
        guessed_ext = sniff.guess_ext(dataset.path, registry.sniff_order)
        # Set data_type only if guessed_ext is a binary datatype
        datatype = registry.get_datatype_by_extension(guessed_ext)
        if isinstance(datatype, Binary):
            data_type = guessed_ext
            ext = guessed_ext
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content)
            if is_gzipped and not is_valid:
                raise UploadProblemException('The gzipped uploaded file contains inappropriate content')
            elif is_gzipped and is_valid and auto_decompress:
                if not link_data_only:
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2 ** 20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            raise UploadProblemException('Problem decompressing gzipped data')
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path, check_content)
                if is_bzipped and not is_valid:
                    raise UploadProblemException('The gzipped uploaded file contains inappropriate content')
                elif is_bzipped and is_valid and auto_decompress:
                    if not link_data_only:
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                raise UploadProblemException('Problem decompressing bz2 compressed data')
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped and auto_decompress:
                    if not link_data_only:
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        raise UploadProblemException('Problem decompressing zipped data')
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    with open(uncompressed, 'wb') as outfile:
                                        outfile.write(z.read(name))
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    raise UploadProblemException('Problem decompressing zipped data')
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                if is_binary or registry.is_extension_unsniffable_binary(dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        is_ext_unsniffable_binary = registry.is_extension_unsniffable_binary(ext)
                        if check_content and not is_ext_unsniffable_binary:
                            raise UploadProblemException('The uploaded binary file contains inappropriate content')
                        elif is_ext_unsniffable_binary and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext, ext)
                            raise UploadProblemException(err_msg)
            if not data_type:
                # We must have a text file
                if check_content and check_html(dataset.path):
                    raise UploadProblemException('The uploaded file contains inappropriate HTML content')
            if data_type != 'binary':
                if not link_data_only and data_type not in ('gzip', 'bz2', 'zip'):
                    # Convert universal line endings to Posix line endings if to_posix_lines is True
                    # and the data is not binary or gzip-, bz2- or zip-compressed.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(converted_path or dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and data_type == 'binary':
        ext = 'data'
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir', 'path_paste') and link_data_only:
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            raise UploadProblemException(err_msg)
    if not link_data_only and converted_path:
        # Move the dataset to its "real" path
        try:
            shutil.move(converted_path, output_path)
        except OSError as e:
            # We may not have permission to remove converted_path
            if e.errno != errno.EACCES:
                raise
    elif not link_data_only:
        if purge_source:
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")
    if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)

コード例 #25

0

ファイルを表示

    def _resolve_src(item):
        converted_path = None

        name, path = _has_src_to_path(item)
        dbkey = item.get("dbkey", "?")
        requested_ext = item.get("ext", "auto")
        info = item.get("info", None)
        object_id = item.get("object_id", None)
        link_data_only = upload_config.link_data_only
        if "link_data_only" in item:
            # Allow overriding this on a per file basis.
            link_data_only = _link_data_only(item)
        to_posix_lines = upload_config.get_option(item, "to_posix_lines")
        space_to_tab = upload_config.get_option(item, "space_to_tab")
        in_place = item.get("in_place", False)
        purge_source = item.get("purge_source", True)

        # Follow upload.py logic but without the auto-decompress logic.
        registry = upload_config.registry
        check_content = upload_config.check_content
        data_type, ext = None, requested_ext

        is_binary = check_binary(path)
        if is_binary:
            data_type, ext = handle_sniffable_binary_check(
                data_type, ext, path, registry)
        if data_type is None:
            root_datatype = registry.get_datatype_by_extension(ext)
            if getattr(root_datatype, 'compressed', False):
                data_type = 'compressed archive'
                ext = ext
            elif is_binary:
                data_type, ext = handle_unsniffable_binary_check(
                    data_type, ext, path, name, is_binary, requested_ext,
                    check_content, registry)
        if not data_type and check_content and check_html(path):
            raise UploadProblemException(
                'The uploaded file contains inappropriate HTML content')

        if data_type != 'binary':
            if not link_data_only:
                if to_posix_lines:
                    if space_to_tab:
                        line_count, converted_path = sniff.convert_newlines_sep2tabs(
                            path, in_place=in_place, tmp_dir=".")
                    else:
                        line_count, converted_path = sniff.convert_newlines(
                            path, in_place=in_place, tmp_dir=".")
                else:
                    if space_to_tab:
                        line_count, converted_path = sniff.sep2tabs(
                            path, in_place=in_place, tmp_dir=".")

            if requested_ext == 'auto':
                ext = sniff.guess_ext(converted_path or path,
                                      registry.sniff_order)
            else:
                ext = requested_ext

            data_type = ext

        if ext == 'auto' and data_type == 'binary':
            ext = 'data'
        if ext == 'auto' and requested_ext:
            ext = requested_ext
        if ext == 'auto':
            ext = 'data'

        datatype = registry.get_datatype_by_extension(ext)
        if link_data_only:
            # Never alter a file that will not be copied to Galaxy's local file store.
            if datatype.dataset_content_needs_grooming(path):
                err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                    '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
                raise UploadProblemException(err_msg)

        # If this file is not in the workdir make sure it gets there.
        if not link_data_only and converted_path:
            path = upload_config.ensure_in_working_directory(
                converted_path, purge_source, in_place)
        elif not link_data_only:
            path = upload_config.ensure_in_working_directory(
                path, purge_source, in_place)

        if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
                path):
            # Groom the dataset content if necessary
            datatype.groom_dataset_content(path)

        rval = {
            "name": name,
            "filename": path,
            "dbkey": dbkey,
            "ext": ext,
            "link_data_only": link_data_only
        }
        if info is not None:
            rval["info"] = info
        if object_id is not None:
            rval["object_id"] = object_id
        return rval

コード例 #26

0

ファイルを表示

ファイル: upload.py プロジェクト: cogsys-tuebingen/SBVC

                     file_err( err_msg, dataset, json_file )
                     return
     #if not data_type:
         # We must have a text file
         #if check_html( dataset.path ):
             #file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
             #return
     if data_type != 'binary':
         if link_data_only == 'copy_files':
             in_place = True
             if dataset.type in ( 'server_dir', 'path_paste' ):
                 in_place = False
             if dataset.space_to_tab:
                 line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place )
             else:
                 line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place )
         if dataset.file_type == 'auto':
             ext = sniff.guess_ext( dataset.path, registry.sniff_order )
         else:
             ext = dataset.file_type
         data_type = ext
 # Save job info for the framework
 if ext == 'auto' and dataset.ext:
     ext = dataset.ext
 if ext == 'auto':
     ext = 'data'
 datatype = registry.get_datatype_by_extension( ext )
 if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files':
     # Never alter a file that will not be copied to Galaxy's local file store.
     if datatype.dataset_content_needs_grooming( output_path ):
         err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \