Esempio n. 1
0
 def __init__(self, **kwd):
     """
         A Fastsearch Index consists of a binary file with the fingerprints
         and a pointer the actual molecule file.
     """
     Binary.__init__(self, **kwd)
     self.add_composite_file('molecule.fs',
                             is_binary=True,
                             description='OpenBabel Fastsearch Index')
     self.add_composite_file('molecule.sdf',
                             optional=True,
                             is_binary=False,
                             description='Molecule File')
     self.add_composite_file('molecule.smi',
                             optional=True,
                             is_binary=False,
                             description='Molecule File')
     self.add_composite_file('molecule.inchi',
                             optional=True,
                             is_binary=False,
                             description='Molecule File')
     self.add_composite_file('molecule.mol2',
                             optional=True,
                             is_binary=False,
                             description='Molecule File')
     self.add_composite_file('molecule.cml',
                             optional=True,
                             is_binary=False,
                             description='Molecule File')
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
     # Binary model
     self.add_composite_file('model.hmm.h3m', is_binary=True)
     # SSI index for binary model
     self.add_composite_file('model.hmm.h3i', is_binary=True)
     # Profiles (MSV part)
     self.add_composite_file('model.hmm.h3f', is_binary=True)
     # Profiles (remained)
     self.add_composite_file('model.hmm.h3p', is_binary=True)
Esempio n. 3
0
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
     """The metadata"""
     self.add_composite_file('imzml',
                             description='The imzML metadata component.',
                             is_binary=False)
     """The mass spectral data"""
     self.add_composite_file(
         'ibd',
         description='The mass spectral data component.',
         is_binary=True)
Esempio n. 4
0
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

        self.add_composite_file(
            'wiff',
            description='AB SCIEX files in .wiff format. This can contain all needed information or only metadata.',
            is_binary=True)

        self.add_composite_file(
            'wiff_scan',
            description='AB SCIEX spectra file (wiff.scan), if the corresponding .wiff file only contains metadata.',
            optional='True', is_binary=True)
Esempio n. 5
0
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

        """The metadata"""
        self.add_composite_file(
            'imzml',
            description='The imzML metadata component.',
            is_binary=False)

        """The mass spectral data"""
        self.add_composite_file(
            'ibd',
            description='The mass spectral data component.',
            is_binary=True)
Esempio n. 6
0
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
     """The header file. Provides information about dimensions, identification, and processing history."""
     self.add_composite_file('hdr',
                             description='The Analyze75 header file.',
                             is_binary=False)
     """The image file.  Image data, whose data type and ordering are described by the header file."""
     self.add_composite_file('img',
                             description='The Analyze75 image file.',
                             is_binary=True)
     """The optional t2m file."""
     self.add_composite_file('t2m',
                             description='The Analyze75 t2m file.',
                             optional='True',
                             is_binary=True)
Esempio n. 7
0
def handle_uploaded_dataset_file(filename,
                                 datatypes_registry,
                                 ext='auto',
                                 is_multi_byte=False):
    is_valid, ext = handle_compressed_file(filename,
                                           datatypes_registry,
                                           ext=ext)

    if not is_valid:
        raise InappropriateDatasetContentError(
            'The compressed uploaded file contains inappropriate content.')

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext(filename,
                        sniff_order=datatypes_registry.sniff_order,
                        is_multi_byte=is_multi_byte)

    if check_binary(filename):
        if not Binary.is_ext_unsniffable(
                ext) and not datatypes_registry.get_datatype_by_extension(
                    ext).sniff(filename):
            raise InappropriateDatasetContentError(
                'The binary uploaded file contains inappropriate content.')
    elif check_html(filename):
        raise InappropriateDatasetContentError(
            'The uploaded file contains inappropriate HTML content.')
    return ext
Esempio n. 8
0
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
     self.add_composite_file('shapefile.shp', description='Geometry File (shp)', is_binary=True, optional=False)
     self.add_composite_file('shapefile.shx', description='Geometry index File (shx)', is_binary=True, optional=False)
     self.add_composite_file('shapefile.dbf', description='Columnar attributes for each shape (dbf)', is_binary=True, optional=False)
     # optional
     self.add_composite_file('shapefile.prj', description='Projection description (prj)', is_binary=False, optional=True)
     self.add_composite_file('shapefile.sbn', description='Spatial index of the features (sbn)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.sbx', description='Spatial index of the features (sbx)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.fbn', description='Read only spatial index of the features (fbn)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.fbx', description='Read only spatial index of the features (fbx)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.ain', description='Attribute index of the active fields in a table (ain)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.aih', description='Attribute index of the active fields in a table (aih)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.atx', description='Attribute index for the dbf file (atx)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.ixs', description='Geocoding index (ixs)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.mxs', description='Geocoding index in ODB format (mxs)', is_binary=True, optional=True)
     self.add_composite_file('shapefile.shp.xml', description='Geospatial metadata in XML format (xml)', is_binary=False, optional=True)
Esempio n. 9
0
 def __init__(self, **kwd):
     """
         A Fastsearch Index consists of a binary file with the fingerprints
         and a pointer the actual molecule file.
     """
     Binary.__init__(self, **kwd)
     self.add_composite_file('molecule.fs', is_binary=True,
                             description='OpenBabel Fastsearch Index')
     self.add_composite_file('molecule.sdf', optional=True,
                             is_binary=False, description='Molecule File')
     self.add_composite_file('molecule.smi', optional=True,
                             is_binary=False, description='Molecule File')
     self.add_composite_file('molecule.inchi', optional=True,
                             is_binary=False, description='Molecule File')
     self.add_composite_file('molecule.mol2', optional=True,
                             is_binary=False, description='Molecule File')
     self.add_composite_file('molecule.cml', optional=True,
                             is_binary=False, description='Molecule File')
Esempio n. 10
0
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

        """The header file. Provides information about dimensions, identification, and processing history."""
        self.add_composite_file(
            'hdr',
            description='The Analyze75 header file.',
            is_binary=False)

        """The image file.  Image data, whose data type and ordering are described by the header file."""
        self.add_composite_file(
            'img',
            description='The Analyze75 image file.',
            is_binary=True)

        """The optional t2m file."""
        self.add_composite_file(
            't2m',
            description='The Analyze75 t2m file.',
            optional='True', is_binary=True)
Esempio n. 11
0
def handle_uploaded_dataset_file(filename, datatypes_registry, ext='auto', is_multi_byte=False):
    is_valid, ext = handle_compressed_file(filename, datatypes_registry, ext=ext)

    if not is_valid:
        raise InappropriateDatasetContentError('The compressed uploaded file contains inappropriate content.')

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext(filename, sniff_order=datatypes_registry.sniff_order, is_multi_byte=is_multi_byte)

    if check_binary(filename):
        if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension(ext).sniff(filename):
            raise InappropriateDatasetContentError('The binary uploaded file contains inappropriate content.')
    elif check_html(filename):
        raise InappropriateDatasetContentError('The uploaded file contains inappropriate HTML content.')
    return ext
Esempio n. 12
0
            opt_text = ''
            if composite_file.optional:
                opt_text = ' (optional)'
            if composite_file.get('description'):
                rval.append(
                    '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' %
                    (fn, fn, composite_file.get('description'), opt_text))
            else:
                rval.append(
                    '<li><a href="%s" type="text/plain">%s</a>%s</li>' %
                    (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)


Binary.register_sniffable_binary_format("wiff", "wiff", Wiff)


class PepXmlReport(Tabular):
    """pepxml converted to tabular report"""
    edam_data = "data_2536"
    file_ext = "pepxml.tsv"

    def __init__(self, **kwd):
        super(PepXmlReport, self).__init__(**kwd)
        self.column_names = [
            'Protein', 'Peptide', 'Assumed Charge',
            'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time',
            'Start Scan', 'End Scan', 'Search Engine',
            'PeptideProphet Probability', 'Interprophet Probabaility'
        ]
 def __init__( self, **kwd ):
     Binary.__init__( self, **kwd )
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
Esempio n. 15
0
        """
        Checking if the file is in FCS format. Should read FCS2.0, FCS3.0
        and FCS3.1
        """
        r.packages.importr("flowCore")
        rlib = r.packages.packages
        try:
            fcsobject = rlib.flowCore.isFCSfile(filename)
            return list(fcsobject)[0]
        except:
            return False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'
Binary.register_sniffable_binary_format("fcs","fcs",FCS)

class FlowText(Tabular):
    """Class describing an Flow Text file"""
    file_ext = "flowtext"

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Text Flow file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
Esempio n. 16
0
class Pdf( Image ):
    edam_format = "format_3508"
    file_ext = "pdf"

    def sniff(self, filename):
        """Determine if the file is in pdf format."""
        headers = get_headers(filename, None, 1)
        try:
            if headers[0][0].startswith("%PDF"):
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("pdf", "pdf", Pdf)

def create_applet_tag_peek( class_name, archive, params ):
    text = """
<object classid="java:%s"
      type="application/x-java-applet"
      height="30" width="200" align="center" >
      <param name="archive" value="%s"/>""" % ( class_name, archive )
    for name, value in params.iteritems():
        text += """<param name="%s" value="%s"/>""" % ( name, value )
    text += """
<object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93"
        height="30" width="200" >
        <param name="code" value="%s" />
        <param name="archive" value="%s"/>""" % ( class_name, archive )
    for name, value in params.iteritems():
Esempio n. 17
0
"""
k-mer count and presence
"""

from galaxy.datatypes.binary import Binary

import logging

log = logging.getLogger(__name__)


class Count(Binary):
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)


class Presence(Binary):
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)


Binary.register_unsniffable_binary_ext("ct")
Binary.register_unsniffable_binary_ext("pt")
Esempio n. 18
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err(
            'Unable to process uploaded file, missing file_type parameter.',
            dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(
                dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                page,
                prefix='url_paste',
                source_encoding=util.get_charset_from_http_headers(
                    page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)),
                     dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path,
                 dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(
                codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path,
                              registry.sniff_order,
                              is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path)
            if is_gzipped and not is_valid:
                file_err(
                    'The gzipped uploaded file contains inappropriate content',
                    dataset, json_file)
                return
            elif is_gzipped and is_valid:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2**20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(
                        prefix='data_id_%s_upload_gunzip_' %
                        dataset.dataset_id,
                        dir=os.path.dirname(output_path),
                        text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data',
                                     dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if dataset.type in ('server_dir',
                                        'path_paste') or not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type and bz2 is not None:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path)
                if is_bzipped and not is_valid:
                    file_err(
                        'The gzipped uploaded file contains inappropriate content',
                        dataset, json_file)
                    return
                elif is_bzipped and is_valid:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2**20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(
                            prefix='data_id_%s_upload_bunzip2_' %
                            dataset.dataset_id,
                            dir=os.path.dirname(output_path),
                            text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err(
                                    'Problem decompressing bz2 compressed data',
                                    dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if dataset.type in ('server_dir',
                                            'path_paste') or not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2**20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(
                                prefix='data_id_%s_upload_zip_' %
                                dataset.dataset_id,
                                dir=os.path.dirname(output_path),
                                text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err(
                                            'Problem decompressing zipped data',
                                            dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err(
                                        'Problem decompressing zipped data',
                                        dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if dataset.type in ('server_dir',
                                                'path_paste') or not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(
                        dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if not Binary.is_ext_unsniffable(ext):
                            file_err(
                                'The uploaded binary file contains inappropriate content',
                                dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(
                                ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (
                                ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_html(dataset.path):
                    file_err(
                        'The uploaded file contains inappropriate HTML content',
                        dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files':
                    if dataset.type in ('server_dir',
                                        'path_paste') and data_type not in [
                                            'gzip', 'bz2', 'zip'
                                        ]:
                        in_place = False
                    # Convert universal line endings to Posix line endings, but allow the user to turn it off,
                    # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
                    # corrupting the content of those files.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir',
                        'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and dataset.type in (
            'server_dir',
            'path_paste') and data_type not in ['gzip', 'bz2', 'zip']:
        # Move the dataset to its "real" path
        if converted_path is not None:
            shutil.copy(converted_path, output_path)
            try:
                os.remove(converted_path)
            except:
                pass
        else:
            # This should not happen, but it's here just in case
            shutil.copy(dataset.path, output_path)
    elif link_data_only == 'copy_files':
        if purge_source:
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")

    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
Esempio n. 19
0
            return "Augustus model (%s)" % (nice_size(dataset.get_size()))

    def sniff(self, filename):
        """
        Augustus archives always contain the same files
        """
        try:
            if filename and tarfile.is_tarfile(filename):
                with tarfile.open(filename, 'r') as temptar:
                    for f in temptar:
                        if not f.isfile():
                            continue
                        if f.name.endswith('_exon_probs.pbl') \
                           or f.name.endswith('_igenic_probs.pbl') \
                           or f.name.endswith('_intron_probs.pbl') \
                           or f.name.endswith('_metapars.cfg') \
                           or f.name.endswith('_metapars.utr.cfg') \
                           or f.name.endswith('_parameters.cfg') \
                           or f.name.endswith('_parameters.cgp.cfg') \
                           or f.name.endswith('_utr_probs.pbl') \
                           or f.name.endswith('_weightmatrix.txt'):
                            return True
                        else:
                            return False
        except Exception as e:
            log.warning('%s, sniff Exception: %s', self, e)
        return False


Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)
Esempio n. 20
0
 image = check_image(dataset.path)
 if image:
     if not PIL:
         image = None
     # get_image_ext() returns None if nor a supported Image type
     ext = get_image_ext(dataset.path, image)
     data_type = ext
 # Is dataset content multi-byte?
 elif dataset.is_multi_byte:
     data_type = 'multi-byte char'
     ext = sniff.guess_ext(dataset.path, is_multi_byte=True)
 # Is dataset content supported sniffable binary?
 else:
     # FIXME: This ignores the declared sniff order in datatype_conf.xml
     # resulting in improper behavior
     type_info = Binary.is_sniffable_binary(dataset.path)
     if type_info:
         data_type = type_info[0]
         ext = type_info[1]
 if not data_type:
     root_datatype = registry.get_datatype_by_extension(dataset.file_type)
     if getattr(root_datatype, 'compressed', False):
         data_type = 'compressed archive'
         ext = dataset.file_type
     else:
         # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
         is_gzipped, is_valid = check_gzip(dataset.path)
         if is_gzipped and not is_valid:
             file_err(
                 'The gzipped uploaded file contains inappropriate content',
                 dataset, json_file)
Esempio n. 21
0
"""
CEL datatype sniffer for Command Console version 1 format (binary files).
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html#calvin
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/generic.html

"""
import data
from galaxy.datatypes.binary import Binary

class CelCc1( Binary ):
    file_ext = "celcc1"

    def sniff(self, filename):
        # Determine if the file is in CEL Command Console version 1 format.
        # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes.
        # Get the first 2 'UBYTE' (8bit unsigned). First is magic number 59, second is version number (always 1).

        with open(filename, "rb") as f:
            byte = f.read(2)

        try:
            if byte[0:2] == b'\x3B\x01':
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("celcc1", "celcc1", CelCc1)

Esempio n. 22
0
            return "Augustus model (%s)" % (nice_size(dataset.get_size()))

    def sniff(self, filename):
        """
        Augustus archives always contain the same files
        """
        try:
            if filename and tarfile.is_tarfile(filename):
                with tarfile.open(filename, 'r') as temptar:
                    for f in temptar:
                        if not f.isfile():
                            continue
                        if f.name.endswith('_exon_probs.pbl') \
                           or f.name.endswith('_igenic_probs.pbl') \
                           or f.name.endswith('_intron_probs.pbl') \
                           or f.name.endswith('_metapars.cfg') \
                           or f.name.endswith('_metapars.utr.cfg') \
                           or f.name.endswith('_parameters.cfg') \
                           or f.name.endswith('_parameters.cgp.cfg') \
                           or f.name.endswith('_utr_probs.pbl') \
                           or f.name.endswith('_weightmatrix.txt'):
                            return True
                        else:
                            return False
        except Exception as e:
            log.warning('%s, sniff Exception: %s', self, e)
        return False


Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)
Esempio n. 23
0
    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

        self.add_composite_file('analysis.baf',
                                description='analysis.baf file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('analysis.baf_idx',
                                description='analysis.baf file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('analysis.baf_xtr',
                                description='analysis.baf file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('analysis.content',
                                description='analysis.content file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file(
            'analysis.0.DataAnalysis.method',
            description='analysis.0.DataAnalysis.method file.',
            optional='False',
            is_binary=True)

        self.add_composite_file('analysis.0.result_c',
                                description='analysis.0.result_c file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('calib.bin',
                                description='calib.bin file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file(
            'desktop.ini',
            description='desktop.ini file that contains some metadata.',
            optional='True',
            is_binary=False)

        self.add_composite_file('ms-waters-pda.hss',
                                description='ms-waters-pda.hss file.',
                                optional='False',
                                is_binary=False)

        self.add_composite_file('*.hdx',
                                description='any .hdx file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.u2',
                                description='any .u2 file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.und',
                                description='any .und file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.m',
                                description='any .m folder.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.m/DataAnalysis.Method',
                                description='DataAnalysis.Method file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.m/desktop.ini',
                                description='*.m/desktop.ini file.',
                                optional='True',
                                is_binary=False)

        self.add_composite_file('*.m/hystar.method',
                                description='*.m/hystar.method file.',
                                optional='True',
                                is_binary=True)

        self.add_composite_file(
            '*.m/microTOFQAcquisition.method',
            description='*.m/microTOFQAcquisition.method file.',
            optional='False',
            is_binary=True)

        self.add_composite_file('*.m/submethods.xml',
                                description='*.m/submethods.xml file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.mcf',
                                description='any .mcf file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('*.mcf_idx',
                                description='any .mcf_idx file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('Storage.mcf_idx',
                                description='Storage.mcf_idx file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file(
            'SampleInfo.xml',
            description='SampleInfo.xml file that contains some metadata.',
            optional='False',
            is_binary=False)

        self.add_composite_file('NuGenesisTemplate.txt',
                                description='NuGenesisTemplate.txt file.',
                                optional='True',
                                is_binary=False)

        self.add_composite_file('LCParms.txt',
                                description='LCParms.txt file.',
                                optional='False',
                                is_binary=False)

        self.add_composite_file('HS_columns.xmc',
                                description='HS_columns.xmc file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('BackgroundLineNeg.ami',
                                description='BackgroundLineNeg.ami file.',
                                optional='True',
                                is_binary=True)

        self.add_composite_file('BackgroundUV.ami',
                                description='BackgroundUV.ami file.',
                                optional='True',
                                is_binary=True)

        self.add_composite_file('Calibrator.ami',
                                description='Calibrator.ami file.',
                                optional='False',
                                is_binary=True)

        self.add_composite_file('DensViewNeg.ami',
                                description='DensViewNeg.ami file.',
                                optional='True',
                                is_binary=True)

        self.add_composite_file('DensViewNegBgnd.ami',
                                description='DensViewNegBgnd.ami file.',
                                optional='True',
                                is_binary=True)
Esempio n. 24
0
        try:
            rscript = 'checkFCS.R'
            fcs_check = subprocess.check_output([rscript, filename])
            if re.search('TRUE', str(fcs_check)):
                return True
            else:
                return False
        except:
            False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'


Binary.register_sniffable_binary_format("fcs", "fcs", FCS)


class FlowFrame( Binary ):
    """R Object containing flowFrame saved with saveRDS"""
    file_ext = 'flowframe'

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Binary RDS flowFrame file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
Esempio n. 25
0
from galaxy.datatypes.binary import Binary
from galaxy.datatypes.xml import GenericXml


class Group(Binary):
    """Class describing a ProteinPilot group files"""
    file_ext = "group"


Binary.register_unsniffable_binary_ext('group')


class ProteinPilotXml(GenericXml):
    file_ext = "proteinpilot.xml"
no_unzip_datatypes

A perfect clone of the prims masscomb datatype FileSet
"""

import logging
import zipfile

from galaxy.datatypes.binary import Binary

log = logging.getLogger(__name__)


class NoUnzip(Binary):
    """FileSet containing N files"""
    file_ext = "no_unzip.zip"
    blurb = "(zipped) FileSet containing multiple files"

    def sniff(self, filename):
        # If the zip file contains multiple files then return true
        zf = zipfile.ZipFile(filename)
        if (len(zf.infolist()) > 1):
            return True
        else:
            return False


# the if is just for backwards compatibility...could remove this at some point
if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('NoUnzip', 'no_unzip.zip', NoUnzip)
Esempio n. 27
0
            else:
                rval.append(
                    '<li><a href="%s" type="text/plain">%s</a>%s</li>' %
                    (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Bruker MS1 RAW file"
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Bruker MS1 RAW file (%s)" % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format("bruker.d", "d", BrukerMS1RAW)


class nmrML(MetabolomicsXml):
    """nmrML data"""
    file_ext = "nmrml"
    blurb = 'nmrML NMR data'
    root = "nmrML"
Esempio n. 28
0
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
                #fp = open("/tmp/sra.py","w")
                #fp.write("inside true\n")
                #fp.close()
                return True
            else:
                #fp = open("/tmp/sra.py","w")
                #fp.write("inside true\n")
                #fp.close()
                return False
        except:
            return False

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = 'Binary sra file'
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return 'Binary sra file (%s)' % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format('sra', 'sra', Sra)
Esempio n. 29
0
 image = check_image(dataset.path)
 if image:
     if not PIL:
         image = None
     # get_image_ext() returns None if nor a supported Image type
     ext = get_image_ext(dataset.path, image)
     data_type = ext
 # Is dataset content multi-byte?
 elif dataset.is_multi_byte:
     data_type = 'multi-byte char'
     ext = sniff.guess_ext(dataset.path, is_multi_byte=True)
 # Is dataset content supported sniffable binary?
 else:
     # FIXME: This ignores the declared sniff order in datatype_conf.xml
     # resulting in improper behavior
     type_info = Binary.is_sniffable_binary(dataset.path)
     if type_info:
         data_type = type_info[0]
         ext = type_info[1]
 if not data_type:
     root_datatype = registry.get_datatype_by_extension(dataset.file_type)
     if getattr(root_datatype, 'compressed', False):
         data_type = 'compressed archive'
         ext = dataset.file_type
     else:
         # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
         is_gzipped, is_valid = check_gzip(dataset.path)
         if is_gzipped and not is_valid:
             file_err(
                 'The gzipped uploaded file contains inappropriate content',
                 dataset, json_file)
Esempio n. 30
0
from galaxy.datatypes.binary import Binary


class Sf3(Binary):
    """Class describing a Scaffold SF3 files"""
    file_ext = "sf3"

Binary.register_unsniffable_binary_ext('sf3')
    def sniff(self, filename):
        return super(PlyAscii, self).sniff(filename, subtype='ascii')


class PlyBinary(Ply, Binary):

    file_ext = "plybinary"

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

    def sniff(self, filename):
        return super(PlyBinary, self).sniff(filename, subtype='binary')


Binary.register_sniffable_binary_format("plybinary", "plybinary", PlyBinary)


class Vtk(object):
    """
    The Visualization Toolkit provides a number of source and writer objects to
    read and write popular data file formats. The Visualization Toolkit also
    provides some of its own file formats.

    There are two different styles of file formats available in VTK. The simplest
    are the legacy, serial formats that are easy to read and write either by hand
    or programmatically. However, these formats are less flexible than the XML
    based file formats which support random access, parallel I/O, and portable
    data compression and are preferred to the serial VTK file formats whenever
    possible.
Esempio n. 32
0
    def sniff(self, filename):
        """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary.
        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
        """
        try:
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify("NCBI.sra"):
                return True
            else:
                return False
        except:
            return False

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Binary sra file"
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = "file does not exist"
            dataset.blurb = "file purged from disk"

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Binary sra file (%s)" % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format("sra", "sra", Sra)
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'
    def display_peek( self, dataset ):
        try:
            return dataset.peek
        except:
            return "Matlab Binary file (%s)" % ( data.nice_size( dataset.get_size() ) )

    def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, size=None, offset=None, **kwd):
        if preview:    
            return ("MATLAB data files cannot be previewed.")
        else:
            return super(Matlab, self).display_data( trans, dataset, preview, filename, to_ext, size, offset, **kwd)
    
Binary.register_sniffable_binary_format("mat", "mat", Matlab)

class Wav(Binary):

    file_ext = "wav"
    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )
 
    def sniff( self, filename ):
        try:
            header = open( filename ).read()
            if header.starts_with("RIFF"):
                return True
            else: 
                return False
        except:
Esempio n. 34
0
"""
k-mer count and presence
"""

from galaxy.datatypes.binary import Binary

import logging

log = logging.getLogger(__name__)


class Count(Binary):

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)


class Presence(Binary):

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

Binary.register_unsniffable_binary_ext("ct")
Binary.register_unsniffable_binary_ext("pt")
Esempio n. 35
0
 def __init__( self, **kwd ):
     Binary.__init__( self, **kwd )
     log.info('Creating cummeRbund CuffDataDB')
Esempio n. 36
0
        rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>']
        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
        for composite_name, composite_file in self.get_composite_files(dataset=dataset).items():
            fn = composite_name
            opt_text = ''
            if composite_file.optional:
                opt_text = ' (optional)'
            if composite_file.get('description'):
                rval.append('<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text))
            else:
                rval.append('<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)


Binary.register_sniffable_binary_format("wiff", "wiff", Wiff )


class PepXmlReport(Tabular):
    """pepxml converted to tabular report"""
    edam_data = "data_2536"
    file_ext = "pepxml.tsv"

    def __init__(self, **kwd):
        super(PepXmlReport, self).__init__(**kwd)
        self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility']

    def display_peek(self, dataset):
        """Returns formated html of peek"""
        return self.make_html_table(dataset, column_names=self.column_names)
Esempio n. 37
0
from galaxy.datatypes import data
from galaxy.datatypes.binary import Binary


class Cel(Binary):
    """Class for generic CEL binary format"""
    file_ext = "cel"


Binary.register_unsniffable_binary_ext("cel")
Esempio n. 38
0
 def __init__(self, **kwd):
     """Initialize binary datatype"""
     Binary.__init__(self, **kwd)
Esempio n. 39
0
            return dataset.peek
        except:
            return "HMMER3 database (multiple files)"

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)
        # Binary model
        self.add_composite_file('model.hmm.h3m', is_binary=True)
        # SSI index for binary model
        self.add_composite_file('model.hmm.h3i', is_binary=True)
        # Profiles (MSV part)
        self.add_composite_file('model.hmm.h3f', is_binary=True)
        # Profiles (remained)
        self.add_composite_file('model.hmm.h3p', is_binary=True)

Binary.register_unsniffable_binary_ext("hmmpress")


class Stockholm_1_0( Text ):
    edam_data = "data_0863"
    edam_format = "format_1961"
    file_ext = "stockholm"

    MetadataElement( name="number_of_models", default=0, desc="Number of multiple alignments", readonly=True, visible=True, optional=True, no_value=0 )

    def set_peek( self, dataset, is_multi_byte=False ):
        if not dataset.dataset.purged:
            dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
            if (dataset.metadata.number_of_models == 1):
                dataset.blurb = "1 alignment"
            else:
Esempio n. 40
0
""" Datatypes for Galaxy-M.
"""

from galaxy.datatypes.binary import (
    Binary,
    SQlite,
)


class SQliteSPS(SQlite):
    file_ext = "sps.sqlite"

Binary.register_sniffable_binary_format("sps.sqlite", "sps.sqlite", SQliteSPS)


class SQliteTM(SQlite):
    file_ext = "tm.sqlite"

Binary.register_sniffable_binary_format("tm.sqlite", "tm.sqlite", SQliteTM)


class SQliteEFS(SQlite):
    file_ext = "efs.sqlite"

Binary.register_sniffable_binary_format("efs.sqlite", "efs.sqlite", SQliteEFS)


class SQlitePPS(SQlite):
    file_ext = "pps.sqlite"

Binary.register_sniffable_binary_format("pps.sqlite", "pps.sqlite", SQlitePPS)
Esempio n. 41
0
        if match:
            x = match.group('resx')
            y = match.group('resy')
            fps = match.group('fps')
        else:
            x = y = fps = 0
        return x, y, fps

    def set_meta(self, dataset, **kwd):
        (x, y, fps) = self._get_resolution( dataset.file_name )
        dataset.metadata.resolution_y = y
        dataset.metadata.resolution_x = x
        dataset.metadata.fps = fps

class Mp4( Video ):
    file_ext = "mp4"

    def sniff(self, filename):
        with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
            return m.id_filename(filename) is 'video/mp4'
#Binary.register_unsniffable_binary_ext("mp4")

class Flv( Video ):
    file_ext = "flv"

    def sniff(self, filename):
        with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
            return m.id_filename(filename) is 'video/x-flv'

Binary.register_unsniffable_binary_ext("flv")
Esempio n. 42
0
        rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>']
        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
        for composite_name, composite_file in self.get_composite_files(dataset=dataset).iteritems():
            fn = composite_name
            opt_text = ''
            if composite_file.optional:
                opt_text = ' (optional)'
            if composite_file.get('description'):
                rval.append('<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text))
            else:
                rval.append('<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)

if hasattr(Binary, 'register_unsniffable_binary_ext'):
    Binary.register_unsniffable_binary_ext('wiff')


class IdpDB(Binary):
    file_ext = "idpDB"

if hasattr(Binary, 'register_unsniffable_binary_ext'):
    Binary.register_unsniffable_binary_ext('idpDB')


class PepXmlReport(Tabular):
    """pepxml converted to tabular report"""
    file_ext = "tsv"

    def __init__(self, **kwd):
        Tabular.__init__(self, **kwd)
Esempio n. 43
0
 image = check_image( dataset.path )
 if image:
     if not PIL:
         image = None
     # get_image_ext() returns None if nor a supported Image type
     ext = get_image_ext( dataset.path, image )
     data_type = ext
 # Is dataset content multi-byte?
 elif dataset.is_multi_byte:
     data_type = 'multi-byte char'
     ext = sniff.guess_ext( dataset.path, is_multi_byte=True )
 # Is dataset content supported sniffable binary?
 else:
     # FIXME: This ignores the declared sniff order in datatype_conf.xml
     # resulting in improper behavior
     type_info = Binary.is_sniffable_binary( dataset.path )
     if type_info:
         data_type = type_info[0]
         ext = type_info[1]
 if not data_type:
     root_datatype = registry.get_datatype_by_extension( dataset.file_type )
     if getattr( root_datatype, 'compressed', False ):
         data_type = 'compressed archive'
         ext = dataset.file_type
     else:
         # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
         is_gzipped, is_valid = check_gzip( dataset.path )
         if is_gzipped and not is_valid:
             file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file )
             return
         elif is_gzipped and is_valid:
Esempio n. 44
0
        try:
            rscript = './checkFCS.R'
            fcs_check = subprocess.check_output([rscript, filename])
            if re.search('TRUE', str(fcs_check)):
                return True
            else:
                return False
        except:
            False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'


Binary.register_sniffable_binary_format("fcs", "fcs", FCS)


class FlowText(Tabular):
    """Class describing an Flow Text file"""
    file_ext = "flowtext"

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Text Flow file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
Esempio n. 45
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    run_as_real_user = in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    # in_place is True if there is no external chmod in place,
    # however there are other instances where modifications should not occur in_place:
    # when a file is added from a directory on the local file system (ftp import folder or any other path).
    if dataset.type in ('server_dir', 'path_paste', 'ftp_import'):
        in_place = False
    check_content = dataset.get('check_content' , True)
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err('Unable to process uploaded file, missing file_type parameter.', dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)), dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path, registry.sniff_order, is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content)
            if is_gzipped and not is_valid:
                file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                return
            elif is_gzipped and is_valid and auto_decompress:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2 ** 20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data', dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path, check_content)
                if is_bzipped and not is_valid:
                    file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                    return
                elif is_bzipped and is_valid and auto_decompress:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err('Problem decompressing bz2 compressed data', dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped and auto_decompress:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err('Problem decompressing zipped data', dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err('Problem decompressing zipped data', dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if check_content and not Binary.is_ext_unsniffable(ext):
                            file_err('The uploaded binary file contains inappropriate content', dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_content and check_html(dataset.path):
                    file_err('The uploaded file contains inappropriate HTML content', dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files' and data_type not in ('gzip', 'bz2', 'zip'):
                    # Convert universal line endings to Posix line endings if to_posix_lines is True
                    # and the data is not binary or gzip-, bz2- or zip-compressed.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and data_type == 'binary':
        ext = 'data'
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir', 'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and converted_path:
        # Move the dataset to its "real" path
        try:
            shutil.move(converted_path, output_path)
        except OSError as e:
            # We may not have permission to remove converted_path
            if e.errno != errno.EACCES:
                raise
    elif link_data_only == 'copy_files':
        if purge_source and not run_as_real_user:
            # if the upload tool runs as a real user the real user
            # can't move dataset.path as this path is owned by galaxy.
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")
    if link_data_only == 'copy_files' and datatype and datatype.dataset_content_needs_grooming(output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
Esempio n. 46
0
 def __init__(self, **kwd):
     Binary.__init__(self, **kwd)
     log.info('Creating cummeRbund CuffDataDB')
Esempio n. 47
0
"""
CEL datatype sniffer for v4 (binary files).
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html

"""
import data
from galaxy.datatypes.binary import Binary

class Cel( Binary ):
    file_ext = "cel"

    def sniff(self, filename):
        # Determine if the file is in CEL v4 format.
        # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes.
        # Get the first 2 integers (32bit). First is magic number 64, second is version number (always 4).

        with open(filename, "rb") as f:
            byte = f.read(8)

        try:
            if byte[0:8] == b'\x40\x00\x00\x00\x04\x00\x00\x00':
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("cel", "cel", Cel)

Esempio n. 48
0
from galaxy.datatypes.binary import Binary
from galaxy.datatypes.xml import GenericXml


class Group( Binary ):
    """Class describing a ProteinPilot group files"""
    file_ext = "group"

Binary.register_unsniffable_binary_ext('group')


class ProteinPilotXml( GenericXml ):
    file_ext = "proteinpilot.xml"
    def sniff(self, filename):
        return super(PlyAscii, self).sniff(filename, subtype='ascii')


class PlyBinary(Ply, Binary):

    file_ext = "plybinary"

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

    def sniff(self, filename):
        return super(PlyBinary, self).sniff(filename, subtype='binary')

Binary.register_sniffable_binary_format("plybinary", "plybinary", PlyBinary)


class Vtk(object):
    """
    The Visualization Toolkit provides a number of source and writer objects to
    read and write popular data file formats. The Visualization Toolkit also
    provides some of its own file formats.

    There are two different styles of file formats available in VTK. The simplest
    are the legacy, serial formats that are easy to read and write either by hand
    or programmatically. However, these formats are less flexible than the XML
    based file formats which support random access, parallel I/O, and portable
    data compression and are preferred to the serial VTK file formats whenever
    possible.
Esempio n. 50
0
        """

        fd = wave.open(dataset.dataset.file_name, 'rb')
        dataset.metadata.rate = fd.getframerate()
        dataset.metadata.nframes = fd.getnframes()
        dataset.metadata.sampwidth = fd.getsampwidth()
        dataset.metadata.nchannels = fd.getnchannels()
        #dataset.metadata.identifier = os.path.splitext(dataset.dataset.element_identifier)[0]
        fd.close()

    #def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, offset=None, ck_size=None, **kwd):

    #    return trans.fill_template( "/dataset/audio.mako", dataset=dataset)


Binary.register_sniffable_binary_format('wav', 'wav', WAV)


class TextGrid(Text):
    """Praat Textgrid file for speech annotations

    >>> from galaxy.datatypes.sniff import get_test_fname
    >>> fname = get_test_fname('1_1119_2_22_001.TextGrid')
    >>> TextGrid().sniff(fname)
    True

    >>> fname = get_test_fname('drugbank_drugs.cml')
    >>> TextGrid().sniff(fname)
    False

    """
Esempio n. 51
0
    edam_format = "format_3508"
    file_ext = "pdf"

    def sniff(self, filename):
        """Determine if the file is in pdf format."""
        headers = get_headers(filename, None, 1)
        try:
            if headers[0][0].startswith("%PDF"):
                return True
            else:
                return False
        except IndexError:
            return False


Binary.register_sniffable_binary_format("pdf", "pdf", Pdf)


def create_applet_tag_peek(class_name, archive, params):
    text = """
<object classid="java:%s"
      type="application/x-java-applet"
      height="30" width="200" align="center" >
      <param name="archive" value="%s"/>""" % (class_name, archive)
    for name, value in params.iteritems():
        text += """<param name="%s" value="%s"/>""" % (name, value)
    text += """
<object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93"
        height="30" width="200" >
        <param name="code" value="%s" />
        <param name="archive" value="%s"/>""" % (class_name, archive)
from galaxy.datatypes import data
from galaxy.datatypes.binary import Binary

class Cel( Binary ):
    """Class for generic CEL binary format"""
    file_ext = "cel"
Binary.register_unsniffable_binary_ext("cel")