Ejemplo n.º 1
0
        try:
            rscript = './checkFCS.R'
            fcs_check = subprocess.check_output([rscript, filename])
            if re.search('TRUE', str(fcs_check)):
                return True
            else:
                return False
        except:
            False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'


Binary.register_sniffable_binary_format("fcs", "fcs", FCS)


class FlowText(Tabular):
    """Class describing an Flow Text file"""
    file_ext = "flowtext"

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Text Flow file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
    def sniff(self, filename):
        return super(PlyAscii, self).sniff(filename, subtype='ascii')


class PlyBinary(Ply, Binary):

    file_ext = "plybinary"

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

    def sniff(self, filename):
        return super(PlyBinary, self).sniff(filename, subtype='binary')

Binary.register_sniffable_binary_format("plybinary", "plybinary", PlyBinary)


class Vtk(object):
    """
    The Visualization Toolkit provides a number of source and writer objects to
    read and write popular data file formats. The Visualization Toolkit also
    provides some of its own file formats.

    There are two different styles of file formats available in VTK. The simplest
    are the legacy, serial formats that are easy to read and write either by hand
    or programmatically. However, these formats are less flexible than the XML
    based file formats which support random access, parallel I/O, and portable
    data compression and are preferred to the serial VTK file formats whenever
    possible.
Ejemplo n.º 3
0
        """

        fd = wave.open(dataset.dataset.file_name, 'rb')
        dataset.metadata.rate = fd.getframerate()
        dataset.metadata.nframes = fd.getnframes()
        dataset.metadata.sampwidth = fd.getsampwidth()
        dataset.metadata.nchannels = fd.getnchannels()
        #dataset.metadata.identifier = os.path.splitext(dataset.dataset.element_identifier)[0]
        fd.close()

    #def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, offset=None, ck_size=None, **kwd):

    #    return trans.fill_template( "/dataset/audio.mako", dataset=dataset)


Binary.register_sniffable_binary_format('wav', 'wav', WAV)


class TextGrid(Text):
    """Praat Textgrid file for speech annotations

    >>> from galaxy.datatypes.sniff import get_test_fname
    >>> fname = get_test_fname('1_1119_2_22_001.TextGrid')
    >>> TextGrid().sniff(fname)
    True

    >>> fname = get_test_fname('drugbank_drugs.cml')
    >>> TextGrid().sniff(fname)
    False

    """
Ejemplo n.º 4
0
""" Datatypes for Galaxy-M.
"""

from galaxy.datatypes.binary import (
    Binary,
    SQlite,
)


class SQliteSPS(SQlite):
    file_ext = "sps.sqlite"

Binary.register_sniffable_binary_format("sps.sqlite", "sps.sqlite", SQliteSPS)


class SQliteTM(SQlite):
    file_ext = "tm.sqlite"

Binary.register_sniffable_binary_format("tm.sqlite", "tm.sqlite", SQliteTM)


class SQliteEFS(SQlite):
    file_ext = "efs.sqlite"

Binary.register_sniffable_binary_format("efs.sqlite", "efs.sqlite", SQliteEFS)


class SQlitePPS(SQlite):
    file_ext = "pps.sqlite"

Binary.register_sniffable_binary_format("pps.sqlite", "pps.sqlite", SQlitePPS)
Ejemplo n.º 5
0
"""
CEL datatype sniffer for v4 (binary files).
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html

"""
import data
from galaxy.datatypes.binary import Binary

class Cel( Binary ):
    file_ext = "cel"

    def sniff(self, filename):
        # Determine if the file is in CEL v4 format.
        # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes.
        # Get the first 2 integers (32bit). First is magic number 64, second is version number (always 4).

        with open(filename, "rb") as f:
            byte = f.read(8)

        try:
            if byte[0:8] == b'\x40\x00\x00\x00\x04\x00\x00\x00':
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("cel", "cel", Cel)

Ejemplo n.º 6
0
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'
    def display_peek( self, dataset ):
        try:
            return dataset.peek
        except:
            return "Matlab Binary file (%s)" % ( data.nice_size( dataset.get_size() ) )

    def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, size=None, offset=None, **kwd):
        if preview:    
            return ("MATLAB data files cannot be previewed.")
        else:
            return super(Matlab, self).display_data( trans, dataset, preview, filename, to_ext, size, offset, **kwd)
    
Binary.register_sniffable_binary_format("mat", "mat", Matlab)

class Wav(Binary):

    file_ext = "wav"
    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )
 
    def sniff( self, filename ):
        try:
            header = open( filename ).read()
            if header.starts_with("RIFF"):
                return True
            else: 
                return False
        except:
Ejemplo n.º 7
0
class Pdf( Image ):
    edam_format = "format_3508"
    file_ext = "pdf"

    def sniff(self, filename):
        """Determine if the file is in pdf format."""
        headers = get_headers(filename, None, 1)
        try:
            if headers[0][0].startswith("%PDF"):
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("pdf", "pdf", Pdf)

def create_applet_tag_peek( class_name, archive, params ):
    text = """
<object classid="java:%s"
      type="application/x-java-applet"
      height="30" width="200" align="center" >
      <param name="archive" value="%s"/>""" % ( class_name, archive )
    for name, value in params.iteritems():
        text += """<param name="%s" value="%s"/>""" % ( name, value )
    text += """
<object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93"
        height="30" width="200" >
        <param name="code" value="%s" />
        <param name="archive" value="%s"/>""" % ( class_name, archive )
    for name, value in params.iteritems():
Ejemplo n.º 8
0
            return "Augustus model (%s)" % (nice_size(dataset.get_size()))

    def sniff(self, filename):
        """
        Augustus archives always contain the same files
        """
        try:
            if filename and tarfile.is_tarfile(filename):
                with tarfile.open(filename, 'r') as temptar:
                    for f in temptar:
                        if not f.isfile():
                            continue
                        if f.name.endswith('_exon_probs.pbl') \
                           or f.name.endswith('_igenic_probs.pbl') \
                           or f.name.endswith('_intron_probs.pbl') \
                           or f.name.endswith('_metapars.cfg') \
                           or f.name.endswith('_metapars.utr.cfg') \
                           or f.name.endswith('_parameters.cfg') \
                           or f.name.endswith('_parameters.cgp.cfg') \
                           or f.name.endswith('_utr_probs.pbl') \
                           or f.name.endswith('_weightmatrix.txt'):
                            return True
                        else:
                            return False
        except Exception as e:
            log.warning('%s, sniff Exception: %s', self, e)
        return False


Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)
no_unzip_datatypes

A perfect clone of the prims masscomb datatype FileSet
"""

import logging
import zipfile

from galaxy.datatypes.binary import Binary

log = logging.getLogger(__name__)


class NoUnzip(Binary):
    """FileSet containing N files"""
    file_ext = "no_unzip.zip"
    blurb = "(zipped) FileSet containing multiple files"

    def sniff(self, filename):
        # If the zip file contains multiple files then return true
        zf = zipfile.ZipFile(filename)
        if (len(zf.infolist()) > 1):
            return True
        else:
            return False


# the if is just for backwards compatibility...could remove this at some point
if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('NoUnzip', 'no_unzip.zip', NoUnzip)
Ejemplo n.º 10
0
            return "Augustus model (%s)" % (nice_size(dataset.get_size()))

    def sniff(self, filename):
        """
        Augustus archives always contain the same files
        """
        try:
            if filename and tarfile.is_tarfile(filename):
                with tarfile.open(filename, 'r') as temptar:
                    for f in temptar:
                        if not f.isfile():
                            continue
                        if f.name.endswith('_exon_probs.pbl') \
                           or f.name.endswith('_igenic_probs.pbl') \
                           or f.name.endswith('_intron_probs.pbl') \
                           or f.name.endswith('_metapars.cfg') \
                           or f.name.endswith('_metapars.utr.cfg') \
                           or f.name.endswith('_parameters.cfg') \
                           or f.name.endswith('_parameters.cgp.cfg') \
                           or f.name.endswith('_utr_probs.pbl') \
                           or f.name.endswith('_weightmatrix.txt'):
                            return True
                        else:
                            return False
        except Exception as e:
            log.warning('%s, sniff Exception: %s', self, e)
        return False


Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)
Ejemplo n.º 11
0
"""
CEL datatype sniffer for Command Console version 1 format (binary files).
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html#calvin
http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/generic.html

"""
import data
from galaxy.datatypes.binary import Binary

class CelCc1( Binary ):
    file_ext = "celcc1"

    def sniff(self, filename):
        # Determine if the file is in CEL Command Console version 1 format.
        # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes.
        # Get the first 2 'UBYTE' (8bit unsigned). First is magic number 59, second is version number (always 1).

        with open(filename, "rb") as f:
            byte = f.read(2)

        try:
            if byte[0:2] == b'\x3B\x01':
                return True
            else:
                return False
        except IndexError:
            return False

Binary.register_sniffable_binary_format("celcc1", "celcc1", CelCc1)

Ejemplo n.º 12
0
            else:
                rval.append(
                    '<li><a href="%s" type="text/plain">%s</a>%s</li>' %
                    (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Bruker MS1 RAW file"
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Bruker MS1 RAW file (%s)" % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format("bruker.d", "d", BrukerMS1RAW)


class nmrML(MetabolomicsXml):
    """nmrML data"""
    file_ext = "nmrml"
    blurb = 'nmrML NMR data'
    root = "nmrML"
Ejemplo n.º 13
0
        if not dataset.dataset.purged:
            dataset.peek = "Thermo Finnigan RAW file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Thermo Finnigan RAW file (%s)" % (data.nice_size(dataset.get_size()))


if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('raw', 'raw', RAW)


class Msp(Text):
    """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
    file_ext = "msp"

    @staticmethod
    def next_line_starts_with(contents, prefix):
        next_line = contents.readline()
        return next_line is not None and next_line.startswith(prefix)

    def sniff(self, filename):
        """ Determines whether the file is a NIST MSP output file.

        >>> fname = get_test_fname('test.msp')
Ejemplo n.º 14
0
        try:
            rscript = 'checkFCS.R'
            fcs_check = subprocess.check_output([rscript, filename])
            if re.search('TRUE', str(fcs_check)):
                return True
            else:
                return False
        except:
            False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'


Binary.register_sniffable_binary_format("fcs", "fcs", FCS)


class FlowFrame( Binary ):
    """R Object containing flowFrame saved with saveRDS"""
    file_ext = 'flowframe'

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Binary RDS flowFrame file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
Ejemplo n.º 15
0
        """
        Checking if the file is in FCS format. Should read FCS2.0, FCS3.0
        and FCS3.1
        """
        r.packages.importr("flowCore")
        rlib = r.packages.packages
        try:
            fcsobject = rlib.flowCore.isFCSfile(filename)
            return list(fcsobject)[0]
        except:
            return False

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'application/octet-stream'
Binary.register_sniffable_binary_format("fcs","fcs",FCS)

class FlowText(Tabular):
    """Class describing an Flow Text file"""
    file_ext = "flowtext"

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Text Flow file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
Ejemplo n.º 16
0
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
                #fp = open("/tmp/sra.py","w")
                #fp.write("inside true\n")
                #fp.close()
                return True
            else:
                #fp = open("/tmp/sra.py","w")
                #fp.write("inside true\n")
                #fp.close()
                return False
        except:
            return False

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = 'Binary sra file'
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return 'Binary sra file (%s)' % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format('sra', 'sra', Sra)
    def sniff(self, filename):
        return super(PlyAscii, self).sniff(filename, subtype='ascii')


class PlyBinary(Ply, Binary):

    file_ext = "plybinary"

    def __init__(self, **kwd):
        Binary.__init__(self, **kwd)

    def sniff(self, filename):
        return super(PlyBinary, self).sniff(filename, subtype='binary')


Binary.register_sniffable_binary_format("plybinary", "plybinary", PlyBinary)


class Vtk(object):
    """
    The Visualization Toolkit provides a number of source and writer objects to
    read and write popular data file formats. The Visualization Toolkit also
    provides some of its own file formats.

    There are two different styles of file formats available in VTK. The simplest
    are the legacy, serial formats that are easy to read and write either by hand
    or programmatically. However, these formats are less flexible than the XML
    based file formats which support random access, parallel I/O, and portable
    data compression and are preferred to the serial VTK file formats whenever
    possible.
Ejemplo n.º 18
0
    def sniff(self, filename):
        """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary.
        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
        """
        try:
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify("NCBI.sra"):
                return True
            else:
                return False
        except:
            return False

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Binary sra file"
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = "file does not exist"
            dataset.blurb = "file purged from disk"

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Binary sra file (%s)" % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format("sra", "sra", Sra)
Ejemplo n.º 19
0
            opt_text = ''
            if composite_file.optional:
                opt_text = ' (optional)'
            if composite_file.get('description'):
                rval.append(
                    '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' %
                    (fn, fn, composite_file.get('description'), opt_text))
            else:
                rval.append(
                    '<li><a href="%s" type="text/plain">%s</a>%s</li>' %
                    (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)


Binary.register_sniffable_binary_format("wiff", "wiff", Wiff)


class PepXmlReport(Tabular):
    """pepxml converted to tabular report"""
    edam_data = "data_2536"
    file_ext = "pepxml.tsv"

    def __init__(self, **kwd):
        super(PepXmlReport, self).__init__(**kwd)
        self.column_names = [
            'Protein', 'Peptide', 'Assumed Charge',
            'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time',
            'Start Scan', 'End Scan', 'Search Engine',
            'PeptideProphet Probability', 'Interprophet Probabaility'
        ]
Ejemplo n.º 20
0
        rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>']
        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
        for composite_name, composite_file in self.get_composite_files(dataset=dataset).items():
            fn = composite_name
            opt_text = ''
            if composite_file.optional:
                opt_text = ' (optional)'
            if composite_file.get('description'):
                rval.append('<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text))
            else:
                rval.append('<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text))
        rval.append('</ul></div></html>')
        return "\n".join(rval)


Binary.register_sniffable_binary_format("wiff", "wiff", Wiff )


class PepXmlReport(Tabular):
    """pepxml converted to tabular report"""
    edam_data = "data_2536"
    file_ext = "pepxml.tsv"

    def __init__(self, **kwd):
        super(PepXmlReport, self).__init__(**kwd)
        self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility']

    def display_peek(self, dataset):
        """Returns formated html of peek"""
        return self.make_html_table(dataset, column_names=self.column_names)
Ejemplo n.º 21
0
    edam_format = "format_3508"
    file_ext = "pdf"

    def sniff(self, filename):
        """Determine if the file is in pdf format."""
        headers = get_headers(filename, None, 1)
        try:
            if headers[0][0].startswith("%PDF"):
                return True
            else:
                return False
        except IndexError:
            return False


Binary.register_sniffable_binary_format("pdf", "pdf", Pdf)


def create_applet_tag_peek(class_name, archive, params):
    text = """
<object classid="java:%s"
      type="application/x-java-applet"
      height="30" width="200" align="center" >
      <param name="archive" value="%s"/>""" % (class_name, archive)
    for name, value in params.iteritems():
        text += """<param name="%s" value="%s"/>""" % (name, value)
    text += """
<object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93"
        height="30" width="200" >
        <param name="code" value="%s" />
        <param name="archive" value="%s"/>""" % (class_name, archive)
Ejemplo n.º 22
0
            dataset.peek = "Thermo Finnigan RAW file"
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Thermo Finnigan RAW file (%s)" % (data.nice_size(
                dataset.get_size()))


if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('raw', 'raw', RAW)


class Msp(Text):
    """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
    file_ext = "msp"

    @staticmethod
    def next_line_starts_with(contents, prefix):
        next_line = contents.readline()
        return next_line is not None and next_line.startswith(prefix)

    def sniff(self, filename):
        """ Determines whether the file is a NIST MSP output file.

        >>> fname = get_test_fname('test.msp')