def __init__(self, **kwd): """ A Fastsearch Index consists of a binary file with the fingerprints and a pointer the actual molecule file. """ Binary.__init__(self, **kwd) self.add_composite_file('molecule.fs', is_binary=True, description='OpenBabel Fastsearch Index') self.add_composite_file('molecule.sdf', optional=True, is_binary=False, description='Molecule File') self.add_composite_file('molecule.smi', optional=True, is_binary=False, description='Molecule File') self.add_composite_file('molecule.inchi', optional=True, is_binary=False, description='Molecule File') self.add_composite_file('molecule.mol2', optional=True, is_binary=False, description='Molecule File') self.add_composite_file('molecule.cml', optional=True, is_binary=False, description='Molecule File')
def __init__(self, **kwd): Binary.__init__(self, **kwd) # Binary model self.add_composite_file('model.hmm.h3m', is_binary=True) # SSI index for binary model self.add_composite_file('model.hmm.h3i', is_binary=True) # Profiles (MSV part) self.add_composite_file('model.hmm.h3f', is_binary=True) # Profiles (remained) self.add_composite_file('model.hmm.h3p', is_binary=True)
def __init__(self, **kwd): Binary.__init__(self, **kwd) """The metadata""" self.add_composite_file('imzml', description='The imzML metadata component.', is_binary=False) """The mass spectral data""" self.add_composite_file( 'ibd', description='The mass spectral data component.', is_binary=True)
def __init__(self, **kwd): Binary.__init__(self, **kwd) self.add_composite_file( 'wiff', description='AB SCIEX files in .wiff format. This can contain all needed information or only metadata.', is_binary=True) self.add_composite_file( 'wiff_scan', description='AB SCIEX spectra file (wiff.scan), if the corresponding .wiff file only contains metadata.', optional='True', is_binary=True)
def __init__(self, **kwd): Binary.__init__(self, **kwd) """The metadata""" self.add_composite_file( 'imzml', description='The imzML metadata component.', is_binary=False) """The mass spectral data""" self.add_composite_file( 'ibd', description='The mass spectral data component.', is_binary=True)
def __init__(self, **kwd): Binary.__init__(self, **kwd) """The header file. Provides information about dimensions, identification, and processing history.""" self.add_composite_file('hdr', description='The Analyze75 header file.', is_binary=False) """The image file. Image data, whose data type and ordering are described by the header file.""" self.add_composite_file('img', description='The Analyze75 image file.', is_binary=True) """The optional t2m file.""" self.add_composite_file('t2m', description='The Analyze75 t2m file.', optional='True', is_binary=True)
def handle_uploaded_dataset_file(filename, datatypes_registry, ext='auto', is_multi_byte=False): is_valid, ext = handle_compressed_file(filename, datatypes_registry, ext=ext) if not is_valid: raise InappropriateDatasetContentError( 'The compressed uploaded file contains inappropriate content.') if ext in AUTO_DETECT_EXTENSIONS: ext = guess_ext(filename, sniff_order=datatypes_registry.sniff_order, is_multi_byte=is_multi_byte) if check_binary(filename): if not Binary.is_ext_unsniffable( ext) and not datatypes_registry.get_datatype_by_extension( ext).sniff(filename): raise InappropriateDatasetContentError( 'The binary uploaded file contains inappropriate content.') elif check_html(filename): raise InappropriateDatasetContentError( 'The uploaded file contains inappropriate HTML content.') return ext
def __init__(self, **kwd): Binary.__init__(self, **kwd) self.add_composite_file('shapefile.shp', description='Geometry File (shp)', is_binary=True, optional=False) self.add_composite_file('shapefile.shx', description='Geometry index File (shx)', is_binary=True, optional=False) self.add_composite_file('shapefile.dbf', description='Columnar attributes for each shape (dbf)', is_binary=True, optional=False) # optional self.add_composite_file('shapefile.prj', description='Projection description (prj)', is_binary=False, optional=True) self.add_composite_file('shapefile.sbn', description='Spatial index of the features (sbn)', is_binary=True, optional=True) self.add_composite_file('shapefile.sbx', description='Spatial index of the features (sbx)', is_binary=True, optional=True) self.add_composite_file('shapefile.fbn', description='Read only spatial index of the features (fbn)', is_binary=True, optional=True) self.add_composite_file('shapefile.fbx', description='Read only spatial index of the features (fbx)', is_binary=True, optional=True) self.add_composite_file('shapefile.ain', description='Attribute index of the active fields in a table (ain)', is_binary=True, optional=True) self.add_composite_file('shapefile.aih', description='Attribute index of the active fields in a table (aih)', is_binary=True, optional=True) self.add_composite_file('shapefile.atx', description='Attribute index for the dbf file (atx)', is_binary=True, optional=True) self.add_composite_file('shapefile.ixs', description='Geocoding index (ixs)', is_binary=True, optional=True) self.add_composite_file('shapefile.mxs', description='Geocoding index in ODB format (mxs)', is_binary=True, optional=True) self.add_composite_file('shapefile.shp.xml', description='Geospatial metadata in XML format (xml)', is_binary=False, optional=True)
def __init__(self, **kwd): Binary.__init__(self, **kwd) """The header file. Provides information about dimensions, identification, and processing history.""" self.add_composite_file( 'hdr', description='The Analyze75 header file.', is_binary=False) """The image file. Image data, whose data type and ordering are described by the header file.""" self.add_composite_file( 'img', description='The Analyze75 image file.', is_binary=True) """The optional t2m file.""" self.add_composite_file( 't2m', description='The Analyze75 t2m file.', optional='True', is_binary=True)
def handle_uploaded_dataset_file(filename, datatypes_registry, ext='auto', is_multi_byte=False): is_valid, ext = handle_compressed_file(filename, datatypes_registry, ext=ext) if not is_valid: raise InappropriateDatasetContentError('The compressed uploaded file contains inappropriate content.') if ext in AUTO_DETECT_EXTENSIONS: ext = guess_ext(filename, sniff_order=datatypes_registry.sniff_order, is_multi_byte=is_multi_byte) if check_binary(filename): if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension(ext).sniff(filename): raise InappropriateDatasetContentError('The binary uploaded file contains inappropriate content.') elif check_html(filename): raise InappropriateDatasetContentError('The uploaded file contains inappropriate HTML content.') return ext
opt_text = '' if composite_file.optional: opt_text = ' (optional)' if composite_file.get('description'): rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text)) else: rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text)) rval.append('</ul></div></html>') return "\n".join(rval) Binary.register_sniffable_binary_format("wiff", "wiff", Wiff) class PepXmlReport(Tabular): """pepxml converted to tabular report""" edam_data = "data_2536" file_ext = "pepxml.tsv" def __init__(self, **kwd): super(PepXmlReport, self).__init__(**kwd) self.column_names = [ 'Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility' ]
def __init__( self, **kwd ): Binary.__init__( self, **kwd )
def __init__(self, **kwd): Binary.__init__(self, **kwd)
""" Checking if the file is in FCS format. Should read FCS2.0, FCS3.0 and FCS3.1 """ r.packages.importr("flowCore") rlib = r.packages.packages try: fcsobject = rlib.flowCore.isFCSfile(filename) return list(fcsobject)[0] except: return False def get_mime(self): """Returns the mime type of the datatype""" return 'application/octet-stream' Binary.register_sniffable_binary_format("fcs","fcs",FCS) class FlowText(Tabular): """Class describing an Flow Text file""" file_ext = "flowtext" def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Text Flow file" dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try:
class Pdf( Image ): edam_format = "format_3508" file_ext = "pdf" def sniff(self, filename): """Determine if the file is in pdf format.""" headers = get_headers(filename, None, 1) try: if headers[0][0].startswith("%PDF"): return True else: return False except IndexError: return False Binary.register_sniffable_binary_format("pdf", "pdf", Pdf) def create_applet_tag_peek( class_name, archive, params ): text = """ <object classid="java:%s" type="application/x-java-applet" height="30" width="200" align="center" > <param name="archive" value="%s"/>""" % ( class_name, archive ) for name, value in params.iteritems(): text += """<param name="%s" value="%s"/>""" % ( name, value ) text += """ <object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93" height="30" width="200" > <param name="code" value="%s" /> <param name="archive" value="%s"/>""" % ( class_name, archive ) for name, value in params.iteritems():
""" k-mer count and presence """ from galaxy.datatypes.binary import Binary import logging log = logging.getLogger(__name__) class Count(Binary): def __init__(self, **kwd): Binary.__init__(self, **kwd) class Presence(Binary): def __init__(self, **kwd): Binary.__init__(self, **kwd) Binary.register_unsniffable_binary_ext("ct") Binary.register_unsniffable_binary_ext("pt")
def add_file(dataset, registry, json_file, output_path): data_type = None line_count = None converted_path = None stdout = None link_data_only = dataset.get('link_data_only', 'copy_files') in_place = dataset.get('in_place', True) purge_source = dataset.get('purge_source', True) try: ext = dataset.file_type except AttributeError: file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file) return if dataset.type == 'url': try: page = urlopen( dataset.path) # page will be .close()ed by sniff methods temp_name, dataset.is_multi_byte = sniff.stream_to_file( page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers( page.headers)) except Exception as e: file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)), dataset, json_file) return dataset.path = temp_name # See if we have an empty file if not os.path.exists(dataset.path): file_err('Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file) return if not os.path.getsize(dataset.path) > 0: file_err('The uploaded file is empty', dataset, json_file) return if not dataset.type == 'url': # Already set is_multi_byte above if type == 'url' try: dataset.is_multi_byte = multi_byte.is_multi_byte( codecs.open(dataset.path, 'r', 'utf-8').read(100)) except UnicodeDecodeError as e: dataset.is_multi_byte = False # Is dataset an image? i_ext = get_image_ext(dataset.path) if i_ext: ext = i_ext data_type = ext # Is dataset content multi-byte? elif dataset.is_multi_byte: data_type = 'multi-byte char' ext = sniff.guess_ext(dataset.path, registry.sniff_order, is_multi_byte=True) # Is dataset content supported sniffable binary? else: # FIXME: This ignores the declared sniff order in datatype_conf.xml # resulting in improper behavior type_info = Binary.is_sniffable_binary(dataset.path) if type_info: data_type = type_info[0] ext = type_info[1] if not data_type: root_datatype = registry.get_datatype_by_extension(dataset.file_type) if getattr(root_datatype, 'compressed', False): data_type = 'compressed archive' ext = dataset.file_type else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip(dataset.path) if is_gzipped and not is_valid: file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file) return elif is_gzipped and is_valid: if link_data_only == 'copy_files': # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format CHUNK_SIZE = 2**20 # 1Mb fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) gzipped_file = gzip.GzipFile(dataset.path, 'rb') while 1: try: chunk = gzipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err('Problem decompressing gzipped data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) gzipped_file.close() # Replace the gzipped file with the decompressed file if it's safe to do so if dataset.type in ('server_dir', 'path_paste') or not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = dataset.name.rstrip('.gz') data_type = 'gzip' if not data_type and bz2 is not None: # See if we have a bz2 file, much like gzip is_bzipped, is_valid = check_bz2(dataset.path) if is_bzipped and not is_valid: file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file) return elif is_bzipped and is_valid: if link_data_only == 'copy_files': # We need to uncompress the temp_name file CHUNK_SIZE = 2**20 # 1Mb fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) bzipped_file = bz2.BZ2File(dataset.path, 'rb') while 1: try: chunk = bzipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err( 'Problem decompressing bz2 compressed data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) bzipped_file.close() # Replace the bzipped file with the decompressed file if it's safe to do so if dataset.type in ('server_dir', 'path_paste') or not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = dataset.name.rstrip('.bz2') data_type = 'bz2' if not data_type: # See if we have a zip archive is_zipped = check_zip(dataset.path) if is_zipped: if link_data_only == 'copy_files': CHUNK_SIZE = 2**20 # 1Mb uncompressed = None uncompressed_name = None unzipped = False z = zipfile.ZipFile(dataset.path) for name in z.namelist(): if name.endswith('/'): continue if unzipped: stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' break fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) if sys.version_info[:2] >= (2, 6): zipped_file = z.open(name) while 1: try: chunk = zipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err( 'Problem decompressing zipped data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) zipped_file.close() uncompressed_name = name unzipped = True else: # python < 2.5 doesn't have a way to read members in chunks(!) try: outfile = open(uncompressed, 'wb') outfile.write(z.read(name)) outfile.close() uncompressed_name = name unzipped = True except IOError: os.close(fd) os.remove(uncompressed) file_err( 'Problem decompressing zipped data', dataset, json_file) return z.close() # Replace the zipped file with the decompressed file if it's safe to do so if uncompressed is not None: if dataset.type in ('server_dir', 'path_paste') or not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = uncompressed_name data_type = 'zip' if not data_type: # TODO refactor this logic. check_binary isn't guaranteed to be # correct since it only looks at whether the first 100 chars are # printable or not. If someone specifies a known unsniffable # binary datatype and check_binary fails, the file gets mangled. if check_binary(dataset.path) or Binary.is_ext_unsniffable( dataset.file_type): # We have a binary dataset, but it is not Bam, Sff or Pdf data_type = 'binary' # binary_ok = False parts = dataset.name.split(".") if len(parts) > 1: ext = parts[-1].strip().lower() if not Binary.is_ext_unsniffable(ext): file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file) return elif Binary.is_ext_unsniffable( ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext) file_err(err_msg, dataset, json_file) return if not data_type: # We must have a text file if check_html(dataset.path): file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file) return if data_type != 'binary': if link_data_only == 'copy_files': if dataset.type in ('server_dir', 'path_paste') and data_type not in [ 'gzip', 'bz2', 'zip' ]: in_place = False # Convert universal line endings to Posix line endings, but allow the user to turn it off, # so that is becomes possible to upload gzip, bz2 or zip files with binary data without # corrupting the content of those files. if dataset.to_posix_lines: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id if dataset.space_to_tab: line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) else: line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) if dataset.file_type == 'auto': ext = sniff.guess_ext(dataset.path, registry.sniff_order) else: ext = dataset.file_type data_type = ext # Save job info for the framework if ext == 'auto' and dataset.ext: ext = dataset.ext if ext == 'auto': ext = 'data' datatype = registry.get_datatype_by_extension(ext) if dataset.type in ('server_dir', 'path_paste') and link_data_only == 'link_to_files': # Never alter a file that will not be copied to Galaxy's local file store. if datatype.dataset_content_needs_grooming(dataset.path): err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' file_err(err_msg, dataset, json_file) return if link_data_only == 'copy_files' and dataset.type in ( 'server_dir', 'path_paste') and data_type not in ['gzip', 'bz2', 'zip']: # Move the dataset to its "real" path if converted_path is not None: shutil.copy(converted_path, output_path) try: os.remove(converted_path) except: pass else: # This should not happen, but it's here just in case shutil.copy(dataset.path, output_path) elif link_data_only == 'copy_files': if purge_source: shutil.move(dataset.path, output_path) else: shutil.copy(dataset.path, output_path) # Write the job info stdout = stdout or 'uploaded %s file' % data_type info = dict(type='dataset', dataset_id=dataset.dataset_id, ext=ext, stdout=stdout, name=dataset.name, line_count=line_count) if dataset.get('uuid', None) is not None: info['uuid'] = dataset.get('uuid') json_file.write(dumps(info) + "\n") if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path): # Groom the dataset content if necessary datatype.groom_dataset_content(output_path)
return "Augustus model (%s)" % (nice_size(dataset.get_size())) def sniff(self, filename): """ Augustus archives always contain the same files """ try: if filename and tarfile.is_tarfile(filename): with tarfile.open(filename, 'r') as temptar: for f in temptar: if not f.isfile(): continue if f.name.endswith('_exon_probs.pbl') \ or f.name.endswith('_igenic_probs.pbl') \ or f.name.endswith('_intron_probs.pbl') \ or f.name.endswith('_metapars.cfg') \ or f.name.endswith('_metapars.utr.cfg') \ or f.name.endswith('_parameters.cfg') \ or f.name.endswith('_parameters.cgp.cfg') \ or f.name.endswith('_utr_probs.pbl') \ or f.name.endswith('_weightmatrix.txt'): return True else: return False except Exception as e: log.warning('%s, sniff Exception: %s', self, e) return False Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)
image = check_image(dataset.path) if image: if not PIL: image = None # get_image_ext() returns None if nor a supported Image type ext = get_image_ext(dataset.path, image) data_type = ext # Is dataset content multi-byte? elif dataset.is_multi_byte: data_type = 'multi-byte char' ext = sniff.guess_ext(dataset.path, is_multi_byte=True) # Is dataset content supported sniffable binary? else: # FIXME: This ignores the declared sniff order in datatype_conf.xml # resulting in improper behavior type_info = Binary.is_sniffable_binary(dataset.path) if type_info: data_type = type_info[0] ext = type_info[1] if not data_type: root_datatype = registry.get_datatype_by_extension(dataset.file_type) if getattr(root_datatype, 'compressed', False): data_type = 'compressed archive' ext = dataset.file_type else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip(dataset.path) if is_gzipped and not is_valid: file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file)
""" CEL datatype sniffer for Command Console version 1 format (binary files). http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html#calvin http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/generic.html """ import data from galaxy.datatypes.binary import Binary class CelCc1( Binary ): file_ext = "celcc1" def sniff(self, filename): # Determine if the file is in CEL Command Console version 1 format. # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes. # Get the first 2 'UBYTE' (8bit unsigned). First is magic number 59, second is version number (always 1). with open(filename, "rb") as f: byte = f.read(2) try: if byte[0:2] == b'\x3B\x01': return True else: return False except IndexError: return False Binary.register_sniffable_binary_format("celcc1", "celcc1", CelCc1)
def __init__(self, **kwd): Binary.__init__(self, **kwd) self.add_composite_file('analysis.baf', description='analysis.baf file.', optional='False', is_binary=True) self.add_composite_file('analysis.baf_idx', description='analysis.baf file.', optional='False', is_binary=True) self.add_composite_file('analysis.baf_xtr', description='analysis.baf file.', optional='False', is_binary=True) self.add_composite_file('analysis.content', description='analysis.content file.', optional='False', is_binary=True) self.add_composite_file( 'analysis.0.DataAnalysis.method', description='analysis.0.DataAnalysis.method file.', optional='False', is_binary=True) self.add_composite_file('analysis.0.result_c', description='analysis.0.result_c file.', optional='False', is_binary=True) self.add_composite_file('calib.bin', description='calib.bin file.', optional='False', is_binary=True) self.add_composite_file( 'desktop.ini', description='desktop.ini file that contains some metadata.', optional='True', is_binary=False) self.add_composite_file('ms-waters-pda.hss', description='ms-waters-pda.hss file.', optional='False', is_binary=False) self.add_composite_file('*.hdx', description='any .hdx file.', optional='False', is_binary=True) self.add_composite_file('*.u2', description='any .u2 file.', optional='False', is_binary=True) self.add_composite_file('*.und', description='any .und file.', optional='False', is_binary=True) self.add_composite_file('*.m', description='any .m folder.', optional='False', is_binary=True) self.add_composite_file('*.m/DataAnalysis.Method', description='DataAnalysis.Method file.', optional='False', is_binary=True) self.add_composite_file('*.m/desktop.ini', description='*.m/desktop.ini file.', optional='True', is_binary=False) self.add_composite_file('*.m/hystar.method', description='*.m/hystar.method file.', optional='True', is_binary=True) self.add_composite_file( '*.m/microTOFQAcquisition.method', description='*.m/microTOFQAcquisition.method file.', optional='False', is_binary=True) self.add_composite_file('*.m/submethods.xml', description='*.m/submethods.xml file.', optional='False', is_binary=True) self.add_composite_file('*.mcf', description='any .mcf file.', optional='False', is_binary=True) self.add_composite_file('*.mcf_idx', description='any .mcf_idx file.', optional='False', is_binary=True) self.add_composite_file('Storage.mcf_idx', description='Storage.mcf_idx file.', optional='False', is_binary=True) self.add_composite_file( 'SampleInfo.xml', description='SampleInfo.xml file that contains some metadata.', optional='False', is_binary=False) self.add_composite_file('NuGenesisTemplate.txt', description='NuGenesisTemplate.txt file.', optional='True', is_binary=False) self.add_composite_file('LCParms.txt', description='LCParms.txt file.', optional='False', is_binary=False) self.add_composite_file('HS_columns.xmc', description='HS_columns.xmc file.', optional='False', is_binary=True) self.add_composite_file('BackgroundLineNeg.ami', description='BackgroundLineNeg.ami file.', optional='True', is_binary=True) self.add_composite_file('BackgroundUV.ami', description='BackgroundUV.ami file.', optional='True', is_binary=True) self.add_composite_file('Calibrator.ami', description='Calibrator.ami file.', optional='False', is_binary=True) self.add_composite_file('DensViewNeg.ami', description='DensViewNeg.ami file.', optional='True', is_binary=True) self.add_composite_file('DensViewNegBgnd.ami', description='DensViewNegBgnd.ami file.', optional='True', is_binary=True)
try: rscript = 'checkFCS.R' fcs_check = subprocess.check_output([rscript, filename]) if re.search('TRUE', str(fcs_check)): return True else: return False except: False def get_mime(self): """Returns the mime type of the datatype""" return 'application/octet-stream' Binary.register_sniffable_binary_format("fcs", "fcs", FCS) class FlowFrame( Binary ): """R Object containing flowFrame saved with saveRDS""" file_ext = 'flowframe' def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Binary RDS flowFrame file" dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset):
from galaxy.datatypes.binary import Binary from galaxy.datatypes.xml import GenericXml class Group(Binary): """Class describing a ProteinPilot group files""" file_ext = "group" Binary.register_unsniffable_binary_ext('group') class ProteinPilotXml(GenericXml): file_ext = "proteinpilot.xml"
no_unzip_datatypes A perfect clone of the prims masscomb datatype FileSet """ import logging import zipfile from galaxy.datatypes.binary import Binary log = logging.getLogger(__name__) class NoUnzip(Binary): """FileSet containing N files""" file_ext = "no_unzip.zip" blurb = "(zipped) FileSet containing multiple files" def sniff(self, filename): # If the zip file contains multiple files then return true zf = zipfile.ZipFile(filename) if (len(zf.infolist()) > 1): return True else: return False # the if is just for backwards compatibility...could remove this at some point if hasattr(Binary, 'register_sniffable_binary_format'): Binary.register_sniffable_binary_format('NoUnzip', 'no_unzip.zip', NoUnzip)
else: rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text)) rval.append('</ul></div></html>') return "\n".join(rval) def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Bruker MS1 RAW file" dataset.blurb = nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return "Bruker MS1 RAW file (%s)" % (nice_size(dataset.get_size())) Binary.register_sniffable_binary_format("bruker.d", "d", BrukerMS1RAW) class nmrML(MetabolomicsXml): """nmrML data""" file_ext = "nmrml" blurb = 'nmrML NMR data' root = "nmrML"
header = open(filename).read(8) if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): #fp = open("/tmp/sra.py","w") #fp.write("inside true\n") #fp.close() return True else: #fp = open("/tmp/sra.py","w") #fp.write("inside true\n") #fp.close() return False except: return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = 'Binary sra file' dataset.blurb = nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return 'Binary sra file (%s)' % (nice_size(dataset.get_size())) Binary.register_sniffable_binary_format('sra', 'sra', Sra)
from galaxy.datatypes.binary import Binary class Sf3(Binary): """Class describing a Scaffold SF3 files""" file_ext = "sf3" Binary.register_unsniffable_binary_ext('sf3')
def sniff(self, filename): return super(PlyAscii, self).sniff(filename, subtype='ascii') class PlyBinary(Ply, Binary): file_ext = "plybinary" def __init__(self, **kwd): Binary.__init__(self, **kwd) def sniff(self, filename): return super(PlyBinary, self).sniff(filename, subtype='binary') Binary.register_sniffable_binary_format("plybinary", "plybinary", PlyBinary) class Vtk(object): """ The Visualization Toolkit provides a number of source and writer objects to read and write popular data file formats. The Visualization Toolkit also provides some of its own file formats. There are two different styles of file formats available in VTK. The simplest are the legacy, serial formats that are easy to read and write either by hand or programmatically. However, these formats are less flexible than the XML based file formats which support random access, parallel I/O, and portable data compression and are preferred to the serial VTK file formats whenever possible.
def sniff(self, filename): """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary. For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: header = open(filename).read(8) if binascii.b2a_hex(header) == binascii.hexlify("NCBI.sra"): return True else: return False except: return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Binary sra file" dataset.blurb = nice_size(dataset.get_size()) else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk" def display_peek(self, dataset): try: return dataset.peek except: return "Binary sra file (%s)" % (nice_size(dataset.get_size())) Binary.register_sniffable_binary_format("sra", "sra", Sra)
else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek( self, dataset ): try: return dataset.peek except: return "Matlab Binary file (%s)" % ( data.nice_size( dataset.get_size() ) ) def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, size=None, offset=None, **kwd): if preview: return ("MATLAB data files cannot be previewed.") else: return super(Matlab, self).display_data( trans, dataset, preview, filename, to_ext, size, offset, **kwd) Binary.register_sniffable_binary_format("mat", "mat", Matlab) class Wav(Binary): file_ext = "wav" def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): try: header = open( filename ).read() if header.starts_with("RIFF"): return True else: return False except:
def __init__( self, **kwd ): Binary.__init__( self, **kwd ) log.info('Creating cummeRbund CuffDataDB')
rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>'] rval.append('<div>This composite dataset is composed of the following files:<p/><ul>') for composite_name, composite_file in self.get_composite_files(dataset=dataset).items(): fn = composite_name opt_text = '' if composite_file.optional: opt_text = ' (optional)' if composite_file.get('description'): rval.append('<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text)) else: rval.append('<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text)) rval.append('</ul></div></html>') return "\n".join(rval) Binary.register_sniffable_binary_format("wiff", "wiff", Wiff ) class PepXmlReport(Tabular): """pepxml converted to tabular report""" edam_data = "data_2536" file_ext = "pepxml.tsv" def __init__(self, **kwd): super(PepXmlReport, self).__init__(**kwd) self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility'] def display_peek(self, dataset): """Returns formated html of peek""" return self.make_html_table(dataset, column_names=self.column_names)
from galaxy.datatypes import data from galaxy.datatypes.binary import Binary class Cel(Binary): """Class for generic CEL binary format""" file_ext = "cel" Binary.register_unsniffable_binary_ext("cel")
def __init__(self, **kwd): """Initialize binary datatype""" Binary.__init__(self, **kwd)
return dataset.peek except: return "HMMER3 database (multiple files)" def __init__(self, **kwd): Binary.__init__(self, **kwd) # Binary model self.add_composite_file('model.hmm.h3m', is_binary=True) # SSI index for binary model self.add_composite_file('model.hmm.h3i', is_binary=True) # Profiles (MSV part) self.add_composite_file('model.hmm.h3f', is_binary=True) # Profiles (remained) self.add_composite_file('model.hmm.h3p', is_binary=True) Binary.register_unsniffable_binary_ext("hmmpress") class Stockholm_1_0( Text ): edam_data = "data_0863" edam_format = "format_1961" file_ext = "stockholm" MetadataElement( name="number_of_models", default=0, desc="Number of multiple alignments", readonly=True, visible=True, optional=True, no_value=0 ) def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) if (dataset.metadata.number_of_models == 1): dataset.blurb = "1 alignment" else:
""" Datatypes for Galaxy-M. """ from galaxy.datatypes.binary import ( Binary, SQlite, ) class SQliteSPS(SQlite): file_ext = "sps.sqlite" Binary.register_sniffable_binary_format("sps.sqlite", "sps.sqlite", SQliteSPS) class SQliteTM(SQlite): file_ext = "tm.sqlite" Binary.register_sniffable_binary_format("tm.sqlite", "tm.sqlite", SQliteTM) class SQliteEFS(SQlite): file_ext = "efs.sqlite" Binary.register_sniffable_binary_format("efs.sqlite", "efs.sqlite", SQliteEFS) class SQlitePPS(SQlite): file_ext = "pps.sqlite" Binary.register_sniffable_binary_format("pps.sqlite", "pps.sqlite", SQlitePPS)
if match: x = match.group('resx') y = match.group('resy') fps = match.group('fps') else: x = y = fps = 0 return x, y, fps def set_meta(self, dataset, **kwd): (x, y, fps) = self._get_resolution( dataset.file_name ) dataset.metadata.resolution_y = y dataset.metadata.resolution_x = x dataset.metadata.fps = fps class Mp4( Video ): file_ext = "mp4" def sniff(self, filename): with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m: return m.id_filename(filename) is 'video/mp4' #Binary.register_unsniffable_binary_ext("mp4") class Flv( Video ): file_ext = "flv" def sniff(self, filename): with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m: return m.id_filename(filename) is 'video/x-flv' Binary.register_unsniffable_binary_ext("flv")
rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>'] rval.append('<div>This composite dataset is composed of the following files:<p/><ul>') for composite_name, composite_file in self.get_composite_files(dataset=dataset).iteritems(): fn = composite_name opt_text = '' if composite_file.optional: opt_text = ' (optional)' if composite_file.get('description'): rval.append('<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % (fn, fn, composite_file.get('description'), opt_text)) else: rval.append('<li><a href="%s" type="text/plain">%s</a>%s</li>' % (fn, fn, opt_text)) rval.append('</ul></div></html>') return "\n".join(rval) if hasattr(Binary, 'register_unsniffable_binary_ext'): Binary.register_unsniffable_binary_ext('wiff') class IdpDB(Binary): file_ext = "idpDB" if hasattr(Binary, 'register_unsniffable_binary_ext'): Binary.register_unsniffable_binary_ext('idpDB') class PepXmlReport(Tabular): """pepxml converted to tabular report""" file_ext = "tsv" def __init__(self, **kwd): Tabular.__init__(self, **kwd)
image = check_image( dataset.path ) if image: if not PIL: image = None # get_image_ext() returns None if nor a supported Image type ext = get_image_ext( dataset.path, image ) data_type = ext # Is dataset content multi-byte? elif dataset.is_multi_byte: data_type = 'multi-byte char' ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) # Is dataset content supported sniffable binary? else: # FIXME: This ignores the declared sniff order in datatype_conf.xml # resulting in improper behavior type_info = Binary.is_sniffable_binary( dataset.path ) if type_info: data_type = type_info[0] ext = type_info[1] if not data_type: root_datatype = registry.get_datatype_by_extension( dataset.file_type ) if getattr( root_datatype, 'compressed', False ): data_type = 'compressed archive' ext = dataset.file_type else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip( dataset.path ) if is_gzipped and not is_valid: file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) return elif is_gzipped and is_valid:
try: rscript = './checkFCS.R' fcs_check = subprocess.check_output([rscript, filename]) if re.search('TRUE', str(fcs_check)): return True else: return False except: False def get_mime(self): """Returns the mime type of the datatype""" return 'application/octet-stream' Binary.register_sniffable_binary_format("fcs", "fcs", FCS) class FlowText(Tabular): """Class describing an Flow Text file""" file_ext = "flowtext" def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Text Flow file" dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset):
def add_file(dataset, registry, json_file, output_path): data_type = None line_count = None converted_path = None stdout = None link_data_only = dataset.get('link_data_only', 'copy_files') run_as_real_user = in_place = dataset.get('in_place', True) purge_source = dataset.get('purge_source', True) # in_place is True if there is no external chmod in place, # however there are other instances where modifications should not occur in_place: # when a file is added from a directory on the local file system (ftp import folder or any other path). if dataset.type in ('server_dir', 'path_paste', 'ftp_import'): in_place = False check_content = dataset.get('check_content' , True) auto_decompress = dataset.get('auto_decompress', True) try: ext = dataset.file_type except AttributeError: file_err('Unable to process uploaded file, missing file_type parameter.', dataset, json_file) return if dataset.type == 'url': try: page = urlopen(dataset.path) # page will be .close()ed by sniff methods temp_name, dataset.is_multi_byte = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers)) except Exception as e: file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)), dataset, json_file) return dataset.path = temp_name # See if we have an empty file if not os.path.exists(dataset.path): file_err('Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file) return if not os.path.getsize(dataset.path) > 0: file_err('The uploaded file is empty', dataset, json_file) return if not dataset.type == 'url': # Already set is_multi_byte above if type == 'url' try: dataset.is_multi_byte = multi_byte.is_multi_byte(codecs.open(dataset.path, 'r', 'utf-8').read(100)) except UnicodeDecodeError as e: dataset.is_multi_byte = False # Is dataset an image? i_ext = get_image_ext(dataset.path) if i_ext: ext = i_ext data_type = ext # Is dataset content multi-byte? elif dataset.is_multi_byte: data_type = 'multi-byte char' ext = sniff.guess_ext(dataset.path, registry.sniff_order, is_multi_byte=True) # Is dataset content supported sniffable binary? else: # FIXME: This ignores the declared sniff order in datatype_conf.xml # resulting in improper behavior type_info = Binary.is_sniffable_binary(dataset.path) if type_info: data_type = type_info[0] ext = type_info[1] if not data_type: root_datatype = registry.get_datatype_by_extension(dataset.file_type) if getattr(root_datatype, 'compressed', False): data_type = 'compressed archive' ext = dataset.file_type else: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content) if is_gzipped and not is_valid: file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file) return elif is_gzipped and is_valid and auto_decompress: if link_data_only == 'copy_files': # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format CHUNK_SIZE = 2 ** 20 # 1Mb fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) gzipped_file = gzip.GzipFile(dataset.path, 'rb') while 1: try: chunk = gzipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err('Problem decompressing gzipped data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) gzipped_file.close() # Replace the gzipped file with the decompressed file if it's safe to do so if not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = dataset.name.rstrip('.gz') data_type = 'gzip' if not data_type: # See if we have a bz2 file, much like gzip is_bzipped, is_valid = check_bz2(dataset.path, check_content) if is_bzipped and not is_valid: file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file) return elif is_bzipped and is_valid and auto_decompress: if link_data_only == 'copy_files': # We need to uncompress the temp_name file CHUNK_SIZE = 2 ** 20 # 1Mb fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) bzipped_file = bz2.BZ2File(dataset.path, 'rb') while 1: try: chunk = bzipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err('Problem decompressing bz2 compressed data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) bzipped_file.close() # Replace the bzipped file with the decompressed file if it's safe to do so if not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = dataset.name.rstrip('.bz2') data_type = 'bz2' if not data_type: # See if we have a zip archive is_zipped = check_zip(dataset.path) if is_zipped and auto_decompress: if link_data_only == 'copy_files': CHUNK_SIZE = 2 ** 20 # 1Mb uncompressed = None uncompressed_name = None unzipped = False z = zipfile.ZipFile(dataset.path) for name in z.namelist(): if name.endswith('/'): continue if unzipped: stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' break fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False) if sys.version_info[:2] >= (2, 6): zipped_file = z.open(name) while 1: try: chunk = zipped_file.read(CHUNK_SIZE) except IOError: os.close(fd) os.remove(uncompressed) file_err('Problem decompressing zipped data', dataset, json_file) return if not chunk: break os.write(fd, chunk) os.close(fd) zipped_file.close() uncompressed_name = name unzipped = True else: # python < 2.5 doesn't have a way to read members in chunks(!) try: outfile = open(uncompressed, 'wb') outfile.write(z.read(name)) outfile.close() uncompressed_name = name unzipped = True except IOError: os.close(fd) os.remove(uncompressed) file_err('Problem decompressing zipped data', dataset, json_file) return z.close() # Replace the zipped file with the decompressed file if it's safe to do so if uncompressed is not None: if not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0o644) dataset.name = uncompressed_name data_type = 'zip' if not data_type: # TODO refactor this logic. check_binary isn't guaranteed to be # correct since it only looks at whether the first 100 chars are # printable or not. If someone specifies a known unsniffable # binary datatype and check_binary fails, the file gets mangled. if check_binary(dataset.path) or Binary.is_ext_unsniffable(dataset.file_type): # We have a binary dataset, but it is not Bam, Sff or Pdf data_type = 'binary' # binary_ok = False parts = dataset.name.split(".") if len(parts) > 1: ext = parts[-1].strip().lower() if check_content and not Binary.is_ext_unsniffable(ext): file_err('The uploaded binary file contains inappropriate content', dataset, json_file) return elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext.capitalize(), ext) file_err(err_msg, dataset, json_file) return if not data_type: # We must have a text file if check_content and check_html(dataset.path): file_err('The uploaded file contains inappropriate HTML content', dataset, json_file) return if data_type != 'binary': if link_data_only == 'copy_files' and data_type not in ('gzip', 'bz2', 'zip'): # Convert universal line endings to Posix line endings if to_posix_lines is True # and the data is not binary or gzip-, bz2- or zip-compressed. if dataset.to_posix_lines: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id if dataset.space_to_tab: line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) else: line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix) if dataset.file_type == 'auto': ext = sniff.guess_ext(dataset.path, registry.sniff_order) else: ext = dataset.file_type data_type = ext # Save job info for the framework if ext == 'auto' and data_type == 'binary': ext = 'data' if ext == 'auto' and dataset.ext: ext = dataset.ext if ext == 'auto': ext = 'data' datatype = registry.get_datatype_by_extension(ext) if dataset.type in ('server_dir', 'path_paste') and link_data_only == 'link_to_files': # Never alter a file that will not be copied to Galaxy's local file store. if datatype.dataset_content_needs_grooming(dataset.path): err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' file_err(err_msg, dataset, json_file) return if link_data_only == 'copy_files' and converted_path: # Move the dataset to its "real" path try: shutil.move(converted_path, output_path) except OSError as e: # We may not have permission to remove converted_path if e.errno != errno.EACCES: raise elif link_data_only == 'copy_files': if purge_source and not run_as_real_user: # if the upload tool runs as a real user the real user # can't move dataset.path as this path is owned by galaxy. shutil.move(dataset.path, output_path) else: shutil.copy(dataset.path, output_path) # Write the job info stdout = stdout or 'uploaded %s file' % data_type info = dict(type='dataset', dataset_id=dataset.dataset_id, ext=ext, stdout=stdout, name=dataset.name, line_count=line_count) if dataset.get('uuid', None) is not None: info['uuid'] = dataset.get('uuid') json_file.write(dumps(info) + "\n") if link_data_only == 'copy_files' and datatype and datatype.dataset_content_needs_grooming(output_path): # Groom the dataset content if necessary datatype.groom_dataset_content(output_path)
def __init__(self, **kwd): Binary.__init__(self, **kwd) log.info('Creating cummeRbund CuffDataDB')
""" CEL datatype sniffer for v4 (binary files). http://media.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html """ import data from galaxy.datatypes.binary import Binary class Cel( Binary ): file_ext = "cel" def sniff(self, filename): # Determine if the file is in CEL v4 format. # Filename is in the format 'upload_file_data_jqRiCG', therefore we must check the header bytes. # Get the first 2 integers (32bit). First is magic number 64, second is version number (always 4). with open(filename, "rb") as f: byte = f.read(8) try: if byte[0:8] == b'\x40\x00\x00\x00\x04\x00\x00\x00': return True else: return False except IndexError: return False Binary.register_sniffable_binary_format("cel", "cel", Cel)
from galaxy.datatypes.binary import Binary from galaxy.datatypes.xml import GenericXml class Group( Binary ): """Class describing a ProteinPilot group files""" file_ext = "group" Binary.register_unsniffable_binary_ext('group') class ProteinPilotXml( GenericXml ): file_ext = "proteinpilot.xml"
""" fd = wave.open(dataset.dataset.file_name, 'rb') dataset.metadata.rate = fd.getframerate() dataset.metadata.nframes = fd.getnframes() dataset.metadata.sampwidth = fd.getsampwidth() dataset.metadata.nchannels = fd.getnchannels() #dataset.metadata.identifier = os.path.splitext(dataset.dataset.element_identifier)[0] fd.close() #def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, offset=None, ck_size=None, **kwd): # return trans.fill_template( "/dataset/audio.mako", dataset=dataset) Binary.register_sniffable_binary_format('wav', 'wav', WAV) class TextGrid(Text): """Praat Textgrid file for speech annotations >>> from galaxy.datatypes.sniff import get_test_fname >>> fname = get_test_fname('1_1119_2_22_001.TextGrid') >>> TextGrid().sniff(fname) True >>> fname = get_test_fname('drugbank_drugs.cml') >>> TextGrid().sniff(fname) False """
edam_format = "format_3508" file_ext = "pdf" def sniff(self, filename): """Determine if the file is in pdf format.""" headers = get_headers(filename, None, 1) try: if headers[0][0].startswith("%PDF"): return True else: return False except IndexError: return False Binary.register_sniffable_binary_format("pdf", "pdf", Pdf) def create_applet_tag_peek(class_name, archive, params): text = """ <object classid="java:%s" type="application/x-java-applet" height="30" width="200" align="center" > <param name="archive" value="%s"/>""" % (class_name, archive) for name, value in params.iteritems(): text += """<param name="%s" value="%s"/>""" % (name, value) text += """ <object classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93" height="30" width="200" > <param name="code" value="%s" /> <param name="archive" value="%s"/>""" % (class_name, archive)
from galaxy.datatypes import data from galaxy.datatypes.binary import Binary class Cel( Binary ): """Class for generic CEL binary format""" file_ext = "cel" Binary.register_unsniffable_binary_ext("cel")