def handle_uploaded_dataset_file(filename, datatypes_registry, ext='auto', is_multi_byte=False): is_valid, ext = handle_compressed_file(filename, datatypes_registry, ext=ext) if not is_valid: raise InappropriateDatasetContentError( 'The compressed uploaded file contains inappropriate content.') if ext in AUTO_DETECT_EXTENSIONS: ext = guess_ext(filename, sniff_order=datatypes_registry.sniff_order, is_multi_byte=is_multi_byte) if check_binary(filename): if not Binary.is_ext_unsniffable( ext) and not datatypes_registry.get_datatype_by_extension( ext).sniff(filename): raise InappropriateDatasetContentError( 'The binary uploaded file contains inappropriate content.') elif check_html(filename): raise InappropriateDatasetContentError( 'The uploaded file contains inappropriate HTML content.') return ext
def handle_uploaded_dataset_file( filename, datatypes_registry, ext = 'auto', is_multi_byte = False ): is_valid, ext = handle_compressed_file( filename, datatypes_registry, ext = ext ) if not is_valid: raise InappropriateDatasetContentError, 'The compressed uploaded file contains inappropriate content.' if ext in AUTO_DETECT_EXTENSIONS: ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte ) if check_binary( filename ): if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ): raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.' elif check_html( filename ): raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.' return ext
def is_data_index_sample_file(file_path): """ Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when a tool shed repository is being installed into a Galaxy instance. """ # Currently most data index files are tabular, so check that first. We'll assume that # if the file is tabular, it's ok to copy. if is_column_based(file_path): return True # If the file is any of the following, don't copy it. if checkers.check_html(file_path): return False if checkers.check_image(file_path): return False if checkers.check_binary(name=file_path): return False if checkers.is_bz2(file_path): return False if checkers.is_gzip(file_path): return False if checkers.check_zip(file_path): return False # Default to copying the file if none of the above are true. return True
z.close() # Replace the zipped file with the decompressed file if it's safe to do so if uncompressed is not None: if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: dataset.path = uncompressed else: shutil.move( uncompressed, dataset.path ) os.chmod(dataset.path, 0644) dataset.name = uncompressed_name data_type = 'zip' if not data_type: # TODO refactor this logic. check_binary isn't guaranteed to be # correct since it only looks at whether the first 100 chars are # printable or not. If someone specifies a known unsniffable # binary datatype and check_binary fails, the file gets mangled. if check_binary( dataset.path ) or Binary.is_ext_unsniffable(dataset.file_type): # We have a binary dataset, but it is not Bam, Sff or Pdf data_type = 'binary' # binary_ok = False parts = dataset.name.split( "." ) if len( parts ) > 1: ext = parts[-1].strip().lower() if not Binary.is_ext_unsniffable(ext): file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) return elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) file_err( err_msg, dataset, json_file ) return if not data_type: # We must have a text file
# Replace the zipped file with the decompressed file if it's safe to do so if uncompressed is not None: if dataset.type in ('server_dir', 'path_paste') or not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0644) dataset.name = uncompressed_name data_type = 'zip' if not data_type: # TODO refactor this logic. check_binary isn't guaranteed to be # correct since it only looks at whether the first 100 chars are # printable or not. If someone specifies a known unsniffable # binary datatype and check_binary fails, the file gets mangled. if check_binary(dataset.path) or Binary.is_ext_unsniffable( dataset.file_type): # We have a binary dataset, but it is not Bam, Sff or Pdf data_type = 'binary' # binary_ok = False parts = dataset.name.split(".") if len(parts) > 1: ext = parts[-1].strip().lower() if not Binary.is_ext_unsniffable(ext): file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file) return elif Binary.is_ext_unsniffable( ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (