Beispiel #1
0
def handle_uploaded_dataset_file(filename,
                                 datatypes_registry,
                                 ext='auto',
                                 is_multi_byte=False):
    is_valid, ext = handle_compressed_file(filename,
                                           datatypes_registry,
                                           ext=ext)

    if not is_valid:
        raise InappropriateDatasetContentError(
            'The compressed uploaded file contains inappropriate content.')

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext(filename,
                        sniff_order=datatypes_registry.sniff_order,
                        is_multi_byte=is_multi_byte)

    if check_binary(filename):
        if not Binary.is_ext_unsniffable(
                ext) and not datatypes_registry.get_datatype_by_extension(
                    ext).sniff(filename):
            raise InappropriateDatasetContentError(
                'The binary uploaded file contains inappropriate content.')
    elif check_html(filename):
        raise InappropriateDatasetContentError(
            'The uploaded file contains inappropriate HTML content.')
    return ext
def handle_uploaded_dataset_file( filename, datatypes_registry, ext = 'auto', is_multi_byte = False ):
    is_valid, ext = handle_compressed_file( filename, datatypes_registry, ext = ext )

    if not is_valid:
        raise InappropriateDatasetContentError, 'The compressed uploaded file contains inappropriate content.'

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte )

    if check_binary( filename ):
        if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
            raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.'
    elif check_html( filename ):
        raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.'
    return ext
Beispiel #3
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Beispiel #4
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Beispiel #5
0
             z.close()
             # Replace the zipped file with the decompressed file if it's safe to do so
             if uncompressed is not None:
                 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place:
                     dataset.path = uncompressed
                 else:
                     shutil.move( uncompressed, dataset.path )
                 os.chmod(dataset.path, 0644)
                 dataset.name = uncompressed_name
         data_type = 'zip'
 if not data_type:
     # TODO refactor this logic.  check_binary isn't guaranteed to be
     # correct since it only looks at whether the first 100 chars are
     # printable or not.  If someone specifies a known unsniffable
     # binary datatype and check_binary fails, the file gets mangled.
     if check_binary( dataset.path ) or Binary.is_ext_unsniffable(dataset.file_type):
         # We have a binary dataset, but it is not Bam, Sff or Pdf
         data_type = 'binary'
         # binary_ok = False
         parts = dataset.name.split( "." )
         if len( parts ) > 1:
             ext = parts[-1].strip().lower()
             if not Binary.is_ext_unsniffable(ext):
                 file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file )
                 return
             elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext:
                 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext )
                 file_err( err_msg, dataset, json_file )
                 return
 if not data_type:
     # We must have a text file
             # Replace the zipped file with the decompressed file if it's safe to do so
             if uncompressed is not None:
                 if dataset.type in ('server_dir',
                                     'path_paste') or not in_place:
                     dataset.path = uncompressed
                 else:
                     shutil.move(uncompressed, dataset.path)
                 os.chmod(dataset.path, 0644)
                 dataset.name = uncompressed_name
         data_type = 'zip'
 if not data_type:
     # TODO refactor this logic.  check_binary isn't guaranteed to be
     # correct since it only looks at whether the first 100 chars are
     # printable or not.  If someone specifies a known unsniffable
     # binary datatype and check_binary fails, the file gets mangled.
     if check_binary(dataset.path) or Binary.is_ext_unsniffable(
             dataset.file_type):
         # We have a binary dataset, but it is not Bam, Sff or Pdf
         data_type = 'binary'
         # binary_ok = False
         parts = dataset.name.split(".")
         if len(parts) > 1:
             ext = parts[-1].strip().lower()
             if not Binary.is_ext_unsniffable(ext):
                 file_err(
                     'The uploaded binary file contains inappropriate content',
                     dataset, json_file)
                 return
             elif Binary.is_ext_unsniffable(
                     ext) and dataset.file_type != ext:
                 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (