Esempio n. 1
0
def check_file_content_for_html_and_images( file_path ):
    message = ''
    if checkers.check_html( file_path ):
        message = 'The file "%s" contains HTML content.\n' % str( file_path )
    elif checkers.check_image( file_path ):
        message = 'The file "%s" contains image content.\n' % str( file_path )
    return message
Esempio n. 2
0
def check_file_content_for_html_and_images(file_path):
    message = ''
    if checkers.check_html(file_path):
        message = 'The file "%s" contains HTML content.\n' % str(file_path)
    elif checkers.check_image(file_path):
        message = 'The file "%s" contains image content.\n' % str(file_path)
    return message
Esempio n. 3
0
def handle_uploaded_dataset_file(filename,
                                 datatypes_registry,
                                 ext='auto',
                                 is_multi_byte=False):
    is_valid, ext = handle_compressed_file(filename,
                                           datatypes_registry,
                                           ext=ext)

    if not is_valid:
        raise InappropriateDatasetContentError(
            'The compressed uploaded file contains inappropriate content.')

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext(filename,
                        sniff_order=datatypes_registry.sniff_order,
                        is_multi_byte=is_multi_byte)

    if check_binary(filename):
        if not Binary.is_ext_unsniffable(
                ext) and not datatypes_registry.get_datatype_by_extension(
                    ext).sniff(filename):
            raise InappropriateDatasetContentError(
                'The binary uploaded file contains inappropriate content.')
    elif check_html(filename):
        raise InappropriateDatasetContentError(
            'The uploaded file contains inappropriate HTML content.')
    return ext
Esempio n. 4
0
 def __check_file_content( self, file_path ):
     message = ''
     if checkers.check_html( file_path ):
         message = 'The file "%s" contains HTML content.\n' % str( file_path )
     elif checkers.check_image( file_path ):
         message = 'The file "%s" contains image content.\n' % str( file_path )
     return message
Esempio n. 5
0
 def __check_file_content(self, file_path):
     message = ''
     if checkers.check_html(file_path):
         message = 'The file "%s" contains HTML content.\n' % str(file_path)
     elif checkers.check_image(file_path):
         message = 'The file "%s" contains image content.\n' % str(
             file_path)
     return message
def handle_uploaded_dataset_file( filename, datatypes_registry, ext = 'auto', is_multi_byte = False ):
    is_valid, ext = handle_compressed_file( filename, datatypes_registry, ext = ext )

    if not is_valid:
        raise InappropriateDatasetContentError, 'The compressed uploaded file contains inappropriate content.'

    if ext in AUTO_DETECT_EXTENSIONS:
        ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte )

    if check_binary( filename ):
        if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
            raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.'
    elif check_html( filename ):
        raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.'
    return ext
Esempio n. 7
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Esempio n. 8
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Esempio n. 9
0
         # We have a binary dataset, but it is not Bam, Sff or Pdf
         data_type = 'binary'
         # binary_ok = False
         parts = dataset.name.split( "." )
         if len( parts ) > 1:
             ext = parts[-1].strip().lower()
             if not Binary.is_ext_unsniffable(ext):
                 file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file )
                 return
             elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext:
                 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext )
                 file_err( err_msg, dataset, json_file )
                 return
 if not data_type:
     # We must have a text file
     if check_html( dataset.path ):
         file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file )
         return
 if data_type != 'binary':
     if link_data_only == 'copy_files':
         if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]:
             in_place = False
         # Convert universal line endings to Posix line endings, but allow the user to turn it off,
         # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
         # corrupting the content of those files.
         if dataset.to_posix_lines:
             tmpdir = output_adjacent_tmpdir( output_path )
             tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
             if dataset.space_to_tab:
                 line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
             else:
Esempio n. 10
0
         if len(parts) > 1:
             ext = parts[-1].strip().lower()
             if not Binary.is_ext_unsniffable(ext):
                 file_err(
                     'The uploaded binary file contains inappropriate content',
                     dataset, json_file)
                 return
             elif Binary.is_ext_unsniffable(
                     ext) and dataset.file_type != ext:
                 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (
                     ext.capitalize(), ext)
                 file_err(err_msg, dataset, json_file)
                 return
 if not data_type:
     # We must have a text file
     if check_html(dataset.path):
         file_err(
             'The uploaded file contains inappropriate HTML content',
             dataset, json_file)
         return
 if data_type != 'binary':
     if link_data_only == 'copy_files':
         if dataset.type in ('server_dir',
                             'path_paste') and data_type not in [
                                 'gzip', 'bz2', 'zip'
                             ]:
             in_place = False
         # Convert universal line endings to Posix line endings, but allow the user to turn it off,
         # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
         # corrupting the content of those files.
         if dataset.to_posix_lines: