Example #1
0
def test_decode_encode_path():
    temp_dir = ckan_io.decode_path(tempfile.mkdtemp())
    try:
        filename = u"\xf6\xe4\xfc.txt"
        path = os.path.join(temp_dir, filename)
        with io.open(ckan_io.encode_path(path), u"w", encoding=u"utf-8") as f:
            f.write(u"foo")
        # Force str return type
        filenames = os.listdir(ckan_io.encode_path(temp_dir))
        assert ckan_io.decode_path(filenames[0]) == filename
    finally:
        shutil.rmtree(temp_dir)
Example #2
0
 def test_decode_encode_path(self):
     temp_dir = ckan_io.decode_path(tempfile.mkdtemp())
     try:
         filename = u'\xf6\xe4\xfc.txt'
         path = os.path.join(temp_dir, filename)
         with io.open(ckan_io.encode_path(path), u'w',
                      encoding=u'utf-8') as f:
             f.write(u'foo')
         # Force str return type
         filenames = os.listdir(ckan_io.encode_path(temp_dir))
         eq_(ckan_io.decode_path(filenames[0]), filename)
     finally:
         shutil.rmtree(temp_dir)
Example #3
0
 def test_decode_encode_path(self):
     temp_dir = ckan_io.decode_path(tempfile.mkdtemp())
     try:
         filename = u'\xf6\xe4\xfc.txt'
         path = os.path.join(temp_dir, filename)
         with io.open(ckan_io.encode_path(path), u'w',
                      encoding=u'utf-8') as f:
             f.write(u'foo')
         # Force str return type
         filenames = os.listdir(ckan_io.encode_path(temp_dir))
         eq_(ckan_io.decode_path(filenames[0]), filename)
     finally:
         shutil.rmtree(temp_dir)
Example #4
0
def munge_filename(filename):
    ''' Tidies a filename

    Keeps the filename extension (e.g. .csv).
    Strips off any path on the front.

    Returns a Unicode string.
    '''
    if not isinstance(filename, text_type):
        filename = decode_path(filename)

    # Ignore path
    filename = os.path.split(filename)[1]

    # Clean up
    filename = filename.lower().strip()
    filename = substitute_ascii_equivalents(filename)
    filename = re.sub(u'[^a-zA-Z0-9_. -]', '', filename).replace(u' ', u'-')
    filename = re.sub(u'-+', u'-', filename)

    # Enforce length constraints
    name, ext = os.path.splitext(filename)
    ext = ext[:MAX_FILENAME_EXTENSION_LENGTH]
    ext_len = len(ext)
    name = _munge_to_length(name, max(1, MIN_FILENAME_TOTAL_LENGTH - ext_len),
                            MAX_FILENAME_TOTAL_LENGTH - ext_len)
    filename = name + ext

    return filename
Example #5
0
def munge_filename(filename):
    ''' Tidies a filename

    Keeps the filename extension (e.g. .csv).
    Strips off any path on the front.

    Returns a Unicode string.
    '''
    if not isinstance(filename, unicode):
        filename = decode_path(filename)

    # Ignore path
    filename = os.path.split(filename)[1]

    # Clean up
    filename = filename.lower().strip()
    filename = substitute_ascii_equivalents(filename)
    filename = re.sub(ur'[^a-zA-Z0-9_. -]', '', filename).replace(u' ', u'-')
    filename = re.sub(ur'-+', u'-', filename)

    # Enforce length constraints
    name, ext = os.path.splitext(filename)
    ext = ext[:MAX_FILENAME_EXTENSION_LENGTH]
    ext_len = len(ext)
    name = _munge_to_length(name, max(1, MIN_FILENAME_TOTAL_LENGTH - ext_len),
                            MAX_FILENAME_TOTAL_LENGTH - ext_len)
    filename = name + ext

    return filename
Example #6
0
 def test_decode_path_returns_unicode(self):
     ok_(isinstance(ckan_io.decode_path(b'just_a_str'), unicode))
Example #7
0
 def test_decode_path_fails_for_unicode(self):
     ckan_io.decode_path(u'just_a_unicode')
Example #8
0
 def test_decode_path_returns_unicode(self):
     ok_(isinstance(ckan_io.decode_path(b'just_a_str'), unicode))
Example #9
0
 def test_decode_path_fails_for_unicode(self):
     ckan_io.decode_path(u'just_a_unicode')
Example #10
0
def test_decode_path_returns_unicode():
    assert isinstance(ckan_io.decode_path(b"just_a_str"), text_type)
Example #11
0
def test_decode_path_fails_for_unicode():
    with pytest.raises(TypeError):
        ckan_io.decode_path(u"just_a_unicode")