Esempio n. 1
0
 def download(self):
     for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
         nametmpl = sanitize_filename(course_name) + '/' \
                  + sanitize_filename(chapter_name) + '/' \
                  + '%02i.%02i.*' % (i,j)
         fn = glob.glob(DIRECTORY + nametmpl)
         if fn:
             continue
         par = self._br.open(SITE_URL + url)
         par_soup = BeautifulSoup(par.read())
         contents = par_soup.findAll('div','seq_contents')
         k = 0
         for content in contents:
             content_soup = BeautifulSoup(content.text)
             try:
                 video_type = content_soup.h2.text.strip()
                 video_stream = content_soup.find('div','video')['data-streams']
                 video_id = video_stream.split(':')[1]
                 video_url = youtube_url + video_id
                 k += 1
                 print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type)
                 #f.writelines(video_url+'\n')
                 outtmpl = DIRECTORY + sanitize_filename(course_name) + '\/' \
                         + sanitize_filename(chapter_name) + '\/' \
                         + '%02i.%02i.%i ' % (i,j,k) \
                         + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
                 self._fd.params['outtmpl'] = outtmpl
                 self._fd.download([video_url])
             except:
                 pass
Esempio n. 2
0
 def test_sanitize_ids(self):
     self.assertEqual(
         sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
     self.assertEqual(
         sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
     self.assertEqual(
         sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
Esempio n. 3
0
    def download(self):
        print "\n-----------------------\nStart downloading\n-----------------------\n"
        for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
            # nametmpl = sanitize_filename(course_name) + '/' \
            #         + sanitize_filename(chapter_name) + '/' \
            #         + '%02i.%02i.*' % (i,j)
            # fn = glob.glob(DIRECTORY + nametmpl)
            nametmpl = os.path.join(
                DIRECTORY,
                sanitize_filename(course_name, replace_space_with_underscore),
                sanitize_filename(chapter_name, replace_space_with_underscore),
                "%02i.%02i.*" % (i, j),
            )
            fn = glob.glob(nametmpl)

            if fn:
                print "Processing of %s skipped" % nametmpl
                continue
            print "Processing %s..." % nametmpl
            par = self._br.open(base_url + url)
            par_soup = BeautifulSoup(par.read())
            contents = par_soup.findAll("div", "seq_contents")
            k = 0
            for content in contents:
                # print "Content: %s" % content
                content_soup = BeautifulSoup(content.text)
                try:
                    video_type = content_soup.h2.text.strip()
                    video_stream = content_soup.find("div", "video")["data-streams"]
                    video_id = video_stream.split(":")[1]
                    video_url = youtube_url + video_id
                    k += 1
                    print "[%02i.%02i.%02i] %s (%s)" % (i, j, k, par_name, video_type)
                    # f.writelines(video_url+'\n')
                    # outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
                    #        + sanitize_filename(chapter_name) + '/' \
                    #        + '%02i.%02i.%02i ' % (i,j,k) \
                    #        + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
                    outtmpl = os.path.join(
                        DIRECTORY,
                        sanitize_filename(course_name, replace_space_with_underscore),
                        sanitize_filename(chapter_name, replace_space_with_underscore),
                        "%02i.%02i.%02i " % (i, j, k)
                        + sanitize_filename("%s (%s)" % (par_name, video_type), replace_space_with_underscore)
                        + ".%(ext)s",
                    )
                    self._fd.params["outtmpl"] = outtmpl
                    self._fd.download([video_url])
                except Exception as e:
                    # print "Error: %s" % e
                    pass
Esempio n. 4
0
    def get_youtube_url(self, youtube_url):
        # determine the media file name
        filetmpl = u'%(id)s_%(uploader_id)s_%(title)s.%(ext)s'
        ydl = youtube_dl.YoutubeDL({
            'outtmpl': join(self.media_folder, filetmpl),
            'quiet': True, 
            'restrictfilenames': True, 
            'noplaylist': True,
            'continuedl': True,
            'nooverwrites': True,
            'retries': 3000,		
            'fragment_retries': 3000,
            'ignoreerrors': True
        })
        ydl.add_default_info_extractors()
        try:
            result = ydl.extract_info(youtube_url, download=False)
            media_filename = sanitize_filename(filetmpl % result['entries'][0], restricted=True)
        except:
            return ''

        # check if a file with this name already exists
        if not os.path.isfile(media_filename):
            try:
                ydl.extract_info(youtube_url, download=True)
            except:
                return ''
        return u'%s/%s' % (self.media_url, split(media_filename)[1])
Esempio n. 5
0
 def download(self):
     print "\n-----------------------\nStart downloading\n-----------------------\n"
     for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
         #nametmpl = sanitize_filename(course_name) + '/' \
         #         + sanitize_filename(chapter_name) + '/' \
         #         + '%02i.%02i.*' % (i,j)
         #fn = glob.glob(DIRECTORY + nametmpl)
         nametmpl = os.path.join(DIRECTORY,
                                 sanitize_filename(course_name),
                                 sanitize_filename(chapter_name),
                                 '%02i.%02i.*' % (i,j))
         fn = glob.glob(nametmpl)
         
         if fn:
             print "Processing of %s skipped" % nametmpl
             continue
         print "Processing %s..." % nametmpl
         par = self._br.open(base_url + url)
         par_soup = BeautifulSoup(par.read())
         contents = par_soup.findAll('div','seq_contents')
         k = 0
         for content in contents:
             #print "Content: %s" % content
             content_soup = BeautifulSoup(content.text)
             try:
                 video_type = content_soup.h2.text.strip()
                 video_stream = content_soup.find('div','video')['data-streams']
                 video_id = video_stream.split(':')[1]
                 video_url = youtube_url + video_id
                 k += 1
                 print '[%02i.%02i.%i] %s (%s)' % (i, j, k, par_name, video_type)
                 #f.writelines(video_url+'\n')
                 #outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
                 #        + sanitize_filename(chapter_name) + '/' \
                 #        + '%02i.%02i.%i ' % (i,j,k) \
                 #        + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
                 outtmpl = os.path.join(DIRECTORY,
                     sanitize_filename(course_name),
                     sanitize_filename(chapter_name),
                     '%02i.%02i.%i ' % (i,j,k) + \
                     sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s')
                 self._fd.params['outtmpl'] = outtmpl
                 self._fd.download([video_url])
             except Exception as e:
                 #print "Error: %s" % e
                 pass
Esempio n. 6
0
 def test_sanitize_ids(self):
     self.assertEqual(sanitize_filename("_n_cd26wFpw", is_id=True), "_n_cd26wFpw")
     self.assertEqual(sanitize_filename("_BD_eEpuzXw", is_id=True), "_BD_eEpuzXw")
     self.assertEqual(sanitize_filename("N0Y__7-UOdI", is_id=True), "N0Y__7-UOdI")
Esempio n. 7
0
    def test_sanitize_filename_restricted(self):
        self.assertEqual(sanitize_filename("abc", restricted=True), "abc")
        self.assertEqual(sanitize_filename("abc_d-e", restricted=True), "abc_d-e")

        self.assertEqual(sanitize_filename("123", restricted=True), "123")

        self.assertEqual("abc_de", sanitize_filename("abc/de", restricted=True))
        self.assertFalse("/" in sanitize_filename("abc/de///", restricted=True))

        self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de", restricted=True))
        self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|", restricted=True))
        self.assertEqual("yes_no", sanitize_filename("yes? no", restricted=True))
        self.assertEqual("this_-_that", sanitize_filename("this: that", restricted=True))

        tests = _compat_str("a\xe4b\u4e2d\u56fd\u7684c")
        self.assertEqual(sanitize_filename(tests, restricted=True), "a_b_c")
        self.assertTrue(sanitize_filename(_compat_str("\xf6"), restricted=True) != "")  # No empty filename

        forbidden = "\"\0\\/&!: '\t\n()[]{}$;`^,#"
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
        self.assertEqual(sanitize_filename(_compat_str("\u5927\u58f0\u5e26 - Song"), restricted=True), "Song")
        self.assertEqual(sanitize_filename(_compat_str("\u603b\u7edf: Speech"), restricted=True), "Speech")
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename("-", restricted=True) != "")
        self.assertTrue(sanitize_filename(":", restricted=True) != "")
Esempio n. 8
0
    def test_sanitize_filename(self):
        self.assertEqual(sanitize_filename("abc"), "abc")
        self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e")

        self.assertEqual(sanitize_filename("123"), "123")

        self.assertEqual("abc_de", sanitize_filename("abc/de"))
        self.assertFalse("/" in sanitize_filename("abc/de///"))

        self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de"))
        self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|"))
        self.assertEqual("yes no", sanitize_filename("yes? no"))
        self.assertEqual("this - that", sanitize_filename("this: that"))

        self.assertEqual(sanitize_filename("AT&T"), "AT&T")
        aumlaut = _compat_str("\xe4")
        self.assertEqual(sanitize_filename(aumlaut), aumlaut)
        tests = _compat_str("\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430")
        self.assertEqual(sanitize_filename(tests), tests)

        forbidden = '"\0\\/'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc))
Esempio n. 9
0
    def test_sanitize_filename(self):
        self.assertEqual(sanitize_filename("abc"), "abc")
        self.assertEqual(sanitize_filename("abc_d-e"), "abc_d-e")

        self.assertEqual(sanitize_filename("123"), "123")

        self.assertEqual("abc_de", sanitize_filename("abc/de"))
        self.assertFalse("/" in sanitize_filename("abc/de///"))

        self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de"))
        self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|"))
        self.assertEqual("yes no", sanitize_filename("yes? no"))
        self.assertEqual("this - that", sanitize_filename("this: that"))

        self.assertEqual(sanitize_filename("AT&T"), "AT&T")
        aumlaut = "ä"
        self.assertEqual(sanitize_filename(aumlaut), aumlaut)
        tests = "\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430"
        self.assertEqual(sanitize_filename(tests), tests)

        self.assertEqual(sanitize_filename("New World record at 0:12:34"), "New World record at 0_12_34")

        self.assertEqual(sanitize_filename("--gasdgf"), "_-gasdgf")
        self.assertEqual(sanitize_filename("--gasdgf", is_id=True), "--gasdgf")
        self.assertEqual(sanitize_filename(".gasdgf"), "gasdgf")
        self.assertEqual(sanitize_filename(".gasdgf", is_id=True), ".gasdgf")

        forbidden = '"\0\\/'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc))
Esempio n. 10
0
    def download(self):
        print "\n-----------------------\nStart downloading\n-----------------------\n"
        for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
            #nametmpl = sanitize_filename(course_name) + '/' \
            #         + sanitize_filename(chapter_name) + '/' \
            #         + '%02i.%02i.*' % (i,j)
            #fn = glob.glob(DIRECTORY + nametmpl)
            nametmpl = os.path.join(DIRECTORY,
                                    sanitize_filename(course_name, replace_space_with_underscore),
                                    sanitize_filename(chapter_name, replace_space_with_underscore),
                                    '%02i.%02i.*' % (i,j))
            fn = glob.glob(nametmpl)
            
            if fn:
                print "Processing of %s skipped" % nametmpl
                continue
            print "Processing %s..." % nametmpl
            par = self._br.open(base_url + url)
            par_soup = BeautifulSoup(par.read())
            contents = par_soup.findAll('div','seq_contents')
            k = 0
            for content in contents:
                #print "Content: %s" % content
                content_soup = BeautifulSoup(content.text)
                try:
                    video_type = content_soup.h2.text.strip()
                    video_stream = content_soup.find('div','video')['data-streams']
                    video_id = video_stream.split(':')[1]
                    video_url = youtube_url + video_id
                    k += 1
                    print '[%02i.%02i.%02i] %s (%s)' % (i, j, k, par_name, video_type)
                    #f.writelines(video_url+'\n')
                    #outtmpl = DIRECTORY + sanitize_filename(course_name) + '/' \
                    #        + sanitize_filename(chapter_name) + '/' \
                    #        + '%02i.%02i.%02i ' % (i,j,k) \
                    #        + sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
                    outtmpl = os.path.join(DIRECTORY,
                        sanitize_filename(course_name, replace_space_with_underscore),
                        sanitize_filename(chapter_name, replace_space_with_underscore),
                        '%02i.%02i.%02i ' % (i,j,k) + \
                        sanitize_filename('%s (%s)' % (par_name, video_type), replace_space_with_underscore) + '.%(ext)s')
                    #
                    #print "Debug me pause- %s"  % self._config.pause_mode
                    #print "Debug me resume- %s"  % self._config.resume_mode
                    if self._config.pause_mode:
                        launch_download_msg = 'Download this video [%s - %s]? (y/n) ' % (chapter_name, outtmpl)
                        launch_download = raw_input(launch_download_msg)
                        if (launch_download.lower() == "n"):
                            continue

                    if self._config.resume_mode:
                        launch_download_msg = 'Download video from this [%s - %s]? (y/n) ' % (chapter_name, outtmpl)
                        launch_download = raw_input(launch_download_msg)
                        if (launch_download.lower() == "n"):
                            continue
                        else:
                            self._config.resume_mode = False
                    #
                    #
                    self._fd.params['outtmpl'] = outtmpl
                    self._fd.download([video_url])
                except Exception as e:
                    #print "Error: %s" % e
                    pass
Esempio n. 11
0
    def test_sanitize_filename(self):
        self.assertEqual(sanitize_filename('abc'), 'abc')
        self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')

        self.assertEqual(sanitize_filename('123'), '123')

        self.assertEqual('abc_de', sanitize_filename('abc/de'))
        self.assertFalse('/' in sanitize_filename('abc/de///'))

        self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
        self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
        self.assertEqual('yes no', sanitize_filename('yes? no'))
        self.assertEqual('this - that', sanitize_filename('this: that'))

        self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
        aumlaut = 'ä'
        self.assertEqual(sanitize_filename(aumlaut), aumlaut)
        tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
        self.assertEqual(sanitize_filename(tests), tests)

        self.assertEqual(
            sanitize_filename('New World record at 0:12:34'),
            'New World record at 0_12_34')

        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
        self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
        self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')

        forbidden = '"\0\\/'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc))
Esempio n. 12
0
	def test_sanitize_filename_restricted(self):
		self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
		self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')

		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')

		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))

		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))

		forbidden = u'"\0\\/&: \'\t\n'
		for fc in forbidden:
			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
			for fbc in forbidden:
				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
Esempio n. 13
0
	def test_sanitize_filename(self):
		self.assertEqual(sanitize_filename(u'abc'), u'abc')
		self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e')

		self.assertEqual(sanitize_filename(u'123'), u'123')

		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
		self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))

		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
		self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
		self.assertEqual(u'this - that', sanitize_filename(u'this: that'))

		self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
		self.assertEqual(sanitize_filename(u'ä'), u'ä')
		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')

		forbidden = u'"\0\\/'
		for fc in forbidden:
			for fbc in forbidden:
				self.assertTrue(fbc not in sanitize_filename(fc))
def main(args):
    # Parse arguments
    playlist_url, output_dirpath = args
    if not os.path.exists(output_dirpath):
        sys.exit('directory not found: %s' % output_dirpath)
    if not os.path.isdir(output_dirpath):
        sys.exit('not a directory: %s' % output_dirpath)
    
    # Default settings
    restrictfilenames = False
    # (TODO: Enable again when it plays nicely with 'extract_audio_for_itunes')
    writeinfojson = False
    extract_audio_for_itunes = True
    
    # Locate all videos already in the filesystem
    ordering_filepath = os.path.join(output_dirpath, '.ordering')
    if os.path.exists(ordering_filepath):
        filesystem_filenames = []
        with codecs.open(ordering_filepath, 'rt', 'utf-8') as ordering_file:
            for line in ordering_file:
                filesystem_filenames.append(line.rstrip(u'\r\n'))
        
        # Ensure all referenced files actually exist
        real_filesystem_filenames = []
        for filename in filesystem_filenames:
            if os.path.exists(os.path.join(output_dirpath, filename)):
                real_filesystem_filenames.append(filename)
            else:
                print ('WARNING: Could not locate file "%s" referenced by ' +
                    '".ordering" file. Assuming deleted.') % filename
        filesystem_filenames = real_filesystem_filenames
    else:
        filesystem_filenames = []
    
    # Prepare downloader
    video_filename_template = u'%(title)s.%(ext)s'
    downloader = youtube_dl.FileDownloader({
        'outtmpl': os.path.join(
            # (Be robust against output_dirpath containing %)
            output_dirpath.replace('%', '%%'),
            video_filename_template),
        'restrictfilenames': restrictfilenames,
        'writeinfojson': writeinfojson,
    })
    if not extract_audio_for_itunes:
        final_filename_template = video_filename_template
    else:
        final_filename_template = video_filename_template.replace(u'%(ext)s', u'm4a')
        downloader.add_post_processor(FFmpegExtractAudioPP(
            preferredcodec='m4a',   # iTunes compatible.
            preferredquality=None,  # default audio quality
            keepvideo=False))
    
    # Locate all videos in the playlist
    video_infos = extract_youtube_playlist_info(playlist_url)
    playlist_filenames = []
    for cur_info in video_infos:
        cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames)
        playlist_filenames.append(cur_filename)
    
    # Download videos to filesystem that are missing
    for cur_info in video_infos:
        cur_filename = sanitize_filename(final_filename_template % cur_info, restrictfilenames)
        if not os.path.exists(os.path.join(output_dirpath, cur_filename)):
            if not cur_info.get('deleted', False):
                # Download (and optionally extract the audio)
                downloader.process_info(cur_info)
                
                # Verify downloaded
                if not os.path.exists(os.path.join(output_dirpath, cur_filename)):
                    raise ValueError('Could not locate downloaded video: %s' % cur_filename)
    
    # Remove filesystem files not in playlist
    playlist_filename_set = set(playlist_filenames)
    for cur_filename in filesystem_filenames:
        if cur_filename not in playlist_filename_set:
            # Remove video (if present)
            video_filepath = os.path.join(output_dirpath, cur_filename)
            if os.path.exists(video_filepath):
                os.remove(video_filepath)
            
            # Remove info json (if present)
            # TODO: This is not the correct path for the info json file
            #       if 'extract_audio_for_itunes' is True.
            #       (The info json will be proceded by the *video* extension,
            #        instead of the output audio file extension.)
            infojson_filepath = os.path.join(output_dirpath, cur_filename + u'.info.json')
            if os.path.exists(infojson_filepath):
                os.remove(infojson_filepath)
    
    # Rewrite the ordering file
    with codecs.open(ordering_filepath, 'wt', 'utf-8') as ordering_file:
        for cur_filename in playlist_filenames:
            ordering_file.write(cur_filename)
            ordering_file.write(u'\n')
Esempio n. 15
0
    def test_sanitize_filename_restricted(self):
        self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
        self.assertEqual(sanitize_filename('abc_d-e', restricted=True),
                         'abc_d-e')

        self.assertEqual(sanitize_filename('123', restricted=True), '123')

        self.assertEqual('abc_de', sanitize_filename('abc/de',
                                                     restricted=True))
        self.assertFalse(
            '/' in sanitize_filename('abc/de///', restricted=True))

        self.assertEqual('abc_de',
                         sanitize_filename('abc/<>\\*|de', restricted=True))
        self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|',
                                                  restricted=True))
        self.assertEqual('yes_no', sanitize_filename('yes? no',
                                                     restricted=True))
        self.assertEqual('this_-_that',
                         sanitize_filename('this: that', restricted=True))

        tests = 'aäb\u4e2d\u56fd\u7684c'
        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
        self.assertTrue(sanitize_filename('\xf6', restricted=True) !=
                        '')  # No empty filename

        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(
                    fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
        self.assertEqual(
            sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True),
            'Song')
        self.assertEqual(
            sanitize_filename('\u603b\u7edf: Speech', restricted=True),
            'Speech')
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
        self.assertTrue(sanitize_filename(':', restricted=True) != '')

        self.assertEqual(
            sanitize_filename(
                'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ',
                restricted=True),
            'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy'
        )
Esempio n. 16
0
    def test_sanitize_filename(self):
        self.assertEqual(sanitize_filename('abc'), 'abc')
        self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')

        self.assertEqual(sanitize_filename('123'), '123')

        self.assertEqual('abc_de', sanitize_filename('abc/de'))
        self.assertFalse('/' in sanitize_filename('abc/de///'))

        self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
        self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
        self.assertEqual('yes no', sanitize_filename('yes? no'))
        self.assertEqual('this - that', sanitize_filename('this: that'))

        self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
        aumlaut = _compat_str('\xe4')
        self.assertEqual(sanitize_filename(aumlaut), aumlaut)
        tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430')
        self.assertEqual(sanitize_filename(tests), tests)

        forbidden = '"\0\\/'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc))
Esempio n. 17
0
    def test_sanitize_filename_restricted(self):
        self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
        self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e')

        self.assertEqual(sanitize_filename('123', restricted=True), '123')

        self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True))
        self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True))

        self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True))
        self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True))
        self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
        self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))

        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename

        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
        self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
        self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
        self.assertTrue(sanitize_filename(':', restricted=True) != '')
Esempio n. 18
0
    def test_sanitize_filename_restricted(self):
        self.assertEqual(sanitize_filename("abc", restricted=True), "abc")
        self.assertEqual(sanitize_filename("abc_d-e", restricted=True), "abc_d-e")

        self.assertEqual(sanitize_filename("123", restricted=True), "123")

        self.assertEqual("abc_de", sanitize_filename("abc/de", restricted=True))
        self.assertFalse("/" in sanitize_filename("abc/de///", restricted=True))

        self.assertEqual("abc_de", sanitize_filename("abc/<>\\*|de", restricted=True))
        self.assertEqual("xxx", sanitize_filename("xxx/<>\\*|", restricted=True))
        self.assertEqual("yes_no", sanitize_filename("yes? no", restricted=True))
        self.assertEqual("this_-_that", sanitize_filename("this: that", restricted=True))

        tests = "aäb\u4e2d\u56fd\u7684c"
        self.assertEqual(sanitize_filename(tests, restricted=True), "aab_c")
        self.assertTrue(sanitize_filename("\xf6", restricted=True) != "")  # No empty filename

        forbidden = "\"\0\\/&!: '\t\n()[]{}$;`^,#"
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
        self.assertEqual(sanitize_filename("\u5927\u58f0\u5e26 - Song", restricted=True), "Song")
        self.assertEqual(sanitize_filename("\u603b\u7edf: Speech", restricted=True), "Speech")
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename("-", restricted=True) != "")
        self.assertTrue(sanitize_filename(":", restricted=True) != "")

        self.assertEqual(
            sanitize_filename("ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ", restricted=True),
            "AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy",
        )
Esempio n. 19
0
    def test_sanitize_filename_restricted(self):
        self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
        self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e')

        self.assertEqual(sanitize_filename('123', restricted=True), '123')

        self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True))
        self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True))

        self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True))
        self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True))
        self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
        self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))

        tests = 'aäb\u4e2d\u56fd\u7684c'
        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename

        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
        for fc in forbidden:
            for fbc in forbidden:
                self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
        self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
        self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
        self.assertTrue(sanitize_filename(':', restricted=True) != '')

        self.assertEqual(sanitize_filename(
            'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
Esempio n. 20
0
	def test_sanitize_filename(self):
		self.assertEqual(sanitize_filename(u'abc'), u'abc')
		self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e')

		self.assertEqual(sanitize_filename(u'123'), u'123')

		self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
		self.assertTrue(u'de' in sanitize_filename(u'abc/de'))
		self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))

		self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
		self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
		self.assertTrue(u'de' in  sanitize_filename(u'abc\\de'))

		self.assertEqual(sanitize_filename(u'ä'), u'ä')
		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')