def test_relative_path(self): test_set = set([self.known_sample_file_06_js]) self.assertEqual( dirwalker.find_filenames_with_extensions( './tests/sample_dir/', ['.js']), test_set)
def create_dirs(): for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']): rp = os.path.relpath(f, old_dir) new_file = os.path.join(new_dir, rp) d = os.path.dirname(new_file) if not os.path.exists(d): os.makedirs(os.path.dirname(new_file))
def convert_files (): for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']): rp = os.path.relpath(f, old_dir) new_file = os.path.join(new_dir, rp) msg = tc.Green + 'Converting %s' % rp + tc.Normal print >> io.stdout, msg args = ['ffmpeg', '-i', f, '-ab', '64k', new_file] subprocess.call(args)
def test_find_single_file_with_extension_period(self): """Find extensions that have a period""" test_set = set([self.known_sample_file_06_js]) self.assertEqual( dirwalker.find_filenames_with_extensions( self.sample_dir, ['.js']), test_set)
def convert_files (): for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp4','.flv']): rp = os.path.basename(f) new_file = os.path.join(dest_dir, rp) msg = tc.Green + 'Converting %s' % rp + tc.Normal print >> io.stdout, msg inp = '%s' % f out = '%s' % new_file out = os.path.splitext(out)[0]+'.mp3' args = ['ffmpeg', '-i', inp, '-f', 'mp3', '-ab', '320k', '-vn', out] subprocess.call(args,stdout=subprocess.PIPE, stderr=None)
def test_find_multiple_files_without_recursion(self): """Find extensions without recursing subdirectories""" test_set = set([ self.known_sample_file_01_txt, self.known_sample_file_02_txt]) self.assertEqual( dirwalker.find_filenames_with_extensions( self.sample_dir, ['txt'], recurse=False), test_set)
def test_find_multiple_extensions(self): """Find more than one extension""" test_set = set([ self.known_sample_file_05_py, self.known_sample_file_06_js, self.known_sample_level_2_file_05_py]) self.assertEqual( dirwalker.find_filenames_with_extensions( self.sample_dir, ['js', '.py']), test_set)
def test_find_multiple_files_without_extension_period(self): """Find extensions in recursive directories""" test_set = set([ self.known_sample_file_01_txt, self.known_sample_file_02_txt, self.known_sample_level_2_file_01_txt, self.known_sample_level_2_file_02_txt]) self.assertEqual( dirwalker.find_filenames_with_extensions( self.sample_dir, ['txt']), test_set)
def prune_in_dest(dir): total_size = 0 fringe_dir = os.path.join(dir, 'fringe') for file in dirwalker.find_filenames_with_extensions(dir, extns): with open(file, 'rb') as f: print >> io.stdout, tc.Blue + 'Checking %s ...' % os.path.basename(file) + tc.Normal sha = hashlib.sha512(f.read()).hexdigest() if source_shas.has_key(sha): total_size = total_size + os.path.getsize(file) if not os.path.exists(fringe_dir): os.makedirs(fringe_dir) os.rename(file, os.path.join(fringe_dir, os.path.basename(file))) print >> io.stdout, tc.Red + 'To remove %s ...' % os.path.basename(file) + tc.Normal print >> io.stdout, tc.Green + 'Cleaned up %s of data' % hurry.filesize.size(total_size) + tc.Normal
def extract_isbns(dir): if not os.path.exists(out_dir): os.makedirs(out_dir) for file in dirwalker.find_filenames_with_extensions(dir, extns): print >> io.stdout, tc.Green + 'File: %s' % os.path.basename(file) + tc.Normal isbn_found = False for num in range(0, 5): if isbn_found: break new_image = None try: image = wimage(filename='%s[%d]' % (file, num), resolution = (300,300)) image_data = image.make_blob(format='jpg') new_image = pimage.open(StringIO(image_data)) new_image.load() except: print 'error' continue if new_image is None: continue try: page_text = pytesseract.image_to_string(new_image) for line in page_text.split('\n'): line = line.lower() # print line if 'isbn' in line: name,ext = os.path.splitext(os.path.basename(file)) idx = line.find('isbn') line = line[idx+4:] line = line.replace(':', ' ') line = line.replace('-', '') line = line.replace('—', '') line = line.replace (' ', '') line = re.sub('[|Ll]','1',line) line = line.strip() new_name = os.path.join(out_dir, '%s%s' % (line.upper(), ext)) print '%s: -%s-' % (os.path.basename(file), line) os.rename(file, new_name) isbn_found = True break except: print '**** Error ****'
import re, os, sys, dirwalker, shutil from IPython.utils.coloransi import TermColors as tc from IPython.utils import io dirs = set() dest = os.path.join(os.getcwd(), 'music-done') if not os.path.exists(dest): os.makedirs(dest) for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp3', '.flac', '.ape']): dirs.add(os.path.dirname(f)) def unique(name): name = name.replace ("'", " ") name = name.replace ("\\", " ") name = re.split('[,:"/_ ]', name.lower()) name = filter(lambda x: len(x) > 0, name) return '-'.join(name) def moveDir(d): destFile = os.path.join(dest, os.path.basename(d)); fail = tc.Red + 'Not moving %s' % os.path.relpath(d) + tc.Normal succ = tc.Green + 'Moving %s' % os.path.relpath(d) + tc.Normal if os.path.exists(destFile): print >> io.stderr, fail else: print >> io.stdout, succ # print d shutil.move(d, dest) map(moveDir, dirs)
else: return changeTitle(data['title']) except: return None def amazonSearch(isbn): pass def changeTitle(title): title = title.replace ("'", " ") title = title.replace ("\\", " ") title = re.split('[,:"/_ ]', title.lower()) title = filter(lambda x: len(x) > 0, title) return '-'.join(title) for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.pdf', '.epub', '.mobi', '.djvu', '.chm', '.azw3', '.azw']): d = os.path.dirname(f) base,ext=os.path.splitext(f) dest = os.path.join(os.getcwd(), 'done') if not os.path.exists(dest): os.makedirs(dest) m = amazon_regex.search(f) amazon = False if m is not None: m = isbn_13_new_regex.search(f) else: amazon = True if m is None:
import os, sys import subprocess import dirwalker for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.ape', '.wav']): d = os.path.dirname(f) base,ext = os.path.splitext(f) newname = base + '.flac' cmd = ['avconv', '-i', f, newname] # print cmd subprocess.call(cmd)
import requests, json import re import os,sys import glob from pyquery import PyQuery as pq import dirwalker import hurry.filesize from IPython.utils.coloransi import TermColors as tc from IPython.utils import io files = {} for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.epub']): file = {} file['epub'] = f file['mobi'] = None file['azw'] = None name,ext=os.path.splitext(f) name=os.path.basename(name) files[name] = file total_size = 0 for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']): name,ext = os.path.splitext(f) name = os.path.basename(name) cur = files.get(name, None) if cur is not None: print 'Removing %s' % os.path.basename(f) total_size = total_size + os.path.getsize(f) os.remove(f)
import dirwalker import argparse def extractZip(f, list_files): zf = zipfile.ZipFile(f, allowZip64 = True) if not list_files: print 'Extracting %s' % os.path.basename(f) zf.extractall() else: pass def extractRar(f, list_files): rf = rarfile.RarFile(f) if not list_files: print 'Extracting %s' % os.path.basename(f) rf.extractall() else: pass if __name__ == '__main__': parser = argparse.ArgumentParser(prog = 'unzip-all') parser.add_argument('-l', dest = 'list_files', required = False, action = 'store_true') args = parser.parse_args() for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.zip']): extractZip(f, args.list_files) for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.rar']): extractRar(f, args.list_files)
import re import os,sys import glob from pyquery import PyQuery as pq import dirwalker import hashlib import hurry.filesize from IPython.utils.coloransi import TermColors as tc from IPython.utils import io files = {} total_size = 0 fringe_dir = os.path.join(os.getcwd(), 'fringe') extns = ['.epub', '.pdf', '.djvu', '.chm', '.mobi'] for file in dirwalker.find_filenames_with_extensions(os.getcwd(), extns): with open(file, 'rb') as f: sha = hashlib.sha512(f.read()).hexdigest() print >> io.stdout, tc.Green + 'Checking %s ...' % file + tc.Normal if files.has_key(sha): total_size = total_size + os.path.getsize(file) if not os.path.exists(fringe_dir): os.makedirs(fringe_dir) os.rename(file, os.path.join(fringe_dir, os.path.basename(file))) print >> io.stdout, tc.Red + 'To remove %s ...' % file + tc.Normal else: files[sha] = file # for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']): # name,ext = os.path.splitext(f) # name = os.path.basename(name)
def gather_shas_in_dir (dir): for file in dirwalker.find_filenames_with_extensions(dir, extns): with open(file, 'rb') as f: print >> io.stdout, tc.Green + 'Checking %s ...' % os.path.basename(file) + tc.Normal sha = hashlib.sha512(f.read()).hexdigest() source_shas[sha] = file