def test_relative_path(self):
     test_set = set([self.known_sample_file_06_js])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             './tests/sample_dir/',
             ['.js']),
         test_set)
Exemple #2
0
def create_dirs():
    for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']):
        rp = os.path.relpath(f, old_dir)
        new_file = os.path.join(new_dir, rp)
        d = os.path.dirname(new_file)
        if not os.path.exists(d):
            os.makedirs(os.path.dirname(new_file))
Exemple #3
0
def convert_files ():
    for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']):
        rp = os.path.relpath(f, old_dir)
        new_file = os.path.join(new_dir, rp)
        msg = tc.Green + 'Converting %s' % rp + tc.Normal
        print >> io.stdout, msg
        args = ['ffmpeg', '-i', f, '-ab', '64k', new_file]
        subprocess.call(args)
 def test_find_single_file_with_extension_period(self):
     """Find extensions that have a period"""
     test_set = set([self.known_sample_file_06_js])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['.js']),
         test_set)
Exemple #5
0
def convert_files ():
    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp4','.flv']):
        rp = os.path.basename(f)
        new_file = os.path.join(dest_dir, rp)
        msg = tc.Green + 'Converting %s' % rp + tc.Normal
        print >> io.stdout, msg
        inp = '%s' % f
        out = '%s' % new_file
        out = os.path.splitext(out)[0]+'.mp3'
        args = ['ffmpeg', '-i', inp, '-f', 'mp3', '-ab', '320k', '-vn', out]
        subprocess.call(args,stdout=subprocess.PIPE, stderr=None)
 def test_find_multiple_files_without_recursion(self):
     """Find extensions without recursing subdirectories"""
     test_set = set([
         self.known_sample_file_01_txt,
         self.known_sample_file_02_txt])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['txt'],
             recurse=False),
         test_set)
 def test_find_multiple_extensions(self):
     """Find more than one extension"""
     test_set = set([
         self.known_sample_file_05_py,
         self.known_sample_file_06_js,
         self.known_sample_level_2_file_05_py])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['js', '.py']),
         test_set)
 def test_find_multiple_files_without_extension_period(self):
     """Find extensions in recursive directories"""
     test_set = set([
         self.known_sample_file_01_txt,
         self.known_sample_file_02_txt,
         self.known_sample_level_2_file_01_txt,
         self.known_sample_level_2_file_02_txt])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['txt']),
         test_set)
def prune_in_dest(dir):
    total_size = 0
    fringe_dir = os.path.join(dir, 'fringe')
    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        with open(file, 'rb') as f:
            print >> io.stdout, tc.Blue + 'Checking %s ...' % os.path.basename(file) + tc.Normal
            sha = hashlib.sha512(f.read()).hexdigest()
            if source_shas.has_key(sha):
                total_size = total_size + os.path.getsize(file)
                if not os.path.exists(fringe_dir):
                    os.makedirs(fringe_dir)
                os.rename(file, os.path.join(fringe_dir, os.path.basename(file)))
                print >> io.stdout, tc.Red + 'To remove %s ...' % os.path.basename(file) + tc.Normal
    print >> io.stdout, tc.Green + 'Cleaned up %s of data' % hurry.filesize.size(total_size) + tc.Normal
Exemple #10
0
def extract_isbns(dir):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        print >> io.stdout, tc.Green + 'File: %s' %  os.path.basename(file) + tc.Normal
        isbn_found = False
        for num in range(0, 5):
            if isbn_found:
                break
            new_image = None
            try:
                image = wimage(filename='%s[%d]' % (file, num), resolution = (300,300))
                image_data = image.make_blob(format='jpg')
                new_image = pimage.open(StringIO(image_data))
                new_image.load()
            except:
                print 'error'
                continue

            if new_image is None:
                continue
            
            try:
                page_text = pytesseract.image_to_string(new_image)
                for line in page_text.split('\n'):
                    line = line.lower()
                    # print line
                    if 'isbn' in line:
                        name,ext = os.path.splitext(os.path.basename(file))
                        idx = line.find('isbn')
                        line = line[idx+4:]
                        line = line.replace(':', ' ')
                        line = line.replace('-', '')
                        line = line.replace('—', '')
                        line = line.replace (' ', '')
                        line = re.sub('[|Ll]','1',line)
                        line = line.strip()

                        new_name = os.path.join(out_dir, '%s%s' % (line.upper(), ext))
                        print '%s: -%s-' % (os.path.basename(file), line)
                        os.rename(file, new_name)
                        isbn_found = True
                        break
            except:
                print '**** Error ****'
Exemple #11
0
import re, os, sys, dirwalker, shutil
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

dirs = set()
dest = os.path.join(os.getcwd(), 'music-done')
if not os.path.exists(dest):
    os.makedirs(dest)

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp3', '.flac', '.ape']):
    dirs.add(os.path.dirname(f))

def unique(name):
    name = name.replace ("'", " ")
    name = name.replace ("\\", " ")
    name = re.split('[,:"/_ ]', name.lower())
    name = filter(lambda x: len(x) > 0, name)
    return '-'.join(name)

def moveDir(d):
    destFile = os.path.join(dest, os.path.basename(d));
    fail = tc.Red + 'Not moving %s' % os.path.relpath(d) + tc.Normal
    succ = tc.Green + 'Moving %s' % os.path.relpath(d) + tc.Normal
    if os.path.exists(destFile):
        print >> io.stderr, fail
    else:
        print >> io.stdout, succ
        # print d
        shutil.move(d, dest)
        
map(moveDir, dirs)
Exemple #12
0
        else:
            return changeTitle(data['title'])
    except:
        return None

def amazonSearch(isbn):
    pass

def changeTitle(title):
    title = title.replace ("'", " ")
    title = title.replace ("\\", " ")
    title = re.split('[,:"/_ ]', title.lower())
    title = filter(lambda x: len(x) > 0, title)
    return '-'.join(title)

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.pdf', '.epub', '.mobi', '.djvu', '.chm', '.azw3', '.azw']):
    d = os.path.dirname(f)
    base,ext=os.path.splitext(f)
    dest = os.path.join(os.getcwd(), 'done')
    if not os.path.exists(dest):
        os.makedirs(dest)

    m = amazon_regex.search(f)
    amazon = False

    if m is not None:
        m = isbn_13_new_regex.search(f)
    else:
        amazon = True

    if m is None:
Exemple #13
0
import os, sys
import subprocess
import dirwalker

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.ape', '.wav']):
    d = os.path.dirname(f)
    base,ext = os.path.splitext(f)
    newname = base + '.flac'
    cmd = ['avconv', '-i', f, newname]
    # print cmd
    subprocess.call(cmd)
Exemple #14
0
import requests, json
import re
import os,sys
import glob
from pyquery import PyQuery as pq
import dirwalker
import hurry.filesize
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

files = {}

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.epub']):
    file = {}
    file['epub'] = f
    file['mobi'] = None
    file['azw'] = None
    name,ext=os.path.splitext(f)
    name=os.path.basename(name)
    files[name] = file

total_size = 0
for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']):
    name,ext = os.path.splitext(f)
    name = os.path.basename(name)
    cur = files.get(name, None)
    if cur is not None:
        print 'Removing %s' % os.path.basename(f)
        total_size = total_size + os.path.getsize(f)
        os.remove(f)
Exemple #15
0
import dirwalker
import argparse

def extractZip(f, list_files):
    zf = zipfile.ZipFile(f, allowZip64 = True)
    if not list_files:
        print 'Extracting %s' % os.path.basename(f)
        zf.extractall()
    else:
        pass

def extractRar(f, list_files):
    rf = rarfile.RarFile(f)
    if not list_files:
        print 'Extracting %s' % os.path.basename(f)
        rf.extractall()
    else:
        pass

if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog = 'unzip-all')
    parser.add_argument('-l', dest = 'list_files', required = False, action = 'store_true')

    args = parser.parse_args()
    
    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.zip']):
        extractZip(f, args.list_files)

    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.rar']):
        extractRar(f, args.list_files)
import re
import os,sys
import glob
from pyquery import PyQuery as pq
import dirwalker
import hashlib
import hurry.filesize
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

files = {}
total_size = 0
fringe_dir = os.path.join(os.getcwd(), 'fringe')

extns = ['.epub', '.pdf', '.djvu', '.chm', '.mobi']
for file in dirwalker.find_filenames_with_extensions(os.getcwd(), extns):
    with open(file, 'rb') as f:
        sha = hashlib.sha512(f.read()).hexdigest()
        print >> io.stdout, tc.Green + 'Checking %s ...' % file + tc.Normal
        if files.has_key(sha):
            total_size = total_size + os.path.getsize(file)
            if not os.path.exists(fringe_dir):
                os.makedirs(fringe_dir)
            os.rename(file, os.path.join(fringe_dir, os.path.basename(file)))
            print >> io.stdout, tc.Red + 'To remove %s ...' % file + tc.Normal
        else:
            files[sha] = file

# for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']):
#     name,ext = os.path.splitext(f)
#     name = os.path.basename(name)
def gather_shas_in_dir (dir):
    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        with open(file, 'rb') as f:
            print >> io.stdout, tc.Green + 'Checking %s ...' % os.path.basename(file) + tc.Normal
            sha = hashlib.sha512(f.read()).hexdigest()
            source_shas[sha] = file