コード例 #1
0
ファイル: test_dirwalker.py プロジェクト: questrail/dirwalker
 def test_relative_path(self):
     test_set = set([self.known_sample_file_06_js])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             './tests/sample_dir/',
             ['.js']),
         test_set)
コード例 #2
0
ファイル: to-low-bit-mp3.py プロジェクト: suryakiran/Scripts
def create_dirs():
    for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']):
        rp = os.path.relpath(f, old_dir)
        new_file = os.path.join(new_dir, rp)
        d = os.path.dirname(new_file)
        if not os.path.exists(d):
            os.makedirs(os.path.dirname(new_file))
コード例 #3
0
ファイル: to-low-bit-mp3.py プロジェクト: suryakiran/Scripts
def convert_files ():
    for f in dirwalker.find_filenames_with_extensions(old_dir, ['.mp3']):
        rp = os.path.relpath(f, old_dir)
        new_file = os.path.join(new_dir, rp)
        msg = tc.Green + 'Converting %s' % rp + tc.Normal
        print >> io.stdout, msg
        args = ['ffmpeg', '-i', f, '-ab', '64k', new_file]
        subprocess.call(args)
コード例 #4
0
ファイル: test_dirwalker.py プロジェクト: questrail/dirwalker
 def test_find_single_file_with_extension_period(self):
     """Find extensions that have a period"""
     test_set = set([self.known_sample_file_06_js])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['.js']),
         test_set)
コード例 #5
0
ファイル: m4a-to-mp3.py プロジェクト: suryakiran/Scripts
def convert_files ():
    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp4','.flv']):
        rp = os.path.basename(f)
        new_file = os.path.join(dest_dir, rp)
        msg = tc.Green + 'Converting %s' % rp + tc.Normal
        print >> io.stdout, msg
        inp = '%s' % f
        out = '%s' % new_file
        out = os.path.splitext(out)[0]+'.mp3'
        args = ['ffmpeg', '-i', inp, '-f', 'mp3', '-ab', '320k', '-vn', out]
        subprocess.call(args,stdout=subprocess.PIPE, stderr=None)
コード例 #6
0
ファイル: test_dirwalker.py プロジェクト: questrail/dirwalker
 def test_find_multiple_files_without_recursion(self):
     """Find extensions without recursing subdirectories"""
     test_set = set([
         self.known_sample_file_01_txt,
         self.known_sample_file_02_txt])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['txt'],
             recurse=False),
         test_set)
コード例 #7
0
ファイル: test_dirwalker.py プロジェクト: questrail/dirwalker
 def test_find_multiple_extensions(self):
     """Find more than one extension"""
     test_set = set([
         self.known_sample_file_05_py,
         self.known_sample_file_06_js,
         self.known_sample_level_2_file_05_py])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['js', '.py']),
         test_set)
コード例 #8
0
ファイル: test_dirwalker.py プロジェクト: questrail/dirwalker
 def test_find_multiple_files_without_extension_period(self):
     """Find extensions in recursive directories"""
     test_set = set([
         self.known_sample_file_01_txt,
         self.known_sample_file_02_txt,
         self.known_sample_level_2_file_01_txt,
         self.known_sample_level_2_file_02_txt])
     self.assertEqual(
         dirwalker.find_filenames_with_extensions(
             self.sample_dir,
             ['txt']),
         test_set)
コード例 #9
0
def prune_in_dest(dir):
    total_size = 0
    fringe_dir = os.path.join(dir, 'fringe')
    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        with open(file, 'rb') as f:
            print >> io.stdout, tc.Blue + 'Checking %s ...' % os.path.basename(file) + tc.Normal
            sha = hashlib.sha512(f.read()).hexdigest()
            if source_shas.has_key(sha):
                total_size = total_size + os.path.getsize(file)
                if not os.path.exists(fringe_dir):
                    os.makedirs(fringe_dir)
                os.rename(file, os.path.join(fringe_dir, os.path.basename(file)))
                print >> io.stdout, tc.Red + 'To remove %s ...' % os.path.basename(file) + tc.Normal
    print >> io.stdout, tc.Green + 'Cleaned up %s of data' % hurry.filesize.size(total_size) + tc.Normal
コード例 #10
0
def extract_isbns(dir):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        print >> io.stdout, tc.Green + 'File: %s' %  os.path.basename(file) + tc.Normal
        isbn_found = False
        for num in range(0, 5):
            if isbn_found:
                break
            new_image = None
            try:
                image = wimage(filename='%s[%d]' % (file, num), resolution = (300,300))
                image_data = image.make_blob(format='jpg')
                new_image = pimage.open(StringIO(image_data))
                new_image.load()
            except:
                print 'error'
                continue

            if new_image is None:
                continue
            
            try:
                page_text = pytesseract.image_to_string(new_image)
                for line in page_text.split('\n'):
                    line = line.lower()
                    # print line
                    if 'isbn' in line:
                        name,ext = os.path.splitext(os.path.basename(file))
                        idx = line.find('isbn')
                        line = line[idx+4:]
                        line = line.replace(':', ' ')
                        line = line.replace('-', '')
                        line = line.replace('—', '')
                        line = line.replace (' ', '')
                        line = re.sub('[|Ll]','1',line)
                        line = line.strip()

                        new_name = os.path.join(out_dir, '%s%s' % (line.upper(), ext))
                        print '%s: -%s-' % (os.path.basename(file), line)
                        os.rename(file, new_name)
                        isbn_found = True
                        break
            except:
                print '**** Error ****'
コード例 #11
0
ファイル: move-music.py プロジェクト: suryakiran/Scripts
import re, os, sys, dirwalker, shutil
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

dirs = set()
dest = os.path.join(os.getcwd(), 'music-done')
if not os.path.exists(dest):
    os.makedirs(dest)

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mp3', '.flac', '.ape']):
    dirs.add(os.path.dirname(f))

def unique(name):
    name = name.replace ("'", " ")
    name = name.replace ("\\", " ")
    name = re.split('[,:"/_ ]', name.lower())
    name = filter(lambda x: len(x) > 0, name)
    return '-'.join(name)

def moveDir(d):
    destFile = os.path.join(dest, os.path.basename(d));
    fail = tc.Red + 'Not moving %s' % os.path.relpath(d) + tc.Normal
    succ = tc.Green + 'Moving %s' % os.path.relpath(d) + tc.Normal
    if os.path.exists(destFile):
        print >> io.stderr, fail
    else:
        print >> io.stdout, succ
        # print d
        shutil.move(d, dest)
        
map(moveDir, dirs)
コード例 #12
0
ファイル: book-names.py プロジェクト: suryakiran/Scripts
        else:
            return changeTitle(data['title'])
    except:
        return None

def amazonSearch(isbn):
    pass

def changeTitle(title):
    title = title.replace ("'", " ")
    title = title.replace ("\\", " ")
    title = re.split('[,:"/_ ]', title.lower())
    title = filter(lambda x: len(x) > 0, title)
    return '-'.join(title)

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.pdf', '.epub', '.mobi', '.djvu', '.chm', '.azw3', '.azw']):
    d = os.path.dirname(f)
    base,ext=os.path.splitext(f)
    dest = os.path.join(os.getcwd(), 'done')
    if not os.path.exists(dest):
        os.makedirs(dest)

    m = amazon_regex.search(f)
    amazon = False

    if m is not None:
        m = isbn_13_new_regex.search(f)
    else:
        amazon = True

    if m is None:
コード例 #13
0
ファイル: ape-to-flac.py プロジェクト: suryakiran/Scripts
import os, sys
import subprocess
import dirwalker

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.ape', '.wav']):
    d = os.path.dirname(f)
    base,ext = os.path.splitext(f)
    newname = base + '.flac'
    cmd = ['avconv', '-i', f, newname]
    # print cmd
    subprocess.call(cmd)
コード例 #14
0
ファイル: remove-mobis.py プロジェクト: suryakiran/Scripts
import requests, json
import re
import os,sys
import glob
from pyquery import PyQuery as pq
import dirwalker
import hurry.filesize
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

files = {}

for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.epub']):
    file = {}
    file['epub'] = f
    file['mobi'] = None
    file['azw'] = None
    name,ext=os.path.splitext(f)
    name=os.path.basename(name)
    files[name] = file

total_size = 0
for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']):
    name,ext = os.path.splitext(f)
    name = os.path.basename(name)
    cur = files.get(name, None)
    if cur is not None:
        print 'Removing %s' % os.path.basename(f)
        total_size = total_size + os.path.getsize(f)
        os.remove(f)
コード例 #15
0
ファイル: unzip-all.py プロジェクト: suryakiran/Scripts
import dirwalker
import argparse

def extractZip(f, list_files):
    zf = zipfile.ZipFile(f, allowZip64 = True)
    if not list_files:
        print 'Extracting %s' % os.path.basename(f)
        zf.extractall()
    else:
        pass

def extractRar(f, list_files):
    rf = rarfile.RarFile(f)
    if not list_files:
        print 'Extracting %s' % os.path.basename(f)
        rf.extractall()
    else:
        pass

if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog = 'unzip-all')
    parser.add_argument('-l', dest = 'list_files', required = False, action = 'store_true')

    args = parser.parse_args()
    
    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.zip']):
        extractZip(f, args.list_files)

    for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.rar']):
        extractRar(f, args.list_files)
コード例 #16
0
import re
import os,sys
import glob
from pyquery import PyQuery as pq
import dirwalker
import hashlib
import hurry.filesize
from IPython.utils.coloransi import TermColors as tc
from IPython.utils import io

files = {}
total_size = 0
fringe_dir = os.path.join(os.getcwd(), 'fringe')

extns = ['.epub', '.pdf', '.djvu', '.chm', '.mobi']
for file in dirwalker.find_filenames_with_extensions(os.getcwd(), extns):
    with open(file, 'rb') as f:
        sha = hashlib.sha512(f.read()).hexdigest()
        print >> io.stdout, tc.Green + 'Checking %s ...' % file + tc.Normal
        if files.has_key(sha):
            total_size = total_size + os.path.getsize(file)
            if not os.path.exists(fringe_dir):
                os.makedirs(fringe_dir)
            os.rename(file, os.path.join(fringe_dir, os.path.basename(file)))
            print >> io.stdout, tc.Red + 'To remove %s ...' % file + tc.Normal
        else:
            files[sha] = file

# for f in dirwalker.find_filenames_with_extensions(os.getcwd(), ['.mobi', '.azw3']):
#     name,ext = os.path.splitext(f)
#     name = os.path.basename(name)
コード例 #17
0
def gather_shas_in_dir (dir):
    for file in dirwalker.find_filenames_with_extensions(dir, extns):
        with open(file, 'rb') as f:
            print >> io.stdout, tc.Green + 'Checking %s ...' % os.path.basename(file) + tc.Normal
            sha = hashlib.sha512(f.read()).hexdigest()
            source_shas[sha] = file