def urls_for_prefix(self, prefix='.'): url_patterns = [] if self.path is None or not self.path.exists(): return url_patterns prefixed_path = Path(self.path, prefix) for html_path in prefixed_path.walk(): # skip files that don't end in .html if not html_path.endswith('.html'): continue rel_path = self.path.rel_path_to(html_path) prefix_rel_path = prefixed_path.rel_path_to(html_path) # skip files in underscore directories if rel_path.startswith('_'): continue view = SheerTemplateView.as_view(template_engine=self.slug, template_name=str(rel_path)) regex_template = r'^%s$' index_template = r'^%s/$' if rel_path.name == 'index.html': if prefix_rel_path.parent: slash_regex = index_template % prefix_rel_path.parent else: slash_regex = r'^$' pattern = url(slash_regex, view) redirect_regex = regex_template % prefix_rel_path index_redirect = RedirectView.as_view(url='./', permanent=True) redirect_pattern = url(redirect_regex, index_redirect) url_patterns += [pattern, redirect_pattern] else: regex = regex_template % prefix_rel_path pattern = url(regex, view) url_patterns.append(pattern) return url_patterns
def process_directory(directory, context, variable_start_string='{<', variable_end_string='>}', extensions=None, filter=FILES_NO_LINKS): directory = Path(directory) for f in directory.walk(filter=filter): if extensions: if f.ext not in extensions: continue components = f.components() td, tf = Path(*components[:-1]), components[-1] jinja_env = Environment(loader=FileSystemLoader(str(td)), variable_start_string=variable_start_string, variable_end_string=variable_end_string, block_start_string='{<%', block_end_string='%>}', comment_start_string='{<#', comment_end_string='#>}', ) try: rendered = jinja_env.get_template(str(tf)).render(**context) except Exception, e: print "Cannot process file %s on line %s" % ( e.filename, e.lineno ) continue f.write_file(rendered.encode('utf-8'))
def get_by_name(cls, name, path, git): # get user's keys key_path = Path(path, 'keydir') keys = [key for key in key_path.walk() if key.endswith('%s.pub' % name)] # get user's repos repos = [] repos_path = Path(path, 'conf/') for repo in repos_path.walk(): if repo.isdir(): continue with open(str(repo)) as f: if name in f.read(): repos.append(repo) if repos or keys: return cls(path, git, name, repos, keys) else: return None
def search_path(path, repo): try: repo = Path(repo).absolute() walk = list(repo.walk()) for item in walk: if item.lower() == Path(repo, path).lower(): return item except: pass return False
def get_by_name(cls, name, path, git): # get user's keys key_path = Path(path, 'keydir') keys = [ key for key in key_path.walk() if key.endswith('%s.pub' % name) ] # get user's repos repos = [] repos_path = Path(path, 'conf/') for repo in repos_path.walk(): if repo.isdir(): continue with open(str(repo)) as f: if name in f.read(): repos.append(repo) if repos or keys: return cls(path, git, name, repos, keys) else: return None
def all(self): users = [] key_dir = Path(self.path, 'keydir') for obj in key_dir.walk(): if obj.isdir(): continue files = re.compile('(\w+.pub)').findall(str(obj)) if files: users += files return [User.get_by_name(user[:-4], self.path, self.git) for user in set(users)]
def all(self): repos = [] repo_dir = Path(self.path, 'conf/repos') for obj in repo_dir.walk(): if obj.isdir(): continue files = re.compile('(\w+.conf$)').findall(str(obj)) if files: repos += files return [Repository.get_by_name(repo[:-5], self.path, self.git) for repo in set(repos)]
def all(self): users = [] key_dir = Path(self.path, 'keydir') for obj in key_dir.walk(): if obj.isdir(): continue files = re.compile(r'(\w+.pub)').findall(str(obj)) if files: users += files return [User.get_by_name(user[:-4], self.path, self.git) for user in set(users)]
def all(self): repos = [] repo_dir = Path(self.path, 'conf/repos') for obj in repo_dir.walk(): if obj.isdir(): continue files = re.compile('(\w+.conf$)').findall(str(obj)) if files: repos += files return [ Repository.get_by_name(repo[:-5], self.path, self.git) for repo in set(repos) ]
def all(self): """ Retrieve all the groups. :rtype: list """ groups = [] path = Path(self.path, os.path.join('conf', 'groups')) for obj in path.walk(): if obj.isdir(): continue files = re.compile(r'(\w+.conf$)').findall(str(obj)) if files: groups += files return [ Group.get(group[:-5], self.path, self.git) for group in set(groups) ]
def urls_for_prefix(self, prefix='.'): url_patterns = [] if self.path is None or not self.path.exists(): return url_patterns prefixed_path = Path(self.path, prefix) for html_path in prefixed_path.walk(): # skip files that don't end in .html if not html_path.endswith('.html'): continue rel_path = self.path.rel_path_to(html_path) prefix_rel_path = prefixed_path.rel_path_to(html_path) # skip files in underscore directories if rel_path.startswith('_'): continue view = SheerTemplateView.as_view( template_engine=self.slug, template_name=str(rel_path)) regex_template = r'^%s$' index_template = r'^%s/$' if rel_path.name == 'index.html': if prefix_rel_path.parent: slash_regex = index_template % prefix_rel_path.parent else: slash_regex = r'^$' pattern = url(slash_regex, view) redirect_regex = regex_template % prefix_rel_path index_redirect = RedirectView.as_view(url='./', permanent=True) redirect_pattern = url(redirect_regex, index_redirect) url_patterns += [pattern, redirect_pattern] else: regex = regex_template % prefix_rel_path pattern = url(regex, view) url_patterns.append(pattern) return url_patterns
class SubtitleDownloader(object): def __init__(self, search_dir, search_all=False): self.search_dir = Path(search_dir) self.search_all = search_all # Make sure the sort dir is a dir and cd into it if not self.search_dir.isdir(): raise SubtitleDownloaderError('Invalid search-dir {}'.format( search_dir)) @staticmethod def relative_path(path, root_path): """Return the relative path of path in root_path""" relative_path = path.replace(root_path, '') if relative_path[0:1] == '/': return relative_path[1:] else: return relative_path def scan_for_search_files(self): """Scan search dir and return all files to search subtiles for""" log.debug('Searching for files in dir {}'.format(self.search_dir)) search_files = [] for file_path in self.search_dir.walk(filter=FILES, top_down=False): if not file_path.ext in ('.mkv', '.avi'): continue subtitle_download = SubtitleDownload(file_path) # Search for subtitle if self.search_all is True or if the file # modified time is in the last week search_subtitle = self.search_all or \ subtitle_download.time_since_modified < timedelta(weeks=1) # Don't search subtitle for this file if not search_subtitle: continue # Check if subtitle already exists if subtitle_download.subtitle_exist(): log.debug('Subtitle for {} already exists'.format( self.relative_path(file_path, self.search_dir))) continue search_files.append(subtitle_download) return search_files def scan_search(self): """Scan for files to download subtitles for and try to download subtitle. """ search_files = self.scan_for_search_files() num_searches = len(search_files) for i, subtitle_download in enumerate(search_files): log.info('Subtitle search for {}'.format(subtitle_download.name)) subtitle_download.search_download_subtitle() # Sleep between searches if it's not the last search file if i + 1 != num_searches: log.info('Sleeping for {} seconds'.format(SLEEP_TIME)) sleep(SLEEP_TIME) def cleanup(self): """Remove subtitle files left over where the media file is removed""" log.debug('Running subtitle cleanup on dir {}'.format(self.search_dir)) subtitle_extensions = ('.srt', '.sub', '.idx') for file_path in self.search_dir.walk(filter=FILES, top_down=False): if not file_path.ext in subtitle_extensions: continue # Remove the subtitle file if no media file exists in the same dir media_file_path_mkv = Path(file_path.parent, '{}.mkv'.format( file_path.stem)) media_file_path_avi = Path(file_path.parent, '{}.avi'.format( file_path.stem)) if (not media_file_path_mkv.exists() and not media_file_path_avi.exists()): log.info('Removing leftover subtitle file {}'.format( self.relative_path(file_path, self.search_dir))) file_path.remove()
class ReleaseSorter(object): def __init__(self, sort_dir): self.sort_dir = Path(sort_dir) # Make sure the sort dir is a dir and cd into it if not self.sort_dir.isdir(): raise ReleaseSorterError('Invalid sort-dir {}'.format(sort_dir)) os.chdir(sort_dir) self.files_to_sort = {} def relative_path(self, path, root_path): relative_path = path.replace(root_path, '') if relative_path[0:1] == '/': return relative_path[1:] else: return relative_path def check_extension(self, extension): if extension in ('.mkv', '.avi'): return True else: return False def check_modified_time(self, time_since_modified): if time_since_modified < timedelta(minutes=20): return False else: return True def create_series_folders(self, sorter_file): if sorter_file.series_dir and not sorter_file.series_dir.exists(): log.info('Creating series dir {}'.format( sorter_file.relative_path(sorter_file.series_dir))) sorter_file.series_dir.mkdir() if sorter_file.season_dir and not sorter_file.season_dir.exists(): log.info('Creating season dir {}'.format( sorter_file.relative_path(sorter_file.season_dir))) sorter_file.season_dir.mkdir() def move_subtitle_files(self, sorter_file): """Check for existing subtitle files matching media file and move them to sort folder too. """ for ext in ('.srt', '.sub', '.idx'): subtitle_path = Path(sorter_file.path.parent, '{}{}'.format( sorter_file.path.stem, ext)) if subtitle_path.exists(): log.info('Moving subtitle file {} to {}'.format( self.relative_path(subtitle_path, self.sort_dir), sorter_file.season_dir)) subtitle_path.move(Path(self.sort_dir, sorter_file.season_dir)) def move_sorter_file(self, sorter_file): log.info('Moving {} to {}'.format(sorter_file.relative_path(), sorter_file.season_dir)) sorter_file.path.move(Path(self.sort_dir, sorter_file.season_dir)) def get_sorter_files(self): """List sort dir and find all files to sort""" log.debug('Sorting dir {}'.format(self.sort_dir)) file_list = self.sort_dir.listdir(filter=FILES) for file in file_list: sorter_file = SorterFile(file, self.sort_dir) # File extension if not self.check_extension(sorter_file.extension): log.debug('Skipping {}, wrong file extension'.format( sorter_file.relative_path())) continue # Modifed time, only process files who hasen't been modified the # in the last 20 min time_since_modified = datetime.now() - sorter_file.mtime if not self.check_modified_time(time_since_modified): log.debug('Skipping {}, has been modified in the last 20 min ' '({})'.format(sorter_file.relative_path(), human(time_since_modified))) continue # Skip if file is not a TV release if not sorter_file.release.tv_release: log.debug('Skipping {}, not a TV release'.format( sorter_file.relative_path())) continue # Add file to sorter list series_name = sorter_file.release.tv_series_data['series_name'] series_episodes = self.files_to_sort.get(series_name) if not series_episodes: series_episodes = {} series_episodes[unicode(sorter_file)] = sorter_file self.files_to_sort[series_name] = series_episodes def sort_files(self): # If a season dir already exist use that when sorting. Else if there # is only one file found for the series skip processing and moving. for series in self.files_to_sort.keys(): series_episodes = self.files_to_sort[series] for episode_file in series_episodes: sorter_file = series_episodes[episode_file] # Episode already has a season dir if sorter_file.season_dir.exists(): log.info('Season dir for {} already exists {}'.format( episode_file, sorter_file.season_dir)) # No season dir for episode. Skip if only one episode was found else: # Skip if only one episode was found if len(series_episodes) < 2: log.debug('Skipping {}, only one episode found'.format( series_episodes.iterkeys().next())) del(self.files_to_sort[series]) # Loop remaining files for folder creating and moving for series in self.files_to_sort: series_episodes = self.files_to_sort[series] for episode_file in series_episodes: sorter_file = series_episodes[episode_file] # Create series folder if needed self.create_series_folders(sorter_file) # Move the file self.move_sorter_file(sorter_file) # Move subtitle files self.move_subtitle_files(sorter_file) def sort(self): self.get_sorter_files() self.sort_files() def cleanup_empty_folders(self): log.debug('Cleanup empty folders in {}'.format(self.sort_dir)) dirs_to_check_for_removal = [] for dir in self.sort_dir.walk(filter=DIRS, top_down=False): # Skip all dirs in _ dir if '/_' in dir: log.debug('Skipping cleanup on {}, _ dir'.format(dir)) continue dirs_to_check_for_removal.append(dir) for dir in dirs_to_check_for_removal: # If dir is empty, remove it if dir.isdir() and len(dir.listdir()) == 0: log.info('Removing empty dir {}'.format(self.relative_path( dir, self.sort_dir))) dir.rmtree()
sys.exit(1) img_file = sys.argv[1] word_file = sys.argv[2] logger.info('Loading pretrained models...') with open('tmp/svm.pickle', 'r') as f: svm = pickle.load(f) logger.info('Loading image and words file') img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE) img = prep.preprocess(img) xml = ET.parse(word_file).getroot() out_dir.mkdir() for f in out_dir.walk('*'): f.remove() logger.info('Starting to create a split file') for sentence in xml: for word in sentence: text = word.get('text') print(text) #if '@' in text or len(text) < 6: if text != 'buton': continue # Skip short words # Get the word image rect = {side: int(word.get(side)) for side in sides} word_img = img[rect['top']:rect['bottom'], rect['left']:rect['right']] word_img = cut_letters.removeWhitelines(word_img)
if changes: return dict( changes=changes, inserts=inserts, dels=dels ) else: return {} # ----- Main script begins here ----------- for root in GIT_DIRS: p = Path(root) change_dict = {} for gitdir in p.walk(filter=contains_git): repo = Repo(str(gitdir.absolute())) try: s = '%i-%i-%i' % (begins.year, begins.month, begins.day) for commit in repo.commits_since(since=s): if commit.author.email.lower() not in GIT_AUTHOR_EMAILS: continue cd = gen_commit_dict(commit) if cd: if repo not in change_dict: change_dict[repo] = {} change_dict[repo][commit] = cd except (GitCommandError, AttributeError) as e: pass if change_dict: title = 'SUMMARY OF GIT ACTIVITIES FROM %s to %s' % (str(begins.date()), str(now.date()))