def download(url, path, mtime): with lock: if path in files: return files.add(path) if os.path.exists(path) and os.path.getmtime(path) == mtime: return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = get(url, stream=True) p = os.path.split(path) temppath = os.path.join(p[0], "." + p[1]) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(self): f = open(self.tempname, "ab") size = os.fstat(f.fileno())[stat.ST_SIZE] response = requests.get(self.href, stream = True, headers = {"Range": "bytes=%u-" % size}) remaining = int(response.headers["Content-Length"]) r = response.raw while True: data = r.read(4096) remaining -= len(data) if data == "": break f.write(data) f.flush() os.fsync(f.fileno()) f.close() if remaining > 0: # download terminated early, retry fileset.remove(self.name) return if not self.verify(): # download corrupt, delete and retry msg("WARN: verify failed for %s" % self.name) os.unlink(self.tempname) fileset.remove(self.name) return common.rename(self.tempname, self.name) common.mkro(self.name) if "Last-Modified" in response.headers: mtime = calendar.timegm(time.strptime(response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z")) os.utime(self.name, (mtime, mtime))
def download(url, path): with lock: if path in files: return files.add(path) if os.path.exists(path): r = tls.s.head(url) mtime = common.parse_last_modified(r.headers["Last-Modified"]) if os.path.getmtime(path) == mtime and \ os.path.getsize(path) == int(r.headers["Content-Length"]): return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = tls.s.get(url, stream = True) temppath = common.mktemppath(path) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) mtime = common.parse_last_modified(r.headers["Last-Modified"]) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, path): with lock: if path in files: return files.add(path) if os.path.exists(path): r = tls.s.head(url) mtime = common.parse_last_modified(r.headers["Last-Modified"]) if os.path.getmtime(path) == mtime and \ os.path.getsize(path) == int(r.headers["Content-Length"]): return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = tls.s.get(url, stream=True) temppath = common.mktemppath(path) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) mtime = common.parse_last_modified(r.headers["Last-Modified"]) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, path, mtime): with lock: if path in files: return files.add(path) if os.path.exists(path) and os.path.getmtime(path) == mtime: return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = get(url, stream = True) p = os.path.split(path) temppath = os.path.join(p[0], "." + p[1]) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def extract(path): if config["attachments-enabled"] != "1": return print("Extracting attachments from %s..." % path, file = sys.stderr) mbox = mailbox.mbox(config["lists-base"] + "/" + path) for msg in mbox.keys(): index = 0 for part in mbox[msg].walk(): fn = part.get_filename() typ = part.get_content_type() if fn is not None \ and not mailindex.decode(part.get("Content-Disposition", "inline")).startswith("inline") \ and typ not in \ ('application/pgp-signature', 'application/pkcs7-signature', 'application/x-pkcs7-signature', 'image/x-icon', 'message/external-body', 'message/rfc822', 'text/calendar', 'text/x-vcard'): p = config["attachments-base"] + "/" + path try: fn = cleanfilename(fn) if config["attachments-odponly"] != "1" or \ fn.lower().endswith(".odp") or \ typ.lower().startswith("application/vnd.oasis.opendocument.presentation"): common.mkdirs(p) p += "/%03u-%03u-%s" % (msg, index, fn) if not os.path.exists(p): temppath = common.mktemppath(p) f = open(temppath, "wb") f.write(part.get_payload(decode = True)) f.flush() os.fsync(f.fileno()) f.close() common.rename(temppath, p) common.mkro(p) except UnicodeEncodeError: pass index += 1
def _get_segment(self): # save current value of self.retry so we can't set any later instance # after a retry for this round has already occurred. retry = self.retry temp_path = self.make_path("temp") hash = hashlib.sha256() file_created = False try: self.logger.debug("Downloading segment {} to {}".format(self.segment, temp_path)) start_time = monotonic() with soft_hard_timeout(self.logger, "getting and writing segment", self.FETCH_FULL_TIMEOUTS, retry.set): with soft_hard_timeout(self.logger, "getting segment headers", self.FETCH_HEADERS_TIMEOUTS, retry.set): resp = self.session.get(self.segment.uri, stream=True, metric_name='get_segment') # twitch returns 403 for expired segment urls, and 404 for very old urls where the original segment is gone. # the latter can happen if we have a network issue that cuts us off from twitch for some time. if resp.status_code in (403, 404): self.logger.warning("Got {} for segment, giving up: {}".format(resp.status_code, self.segment)) return resp.raise_for_status() common.ensure_directory(temp_path) with open(temp_path, 'wb') as f: file_created = True # We read chunk-wise in 8KiB chunks. Note that if the connection cuts halfway, # we may lose part of the last chunk even though we did receive it. # This is a small enough amount of data that we don't really care. for chunk in resp.iter_content(8192): common.writeall(f.write, chunk) hash.update(chunk) except Exception as e: if file_created: partial_path = self.make_path("partial", hash) self.logger.warning("Saving partial segment {} as {}".format(temp_path, partial_path)) common.rename(temp_path, partial_path) segments_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc() segment_duration_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc(self.segment.duration) raise e else: request_duration = monotonic() - start_time segment_type = "full" if request_duration < self.FETCH_SUSPECT_TIME else "suspect" full_path = self.make_path(segment_type, hash) self.logger.debug("Saving completed segment {} as {}".format(temp_path, full_path)) common.rename(temp_path, full_path) segments_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc() segment_duration_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc(self.segment.duration) # Prom doesn't provide a way to compare value to gauge's existing value, # we need to reach into internals stat = latest_segment.labels(channel=self.channel, quality=self.quality) timestamp = (self.date - datetime.datetime(1970, 1, 1)).total_seconds() stat.set(max(stat._value.get(), timestamp)) # NOTE: not thread-safe but is gevent-safe
def get_remote_segment(base_dir, node, channel, quality, hour, missing_segment, logger, timeout=TIMEOUT): """Get a segment from a node. Fetches channel/quality/hour/missing_segment from node and puts it in base_dir/channel/quality/hour/missing_segment. If the segment already exists locally, this does not attempt to fetch it.""" path = os.path.join(base_dir, channel, quality, hour, missing_segment) # check to see if file was created since we listed the local segments to # avoid unnecessarily copying if os.path.exists(path): logging.debug('Skipping existing segment {}'.format(path)) return dir_name = os.path.dirname(path) date, duration, _ = os.path.basename(path).split('-', 2) temp_name = "-".join([date, duration, "temp", str(uuid.uuid4())]) temp_path = os.path.join(dir_name, "{}.ts".format(temp_name)) common.ensure_directory(temp_path) hash = hashlib.sha256() try: logging.debug('Fetching segment {} from {}'.format(path, node)) uri = '{}/segments/{}/{}/{}/{}'.format(node, channel, quality, hour, missing_segment) resp = requests.get(uri, stream=True, timeout=timeout, metric_name='get_remote_segment') resp.raise_for_status() with open(temp_path, 'w') as f: for chunk in resp.iter_content(8192): f.write(chunk) hash.update(chunk) filename_hash = common.parse_segment_path(missing_segment).hash if filename_hash != hash.digest(): logger.warn( 'Hash of segment {} does not match. Discarding segment'.format( missing_segment)) hash_mismatches.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() os.remove(temp_path) return #try to get rid of the temp file if an exception is raised. except Exception: if os.path.exists(temp_path): os.remove(temp_path) raise logging.debug('Saving completed segment {} as {}'.format(temp_path, path)) common.rename(temp_path, path) segments_backfilled.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() logger.info('Segment {}/{}/{} backfilled'.format(quality, hour, missing_segment))
def modify_names(directory, action, position, input_string, replace_string=None, recursive=False, exclude=None, pattern=None, ignore_case=True, regex_syntax=False, report_file=None, ignore_symlinks=False, strip_chars=None): """ Modify the base name of files by adding, removing or replacing a user-defined string. """ pv.path(directory, "given", False, True) pv.compstr(action, "action", ["add", "remove", "replace"]) pv.compstr(position, "position", ["any", "prefix", "suffix"]) pv.string(input_string, "input string", False, common.get_invalid_chars()) action = action.lower() position = position.lower() directory = os.path.abspath(directory) if not directory.endswith(os.path.sep): directory += os.path.sep if report_file == None: simulate = False else: pv.path(report_file, "report", True, False) report_file = os.path.abspath(report_file) simulate = True if not replace_string == None: if not action == "replace": raise Exception("The replace string argument can only be used " \ "together with the action 'replace'.") else: pv.string(replace_string, "string False", False, common.get_invalid_chars()) if action == "add" and position == "any": raise Exception("The position 'any' cannot be used together with " \ "the action 'add'.") if len(input_string) == 0: raise Exception("The input string must not be empty.") else: pv.string(input_string, "input string", False, common.get_invalid_chars()) if not strip_chars == None: pv.string(strip_chars, "strip chars string", False, common.get_invalid_chars()) time_start = dt.now() list_content = [] list_excluded = [] list_renamed = [] list_skipped = [] regex = None if not pattern == None: regex = common.compile_regex(pattern, ignore_case, regex_syntax) list_content, list_excluded = \ common.get_files(directory, recursive, ignore_case, regex, exclude, ignore_symlinks) for item in list_content: list_files = item[1] __modify_names(list_files, list_renamed, list_skipped, action, position, input_string, replace_string, strip_chars) if simulate: explicit = None if exclude == None: exclude = False explicit = False elif exclude: explicit = False else: explicit = True list_header = [] list_header.append("Nomen File Name Modifier simulation report") list_header.append(["Report file name:", report_file]) list_header.append(["Directory:", directory]) list_header.append(["Recursive:", recursive]) list_header.append(["Ignore symlinks:", ignore_symlinks]) list_header.append(["Action to perform:", action.capitalize()]) list_header.append(["Position:", position.capitalize()]) list_header.append(["Input string:", "\"" + input_string + "\" " \ "(without double quotes)"]) if not replace_string == None: list_header.append(["Replace string:", "\"" + replace_string + \ "\" (without double quotes)"]) if strip_chars == None: list_header.append(["Strip chars:", "None"]) else: list_header.append(["Strip chars:", "\"" + strip_chars + "\" " \ "(without double quotes)"]) list_header.append(["Exclude files:", exclude]) list_header.append(["Explicit files:", explicit]) list_header.append(["Pattern:", pattern]) list_header.append(["Ignore case:", ignore_case]) list_header.append(["Regex syntax:", regex_syntax]) common.report(report_file, list_header, list_renamed, list_excluded, list_skipped, time_start) else: common.rename(list_renamed)
def rename_files(directory, rename_mode, separator=" ", recursive=False, padding=0, exclude=None, pattern=None, ignore_case=True, regex_syntax=False, report_file=None, ignore_symlinks=False, ignore_file_ext=False, custom_name=None, step=1, order_by=None): """ Rename the base name of files based on the name of the directory where they are stored in and add a numeric ID. """ pv.path(directory, "given", False, True) pv.compstr(rename_mode, "rename mode", ["fill-gaps", "increase", "keep-order", "rename-new"]) pv.intrange(padding, "padding", 0, 12, True) pv.string(separator, "seperator", False, common.get_invalid_chars()) pv.intvalue(step, "step", True, False, False) if not order_by == None: pv.compstr(order_by, "order by", ["accessed", "created", "modified"]) if not rename_mode == "keep-order": raise Exception("The order-by argument can only be used in " \ "combination with keep-order mode.") step = int(step) rename_mode = rename_mode.lower() directory = os.path.abspath(directory) if not directory.endswith(os.path.sep): directory += os.path.sep if report_file == None: simulate = False else: pv.path(report_file, "report", True, False) report_file = os.path.abspath(report_file) simulate = True if not custom_name == None: pv.string(custom_name, "custom file name", False, common.get_invalid_chars()) time_start = dt.now() list_content = [] list_excluded = [] list_renamed = [] list_skipped = [] regex = None if not pattern == None: regex = common.compile_regex(pattern, ignore_case, regex_syntax) list_content, list_excluded = \ common.get_files(directory, recursive, ignore_case, regex, exclude, ignore_symlinks, order_by) for item in list_content: list_files = item[1] if rename_mode == "fill-gaps": list_renamed, list_skipped = \ __rename_files_fill(list_files, list_renamed, list_skipped, separator, padding, True, ignore_file_ext, custom_name, step) elif rename_mode == "rename-new": list_renamed, list_skipped = \ __rename_files_fill(list_files, list_renamed, list_skipped, separator, padding, False, ignore_file_ext, custom_name, step) elif rename_mode == "keep-order": list_renamed, list_skipped = \ __rename_files_keep_order(list_files, list_renamed, list_skipped, separator, padding, ignore_file_ext, custom_name, step, order_by) else: raise Exception("An invalid rename mode was given.") if simulate: if padding == 0: padding = "Set automatically" else: padding = str(padding) explicit = None if exclude == None: exclude = False explicit = False elif exclude: explicit = False else: explicit = True if order_by == "accessed": order_by = "Access time" elif order_by == "created": order_by = "Creation time" elif order_by == "modified": order_by = "Modification time" else: order_by = "False" list_header = [] list_header.append("Nomen File Renamer simulation report") list_header.append(["Report file name:", report_file]) list_header.append(["Directory:", directory]) list_header.append(["Recursive:", recursive]) list_header.append(["Ignore symlinks:", ignore_symlinks]) list_header.append(["Rename mode:", rename_mode.capitalize()]) list_header.append(["Order by time:", order_by]) list_header.append(["Separator:", "\"" + separator + "\" " \ "(without double quotes)"]) list_header.append(["Numeric padding:", padding]) list_header.append(["Step size:", step]) list_header.append(["Exclude files:", exclude]) list_header.append(["Explicit files:", explicit]) list_header.append(["Pattern:", pattern]) list_header.append(["Ignore case:", ignore_case]) list_header.append(["Regex syntax:", regex_syntax]) common.report(report_file, list_header, list_renamed, list_excluded, list_skipped, time_start) else: common.rename(list_renamed)
list_header.append("Nomen File Name Case Converter simulation report") list_header.append(["Report file name:", report_file]) list_header.append(["Directory:", directory]) list_header.append(["Recursive:", recursive]) list_header.append(["Ignore symlinks:", ignore_symlinks]) list_header.append(["Conflict mode:", conflict_mode.capitalize()]) list_header.append(["Case:", case.capitalize()]) list_header.append(["Lowercase config:", cfg_lower]) list_header.append(["Mixed case config:", cfg_mixed]) list_header.append(["Title case config:", cfg_title]) list_header.append(["Uppercase config:", cfg_upper]) common.report(report_file, list_header, list_renamed, list_excluded, list_skipped, time_start) else: common.rename(list_renamed) def get_version(): """ Return the version of this module. """ return __version__ def modify_names(directory, action, position, input_string, replace_string=None, recursive=False, exclude=None, pattern=None, ignore_case=True, regex_syntax=False, report_file=None, ignore_symlinks=False, strip_chars=None): """ Modify the base name of files by adding, removing or replacing a user-defined string. """
def rename_extensions(directory, conflict_mode, extension, extension_target, recursive=False, ignore_case=True, report_file=None, ignore_symlinks=False): """ Rename the file extensions in the given directory and all of its sub-directories (if requested). """ pv.path(directory, "given", False, True) pv.compstr(conflict_mode, "conflict mode", ["rename", "skip"]) pv.string(extension, "extension", False, common.get_invalid_chars()) pv.string(extension_target, "target extension", False, common.get_invalid_chars()) conflict_mode = conflict_mode.lower() directory = os.path.abspath(directory) if not directory.endswith(os.path.sep): directory += os.path.sep if report_file == None: simulate = False else: pv.path(report_file, "report", True, False) report_file = os.path.abspath(report_file) simulate = True time_start = dt.now() list_content = [] list_excluded = [] list_extensions = [] list_renamed = [] list_skipped = [] if ";" in extension: while (";" * 2) in extension: extension = extension.replace((";" * 2), ";") list_temp = extension.split(";") for extension in list_temp: if not extension == "": list_extensions.append(extension) if len(list_extensions) == 0: raise Exception("The given extension list does not contain any " \ "extensions.") else: list_extensions.append(extension) pattern = "" for extension in list_extensions: pattern += "(.*\." + str(extension) + "$)|" pattern = pattern.rstrip("|") if ignore_case: regex = re.compile(pattern, re.IGNORECASE) else: regex = re.compile(pattern) list_content, list_excluded = \ common.get_files(directory, recursive, ignore_case, regex, False, ignore_symlinks) for item in list_content: list_files = item[1] list_renamed, list_skipped = \ __rename_extensions(list_files, list_extensions, list_renamed, list_skipped, conflict_mode, extension_target) if simulate: list_header = [] list_header.append("Nomen Extension Renamer simulation report") list_header.append(["Report file name:", report_file]) list_header.append(["Directory:", directory]) list_header.append(["Recursive:", recursive]) list_header.append(["Ignore symlinks:", ignore_symlinks]) list_header.append(["Conflict mode:", conflict_mode.capitalize()]) list_header.append(["Extensions:", extension]) list_header.append(["Target extension:", extension_target]) list_header.append(["Ignore case:", ignore_case]) common.report(report_file, list_header, list_renamed, list_excluded, list_skipped, time_start) else: common.rename(list_renamed)