Example #1
0
def download(url, path, mtime):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path) and os.path.getmtime(path) == mtime:
        return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = get(url, stream=True)

    p = os.path.split(path)
    temppath = os.path.join(p[0], "." + p[1])

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
    def download(self):
        f = open(self.tempname, "ab")
        size = os.fstat(f.fileno())[stat.ST_SIZE]
        response = requests.get(self.href, stream = True,
                                headers = {"Range": "bytes=%u-" % size})
        remaining = int(response.headers["Content-Length"])
        r = response.raw
        while True:
            data = r.read(4096)
            remaining -= len(data)
            if data == "": break
            f.write(data)
        f.flush()
        os.fsync(f.fileno())
        f.close()

        if remaining > 0:
            # download terminated early, retry
            fileset.remove(self.name)
            return

        if not self.verify():
            # download corrupt, delete and retry
            msg("WARN: verify failed for %s" % self.name)
            os.unlink(self.tempname)
            fileset.remove(self.name)
            return

        common.rename(self.tempname, self.name)
        common.mkro(self.name)

        if "Last-Modified" in response.headers:
            mtime = calendar.timegm(time.strptime(response.headers["Last-Modified"],
                                                  "%a, %d %b %Y %H:%M:%S %Z"))
            os.utime(self.name, (mtime, mtime))
Example #3
0
def download(url, path):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path):
        r = tls.s.head(url)
        mtime = common.parse_last_modified(r.headers["Last-Modified"])

        if os.path.getmtime(path) == mtime and \
           os.path.getsize(path) == int(r.headers["Content-Length"]):
            return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = tls.s.get(url, stream = True)

    temppath = common.mktemppath(path)

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    mtime = common.parse_last_modified(r.headers["Last-Modified"])
    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
Example #4
0
def download(url, path):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path):
        r = tls.s.head(url)
        mtime = common.parse_last_modified(r.headers["Last-Modified"])

        if os.path.getmtime(path) == mtime and \
           os.path.getsize(path) == int(r.headers["Content-Length"]):
            return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = tls.s.get(url, stream=True)

    temppath = common.mktemppath(path)

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    mtime = common.parse_last_modified(r.headers["Last-Modified"])
    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
Example #5
0
def download(url, path, mtime):
  with lock:
    if path in files:
      return
    files.add(path)

  if os.path.exists(path) and os.path.getmtime(path) == mtime:
    return

  common.mkdirs(os.path.dirname(path))

  log(url + " -> " + path)
  r = get(url, stream = True)

  p = os.path.split(path)
  temppath = os.path.join(p[0], "." + p[1])

  with open(temppath, "wb") as f:
    for data in r.iter_content(4096):
      f.write(data)

    f.flush()
    os.fsync(f.fileno())

  os.utime(temppath, (mtime, mtime))
  common.mkro(temppath)
  common.rename(temppath, path)
Example #6
0
def extract(path):
    if config["attachments-enabled"] != "1":
        return

    print("Extracting attachments from %s..." % path, file = sys.stderr)

    mbox = mailbox.mbox(config["lists-base"] + "/" + path)

    for msg in mbox.keys():
        index = 0
        for part in mbox[msg].walk():
            fn = part.get_filename()
            typ = part.get_content_type()
            if fn is not None \
                    and not mailindex.decode(part.get("Content-Disposition", "inline")).startswith("inline") \
                    and typ not in \
                    ('application/pgp-signature', 'application/pkcs7-signature',
                     'application/x-pkcs7-signature', 'image/x-icon',
                     'message/external-body', 'message/rfc822', 'text/calendar',
                     'text/x-vcard'):

                p = config["attachments-base"] + "/" + path

                try:
                    fn = cleanfilename(fn)

                    if config["attachments-odponly"] != "1" or \
                            fn.lower().endswith(".odp") or \
                            typ.lower().startswith("application/vnd.oasis.opendocument.presentation"):
                        common.mkdirs(p)
                        p += "/%03u-%03u-%s" % (msg, index, fn)

                        if not os.path.exists(p):
                            temppath = common.mktemppath(p)
                        
                            f = open(temppath, "wb")
                            f.write(part.get_payload(decode = True))

                            f.flush()
                            os.fsync(f.fileno())
                            f.close()
                
                            common.rename(temppath, p)
                            common.mkro(p)

                except UnicodeEncodeError:
                    pass

            index += 1
Example #7
0
	def _get_segment(self):
		# save current value of self.retry so we can't set any later instance
		# after a retry for this round has already occurred.
		retry = self.retry
		temp_path = self.make_path("temp")
		hash = hashlib.sha256()
		file_created = False
		try:
			self.logger.debug("Downloading segment {} to {}".format(self.segment, temp_path))
			start_time = monotonic()
			with soft_hard_timeout(self.logger, "getting and writing segment", self.FETCH_FULL_TIMEOUTS, retry.set):
				with soft_hard_timeout(self.logger, "getting segment headers", self.FETCH_HEADERS_TIMEOUTS, retry.set):
					resp = self.session.get(self.segment.uri, stream=True, metric_name='get_segment')
				# twitch returns 403 for expired segment urls, and 404 for very old urls where the original segment is gone.
				# the latter can happen if we have a network issue that cuts us off from twitch for some time.
				if resp.status_code in (403, 404):
					self.logger.warning("Got {} for segment, giving up: {}".format(resp.status_code, self.segment))
					return
				resp.raise_for_status()
				common.ensure_directory(temp_path)
				with open(temp_path, 'wb') as f:
					file_created = True
					# We read chunk-wise in 8KiB chunks. Note that if the connection cuts halfway,
					# we may lose part of the last chunk even though we did receive it.
					# This is a small enough amount of data that we don't really care.
					for chunk in resp.iter_content(8192):
						common.writeall(f.write, chunk)
						hash.update(chunk)
		except Exception as e:
			if file_created:
				partial_path = self.make_path("partial", hash)
				self.logger.warning("Saving partial segment {} as {}".format(temp_path, partial_path))
				common.rename(temp_path, partial_path)
				segments_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc()
				segment_duration_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc(self.segment.duration)
			raise e
		else:
			request_duration = monotonic() - start_time
			segment_type = "full" if request_duration < self.FETCH_SUSPECT_TIME else "suspect"
			full_path = self.make_path(segment_type, hash)
			self.logger.debug("Saving completed segment {} as {}".format(temp_path, full_path))
			common.rename(temp_path, full_path)
			segments_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc()
			segment_duration_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc(self.segment.duration)
			# Prom doesn't provide a way to compare value to gauge's existing value,
			# we need to reach into internals
			stat = latest_segment.labels(channel=self.channel, quality=self.quality)
			timestamp = (self.date - datetime.datetime(1970, 1, 1)).total_seconds()
			stat.set(max(stat._value.get(), timestamp)) # NOTE: not thread-safe but is gevent-safe
Example #8
0
def get_remote_segment(base_dir,
                       node,
                       channel,
                       quality,
                       hour,
                       missing_segment,
                       logger,
                       timeout=TIMEOUT):
    """Get a segment from a node.

	Fetches channel/quality/hour/missing_segment from node and puts it in
	base_dir/channel/quality/hour/missing_segment. If the segment already exists
	locally, this does not attempt to fetch it."""

    path = os.path.join(base_dir, channel, quality, hour, missing_segment)
    # check to see if file was created since we listed the local segments to
    # avoid unnecessarily copying
    if os.path.exists(path):
        logging.debug('Skipping existing segment {}'.format(path))
        return

    dir_name = os.path.dirname(path)
    date, duration, _ = os.path.basename(path).split('-', 2)
    temp_name = "-".join([date, duration, "temp", str(uuid.uuid4())])
    temp_path = os.path.join(dir_name, "{}.ts".format(temp_name))
    common.ensure_directory(temp_path)
    hash = hashlib.sha256()

    try:
        logging.debug('Fetching segment {} from {}'.format(path, node))
        uri = '{}/segments/{}/{}/{}/{}'.format(node, channel, quality, hour,
                                               missing_segment)
        resp = requests.get(uri,
                            stream=True,
                            timeout=timeout,
                            metric_name='get_remote_segment')

        resp.raise_for_status()

        with open(temp_path, 'w') as f:
            for chunk in resp.iter_content(8192):
                f.write(chunk)
                hash.update(chunk)

        filename_hash = common.parse_segment_path(missing_segment).hash
        if filename_hash != hash.digest():
            logger.warn(
                'Hash of segment {} does not match. Discarding segment'.format(
                    missing_segment))
            hash_mismatches.labels(remote=node,
                                   channel=channel,
                                   quality=quality,
                                   hour=hour).inc()
            os.remove(temp_path)
            return

    #try to get rid of the temp file if an exception is raised.
    except Exception:
        if os.path.exists(temp_path):
            os.remove(temp_path)
        raise
    logging.debug('Saving completed segment {} as {}'.format(temp_path, path))
    common.rename(temp_path, path)
    segments_backfilled.labels(remote=node,
                               channel=channel,
                               quality=quality,
                               hour=hour).inc()
    logger.info('Segment {}/{}/{} backfilled'.format(quality, hour,
                                                     missing_segment))
Example #9
0
def modify_names(directory, action, position, input_string,
                 replace_string=None, recursive=False, exclude=None,
                 pattern=None, ignore_case=True, regex_syntax=False,
                 report_file=None, ignore_symlinks=False, strip_chars=None):
    """
        Modify the base name of files by adding, removing or replacing a
        user-defined string.
    """
    pv.path(directory, "given", False, True)
    pv.compstr(action, "action", ["add", "remove", "replace"])
    pv.compstr(position, "position", ["any", "prefix", "suffix"])
    pv.string(input_string, "input string", False, common.get_invalid_chars())

    action = action.lower()
    position = position.lower()
    directory = os.path.abspath(directory)
    if not directory.endswith(os.path.sep):
        directory += os.path.sep

    if report_file == None:
        simulate = False
    else:
        pv.path(report_file, "report", True, False)
        report_file = os.path.abspath(report_file)
        simulate = True

    if not replace_string == None:
        if not action == "replace":
            raise Exception("The replace string argument can only be used " \
                            "together with the action 'replace'.")
        else:
            pv.string(replace_string, "string False", False,
                      common.get_invalid_chars())

    if action == "add" and position == "any":
        raise Exception("The position 'any' cannot be used together with " \
                        "the action 'add'.")

    if len(input_string) == 0:
        raise Exception("The input string must not be empty.")
    else:
        pv.string(input_string, "input string", False,
                  common.get_invalid_chars())

    if not strip_chars == None:
        pv.string(strip_chars, "strip chars string", False,
                  common.get_invalid_chars())

    time_start = dt.now()

    list_content = []
    list_excluded = []
    list_renamed = []
    list_skipped = []
    regex = None
    if not pattern == None:
        regex = common.compile_regex(pattern, ignore_case, regex_syntax)

    list_content, list_excluded = \
        common.get_files(directory, recursive, ignore_case, regex, exclude,
                         ignore_symlinks)
    for item in list_content:
        list_files = item[1]
        __modify_names(list_files, list_renamed, list_skipped, action,
                       position, input_string, replace_string, strip_chars)

    if simulate:
        explicit = None
        if exclude == None:
            exclude = False
            explicit = False
        elif exclude:
            explicit = False
        else:
            explicit = True

        list_header = []
        list_header.append("Nomen File Name Modifier simulation report")
        list_header.append(["Report file name:", report_file])
        list_header.append(["Directory:", directory])
        list_header.append(["Recursive:", recursive])
        list_header.append(["Ignore symlinks:", ignore_symlinks])
        list_header.append(["Action to perform:", action.capitalize()])
        list_header.append(["Position:", position.capitalize()])
        list_header.append(["Input string:", "\"" + input_string + "\" " \
                            "(without double quotes)"])
        if not replace_string == None:
            list_header.append(["Replace string:", "\"" + replace_string + \
                                "\" (without double quotes)"])
        if strip_chars == None:
            list_header.append(["Strip chars:", "None"])
        else:
            list_header.append(["Strip chars:", "\"" + strip_chars + "\" " \
                                "(without double quotes)"])

        list_header.append(["Exclude files:", exclude])
        list_header.append(["Explicit files:", explicit])
        list_header.append(["Pattern:", pattern])
        list_header.append(["Ignore case:", ignore_case])
        list_header.append(["Regex syntax:", regex_syntax])

        common.report(report_file, list_header, list_renamed, list_excluded,
                      list_skipped, time_start)
    else:
        common.rename(list_renamed)
Example #10
0
def rename_files(directory, rename_mode, separator=" ", recursive=False,
                 padding=0, exclude=None, pattern=None, ignore_case=True,
                 regex_syntax=False, report_file=None, ignore_symlinks=False,
                 ignore_file_ext=False, custom_name=None, step=1,
                 order_by=None):
    """
        Rename the base name of files based on the name of the directory where
        they are stored in and add a numeric ID.
    """
    pv.path(directory, "given", False, True)
    pv.compstr(rename_mode, "rename mode",
               ["fill-gaps", "increase", "keep-order", "rename-new"])
    pv.intrange(padding, "padding", 0, 12, True)
    pv.string(separator, "seperator", False, common.get_invalid_chars())
    pv.intvalue(step, "step", True, False, False)

    if not order_by == None:
        pv.compstr(order_by, "order by", ["accessed", "created", "modified"])
        if not rename_mode == "keep-order":
            raise Exception("The order-by argument can only be used in " \
                            "combination with keep-order mode.")

    step = int(step)
    rename_mode = rename_mode.lower()
    directory = os.path.abspath(directory)
    if not directory.endswith(os.path.sep):
        directory += os.path.sep

    if report_file == None:
        simulate = False
    else:
        pv.path(report_file, "report", True, False)
        report_file = os.path.abspath(report_file)
        simulate = True

    if not custom_name == None:
        pv.string(custom_name, "custom file name", False,
                  common.get_invalid_chars())

    time_start = dt.now()

    list_content = []
    list_excluded = []
    list_renamed = []
    list_skipped = []
    regex = None
    if not pattern == None:
        regex = common.compile_regex(pattern, ignore_case, regex_syntax)

    list_content, list_excluded = \
        common.get_files(directory, recursive, ignore_case, regex, exclude,
                         ignore_symlinks, order_by)

    for item in list_content:
        list_files = item[1]
        if rename_mode == "fill-gaps":
            list_renamed, list_skipped = \
                __rename_files_fill(list_files, list_renamed, list_skipped,
                                    separator, padding, True, ignore_file_ext,
                                    custom_name, step)
        elif rename_mode == "rename-new":
            list_renamed, list_skipped = \
                __rename_files_fill(list_files, list_renamed, list_skipped,
                                    separator, padding, False,
                                    ignore_file_ext, custom_name, step)
        elif rename_mode == "keep-order":
            list_renamed, list_skipped = \
                __rename_files_keep_order(list_files, list_renamed,
                                          list_skipped, separator, padding,
                                          ignore_file_ext, custom_name, step,
                                          order_by)
        else:
            raise Exception("An invalid rename mode was given.")

    if simulate:
        if padding == 0:
            padding = "Set automatically"
        else:
            padding = str(padding)

        explicit = None
        if exclude == None:
            exclude = False
            explicit = False
        elif exclude:
            explicit = False
        else:
            explicit = True

        if order_by == "accessed":
            order_by = "Access time"
        elif order_by == "created":
            order_by = "Creation time"
        elif order_by == "modified":
            order_by = "Modification time"
        else:
            order_by = "False"

        list_header = []
        list_header.append("Nomen File Renamer simulation report")
        list_header.append(["Report file name:", report_file])
        list_header.append(["Directory:", directory])
        list_header.append(["Recursive:", recursive])
        list_header.append(["Ignore symlinks:", ignore_symlinks])
        list_header.append(["Rename mode:", rename_mode.capitalize()])
        list_header.append(["Order by time:", order_by])
        list_header.append(["Separator:", "\"" + separator + "\" " \
                            "(without double quotes)"])
        list_header.append(["Numeric padding:", padding])
        list_header.append(["Step size:", step])
        list_header.append(["Exclude files:", exclude])
        list_header.append(["Explicit files:", explicit])
        list_header.append(["Pattern:", pattern])
        list_header.append(["Ignore case:", ignore_case])
        list_header.append(["Regex syntax:", regex_syntax])

        common.report(report_file, list_header, list_renamed, list_excluded,
                      list_skipped, time_start)
    else:
        common.rename(list_renamed)
Example #11
0
        list_header.append("Nomen File Name Case Converter simulation report")
        list_header.append(["Report file name:", report_file])
        list_header.append(["Directory:", directory])
        list_header.append(["Recursive:", recursive])
        list_header.append(["Ignore symlinks:", ignore_symlinks])
        list_header.append(["Conflict mode:", conflict_mode.capitalize()])
        list_header.append(["Case:", case.capitalize()])
        list_header.append(["Lowercase config:", cfg_lower])
        list_header.append(["Mixed case config:", cfg_mixed])
        list_header.append(["Title case config:", cfg_title])
        list_header.append(["Uppercase config:", cfg_upper])

        common.report(report_file, list_header, list_renamed, list_excluded,
                      list_skipped, time_start)
    else:
        common.rename(list_renamed)

def get_version():
    """
        Return the version of this module.
    """
    return __version__

def modify_names(directory, action, position, input_string,
                 replace_string=None, recursive=False, exclude=None,
                 pattern=None, ignore_case=True, regex_syntax=False,
                 report_file=None, ignore_symlinks=False, strip_chars=None):
    """
        Modify the base name of files by adding, removing or replacing a
        user-defined string.
    """
Example #12
0
def rename_extensions(directory,
                      conflict_mode,
                      extension,
                      extension_target,
                      recursive=False,
                      ignore_case=True,
                      report_file=None,
                      ignore_symlinks=False):
    """
        Rename the file extensions in the given directory and all of its
        sub-directories (if requested).
    """
    pv.path(directory, "given", False, True)
    pv.compstr(conflict_mode, "conflict mode", ["rename", "skip"])
    pv.string(extension, "extension", False, common.get_invalid_chars())
    pv.string(extension_target, "target extension", False,
              common.get_invalid_chars())

    conflict_mode = conflict_mode.lower()
    directory = os.path.abspath(directory)
    if not directory.endswith(os.path.sep):
        directory += os.path.sep

    if report_file == None:
        simulate = False
    else:
        pv.path(report_file, "report", True, False)
        report_file = os.path.abspath(report_file)
        simulate = True

    time_start = dt.now()

    list_content = []
    list_excluded = []
    list_extensions = []
    list_renamed = []
    list_skipped = []

    if ";" in extension:
        while (";" * 2) in extension:
            extension = extension.replace((";" * 2), ";")

        list_temp = extension.split(";")
        for extension in list_temp:
            if not extension == "":
                list_extensions.append(extension)

        if len(list_extensions) == 0:
            raise Exception("The given extension list does not contain any " \
                            "extensions.")
    else:
        list_extensions.append(extension)

    pattern = ""
    for extension in list_extensions:
        pattern += "(.*\." + str(extension) + "$)|"
    pattern = pattern.rstrip("|")

    if ignore_case:
        regex = re.compile(pattern, re.IGNORECASE)
    else:
        regex = re.compile(pattern)

    list_content, list_excluded = \
        common.get_files(directory, recursive, ignore_case, regex, False,
                         ignore_symlinks)
    for item in list_content:
        list_files = item[1]
        list_renamed, list_skipped = \
            __rename_extensions(list_files, list_extensions, list_renamed,
                                list_skipped, conflict_mode, extension_target)

    if simulate:
        list_header = []
        list_header.append("Nomen Extension Renamer simulation report")
        list_header.append(["Report file name:", report_file])
        list_header.append(["Directory:", directory])
        list_header.append(["Recursive:", recursive])
        list_header.append(["Ignore symlinks:", ignore_symlinks])
        list_header.append(["Conflict mode:", conflict_mode.capitalize()])
        list_header.append(["Extensions:", extension])
        list_header.append(["Target extension:", extension_target])
        list_header.append(["Ignore case:", ignore_case])

        common.report(report_file, list_header, list_renamed, list_excluded,
                      list_skipped, time_start)
    else:
        common.rename(list_renamed)