Esempio n. 1
0
    def create_coverage_page(self, quality):
        nodes = {}
        try:
            connection = self.db_manager.get_conn()
            host = [
                s.split('=')[-1] for s in connection.dsn.split() if 'host' in s
            ][0]
            self.logger.info('Fetching list of nodes from {}'.format(host))
            results = database.query(
                connection, """
				SELECT name, url
				FROM nodes
				WHERE backfill_from""")
            for row in results:
                nodes[row.name] = row.url
        except:
            self.logger.exception('Getting nodes failed.', exc_info=True)
            return

        self.logger.info('Nodes fetched: {}'.format(list(nodes.keys())))

        html = """<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <meta http-equiv="refresh" content="30"/>
    <title>{0} {1} Segment Coverage Maps</title>
      <style>
        html {{ background-color: #222;}}
        h1   {{ color: #eee;
               text-align: center;
               font-family: sans-serif;}}
        h3   {{ color: #eee;
               text-align: center;
               font-family: sans-serif;}}
       img  {{ display: block;
              margin-left: auto;
              margin-right: auto;}}
    </style>
  </head>
  <body>
    <h1>{0} {1}</h1>""".format(self.channel, quality)

        for node in sorted(nodes.keys()):
            html += """    <h3>{}</h3>
	<img src="{}/segments/coverage-maps/{}_{}_coverage.png" alt="{}">
""".format(node, nodes[node], self.channel, quality, node)

        html += """  </body>
</html>"""

        path_prefix = os.path.join(self.base_dir, 'coverage-maps',
                                   '{}_{}'.format(self.channel, quality))
        temp_path = '{}_{}.html'.format(path_prefix, uuid.uuid4())
        final_path = '{}_coverage.html'.format(path_prefix)
        common.ensure_directory(temp_path)
        with open(temp_path, 'w') as f:
            common.writeall(f.write, html)
        os.rename(temp_path, final_path)
        self.logger.info('Coverage page for {} created'.format(quality))
Esempio n. 2
0
	def _get_segment(self):
		# save current value of self.retry so we can't set any later instance
		# after a retry for this round has already occurred.
		retry = self.retry
		temp_path = self.make_path("temp")
		hash = hashlib.sha256()
		file_created = False
		try:
			self.logger.debug("Downloading segment {} to {}".format(self.segment, temp_path))
			start_time = monotonic()
			with soft_hard_timeout(self.logger, "getting and writing segment", self.FETCH_FULL_TIMEOUTS, retry.set):
				with soft_hard_timeout(self.logger, "getting segment headers", self.FETCH_HEADERS_TIMEOUTS, retry.set):
					resp = self.session.get(self.segment.uri, stream=True, metric_name='get_segment')
				# twitch returns 403 for expired segment urls, and 404 for very old urls where the original segment is gone.
				# the latter can happen if we have a network issue that cuts us off from twitch for some time.
				if resp.status_code in (403, 404):
					self.logger.warning("Got {} for segment, giving up: {}".format(resp.status_code, self.segment))
					return
				resp.raise_for_status()
				common.ensure_directory(temp_path)
				with open(temp_path, 'wb') as f:
					file_created = True
					# We read chunk-wise in 8KiB chunks. Note that if the connection cuts halfway,
					# we may lose part of the last chunk even though we did receive it.
					# This is a small enough amount of data that we don't really care.
					for chunk in resp.iter_content(8192):
						common.writeall(f.write, chunk)
						hash.update(chunk)
		except Exception as e:
			if file_created:
				partial_path = self.make_path("partial", hash)
				self.logger.warning("Saving partial segment {} as {}".format(temp_path, partial_path))
				common.rename(temp_path, partial_path)
				segments_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc()
				segment_duration_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc(self.segment.duration)
			raise e
		else:
			request_duration = monotonic() - start_time
			segment_type = "full" if request_duration < self.FETCH_SUSPECT_TIME else "suspect"
			full_path = self.make_path(segment_type, hash)
			self.logger.debug("Saving completed segment {} as {}".format(temp_path, full_path))
			common.rename(temp_path, full_path)
			segments_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc()
			segment_duration_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc(self.segment.duration)
			# Prom doesn't provide a way to compare value to gauge's existing value,
			# we need to reach into internals
			stat = latest_segment.labels(channel=self.channel, quality=self.quality)
			timestamp = (self.date - datetime.datetime(1970, 1, 1)).total_seconds()
			stat.set(max(stat._value.get(), timestamp)) # NOTE: not thread-safe but is gevent-safe
Esempio n. 3
0
def get_remote_segment(base_dir,
                       node,
                       channel,
                       quality,
                       hour,
                       missing_segment,
                       logger,
                       timeout=TIMEOUT):
    """Get a segment from a node.

	Fetches channel/quality/hour/missing_segment from node and puts it in
	base_dir/channel/quality/hour/missing_segment. If the segment already exists
	locally, this does not attempt to fetch it."""

    path = os.path.join(base_dir, channel, quality, hour, missing_segment)
    # check to see if file was created since we listed the local segments to
    # avoid unnecessarily copying
    if os.path.exists(path):
        logging.debug('Skipping existing segment {}'.format(path))
        return

    dir_name = os.path.dirname(path)
    date, duration, _ = os.path.basename(path).split('-', 2)
    temp_name = "-".join([date, duration, "temp", str(uuid.uuid4())])
    temp_path = os.path.join(dir_name, "{}.ts".format(temp_name))
    common.ensure_directory(temp_path)
    hash = hashlib.sha256()

    try:
        logging.debug('Fetching segment {} from {}'.format(path, node))
        uri = '{}/segments/{}/{}/{}/{}'.format(node, channel, quality, hour,
                                               missing_segment)
        resp = requests.get(uri,
                            stream=True,
                            timeout=timeout,
                            metric_name='get_remote_segment')

        resp.raise_for_status()

        with open(temp_path, 'w') as f:
            for chunk in resp.iter_content(8192):
                f.write(chunk)
                hash.update(chunk)

        filename_hash = common.parse_segment_path(missing_segment).hash
        if filename_hash != hash.digest():
            logger.warn(
                'Hash of segment {} does not match. Discarding segment'.format(
                    missing_segment))
            hash_mismatches.labels(remote=node,
                                   channel=channel,
                                   quality=quality,
                                   hour=hour).inc()
            os.remove(temp_path)
            return

    #try to get rid of the temp file if an exception is raised.
    except Exception:
        if os.path.exists(temp_path):
            os.remove(temp_path)
        raise
    logging.debug('Saving completed segment {} as {}'.format(temp_path, path))
    common.rename(temp_path, path)
    segments_backfilled.labels(remote=node,
                               channel=channel,
                               quality=quality,
                               hour=hour).inc()
    logger.info('Segment {}/{}/{} backfilled'.format(quality, hour,
                                                     missing_segment))
Esempio n. 4
0
    def create_coverage_map(self,
                            quality,
                            all_hour_holes,
                            all_hour_partials,
                            pixel_length=2,
                            rows=300):
        """Create a PNG image showing segment coverage.

		Each pixel repersents pixel_length seconds, with time increasing from
		top to bottom along each column then right to left. By default each
		pixel is 2 s and each column of the image repersents 10 min. White
		pixels have no coverage, orange pixels only have coverage by partial
		segments and blue pixels have coverage by full segments. If any part
		of a pixel does not have coverage, it is marked as not having coverage.
		Likewise, if only a partial segment is available for any part of a
		pixel, it is marked as partial.

		all_hour_holes -- a dict mapping hours to lists of holes
		all_hour_holes -- a dict mapping hours to lists of partial segments
		pixel_length -- length of a pixel in seconds
		rows -- the height of the image"""

        if not all_hour_holes:
            self.logger.warning('No hours to generate coverage map from')
            return

        if self.first_hour is None:
            first_hour = datetime.datetime.strptime(min(all_hour_holes.keys()),
                                                    HOUR_FMT)
        else:
            first_hour = self.first_hour.replace(minute=0,
                                                 second=0,
                                                 microsecond=0)
        if self.last_hour is None:
            last_hour = datetime.datetime.strptime(max(all_hour_holes.keys()),
                                                   HOUR_FMT)
        else:
            last_hour = self.last_hour.replace(minute=0,
                                               second=0,
                                               microsecond=0)
        self.logger.info('Creating coverage map for {} from {} to {}'.format(
            quality, first_hour.strftime(HOUR_FMT),
            last_hour.strftime(HOUR_FMT)))

        hours = []
        latest_hour = first_hour
        while latest_hour <= last_hour:
            hours.append(latest_hour)
            latest_hour += datetime.timedelta(hours=1)

        pixel_starts = np.arange(
            0, 3600,
            pixel_length)  # start times of the pixels in an hour in seconds
        pixel_ends = np.arange(
            pixel_length, 3601,
            pixel_length)  # end times of the pixels in an hour in seconds
        pixel_count = 3600 / pixel_length  # number of pixels in an hour
        coverage_mask = np.zeros(len(hours) * pixel_count, dtype=np.bool_)
        partial_mask = np.zeros(len(hours) * pixel_count, dtype=np.bool_)
        for i, hour in enumerate(hours):
            hour_str = hour.strftime(HOUR_FMT)
            if hour_str in all_hour_holes:

                hour_coverage = np.ones(pixel_count, dtype=np.bool_)
                hour_partial = np.zeros(pixel_count, dtype=np.bool_)

                for hole in all_hour_holes[hour_str]:
                    hole_start = np.floor(
                        (hole[0] - hour).total_seconds() / pixel_length
                    ) * pixel_length  # the start of the pixel containing the start of the hole
                    hole_end = np.ceil(
                        (hole[1] - hour).total_seconds() / pixel_length
                    ) * pixel_length  # the end of the pixel containing the end of the hole
                    hour_coverage = hour_coverage & (
                        (pixel_starts < hole_start) | (pixel_ends > hole_end))

                for partial in all_hour_partials[hour_str]:
                    partial_start = np.floor(
                        (partial[0] - hour).total_seconds() / pixel_length
                    ) * pixel_length  # the start of the pixel containing the start of the partial segment
                    partial_end = np.ceil(
                        (partial[1] - hour).total_seconds() / pixel_length
                    ) * pixel_length  # the end of the pixel containing the end of the partial segment
                    hour_partial = hour_partial | (
                        (pixel_starts >= partial_start) &
                        (pixel_ends <= partial_end))

                coverage_mask[i * pixel_count:(i + 1) *
                              pixel_count] = hour_coverage
                partial_mask[i * pixel_count:(i + 1) *
                             pixel_count] = hour_partial

        # convert the flat masks into 2-D arrays
        columns = coverage_mask.size / rows
        coverage_mask = coverage_mask.reshape((columns, rows)).T
        partial_mask = partial_mask.reshape((columns, rows)).T

        # use the masks to set the actual pixel colours
        colours = np.ones((rows, columns, 3))
        colours[coverage_mask] = matplotlib.colors.to_rgb('tab:blue')
        colours[coverage_mask
                & partial_mask] = matplotlib.colors.to_rgb('tab:orange')
        # write the pixel array to a temporary file then atomically rename it
        path_prefix = os.path.join(self.base_dir, 'coverage-maps',
                                   '{}_{}'.format(self.channel, quality))
        temp_path = '{}_{}.png'.format(path_prefix, uuid.uuid4())
        final_path = '{}_coverage.png'.format(path_prefix)
        common.ensure_directory(temp_path)
        matplotlib.image.imsave(temp_path, colours)
        os.rename(temp_path, final_path)
        self.logger.info('Coverage map for {} created'.format(quality))