def create_coverage_page(self, quality): nodes = {} try: connection = self.db_manager.get_conn() host = [ s.split('=')[-1] for s in connection.dsn.split() if 'host' in s ][0] self.logger.info('Fetching list of nodes from {}'.format(host)) results = database.query( connection, """ SELECT name, url FROM nodes WHERE backfill_from""") for row in results: nodes[row.name] = row.url except: self.logger.exception('Getting nodes failed.', exc_info=True) return self.logger.info('Nodes fetched: {}'.format(list(nodes.keys()))) html = """<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta http-equiv="refresh" content="30"/> <title>{0} {1} Segment Coverage Maps</title> <style> html {{ background-color: #222;}} h1 {{ color: #eee; text-align: center; font-family: sans-serif;}} h3 {{ color: #eee; text-align: center; font-family: sans-serif;}} img {{ display: block; margin-left: auto; margin-right: auto;}} </style> </head> <body> <h1>{0} {1}</h1>""".format(self.channel, quality) for node in sorted(nodes.keys()): html += """ <h3>{}</h3> <img src="{}/segments/coverage-maps/{}_{}_coverage.png" alt="{}"> """.format(node, nodes[node], self.channel, quality, node) html += """ </body> </html>""" path_prefix = os.path.join(self.base_dir, 'coverage-maps', '{}_{}'.format(self.channel, quality)) temp_path = '{}_{}.html'.format(path_prefix, uuid.uuid4()) final_path = '{}_coverage.html'.format(path_prefix) common.ensure_directory(temp_path) with open(temp_path, 'w') as f: common.writeall(f.write, html) os.rename(temp_path, final_path) self.logger.info('Coverage page for {} created'.format(quality))
def _get_segment(self): # save current value of self.retry so we can't set any later instance # after a retry for this round has already occurred. retry = self.retry temp_path = self.make_path("temp") hash = hashlib.sha256() file_created = False try: self.logger.debug("Downloading segment {} to {}".format(self.segment, temp_path)) start_time = monotonic() with soft_hard_timeout(self.logger, "getting and writing segment", self.FETCH_FULL_TIMEOUTS, retry.set): with soft_hard_timeout(self.logger, "getting segment headers", self.FETCH_HEADERS_TIMEOUTS, retry.set): resp = self.session.get(self.segment.uri, stream=True, metric_name='get_segment') # twitch returns 403 for expired segment urls, and 404 for very old urls where the original segment is gone. # the latter can happen if we have a network issue that cuts us off from twitch for some time. if resp.status_code in (403, 404): self.logger.warning("Got {} for segment, giving up: {}".format(resp.status_code, self.segment)) return resp.raise_for_status() common.ensure_directory(temp_path) with open(temp_path, 'wb') as f: file_created = True # We read chunk-wise in 8KiB chunks. Note that if the connection cuts halfway, # we may lose part of the last chunk even though we did receive it. # This is a small enough amount of data that we don't really care. for chunk in resp.iter_content(8192): common.writeall(f.write, chunk) hash.update(chunk) except Exception as e: if file_created: partial_path = self.make_path("partial", hash) self.logger.warning("Saving partial segment {} as {}".format(temp_path, partial_path)) common.rename(temp_path, partial_path) segments_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc() segment_duration_downloaded.labels(type="partial", channel=self.channel, quality=self.quality).inc(self.segment.duration) raise e else: request_duration = monotonic() - start_time segment_type = "full" if request_duration < self.FETCH_SUSPECT_TIME else "suspect" full_path = self.make_path(segment_type, hash) self.logger.debug("Saving completed segment {} as {}".format(temp_path, full_path)) common.rename(temp_path, full_path) segments_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc() segment_duration_downloaded.labels(type=segment_type, channel=self.channel, quality=self.quality).inc(self.segment.duration) # Prom doesn't provide a way to compare value to gauge's existing value, # we need to reach into internals stat = latest_segment.labels(channel=self.channel, quality=self.quality) timestamp = (self.date - datetime.datetime(1970, 1, 1)).total_seconds() stat.set(max(stat._value.get(), timestamp)) # NOTE: not thread-safe but is gevent-safe
def get_remote_segment(base_dir, node, channel, quality, hour, missing_segment, logger, timeout=TIMEOUT): """Get a segment from a node. Fetches channel/quality/hour/missing_segment from node and puts it in base_dir/channel/quality/hour/missing_segment. If the segment already exists locally, this does not attempt to fetch it.""" path = os.path.join(base_dir, channel, quality, hour, missing_segment) # check to see if file was created since we listed the local segments to # avoid unnecessarily copying if os.path.exists(path): logging.debug('Skipping existing segment {}'.format(path)) return dir_name = os.path.dirname(path) date, duration, _ = os.path.basename(path).split('-', 2) temp_name = "-".join([date, duration, "temp", str(uuid.uuid4())]) temp_path = os.path.join(dir_name, "{}.ts".format(temp_name)) common.ensure_directory(temp_path) hash = hashlib.sha256() try: logging.debug('Fetching segment {} from {}'.format(path, node)) uri = '{}/segments/{}/{}/{}/{}'.format(node, channel, quality, hour, missing_segment) resp = requests.get(uri, stream=True, timeout=timeout, metric_name='get_remote_segment') resp.raise_for_status() with open(temp_path, 'w') as f: for chunk in resp.iter_content(8192): f.write(chunk) hash.update(chunk) filename_hash = common.parse_segment_path(missing_segment).hash if filename_hash != hash.digest(): logger.warn( 'Hash of segment {} does not match. Discarding segment'.format( missing_segment)) hash_mismatches.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() os.remove(temp_path) return #try to get rid of the temp file if an exception is raised. except Exception: if os.path.exists(temp_path): os.remove(temp_path) raise logging.debug('Saving completed segment {} as {}'.format(temp_path, path)) common.rename(temp_path, path) segments_backfilled.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() logger.info('Segment {}/{}/{} backfilled'.format(quality, hour, missing_segment))
def create_coverage_map(self, quality, all_hour_holes, all_hour_partials, pixel_length=2, rows=300): """Create a PNG image showing segment coverage. Each pixel repersents pixel_length seconds, with time increasing from top to bottom along each column then right to left. By default each pixel is 2 s and each column of the image repersents 10 min. White pixels have no coverage, orange pixels only have coverage by partial segments and blue pixels have coverage by full segments. If any part of a pixel does not have coverage, it is marked as not having coverage. Likewise, if only a partial segment is available for any part of a pixel, it is marked as partial. all_hour_holes -- a dict mapping hours to lists of holes all_hour_holes -- a dict mapping hours to lists of partial segments pixel_length -- length of a pixel in seconds rows -- the height of the image""" if not all_hour_holes: self.logger.warning('No hours to generate coverage map from') return if self.first_hour is None: first_hour = datetime.datetime.strptime(min(all_hour_holes.keys()), HOUR_FMT) else: first_hour = self.first_hour.replace(minute=0, second=0, microsecond=0) if self.last_hour is None: last_hour = datetime.datetime.strptime(max(all_hour_holes.keys()), HOUR_FMT) else: last_hour = self.last_hour.replace(minute=0, second=0, microsecond=0) self.logger.info('Creating coverage map for {} from {} to {}'.format( quality, first_hour.strftime(HOUR_FMT), last_hour.strftime(HOUR_FMT))) hours = [] latest_hour = first_hour while latest_hour <= last_hour: hours.append(latest_hour) latest_hour += datetime.timedelta(hours=1) pixel_starts = np.arange( 0, 3600, pixel_length) # start times of the pixels in an hour in seconds pixel_ends = np.arange( pixel_length, 3601, pixel_length) # end times of the pixels in an hour in seconds pixel_count = 3600 / pixel_length # number of pixels in an hour coverage_mask = np.zeros(len(hours) * pixel_count, dtype=np.bool_) partial_mask = np.zeros(len(hours) * pixel_count, dtype=np.bool_) for i, hour in enumerate(hours): hour_str = hour.strftime(HOUR_FMT) if hour_str in all_hour_holes: hour_coverage = np.ones(pixel_count, dtype=np.bool_) hour_partial = np.zeros(pixel_count, dtype=np.bool_) for hole in all_hour_holes[hour_str]: hole_start = np.floor( (hole[0] - hour).total_seconds() / pixel_length ) * pixel_length # the start of the pixel containing the start of the hole hole_end = np.ceil( (hole[1] - hour).total_seconds() / pixel_length ) * pixel_length # the end of the pixel containing the end of the hole hour_coverage = hour_coverage & ( (pixel_starts < hole_start) | (pixel_ends > hole_end)) for partial in all_hour_partials[hour_str]: partial_start = np.floor( (partial[0] - hour).total_seconds() / pixel_length ) * pixel_length # the start of the pixel containing the start of the partial segment partial_end = np.ceil( (partial[1] - hour).total_seconds() / pixel_length ) * pixel_length # the end of the pixel containing the end of the partial segment hour_partial = hour_partial | ( (pixel_starts >= partial_start) & (pixel_ends <= partial_end)) coverage_mask[i * pixel_count:(i + 1) * pixel_count] = hour_coverage partial_mask[i * pixel_count:(i + 1) * pixel_count] = hour_partial # convert the flat masks into 2-D arrays columns = coverage_mask.size / rows coverage_mask = coverage_mask.reshape((columns, rows)).T partial_mask = partial_mask.reshape((columns, rows)).T # use the masks to set the actual pixel colours colours = np.ones((rows, columns, 3)) colours[coverage_mask] = matplotlib.colors.to_rgb('tab:blue') colours[coverage_mask & partial_mask] = matplotlib.colors.to_rgb('tab:orange') # write the pixel array to a temporary file then atomically rename it path_prefix = os.path.join(self.base_dir, 'coverage-maps', '{}_{}'.format(self.channel, quality)) temp_path = '{}_{}.png'.format(path_prefix, uuid.uuid4()) final_path = '{}_coverage.png'.format(path_prefix) common.ensure_directory(temp_path) matplotlib.image.imsave(temp_path, colours) os.rename(temp_path, final_path) self.logger.info('Coverage map for {} created'.format(quality))