Ejemplo n.º 1
0
    def run(self):
        self.logger.info('Starting')
        failures = 0

        while not self.stopping.is_set():
            try:
                self.logger.info('Starting backfill')
                self.backfill()
                self.logger.info('Backfill complete')
                failures = 0  #reset failure count on a successful backfill
                if not self.run_once:
                    self.stopping.wait(common.jitter(self.WAIT_INTERVAL))

            except Exception:
                if failures < MAX_BACKOFF:
                    failures += 1
                delay = common.jitter(TIMEOUT * 2**failures)
                self.logger.exception(
                    'Backfill failed. Retrying in {:.0f} s'.format(delay))
                backfill_errors.labels(remote=self.node).inc()
                self.stopping.wait(delay)

            if self.run_once:
                break

        self.logger.info('Worker stopped')
        self.done.set()
        if self.node in self.manager.workers:
            del self.manager.workers[self.node]
Ejemplo n.º 2
0
    def run(self):
        """Stop and start workers based on results of get_nodes.
		
		Regularly call get_nodes. Nodes returned by get_nodes not currently
		running are started and currently running nodes not returned by
		get_nodes are stopped. If self.run_once, only call nodes once. Calling
		stop will exit the loop."""
        self.logger.info('Starting')
        failures = 0

        while not self.stopping.is_set():
            try:
                new_nodes = set(self.get_nodes())
            except Exception:
                # To ensure a fresh slate and clear any DB-related errors, get a new conn on error.
                # This is heavy-handed but simple and effective.
                self.connection = None
                if failures < MAX_BACKOFF:
                    failures += 1
                delay = common.jitter(TIMEOUT * 2**failures)
                self.logger.exception(
                    'Getting nodes failed. Retrying in {:.0f} s'.format(delay))
                node_list_errors.inc()
                self.stopping.wait(delay)
                continue
            exisiting_nodes = set(self.workers.keys())
            to_start = new_nodes - exisiting_nodes
            for node in to_start:
                self.start_worker(node)
            to_stop = exisiting_nodes - new_nodes
            for node in to_stop:
                self.stop_worker(node)
            failures = 0  #reset failures on success
            if self.run_once:
                break

            # note that if get_nodes() raises an error, then deletes will not occur
            if self.delete_old and self.start:
                try:
                    self.delete_hours()
                except Exception:
                    self.logger.warning('Failed to delete old segments',
                                        exc_info=True)

            self.stopping.wait(common.jitter(self.NODE_INTERVAL))

        #wait for all workers to finish
        for worker in self.workers.values():
            worker.done.wait()
Ejemplo n.º 3
0
def main(dbconnect,
         sheets_creds_file,
         edit_url,
         bustime_start,
         sheet_id,
         worksheet_names,
         metrics_port=8005,
         backdoor_port=0,
         allocate_ids=False):
    """
	Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet
	to the DB and outputs from the DB to the sheet.

	With the exception of id allocation, all operations are idempotent and multiple sheet syncs
	may be run for redundancy.
	"""
    common.PromLogCountsHandler.install()
    common.install_stacksampler()
    prom.start_http_server(metrics_port)

    register_uuid()

    if backdoor_port:
        gevent.backdoor.BackdoorServer(('127.0.0.1', backdoor_port),
                                       locals=locals()).start()

    stop = gevent.event.Event()
    gevent.signal(signal.SIGTERM, stop.set)  # shut down on sigterm

    logging.info("Starting up")

    dbmanager = DBManager(dsn=dbconnect)
    while True:
        try:
            # Get a test connection so we know the database is up,
            # this produces a clearer error in cases where there's a connection problem.
            conn = dbmanager.get_conn()
        except Exception:
            delay = common.jitter(10)
            logging.info(
                'Cannot connect to database. Retrying in {:.0f} s'.format(
                    delay))
            stop.wait(delay)
        else:
            # put it back so it gets reused on next get_conn()
            dbmanager.put_conn(conn)
            break

    sheets_creds = json.load(open(sheets_creds_file))

    sheets = Sheets(
        client_id=sheets_creds['client_id'],
        client_secret=sheets_creds['client_secret'],
        refresh_token=sheets_creds['refresh_token'],
    )

    SheetSync(stop, dbmanager, sheets, sheet_id, worksheet_names, edit_url,
              bustime_start, allocate_ids).run()

    logging.info("Gracefully stopped")
Ejemplo n.º 4
0
 def _run(self):
     start = monotonic()
     self.logger.debug("Getter started at {}".format(start))
     while not self.exists():
         self.retry = gevent.event.Event()
         worker = gevent.spawn(self.get_segment)
         # wait until worker succeeds/fails or retry is set
         gevent.wait([worker, self.retry], count=1)
         # If worker has returned, and return value is true, we're done
         if worker.ready() and worker.value:
             break
         # If a large amount of time has elapsed since starting, our URL is stale
         # anyway so we might as well give up to avoid cpu and disk usage.
         elapsed = monotonic() - start
         if elapsed > self.GIVE_UP_TIMEOUT:
             self.logger.warning(
                 "Getter has been running for {}s, giving up as our URL has expired"
                 .format(elapsed))
             break
         # Create a new session, so we don't reuse a connection from the old session
         # which had an error / some other issue. This is mostly just out of paranoia.
         self.session = requests.Session()
         # if retry not set, wait for FETCH_RETRY first
         self.retry.wait(common.jitter(self.FETCH_RETRY))
     self.logger.debug("Getter is done")
Ejemplo n.º 5
0
 def run(self):
     self.trigger_refresh()  # on first round, always go immediately
     while not self.stopping.is_set():
         # clamp time to max age to non-negative, and default to 0 if no workers exist
         time_to_next_max_age = max(
             0,
             min([
                 self.MAX_WORKER_AGE - workers[-1].age()
                 for workers in self.stream_workers.values() if workers
             ] or [0]))
         self.logger.info(
             "Next master playlist refresh in at most {} sec".format(
                 time_to_next_max_age))
         # wait until refresh triggered, next max age reached, or we're stopping (whichever happens first)
         gevent.wait([self.stopping, self.refresh_needed],
                     timeout=time_to_next_max_age,
                     count=1)
         if not self.stopping.is_set():
             self.refresh_needed.clear()
             gevent.spawn(self.fetch_latest)
         # wait min retry interval with jitter, unless we're stopping
         self.stopping.wait(common.jitter(self.FETCH_MIN_INTERVAL))
     self.logger.info("Stopping workers")
     for workers in self.stream_workers.values():
         for worker in workers:
             worker.stop()
         for worker in workers:
             worker.done.wait()
Ejemplo n.º 6
0
	def run(self):
		try:
			while True:
				try:
					self._run()
				except Exception:
					self.logger.exception("Unexpected exception while getting segment {}, retrying".format(self.segment))
					gevent.sleep(common.jitter(self.UNEXPECTED_FAILURE_RETRY))
				else:
					break
		finally:
			self.done.set()
Ejemplo n.º 7
0
 def wait(self, base, interval):
     """Wait until INTERVAL seconds after BASE."""
     now = monotonic()
     to_wait = base + common.jitter(interval) - now
     if to_wait > 0:
         self.stop.wait(to_wait)
Ejemplo n.º 8
0
    def run(self):
        """Loop over available hours for each quality, checking segment coverage."""
        self.logger.info('Starting')

        while not self.stopping.is_set():

            for quality in self.qualities:
                if self.stopping.is_set():
                    break

                path = os.path.join(self.base_dir, self.channel, quality)
                try:
                    hours = [
                        name for name in os.listdir(path)
                        if not name.startswith('.')
                    ]
                except OSError as e:
                    if e.errno == errno.ENOENT:
                        self.logger.warning('{} does not exist'.format(path))
                        continue

                hours.sort()
                previous_hour_segments = None
                all_hour_holes = {}
                all_hour_partials = {}
                for hour in hours:
                    if self.stopping.is_set():
                        break
                    self.logger.info('Checking {}/{}'.format(quality, hour))

                    # based on common.segments.best_segments_by_start
                    # but more complicated to capture more detailed metrics
                    hour_path = os.path.join(self.base_dir, self.channel,
                                             quality, hour)
                    try:
                        segment_names = [
                            name for name in os.listdir(hour_path)
                            if not name.startswith('.')
                        ]
                    except OSError as e:
                        if e.errno == errno.ENOENT:
                            self.logger.warning(
                                'Hour {} was deleted between finding it and processing it, ignoring'
                                .format(hour))
                            continue
                    segment_names.sort()
                    parsed = []
                    bad_segment_count = 0
                    for name in segment_names:
                        try:
                            parsed.append(
                                common.parse_segment_path(
                                    os.path.join(hour_path, name)))
                        except ValueError:
                            self.logger.warning(
                                "Failed to parse segment: {!r}".format(
                                    os.path.join(hour_path, name)),
                                exc_info=True)
                            bad_segment_count += 1

                    full_segment_count = 0
                    partial_segment_count = 0
                    full_segment_duration = datetime.timedelta()
                    partial_segment_duration = datetime.timedelta()
                    full_overlaps = 0
                    full_overlap_duration = datetime.timedelta()
                    partial_overlaps = 0
                    partial_overlap_duration = datetime.timedelta()
                    best_segments = []
                    holes = []
                    editable_holes = []
                    previous = None
                    previous_editable = None
                    coverage = datetime.timedelta()
                    editable_coverage = datetime.timedelta()
                    only_partials = []

                    # loop over all start times
                    # first select the best segment for a start time
                    # then update coverage
                    for start_time, segments in itertools.groupby(
                            parsed, key=lambda segment: segment.start):
                        full_segments = []
                        partial_segments = []
                        for segment in segments:
                            if segment.type == 'full':
                                full_segments.append(segment)
                                full_segment_count += 1
                                full_segment_duration += segment.duration
                            elif segment.type == 'partial':
                                partial_segments.append(segment)
                                partial_segment_count += 1
                                partial_segment_duration += segment.duration
                        if full_segments:
                            full_segments.sort(
                                key=lambda segment: (segment.duration))
                            best_segment = full_segments[-1]
                            for segment in full_segments[:-1]:
                                full_overlaps += 1
                                full_overlap_duration += segment.duration
                            for segment in partial_segments:
                                partial_overlaps += 1
                                partial_overlap_duration += segment.duration
                        elif partial_segments:
                            partial_segments.sort(key=lambda segment: os.stat(
                                segment.path).st_size)
                            best_segment = partial_segments[-1]
                            only_partials.append(
                                (best_segment.start,
                                 best_segment.start + best_segment.duration))
                            for segment in partial_segments[:-1]:
                                partial_overlaps += 1
                                partial_overlap_duration += segment.duration
                        else:
                            # ignore any start times with only temporary segments
                            continue
                        self.logger.debug(best_segment.path.split('/')[-1])
                        best_segments.append(best_segment)

                        # now update coverage, overlaps and holes
                        if previous is None:
                            coverage += best_segment.duration
                            editable_coverage += best_segment.duration
                            previous_editable = best_segment
                        else:
                            previous_end = previous.start + previous.duration
                            if segment.start < previous_end:
                                if segment.type == 'full':
                                    full_overlaps += 1
                                    full_overlap_duration += previous_end - segment.start
                                else:
                                    partial_overlaps += 1
                                    partial_overlap_duration += previous_end - segment.start
                                coverage += segment.start - previous_end + segment.duration
                            else:
                                coverage += segment.duration
                                editable_coverage += segment.duration

                                if segment.start > previous_end:
                                    holes.append((previous_end, segment.start))

                                previous_editable_end = previous_editable.start + previous_editable.duration
                                if segment.start > previous_editable_end:
                                    editable_holes.append(
                                        (previous_editable_end, segment.start))

                                previous_editable = best_segment

                        previous = best_segment

                    if best_segments:
                        start = best_segments[0].start
                        end = best_segments[-1].start + best_segments[
                            -1].duration
                        hole_duration = end - start - coverage
                        editable_hole_duration = end - start - editable_coverage

                        hour_start = datetime.datetime.strptime(hour, HOUR_FMT)
                        hour_end = hour_start + datetime.timedelta(hours=1)
                        # handle the case when there is a hole between the last segment of the previous hour and the first of this
                        if previous_hour_segments:
                            last_segment = previous_hour_segments[-1]
                            if best_segments[
                                    0].start > last_segment.start + last_segment.duration:
                                holes.append((hour_start, start))
                                hole_duration += start - hour_start
                                editable_holes.append((hour_start, start))
                                editable_hole_duration += start - hour_start

                        # handle the case when there is a hole between the last segment and the end of the hour if not the last hour
                        if hour != hours[-1] and end < hour_end:
                            holes.append((end, hour_end))
                            hole_duration += hour_end - end
                            editable_holes.append((end, hour_end))
                            editable_hole_duration += hour_end - end

                    # update the large number of Prometheus guages
                    segment_count_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='full').set(full_segment_count)
                    segment_count_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='partial').set(partial_segment_count)
                    segment_count_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='bad').set(bad_segment_count)
                    segment_duration_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='full').set(full_segment_duration.total_seconds())
                    segment_duration_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='partial').set(
                            partial_segment_duration.total_seconds())
                    raw_coverage_gauge.labels(channel=self.channel,
                                              quality=quality,
                                              hour=hour).set(
                                                  coverage.total_seconds())
                    editable_coverage_gauge.labels(
                        channel=self.channel, quality=quality,
                        hour=hour).set(editable_coverage.total_seconds())
                    raw_holes_gauge.labels(channel=self.channel,
                                           quality=quality,
                                           hour=hour).set(len(holes))
                    editable_holes_gauge.labels(channel=self.channel,
                                                quality=quality,
                                                hour=hour).set(
                                                    len(editable_holes))
                    overlap_count_gauge.labels(channel=self.channel,
                                               quality=quality,
                                               hour=hour,
                                               type='full').set(full_overlaps)
                    overlap_count_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='partial').set(partial_overlaps)
                    overlap_duration_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='full').set(full_overlap_duration.total_seconds())
                    overlap_duration_gauge.labels(
                        channel=self.channel,
                        quality=quality,
                        hour=hour,
                        type='partial').set(
                            partial_overlap_duration.total_seconds())

                    # log the same information
                    if best_segments:
                        self.logger.info(
                            '{}/{}: Start: {} End: {} ({} s)'.format(
                                quality, hour, start, end,
                                (end - start).total_seconds()))
                        self.logger.info(
                            '{}/{}: {} full segments totalling {} s'.format(
                                quality, hour, full_segment_count,
                                full_segment_duration.total_seconds()))
                        self.logger.info('{}/{}: {} bad segments'.format(
                            quality, hour, bad_segment_count))
                        self.logger.info(
                            '{}/{}: {} overlapping full segments totalling {} s'
                            .format(quality, hour, full_overlaps,
                                    full_overlap_duration.total_seconds()))
                        self.logger.info(
                            '{}/{}: {} partial segments totalling {} s'.format(
                                quality, hour, partial_segment_count,
                                partial_segment_duration.total_seconds()))
                        self.logger.info(
                            '{}/{}: {} overlapping partial segments totalling {} s'
                            .format(quality, hour, partial_overlaps,
                                    partial_overlap_duration.total_seconds()))
                        self.logger.info(
                            '{}/{}: raw coverage {} s, editable coverage {} s '
                            .format(quality, hour, coverage.total_seconds(),
                                    editable_coverage.total_seconds()))
                        self.logger.info(
                            '{}/{}: {} holes totalling {} s '.format(
                                quality, hour, len(holes),
                                hole_duration.total_seconds()))
                        self.logger.info(
                            '{}/{}: {} editable holes totalling {} s '.format(
                                quality, hour, len(editable_holes),
                                editable_hole_duration.total_seconds()))
                        self.logger.info('Checking {}/{} complete'.format(
                            quality, hour))

                        # add holes for the start and end hours for the
                        # coverage map. do this after updating gauges and
                        # logging as these aren't likely real holes, just the
                        # start and end of the stream.
                        if previous_hour_segments is None:
                            holes.append((hour_start, start))
                        if hour == hours[-1]:
                            holes.append((end, hour_end))

                        all_hour_holes[hour] = holes
                        all_hour_partials[hour] = only_partials

                        previous_hour_segments = best_segments

                    else:
                        self.logger.info('{}/{} is empty'.format(
                            quality, hour))

                self.create_coverage_map(quality, all_hour_holes,
                                         all_hour_partials)

            self.stopping.wait(common.jitter(self.CHECK_INTERVAL))
Ejemplo n.º 9
0
 def wait(self, interval):
     self.stop.wait(common.jitter(interval))
Ejemplo n.º 10
0
 def wait(self, interval):
     """Wait for given interval with jitter, unless we're stopping"""
     self.stopping.wait(common.jitter(interval))
Ejemplo n.º 11
0
def main(
    dbconnect,
    config,
    creds_file,
    name=None,
    base_dir=".",
    tags='',
    metrics_port=8003,
    backdoor_port=0,
):
    """dbconnect should be a postgres connection string, which is either a space-separated
	list of key=value pairs, or a URI like:
		postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE

	config should be a json blob mapping upload location names to a config object
	for that location. This config object should contain the keys:
		type:
			the name of the upload backend type
		no_transcode_check:
			bool. If true, won't check for when videos are done transcoding.
			This is useful when multiple upload locations actually refer to the
			same place just with different settings, and you only want one of them
			to actually do the check.
		cut_type:
			One of 'fast' or 'full'. Default 'fast'. This indicates whether to use
			fast_cut_segments() or full_cut_segments() for this location.
	along with any additional config options defined for that backend type.

	creds_file should contain any required credentials for the upload backends, as JSON.

	name defaults to hostname.

	tags should be a comma-seperated list of tags to attach to all videos.
	"""
    common.PromLogCountsHandler.install()
    common.install_stacksampler()
    prom.start_http_server(metrics_port)

    if backdoor_port:
        gevent.backdoor.BackdoorServer(('127.0.0.1', backdoor_port),
                                       locals=locals()).start()

    if name is None:
        name = socket.gethostname()

    tags = tags.split(',') if tags else []

    stop = gevent.event.Event()
    gevent.signal(signal.SIGTERM, stop.set)  # shut down on sigterm

    logging.info("Starting up")

    # We have two independent jobs to do - to perform cut jobs (cutter),
    # and to check the status of transcoding videos to see if they're done (transcode checker).
    # We want to error if either errors, and shut down if either exits.
    dbmanager = None
    stopping = gevent.event.Event()
    dbmanager = DBManager(dsn=dbconnect)
    while True:
        try:
            # Get a test connection so we know the database is up,
            # this produces a clearer error in cases where there's a connection problem.
            conn = dbmanager.get_conn()
        except Exception:
            delay = common.jitter(10)
            logging.warning(
                'Cannot connect to database. Retrying in {:.0f} s'.format(
                    delay),
                exc_info=True)
            stop.wait(delay)
        else:
            # put it back so it gets reused on next get_conn()
            dbmanager.put_conn(conn)
            break

    with open(creds_file) as f:
        credentials = json.load(f)

    config = json.loads(config)
    upload_locations = {}
    needs_transcode_check = {}
    for location, backend_config in config.items():
        backend_type = backend_config.pop('type')
        no_transcode_check = backend_config.pop('no_transcode_check', False)
        cut_type = backend_config.pop('cut_type', 'full')
        if backend_type == 'youtube':
            backend_type = Youtube
        elif backend_type == 'local':
            backend_type = Local
        else:
            raise ValueError(
                "Unknown upload backend type: {!r}".format(backend_type))
        backend = backend_type(credentials, **backend_config)
        if cut_type == 'fast':
            # mark for fast cut by clearing encoding settings
            backend.encoding_settings = None
        elif cut_type != 'full':
            raise ValueError("Unknown cut type: {!r}".format(cut_type))
        upload_locations[location] = backend
        if backend.needs_transcode and not no_transcode_check:
            needs_transcode_check[location] = backend

    cutter = Cutter(upload_locations, dbmanager, stop, name, base_dir, tags)
    transcode_checkers = [
        TranscodeChecker(location, backend, dbmanager, stop)
        for location, backend in needs_transcode_check.items()
    ]
    jobs = [gevent.spawn(cutter.run)] + [
        gevent.spawn(transcode_checker.run)
        for transcode_checker in transcode_checkers
    ]
    # Block until any one exits
    gevent.wait(jobs, count=1)
    # Stop the others if they aren't stopping already
    stop.set()
    # Block until all have exited
    gevent.wait(jobs)
    # Call get() for each one to re-raise if any errored
    for job in jobs:
        job.get()

    logging.info("Gracefully stopped")
Ejemplo n.º 12
0
 def wait(self, interval):
     """Wait for INTERVAL with jitter, unless we're stopping"""
     self.stop.wait(common.jitter(interval))