def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[
                    u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"],
                                           data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests()

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
예제 #2
0
def main():
    try:
        settings = startup.read_settings()
        with startup.SingleInstance(settings.args.filename):
            constants.set(settings.constants)
            Log.start(settings.debug)

            extractor = Extract(settings)

            def extract(please_stop):
                with MySQL(**settings.snowflake.database) as db:
                    with db.transaction():
                        for kwargs in extractor.queue:
                            if please_stop:
                                break
                            try:
                                extractor.extract(db=db,
                                                  please_stop=please_stop,
                                                  **kwargs)
                            except Exception as e:
                                Log.warning("Could not extract", cause=e)
                                extractor.queue.add(kwargs)

            for i in range(settings.extract.threads):
                Thread.run("extract #" + text_type(i), extract)

            please_stop = Signal()
            Thread.wait_for_shutdown_signal(please_stop=please_stop,
                                            allow_exit=True,
                                            wait_forever=False)
    except Exception as e:
        Log.warning("Problem with data extraction", e)
    finally:
        Log.stop()
예제 #3
0
def main():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            constants.set(settings.constants)
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"], data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests(instance_manager.required_utility())

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
예제 #4
0
파일: app.py 프로젝트: rv404674/TUID
 def run(self, *args, **kwargs):
     # ENSURE THE LOGGING IS CLEANED UP
     try:
         Flask.run(self, *args, **kwargs)
     except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
         Log.warning("TUID service shutdown!", cause=e)
     finally:
         Log.stop()
예제 #5
0
 def tearDownClass(self):
     for i in ESUtils.indexes:
         try:
             self._es_cluster.delete_index(i)
             Log.note("remove index {{index}}", index=i)
         except Exception as e:
             pass
     Log.stop()
예제 #6
0
 def run(self, *args, **kwargs):
     # ENSURE THE LOGGING IS CLEANED UP
     try:
         Flask.run(self, *args, **kwargs)
     except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
         Log.warning(APP_NAME + " service shutdown!", cause=e)
     finally:
         Log.stop()
예제 #7
0
 def tearDownClass(self):
     cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es)
     for i in ESUtils.indexes:
         try:
             cluster.delete_index(i)
             Log.note("remove index {{index}}", index=i)
         except Exception as e:
             pass
     Log.stop()
예제 #8
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        ETL(settings).setup(settings.instance, settings.utility)
    except Exception as e:
        Log.warning("Problem with setup of ETL", cause=e)
    finally:
        Log.stop()
예제 #9
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        ETL(settings).setup(settings.instance, settings.utility)
    except Exception as e:
        Log.warning("Problem with setup of ETL", cause=e)
    finally:
        Log.stop()
예제 #10
0
def start():
    try:
        config = json2value(STDIN.readline().decode('utf8'))
        constants.set(config.constants)
        Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]}))
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
예제 #11
0
def main():
    settings = startup.read_settings()
    Log.start(settings.debug)
    constants.set(settings.constants)

    try:
        _synch(settings)
    except Exception as e:
        Log.error("Problem with synch", e)
    finally:
        Log.stop()
예제 #12
0
파일: app.py 프로젝트: mars-f/ActiveData
 def run(self, *args, **kwargs):
     # ENSURE THE LOGGING IS CLEANED UP
     try:
         Flask.run(self, *args, **kwargs)
     except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
         if e.args and e.args[0] == 0:
             pass  # ASSUME NORMAL EXIT
         else:
             Log.warning("Serious problem with ActiveData service construction!  Shutdown!", cause=e)
     finally:
         Log.stop()
         stop_main_thread()
예제 #13
0
def start():
    try:
        line = STDIN.readline().decode("utf8")
        config = json2value(line)
        constants.set(config.constants)
        Log.start(config.debug)
        Log.set_logger(RawLogger())
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
예제 #14
0
    def run(self, force=False, restart=False, start=None, merge=False):
        try:
            # SETUP LOGGING
            settings = startup.read_settings(filename=CONFIG_FILE)
            constants.set(settings.constants)
            Log.start(settings.debug)

            self.extract(settings, force, restart, start, merge)
        except Exception as e:
            Log.error("could not extract jobs", cause=e)
        finally:
            Log.stop()
예제 #15
0
def main():
    try:
        settings = wrap({"elasticsearch":{
            "host": "http://activedata.allizom.org",
            "port": 9200,
            "debug": True
        }})

        Log.start(settings)
        move_shards(settings)
    except Exception as e:
        Log.error("Problem with assign of shards", e)
    finally:
        Log.stop()
예제 #16
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        branches = _get_branches_from_hg(settings.hg)

        es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches)
        es.add_alias()
        es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches)
        Log.alert("DONE!")
    except Exception as e:
        Log.error("Problem with etl", e)
    finally:
        Log.stop()
예제 #17
0
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        inject_secrets(config)

        with Timer("PATCH ADR: dd update() method to Configuration class"):

            def update(self, config):
                """
                Update the configuration object with new parameters
                :param config: dict of configuration
                """
                for k, v in config.items():
                    if v != None:
                        self._config[k] = v

                self._config["sources"] = sorted(
                    map(os.path.expanduser, set(self._config["sources"]))
                )

                # Use the NullStore by default. This allows us to control whether
                # caching is enabled or not at runtime.
                self._config["cache"].setdefault("stores", {"null": {"driver": "null"}})
                object.__setattr__(self, "cache", CacheManager(self._config["cache"]))
                self.cache.extend("null", lambda driver: NullStore())

            setattr(Configuration, "update", update)

        # UPDATE ADR COFIGURATION
        adr.config.update(config.adr)

        Log.start(config.debug)

        # SHUNT ADR LOGGING TO MAIN LOGGING
        # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add
        loguru.logger.remove()
        loguru.logger.add(
            _logging, level="DEBUG", format="{message}", filter=lambda r: True,
        )

        Schedulers(config).process()
    except Exception as e:
        Log.warning("Problem with etl! Shutting down.", cause=e)
    finally:
        Log.stop()
예제 #18
0
def main():
    try:
        args = startup.argparse({
            "name": ["--file", "--source"],
            "help": "directory or file with *.json schema files",
            "type": str,
            "dest": "source",
            "required": True,
        })
        _convert(File(args.source))

    except Exception as e:
        Log.error(
            "Serious problem with ActiveData service!  Shutdown completed!",
            cause=e)
    finally:
        Log.stop()
예제 #19
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        branches = _get_branches_from_hg(settings.hg)

        es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches)
        es.add_alias()
        es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches)
        Log.alert("DONE!")
    except Exception as e:
        Log.error("Problem with etl", e)
    finally:
        Log.stop()
예제 #20
0
def main(num):
    try:
        Log.start()
        results = []
        test_json(results, "mo-json encoder", json_encoder, num)
        test_json(results, "mo-json encoder (again)", json_encoder, num)
        test_json(results, "scrub before json.dumps",
                  cPythonJSONEncoder().encode, num)
        test_json(results, "override JSONEncoder.default()",
                  EnhancedJSONEncoder().encode, num)
        test_json(results, "default json.dumps", json.dumps,
                  num)  # WILL CRASH, CAN NOT HANDLE DIVERSITY OF TYPES
        test_json(results, "typed json", typed_encoder.encode, num)

        # test_json(results, "scrubbed ujson", ujson.dumps, num)  # THIS PLAIN CRASHES

        Log.note(u"\n{{summary}}", summary=convert.list2tab(results))
    finally:
        Log.stop()
예제 #21
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        hg = HgMozillaOrg(settings)
        todo = Queue()
        todo.add("97160a734959")
        least = 100000

        while todo:
            next_ = todo.pop()
            curr = hg.get_revision(
                wrap({
                    "changeset": {
                        "id": next_
                    },
                    "branch": {
                        "name": BRANCH
                    }
                }))
            if len(curr.changeset.files) > MIN_FILES:
                diff = hg._get_json_diff_from_hg(curr)
                num_changes = sum(len(d.changes) for d in diff)
                score = num_changes / len(diff)
                if score < least:
                    least = score
                    Log.note(
                        "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}",
                        rev=curr.changeset.id,
                        num=num_changes,
                        files=len(diff))
            todo.extend(listwrap(curr.parents))

    except Exception as e:
        Log.error("Problem with scna", e)
    finally:
        Log.stop()
예제 #22
0
파일: main.py 프로젝트: mozilla/cia-tasks
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        # SHUNT PYTHON LOGGING TO MAIN LOGGING
        capture_logging()
        # SHUNT ADR LOGGING TO MAIN LOGGING
        # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add
        capture_loguru()

        if config.taskcluster:
            inject_secrets(config)

        @extend(Configuration)
        def update(self, config):
            """
            Update the configuration object with new parameters
            :param config: dict of configuration
            """
            for k, v in config.items():
                if v != None:
                    self._config[k] = v

            self._config["sources"] = sorted(
                map(os.path.expanduser, set(self._config["sources"])))

            # Use the NullStore by default. This allows us to control whether
            # caching is enabled or not at runtime.
            self._config["cache"].setdefault("stores",
                                             {"null": {
                                                 "driver": "null"
                                             }})
            object.__setattr__(self, "cache", CustomCacheManager(self._config))
            for _, store in self._config["cache"]["stores"].items():
                if store.path and not store.path.endswith("/"):
                    # REQUIRED, OTHERWISE FileStore._create_cache_directory() WILL LOOK AT PARENT DIRECTORY
                    store.path = store.path + "/"

        if SHOW_S3_CACHE_HIT:
            s3_get = S3Store._get

            @extend(S3Store)
            def _get(self, key):
                with Timer("get {{key}} from S3", {"key": key},
                           verbose=False) as timer:
                    output = s3_get(self, key)
                    if output is not None:
                        timer.verbose = True
                    return output

        # UPDATE ADR CONFIGURATION
        with Repeat("waiting for ADR", every="10second"):
            adr.config.update(config.adr)
            # DUMMY TO TRIGGER CACHE
            make_push_objects(from_date=Date.today().format(),
                              to_date=Date.now().format(),
                              branch="autoland")

        outatime = Till(seconds=Duration(MAX_RUNTIME).total_seconds())
        outatime.then(lambda: Log.alert("Out of time, exit early"))
        Schedulers(config).process(outatime)
    except Exception as e:
        Log.warning("Problem with etl! Shutting down.", cause=e)
    finally:
        Log.stop()
예제 #23
0
    any_flask_app.add_url_rule(str("/<path:path>"), None, relay_get, methods=[str("GET")])
    any_flask_app.add_url_rule(str("/<path:path>"), None, relay_post, methods=[str("POST")])
    any_flask_app.add_url_rule(str("/"), None, relay_get, methods=[str("GET")])
    any_flask_app.add_url_rule(str("/"), None, relay_post, methods=[str("POST")])


if __name__ in ("__main__",):
    Log.note("Starting " + APP_NAME + " Service App...")
    flask_app = RelayApp(__name__)

    try:
        config = startup.read_settings(filename=os.environ.get("HG_RELAY_CONFIG"))
        constants.set(config.constants)
        Log.start(config.debug)

        add(flask_app)
        Log.note("Started " + APP_NAME + " Service")
    except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
        try:
            Log.error(
                "Serious problem with " + APP_NAME + " service construction!  Shutdown!", cause=e
            )
        finally:
            Log.stop()

    if config.flask:
        if config.flask.port and config.args.process_num:
            config.flask.port += config.args.process_num
        Log.note("Running Flask...")
        flask_app.run(**config.flask)
예제 #24
0
 def tearDownClass(cls):
     Log.stop()
예제 #25
0
    def extract(self, settings, force, restart, merge):
        if not settings.extractor.app_name:
            Log.error("Expecting an extractor.app_name in config file")

        # SETUP DESTINATION
        destination = bigquery.Dataset(
            dataset=settings.extractor.app_name,
            kwargs=settings.destination).get_or_create_table(
                settings.destination)

        try:
            if merge:
                with Timer("merge shards"):
                    destination.merge_shards()

            # RECOVER LAST SQL STATE
            redis = Redis.from_url(REDIS_URL)
            state = redis.get(settings.extractor.key)

            if restart or not state:
                state = (0, 0)
                redis.set(settings.extractor.key,
                          value2json(state).encode("utf8"))
            else:
                state = json2value(state.decode("utf8"))

            last_modified, alert_id = state
            last_modified = Date(last_modified)

            # SCAN SCHEMA, GENERATE EXTRACTION SQL
            extractor = MySqlSnowflakeExtractor(settings.source)
            canonical_sql = extractor.get_sql(SQL("SELECT 0"))

            # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN
            old_sql = redis.get(settings.extractor.sql)
            if old_sql and old_sql.decode("utf8") != canonical_sql.sql:
                if force:
                    Log.warning("Schema has changed")
                else:
                    Log.error("Schema has changed")
            redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8"))

            # SETUP SOURCE
            source = MySQL(settings.source.database)

            while True:
                Log.note(
                    "Extracting alerts for last_modified={{last_modified|datetime|quote}}, alert.id={{alert_id}}",
                    last_modified=last_modified,
                    alert_id=alert_id,
                )
                last_year = Date.today(
                ) - YEAR + DAY  # ONLY YOUNG RECORDS CAN GO INTO BIGQUERY

                get_ids = SQL(
                    "SELECT s.id " +
                    "\nFROM treeherder.performance_alert_summary s" +
                    "\nLEFT JOIN treeherder.performance_alert a ON s.id=a.summary_id"
                    + "\nWHERE s.created>" + quote_value(last_year).sql +
                    " AND (s.last_updated > " +
                    quote_value(last_modified).sql + "\nOR a.last_updated > " +
                    quote_value(last_modified).sql + ")" + "\nGROUP BY s.id" +
                    "\nORDER BY s.id" + "\nLIMIT " +
                    quote_value(settings.extractor.chunk_size).sql)
                sql = extractor.get_sql(get_ids)

                # PULL FROM source, AND PUSH TO destination
                acc = []
                with source.transaction():
                    cursor = source.query(sql, stream=True, row_tuples=True)
                    extractor.construct_docs(cursor, acc.append, False)
                if not acc:
                    break
                destination.extend(acc)

                # RECORD THE STATE
                last_doc = acc[-1]
                last_modified, alert_id = last_doc.created, last_doc.id
                redis.set(
                    settings.extractor.key,
                    value2json((last_modified, alert_id)).encode("utf8"),
                )

                if len(acc) < settings.extractor.chunk_size:
                    break

        except Exception as e:
            Log.warning("problem with extraction", cause=e)

        Log.note("done alert extraction")

        try:
            with Timer("merge shards"):
                destination.merge_shards()
        except Exception as e:
            Log.warning("problem with merge", cause=e)

        Log.note("done alert merge")
        Log.stop()
예제 #26
0
파일: app.py 프로젝트: rv404674/TUID
    try:
        config = startup.read_settings(
            filename=os.environ.get('TUID_CONFIG')
        )
        constants.set(config.constants)
        Log.start(config.debug)

        service = TUIDService(config.tuid)

        # Log memory info while running
        initial_growth = {}
        objgraph.growth(peak_stats={})
        objgraph.growth(peak_stats=initial_growth)
        service.statsdaemon.initial_growth = initial_growth

        Log.note("Started TUID Service")
        Log.note("Current free memory: {{mem}} Mb", mem=service.statsdaemon.get_free_memory())
    except BaseException as e:  # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
        try:
            Log.error("Serious problem with TUID service construction!  Shutdown!", cause=e)
        finally:
            Log.stop()

    if config.flask:
        if config.flask.port and config.args.process_num:
            config.flask.port += config.args.process_num
        Log.note("Running Flask...")
        flask_app.run(**config.flask)

예제 #27
0
    def run(self, force=False, restart=False, merge=False):
        # SETUP LOGGING
        settings = startup.read_settings(filename=CONFIG_FILE)
        constants.set(settings.constants)
        Log.start(settings.debug)

        if not settings.extractor.app_name:
            Log.error("Expecting an extractor.app_name in config file")

        # SETUP DESTINATION
        destination = bigquery.Dataset(
            dataset=settings.extractor.app_name, kwargs=settings.destination
        ).get_or_create_table(settings.destination)

        try:
            if merge:
                with Timer("merge shards"):
                    destination.merge_shards()

            # RECOVER LAST SQL STATE
            redis = Redis()
            state = redis.get(settings.extractor.key)

            if restart or not state:
                state = (0, 0)
                redis.set(settings.extractor.key, value2json(state).encode("utf8"))
            else:
                state = json2value(state.decode("utf8"))

            last_modified, alert_id = state
            last_modified = parse(last_modified)

            # SCAN SCHEMA, GENERATE EXTRACTION SQL
            extractor = MySqlSnowflakeExtractor(settings.source)
            canonical_sql = extractor.get_sql(SQL("SELECT 0"))

            # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN
            old_sql = redis.get(settings.extractor.sql)
            if old_sql and old_sql.decode("utf8") != canonical_sql.sql:
                if force:
                    Log.warning("Schema has changed")
                else:
                    Log.error("Schema has changed")
            redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8"))

            # SETUP SOURCE
            source = MySQL(settings.source.database)

            while True:
                Log.note(
                    "Extracting alerts for last_modified={{last_modified|datetime|quote}}, alert.id={{alert_id}}",
                    last_modified=last_modified,
                    alert_id=alert_id,
                )
                last_year = (
                    Date.today() - YEAR + DAY
                )  # ONLY YOUNG RECORDS CAN GO INTO BIGQUERY

                # SELECT
                #     s.od
                # FROM
                #     treeherder.performance_alert_summary s
                # LEFT JOIN
                #     treeherder.performance_alert a ON s.id=a.summary_id
                # WHERE
                #     s.created>{last_year} AND (s.last_updated>{last_modified} OR a.last_updated>{last_modified})
                # GROUP BY
                #     s.id
                # ORDER BY
                #     s.id
                # LIMIT
                #     {settings.extractor.chunk_size}
                get_ids = SQL(
                    str(
                        (
                            PerformanceAlertSummary.objects.filter(
                                Q(created__gt=last_year.datetime)
                                & (
                                    Q(last_updated__gt=last_modified.datetime)
                                    | Q(alerts__last_updated__gt=last_modified.datetime)
                                )
                            )
                            .annotate()
                            .values("id")
                            .order_by("id")[: settings.extractor.chunk_size]
                        ).query
                    )
                )

                sql = extractor.get_sql(get_ids)

                # PULL FROM source, AND PUSH TO destination
                acc = []
                with source.transaction():
                    cursor = source.query(sql, stream=True, row_tuples=True)
                    extractor.construct_docs(cursor, acc.append, False)
                if not acc:
                    break
                destination.extend(acc)

                # RECORD THE STATE
                last_doc = acc[-1]
                last_modified, alert_id = last_doc.created, last_doc.id
                redis.set(
                    settings.extractor.key,
                    value2json((last_modified, alert_id)).encode("utf8"),
                )

                if len(acc) < settings.extractor.chunk_size:
                    break

        except Exception as e:
            Log.warning("problem with extraction", cause=e)

        Log.note("done alert extraction")

        try:
            with Timer("merge shards"):
                destination.merge_shards()
        except Exception as e:
            Log.warning("problem with merge", cause=e)

        Log.note("done alert merge")
        Log.stop()
예제 #28
0
 def tearDownClass(cls):
     broker.close()
     Log.stop()
예제 #29
0
 def tearDown(self):
     Log.stop()