예제 #1
0
    def _build_list_sql(self, db, first, batch_size):
        # TODO: ENSURE THE LAST COLUMN IS THE id
        if first:
            dim = len(self._extract.field)
            where = SQL_OR.join(
                sql_iso(
                    sql_and(
                        quote_column(f) + ineq(i, e, dim) +
                        db.quote_value(Date(v) if t == "time" else v)
                        for e, (f, v, t) in enumerate(
                            zip(self._extract.field[0:i + 1:], first,
                                self._extract.type[0:i + 1:]))))
                for i in range(dim))
        else:
            where = SQL_TRUE

        selects = []
        for t, f in zip(self._extract.type, self._extract.field):
            if t == "time":
                selects.append(
                    "CAST" +
                    sql_iso(sql_alias(quote_column(f), SQL("DATETIME(6)"))))
            else:
                selects.append(quote_column(f))
        sql = (SQL_SELECT + sql_list(selects) + SQL_FROM +
               self.settings.snowflake.fact_table + SQL_WHERE + where +
               SQL_ORDERBY +
               sql_list(quote_column(f) for f in self._extract.field) +
               SQL_LIMIT + db.quote_value(batch_size))
        return sql
예제 #2
0
    def __init__(self, message="ping", every="second", start=None, until=None):
        if is_text(message):
            self.message = show_message(message)
        else:
            self.message = message

        self.every = Duration(every)

        if isinstance(until, Signal):
            self.please_stop = until
        elif until == None:
            self.please_stop = Signal()
        else:
            self.please_stop = Till(Duration(until).seconds)

        self.thread = None
        if start:
            self.thread = Thread.run(
                "repeat",
                _repeat,
                self.message,
                self.every,
                Date(start),
                parent_thread=MAIN_THREAD,
                please_stop=self.please_stop,
            ).release()
예제 #3
0
 def to_es_script(self, schema):
     return EsScript(
         type=NUMBER,
         expr=text_type(Date(self.value).unix),
         frum=self,
         schema=schema
     )
예제 #4
0
    def __init__(
        self,
        interval,  # TIME INTERVAL BETWEEN RUNS
        starting,  # THE TIME TO START THE INTERVAL COUNT
        max_runtime=MAX_RUNTIME,  # LIMIT HOW LONG THE PROCESS IS ALIVE
        wait_for_shutdown=WAIT_FOR_SHUTDOWN,  # LIMIT PAITENCE WHEN ASKING FOR SHUTDOWN, THEN SEND KILL
        process=None,
    ):
        self.duration = Duration(interval)
        self.starting = coalesce(Date(starting), Date.now())
        self.max_runtime = Duration(max_runtime)
        self.wait_for_shutdown = Duration(wait_for_shutdown)
        # Process parameters
        self.process = process

        # STATE
        self.last_started = None
        self.last_finished = None
        self.run_count = 0
        self.fail_count = 0
        self.current = None
        self.terminator = None  # SIGNAL TO KILL THE PROCESS
        self.next_run = self._next_run()
        self.next = Till(till=self.next_run)
        self.next_run.then(self.run)
예제 #5
0
    def setUpClass(self):
        while True:
            try:
                es = test_jx.global_settings.backend_es
                http.get_json(URL(es.host, port=es.port))
                break
            except Exception as e:
                e = Except.wrap(e)
                if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e:
                    Log.alert("Problem connecting")
                else:
                    Log.error("Server raised exception", e)

        # REMOVE OLD INDEXES
        cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es)
        aliases = cluster.get_aliases()
        for a in aliases:
            try:
                if a.index.startswith("testing_"):
                    create_time = Date(
                        a.index[-15:], "%Y%m%d_%H%M%S"
                    )  # EXAMPLE testing_0ef53e45b320160118_180420
                    if create_time < Date.now() - 10 * MINUTE:
                        cluster.delete_index(a.index)
            except Exception as e:
                Log.warning("Problem removing {{index|quote}}",
                            index=a.index,
                            cause=e)
예제 #6
0
 def test_groupby_expression_and_sort(self):
     test = {
         "data": [
             {"a": Date("2018-04-01 12:34:00").unix, "value": 1},
             {"a": Date("2018-04-01 13:34:00").unix, "value": 3},
             {"a": Date("2018-04-01 15:34:00").unix, "value": 4},
             {"a": Date("2018-04-01 08:34:00").unix, "value": 6},
             {"a": Date("2018-04-02 00:34:00").unix, "value": 7},
             {"value": 99},
             {"a": Date("2018-04-02 01:34:00").unix, "value": 8},
             {"a": Date("2018-04-02 02:44:00").unix, "value": 9},
             {"a": Date("2018-04-02 04:54:00").unix, "value": 10},
             {"a": Date("2018-04-02 14:04:00").unix, "value": 11}
         ],
         "query": {
             "from": TEST_TABLE,
             "groupby": {
                 "name": "date",
                 "value": {"floor": [{"div": ["a", 86400]}]}
             },
             "sort": {"value": {"floor": [{"div": ["a", 86400]}]}}
         },
         "expecting_list": {
             "meta": {"format": "list"},
             "data": [
                 {"date": 17622, "count": 4},
                 {"date": 17623, "count": 5},
                 {"count": 1}
             ]
         },
         "expecting_table": {
             "meta": {"format": "table"},
             "header": ["date", "count"],
             "data": [
                 [17622, 4],
                 [17623, 5],
                 [NULL, 1]
             ]
         },
         "expecting_cube": {
             "meta": {"format": "cube"},
             "edges": [{"name": "date", "domain": {"type": "set", "partitions": [
                 {"value": 17622},
                 {"value": 17623}
             ]}}],
             "data": {
                 "count": [4, 5, 1]
             }
         }
     }
     self.utils.execute_tests(test)
예제 #7
0
    def __init__(self, config):
        self.config = config = wrap(config)
        config.range.min = Date(config.range.min)
        config.range.max = Date(config.range.max)
        config.start = Date(config.start)
        config.interval = Duration(config.interval)
        config.branches = listwrap(config.branches)
        self.destination = bigquery.Dataset(config.destination).get_or_create_table(
            config.destination
        )

        # CALCULATE THE PREVIOUS RUN
        mozci_version = self.version("mozci")
        self.etl_config_table = jx_sqlite.Container(
            config.config_db
        ).get_or_create_facts("etl-range")
        done_result = wrap(self.etl_config_table.query()).data
        prev_done = done_result[0]
        if len(done_result) and prev_done.mozci_version == mozci_version:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(prev_done.min, config.start, "today-2day")),
                max=Date(coalesce(prev_done.max, config.start, "today-2day")),
            )
        else:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(config.start, "today-2day")),
                max=Date(coalesce(config.start, "today-2day")),
            )
            self.etl_config_table.add(self.done)
예제 #8
0
파일: main.py 프로젝트: mozilla/cia-tasks
    def __init__(self, config):
        self.config = config = wrap(config)
        config.range.min = Date(config.range.min)
        config.range.max = Date(config.range.max)
        config.start = Date(config.start)
        config.interval = Duration(config.interval)
        config.branches = listwrap(config.branches)
        self.destination = bigquery.Dataset(
            config.destination).get_or_create_table(config.destination)

        # CALCULATE THE PREVIOUS RUN
        mozci_version = self.version("mozci")
        prev_done = self.get_state()
        if prev_done and prev_done.mozci_version == mozci_version:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(prev_done.min, config.start, "today-2day")),
                max=Date(coalesce(prev_done.max, config.start, "today-2day")),
            )
        else:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(config.start, "today-2day")),
                max=Date(coalesce(config.start, "today-2day")),
            )
            self.set_state()
예제 #9
0
    def next(self, value):
        v = Date(value[0])
        if self.last_value.floor(self.duration) > v:
            Log.error("Expecting strictly increasing")
        self.last_value = v

        key = Math.round((v.floor(self.duration) - self.start) / self.duration,
                         decimal=0)
        if key != self.batch:
            self.child.reset()
            self.batch = key

        c = self.child.next(value[1:])
        return [self.batch] + c
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.no_capacity = {}
        self.no_capacity_file = File(
            kwargs.price_file).parent / "no capacity.json"
        self.done_making_new_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(
            ("id", ))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(
            self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(
            Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(
            Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(
            self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(
        ):
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()
예제 #11
0
def test_django_cannot_encode_datetime(extract_job_settings):
    """
    DJANGO DOES NOT ENCODE THE DATETIME PROPERLY
    """
    epoch = Date(Date.EPOCH).datetime
    get_ids = SQL(
        str((Job.objects.filter(
            Q(last_modified__gt=epoch)
            | (Q(last_modified=epoch)
               & Q(id__gt=0))).annotate().values("id").order_by(
                   "last_modified", "id")[:2000]).query))
    source = MySQL(extract_job_settings.source.database)

    with pytest.raises(Exception):
        with source.transaction():
            list(source.query(get_ids, stream=True, row_tuples=True))
예제 #12
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            column = self.todo.pop()
            if column == THREAD_STOP:
                break
            # if untype_path(column.name) in ["build.type", "run.type"]:
            #     Log.note("found")

            if column.jx_type in STRUCT or split_field(
                    column.es_column)[-1] == EXISTS_TYPE:
                DEBUG and Log.note("{{column.es_column}} is a struct",
                                   column=column)
                column.last_updated = Date.now()
                continue
            elif column.last_updated > Date.now(
            ) - TOO_OLD and column.cardinality is not None:
                # DO NOT UPDATE FRESH COLUMN METADATA
                DEBUG and Log.note(
                    "{{column.es_column}} is still fresh ({{ago}} ago)",
                    column=column,
                    ago=(Date.now() - Date(column.last_updated)).seconds)
                continue

            with Timer("Update {{col.es_index}}.{{col.es_column}}",
                       param={"col": column},
                       silent=not DEBUG,
                       too_long=0.05):
                if untype_path(column.name) in ["build.type", "run.type"]:
                    try:
                        self._update_cardinality(column)
                    except Exception as e:
                        Log.warning(
                            "problem getting cardinality for {{column.name}}",
                            column=column,
                            cause=e)
                else:
                    column.last_updated = Date.now()
예제 #13
0
def average_weekly(y, year):
    # RETURN AVERAGE OVER YEAR ENDING JULY 1
    min = Date(year).floor(YEAR) - 6 * MONTH
    max = min + YEAR
    max_seen = min
    acc = 0
    for value, (start, stop) in zip(deaths[y], _death_dates):
        if is_nan(value):
            continue
        if min < stop < max:
            max_seen = stop
            if min < start < max:
                acc += value
            else:
                ratio = (stop - min) / WEEK
                acc += value * ratio
        elif min < start < max:
            max_seen = max
            ratio = (max - start) / WEEK
            acc += value * ratio
        if is_nan(acc):
            Log.error("not expected")
    return acc * WEEK / (max_seen - min)
예제 #14
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            try:
                if not self.todo:
                    with self.meta.columns.locker:
                        old_columns = [
                            c for c in self.meta.columns
                            if (c.last_updated == None or c.last_updated <
                                Date.now() - TOO_OLD) and c.type not in STRUCT
                        ]
                        if old_columns:
                            if DEBUG:
                                Log.note(
                                    "Old columns {{names|json}} last updated {{dates|json}}",
                                    names=wrap(old_columns).es_column,
                                    dates=[
                                        Date(t).format()
                                        for t in wrap(old_columns).last_updated
                                    ])
                            self.todo.extend(old_columns)
                            # TEST CONSISTENCY
                            for c, d in product(list(self.todo.queue),
                                                list(self.todo.queue)):
                                if c.es_column == d.es_column and c.es_index == d.es_index and c != d:
                                    Log.error("")
                        else:
                            if DEBUG:
                                Log.note("no more metatdata to update")

                column = self.todo.pop(Till(seconds=(10 * MINUTE).seconds))
                if DEBUG:
                    Log.note("update {{table}}.{{column}}",
                             table=column.es_index,
                             column=column.es_column)
                if column:
                    if column.es_index in self.index_does_not_exist:
                        with self.meta.columns.locker:
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {
                                    "eq": {
                                        "es_index": column.es_index
                                    }
                                }
                            })
                        continue
                    if column.type in STRUCT or column.es_column.endswith(
                            "." + EXISTS_TYPE):
                        with self.meta.columns.locker:
                            column.last_updated = Date.now()
                        continue
                    elif column.last_updated >= Date.now() - TOO_OLD:
                        continue
                    try:
                        self._update_cardinality(column)
                        if DEBUG and not column.es_index.startswith(
                                TEST_TABLE_PREFIX):
                            Log.note("updated {{column.name}}", column=column)
                    except Exception as e:
                        Log.warning(
                            "problem getting cardinality for {{column.name}}",
                            column=column,
                            cause=e)
            except Exception as e:
                Log.warning("problem in cardinality monitor", cause=e)
예제 #15
0
population_yaxis = {"range": [0, populations_max_y]}

fig = go.Figure(data=[
    go.Bar(name="0-44", x=populations[DATE_COLUMN], y=populations["00"]),
    go.Bar(name="45-64", x=populations[DATE_COLUMN], y=populations["45"]),
    go.Bar(name="65-84", x=populations[DATE_COLUMN], y=populations["65"]),
    go.Bar(name="85+", x=populations[DATE_COLUMN], y=populations["85"]),
])
fig.update_layout(
    title="Population, " + PROVINCE_NAME, barmode="stack", yaxis=population_yaxis
)
fig.show()

recent_year_index = populations.shape[0] - 1  # INDEX OF LAST POPULATION COUNT
recent_year_name = populations.refPer[recent_year_index][:4]
_population_dates = [Date(d) for i, d in enumerate(populations[DATE_COLUMN])]


def get_population(y, date):
    """
    RETURN POPULATION AT GIVEN DATE
    :param date:
    :param y: WHICH POPULATION
    """
    for i, next in enumerate(_population_dates):
        if date < next:
            prev = _population_dates[i - 1]
            y1, y2 = populations[y][i - 1 : i + 1]
            ratio = (date - prev) / (next - prev)
            return (y2 - y1) * ratio + y1
예제 #16
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.then(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            pair = self.todo.pop()
            if pair is THREAD_STOP:
                break
            column, after = pair

            with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05):
                if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
                    # DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
                    continue
                elif after and column.last_updated > after:
                    continue  # COLUMN IS STILL YOUNG
                elif column.last_updated > Date.now() - TOO_OLD and column.cardinality > 0:
                    # DO NOT UPDATE FRESH COLUMN METADATA
                    DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
                    continue

                if untype_path(column.name) in KNOWN_MULTITYPES:
                    try:
                        self._update_cardinality(column)
                    except Exception as e:
                        Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
                    continue

                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "multi",
                        "partitions",
                    ],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
예제 #17
0
    def monitor(self, please_stop):
        please_stop.then(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            try:
                if not self.todo:
                    # LOOK FOR OLD COLUMNS WE CAN RE-SCAN
                    now = Date.now()
                    last_good_update = now - MAX_COLUMN_METADATA_AGE
                    old_columns = [
                        c
                        for c in self.meta.columns
                        if (c.last_updated < last_good_update) and c.jx_type not in STRUCT and c.es_index != META_COLUMNS_NAME
                    ]
                    if old_columns:
                        DEBUG and Log.note(
                            "Old columns {{names|json}} last updated {{dates|json}}",
                            names=wrap(old_columns).es_column,
                            dates=[Date(t).format() for t in wrap(old_columns).last_updated]
                        )
                        self.todo.extend((c, max(last_good_update, c.last_updated)) for c in old_columns)
                    else:
                        DEBUG and Log.note("no more metatdata to update")

                    META_COLUMNS_DESC.last_updated = now

                pair = self.todo.pop(Till(seconds=(10*MINUTE).seconds))
                if pair:
                    if pair is THREAD_STOP:
                        continue
                    column, after = pair

                    now = Date.now()
                    with Timer("review {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
                        if column.es_index in self.index_does_not_exist:
                            DEBUG and Log.note("{{column.es_column}} of {{column.es_index}} does not exist", column=column)
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {"eq": {"es_index": column.es_index}}
                            })
                            continue
                        if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
                            # DEBUG and Log.note("{{column.es_column}} is a struct, not scanned", column=column)
                            column.last_updated = now
                            continue
                        elif column.cardinality is None:
                            pass  # NO CARDINALITY MEANS WE MUST GET UPDATE IT
                        elif after and column.last_updated < after:
                            pass  # COLUMN IS TOO OLD
                        elif column.last_updated < now - TOO_OLD:
                            pass  # COLUMN IS WAY TOO OLD
                        else:
                            # DO NOT UPDATE FRESH COLUMN METADATA
                            DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(now-Date(column.last_updated)))
                            continue

                        try:
                            self._update_cardinality(column)
                            (DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
                        except Exception as e:
                            if '"status":404' in e:
                                self.meta.columns.update({
                                    "clear": ".",
                                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                                })
                            else:
                                Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
                    META_COLUMNS_DESC.last_updated = now
            except Exception as e:
                Log.warning("problem in cardinality monitor", cause=e)
예제 #18
0
 def __init__(self, start, duration, child):
     self.duration = Duration(duration)
     self.start = self.last_value = Date(start).floor(self.duration)
     self.batch = 0
     self.child = child
        def life_cycle_watcher(please_stop):
            bad_requests = Data()
            setup_threads = []
            last_get = Date.now()
            setup_in_progress = set()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                instances = wrap({
                    i.id: i
                    for r in self.ec2_conn.get_all_instances()
                    for i in r.instances
                })
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r)
                                        for r in spot_requests]
                    if i.id and (not i.tags.get("Name") or i.tags.get(
                        "Name") == self.settings.ec2.instance.name +
                                 " (setup)") and i.id not in setup_in_progress
                    and i._state.name == "running"
                    and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]

                for i, r in please_setup:
                    if not time_to_stop_trying.get(i.id):
                        time_to_stop_trying[
                            i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                    if Date.now() > time_to_stop_trying[i.id]:
                        # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                        self.ec2_conn.terminate_instances(instance_ids=[i.id])
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                        Log.warning(
                            "Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!",
                            instance_id=i.id)
                        continue

                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(
                                    instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error(
                                    "Can not setup unknown {{instance_id}} of type {{type}}",
                                    instance_id=i.id,
                                    type=i.instance_type)

                        i.markup = p
                        i.add_tag("Name",
                                  self.settings.ec2.instance.name + " (setup)")
                        setup_in_progress.add(i.id)
                        t = Thread.run("setup for " + text(i.id), track_setup,
                                       self.instance_manager.setup, r, i, p)
                        if SINGLE_THREAD_SETUP:
                            t.join()
                        setup_threads.append(t)
                    except Exception as e:
                        i.add_tag("Name", "")
                        Log.warning("Unexpected failure on startup",
                                    instance_id=i.id,
                                    cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([
                    r for r in spot_requests
                    if r.status.code in PENDING_STATUS_CODES
                ])
                give_up = wrap([
                    r for r in spot_requests
                    if (r.status.code in PROBABLY_NOT_FOR_A_WHILE
                        | TERMINATED_STATUS_CODES) and r.id not in bad_requests
                ])
                ignore = wrap([
                    r for r in spot_requests if r.status.code in MIGHT_HAPPEN
                ])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_making_new_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now(
                        ) - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)

                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id", ), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(
                            request_ids=give_up.id)
                        Log.note(
                            "Cancelled spot requests {{spots}}, {{reasons}}",
                            spots=give_up.id,
                            reasons=give_up.status.code)

                        for g in give_up:
                            bad_requests[g.id] += 1
                            if g.id in self.net_new_spot_requests:
                                self.net_new_spot_requests.remove(g.id)
                                if g.status.code == "capacity-not-available":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                if g.status.code == "bad-parameters":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                    Log.warning(
                                        "bad parameters while requesting type {{type}}",
                                        type=g.launch_specification.
                                        instance_type)

                if not pending and self.done_making_new_spot_requests:
                    Log.note("No more pending spot requests")
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}",
                             pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            with Timer("Save no capacity to file"):
                table = [{
                    "instance_type": k,
                    "last_failure": v
                } for k, v in self.no_capacity.items()]
                self.no_capacity_file.write(value2json(table, pretty=True))

            # WAIT FOR SETUP TO COMPLETE
            for t in setup_threads:
                t.join()

            Log.note("life cycle watcher has stopped")
예제 #20
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            try:
                if not self.todo:
                    old_columns = [
                        c for c in self.meta.columns
                        if ((c.last_updated < Date.now() -
                             MAX_COLUMN_METADATA_AGE) or c.cardinality == None)
                        and c.jx_type not in STRUCT
                    ]
                    if old_columns:
                        DEBUG and Log.note(
                            "Old columns {{names|json}} last updated {{dates|json}}",
                            names=wrap(old_columns).es_column,
                            dates=[
                                Date(t).format()
                                for t in wrap(old_columns).last_updated
                            ])
                        self.todo.extend(old_columns)
                    else:
                        DEBUG and Log.note("no more metatdata to update")

                column = self.todo.pop(Till(seconds=(10 * MINUTE).seconds))
                if column:
                    if column is THREAD_STOP:
                        continue

                    with Timer("update {{table}}.{{column}}",
                               param={
                                   "table": column.es_index,
                                   "column": column.es_column
                               },
                               silent=not DEBUG):
                        if column.es_index in self.index_does_not_exist:
                            DEBUG and Log.note(
                                "{{column.es_column}} does not exist",
                                column=column)
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {
                                    "eq": {
                                        "es_index": column.es_index
                                    }
                                }
                            })
                            continue
                        if column.jx_type in STRUCT or split_field(
                                column.es_column)[-1] == EXISTS_TYPE:
                            DEBUG and Log.note(
                                "{{column.es_column}} is a struct",
                                column=column)
                            column.last_updated = Date.now()
                            continue
                        elif column.last_updated > Date.now(
                        ) - TOO_OLD and column.cardinality is not None:
                            # DO NOT UPDATE FRESH COLUMN METADATA
                            DEBUG and Log.note(
                                "{{column.es_column}} is still fresh ({{ago}} ago)",
                                column=column,
                                ago=(Date.now() -
                                     Date(column.last_updated)).seconds)
                            continue
                        try:
                            self._update_cardinality(column)
                            (DEBUG and
                             not column.es_index.startswith(TEST_TABLE_PREFIX)
                             ) and Log.note("updated {{column.name}}",
                                            column=column)
                        except Exception as e:
                            if '"status":404' in e:
                                self.meta.columns.update({
                                    "clear": ".",
                                    "where": {
                                        "eq": {
                                            "es_index": column.es_index,
                                            "es_column": column.es_column
                                        }
                                    }
                                })
                            else:
                                Log.warning(
                                    "problem getting cardinality for {{column.name}}",
                                    column=column,
                                    cause=e)
            except Exception as e:
                Log.warning("problem in cardinality monitor", cause=e)
    def _get_spot_prices_from_aws(self):
        with Timer("Read no capacity file"):
            try:
                # FILE IS LIST OF {instance_type, last_failure} OBJECTS
                content = self.no_capacity_file.read()
                self.no_capacity = dict(
                    (r.instance_type, r.last_failure)
                    for r in convert.json2value(
                        content, flexible=False, leaves=False))
            except Exception as e:
                self.no_capacity = {}

        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content,
                                           flexible=False,
                                           leaves=False)
            except Exception as e:
                cache = FlatList()

        cache = ListContainer(name=None, data=cache)
        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {
                "value": "timestamp",
                "aggregate": "max"
            }
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX(
                            [Date(most_recent),
                             Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note(
                            "get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at)

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(
                                self.settings.product,
                                "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token)
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(
                                wrap({
                                    "availability_zone": p.availability_zone,
                                    "instance_type": p.instance_type,
                                    "price": p.price,
                                    "product_description":
                                    p.product_description,
                                    "region": p.region.name,
                                    "timestamp": Date(p.timestamp).unix
                                }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(
                prices, {"gte": {
                    "timestamp": {
                        "date": "today-2day"
                    }
                }})

            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"

            File(self.settings.price_file).write(stream())

        return ListContainer(name="prices", data=prices)
예제 #22
0
    def pull_all_remaining(self, please_stop):
        try:
            try:
                content = File(self.settings.extract.last).read_json()
                if len(content) == 1:
                    Log.note("Got a manually generated file {{filename}}",
                             filename=self.settings.extract.last)
                    start_point = tuple(content[0])
                    first_value = [
                        self._extract.start[0] + (start_point[0] * DAY),
                        start_point[1]
                    ]
                else:
                    Log.note("Got a machine generated file {{filename}}",
                             filename=self.settings.extract.last)
                    start_point, first_value = content
                    start_point = tuple(start_point)
                Log.note("First value is {{start1|date}}, {{start2}}",
                         start1=first_value[0],
                         start2=first_value[1])
            except Exception as _:
                Log.error(
                    "Expecting a file {{filename}} with the last good S3 bucket etl id in array form eg: [[954, 0]]",
                    filename=self.settings.extract.last)
                start_point = tuple(self._extract.start)
                first_value = Null

            counter = Counter(start=0)
            for t, s, b, f, i in reversed(
                    zip(self._extract.type, self._extract.start,
                        self._extract.batch,
                        listwrap(first_value) + DUMMY_LIST,
                        range(len(self._extract.start)))):
                if t == "time":
                    counter = DurationCounter(start=s,
                                              duration=b,
                                              child=counter)
                    first_value[i] = Date(f)
                else:
                    counter = BatchCounter(start=s, size=b, child=counter)

            batch_size = self._extract.batch.last(
            ) * 2 * self.settings.extract.threads
            with MySQL(**self.settings.snowflake.database) as db:
                while not please_stop:
                    sql = self._build_list_sql(db, first_value, batch_size + 1)
                    pending = []
                    counter.reset(start_point)
                    with Timer("Grab a block of ids for processing"):
                        with closing(db.db.cursor()) as cursor:
                            acc = []
                            cursor.execute(sql)
                            count = 0
                            for row in cursor:
                                detail_key = counter.next(row)
                                key = tuple(detail_key[:-1])
                                count += 1
                                if key != start_point:
                                    if first_value:
                                        if not acc:
                                            Log.error(
                                                "not expected, {{filename}} is probably set too far in the past",
                                                filename=self.settings.extract.
                                                last)
                                        pending.append({
                                            "start_point": start_point,
                                            "first_value": first_value,
                                            "data": acc
                                        })
                                    acc = []
                                    start_point = key
                                    first_value = row
                                acc.append(
                                    row[-1]
                                )  # ASSUME LAST COLUMN IS THE FACT TABLE id
                    Log.note("adding {{num}} for processing", num=len(pending))
                    self.queue.extend(pending)

                    if count < batch_size:
                        self.queue.add(THREAD_STOP)
                        break
        except Exception as e:
            Log.warning("Problem pulling data", cause=e)
        finally:
            self.done_pulling.go()
            Log.note("pulling new data is done")
예제 #23
0
def sql_time(time):
    return sql_call("TIMESTAMP_MICROS",
                    quote_value(int(Date(time).unix * 1000000)))
예제 #24
0
def _path(timestamp):
    return Date(timestamp).format("%Y/%m/%d")
예제 #25
0
def complex_job(
    transactional_db, generic_reference_data, test_repository, extract_job_settings, now
):
    fc = FailureClassification.objects.create(id=1, name="not classified")
    repository_group = RepositoryGroup.objects.create(name="common")
    repo = Repository.objects.create(name="autoland", repository_group=repository_group)

    push = Push.objects.create(
        **{
            "author": "*****@*****.**",
            "repository": repo,
            "revision": "ae6bb3a1066959a8c43d003a3caab0af769455bf",
            "time": unix2datetime(1578427105).replace(tzinfo=None),
        }
    )

    Commit.objects.create(
        push=push,
        revision="ae6bb3a1066959a8c43d003a3caab0af769455bf",
        author="*****@*****.**",
        comments="no comment",
    )
    Commit.objects.create(
        push=push,
        revision="0123456789012345678901234567890123456789",
        author="*****@*****.**",
        comments="no comment2",
    )

    debug = Option.objects.create(name="debug")
    oc = OptionCollection.objects.create(option_collection_hash=Random.base64(5), option=debug)

    job = Job.objects.create(
        autoclassify_status=1,
        guid=Random.base64(20),
        repository=test_repository,
        push_id=push.id,
        signature=generic_reference_data.signature,
        build_platform=generic_reference_data.build_platform,
        machine_platform=generic_reference_data.machine_platform,
        machine=generic_reference_data.machine,
        option_collection_hash=oc.option_collection_hash,
        job_type=generic_reference_data.job_type,
        job_group=generic_reference_data.job_group,
        product=generic_reference_data.product,
        failure_classification_id=fc.id,
        who="*****@*****.**",
        reason="scheduled",
        result="success",
        state="completed",
        submit_time=unix2datetime(1578427253).replace(tzinfo=None),
        start_time=unix2datetime(1578430841).replace(tzinfo=None),
        last_modified=unix2datetime(1578432686.364459).replace(tzinfo=None),
        end_time=unix2datetime(1578432680).replace(tzinfo=None),
        tier=1,
    )

    text_log_step = TextLogStep.objects.create(
        job=job,
        **{
            "finished_line_number": 88739,
            "name": "Unnamed step",
            "result": 7,
            "started_line_number": 0,
        },
    )

    TextLogError.objects.create(
        step=text_log_step, line="line contents here", line_number=619845839
    )
    TextLogError.objects.create(step=text_log_step, line="ERROR! more line contents", line_number=6)

    TaskclusterMetadata.objects.create(job=job, retry_id=0, task_id="WWb9ExAvQUa78ku0DIxdSQ")

    JobLog.objects.create(
        **{
            "job_id": job.id,
            "name": "builds-4h",
            "status": 1,
            "url": "https://example.com/api/queue/v1/task/WWb9ExAvQUa78ku0DIxdSQ/runs/0/artifacts/public/logs/live_backing.log",
        }
    )
    job_logs1 = JobLog.objects.create(
        **{
            "job_id": job.id,
            "name": "errorsummary_json",
            "status": 1,
            "url": "https://example.com/api/queue/v1/task/WWb9ExAvQUa78ku0DIxdSQ/runs/0/artifacts/public/test_info/wpt_errorsummary.log",
        }
    )

    bcf = ClassifiedFailure.objects.create(**{"bug_number": 1234567,})
    bcf.created = Date("2020-01-17 12:00:00").datetime
    bcf.save()

    FailureLine.objects.create(
        job_log=job_logs1,
        **{
            "action": "test_groups",
            "best_classification": bcf,
            "best_is_verified": True,
            "repository": repo,
            "job_guid": job.guid,
            "line": 15,
            "modified": 0,
            "stackwalk_stderr": 1578432686,
            "stackwalk_stdout": 1578432686,
        },
    )
    FailureLine.objects.create(
        job_log=job_logs1,
        **{
            "action": "crash",
            "best_classification": bcf,
            "best_is_verified": False,
            "repository": repo,
            "job_guid": job.guid,
            "line": 24031,
            "modified": 0,
            "signature": "@ mozilla::dom::CustomElementData::SetCustomElementDefinition(mozilla::dom::CustomElementDefinition*)",
            "stackwalk_stderr": 1578432686,
            "stackwalk_stdout": 1578432686,
            "test": "/custom-elements/upgrading.html",
        },
    )

    return job
예제 #26
0
    def __init__(self, kwargs=None):
        self.settings = kwargs
        self.schema = SnowflakeSchema(self.settings.snowflake)
        self._extract = extract = kwargs.extract

        # SOME PREP
        get_git_revision()

        # VERIFY WE DO NOT HAVE TOO MANY OTHER PROCESSES WORKING ON STUFF
        with MySQL(**kwargs.snowflake.database) as db:
            processes = None
            try:
                processes = jx.filter(
                    db.query("show processlist"), {
                        "and": [{
                            "neq": {
                                "Command": "Sleep"
                            }
                        }, {
                            "neq": {
                                "Info": "show processlist"
                            }
                        }]
                    })
            except Exception as e:
                Log.warning("no database", cause=e)

            if processes:
                if DEBUG:
                    Log.warning("Processes are running\n{{list|json}}",
                                list=processes)
                else:
                    Log.error("Processes are running\n{{list|json}}",
                              list=processes)

        extract.type = listwrap(extract.type)
        extract.start = listwrap(extract.start)
        extract.batch = listwrap(extract.batch)
        extract.field = listwrap(extract.field)
        if any(
                len(extract.type) != len(other)
                for other in [extract.start, extract.batch, extract.field]):
            Log.error(
                "Expecting same number of dimensions for `type`, `start`, `batch`, and `field` in the `extract` inner object"
            )
        for i, t in enumerate(extract.type):
            if t == "time":
                extract.start[i] = Date(extract.start[i])
                extract.batch[i] = Duration(extract.batch[i])
            elif t == "number":
                pass
            else:
                Log.error('Expecting `extract.type` to be "number" or "time"')

        extract.threads = coalesce(extract.threads, 1)
        self.done_pulling = Signal()
        self.queue = Queue("all batches",
                           max=2 * coalesce(extract.threads, 1),
                           silent=True)

        self.bucket = s3.Bucket(self.settings.destination)
        self.notify = aws.Queue(self.settings.notify)
        Thread.run("get records", self.pull_all_remaining)