Esempio n. 1
0
def core():
    measurement_cnt = 0

    # There are 3 main data sources, in order of age:
    # - cans on S3
    # - older report files on collectors (max 1 day of age)
    # - report files on collectors fetched in "real-time"
    # Load json/yaml files and apply filters like canning

    t00 = time.time()
    scores = None

    # Spawn worker processes
    # 'queue' is a singleton from the portable_queue module
    workers = [
        mp.Process(target=msm_processor, args=(queue,)) for n in range(NUM_WORKERS)
    ]
    try:
        [t.start() for t in workers]

        for measurement_tup in fetch_measurements(conf.start_day, conf.end_day):
            assert len(measurement_tup) == 2
            msm_jstr, msm = measurement_tup
            assert msm_jstr is None or isinstance(msm_jstr, (str, bytes)), type(
                msm_jstr
            )
            assert msm is None or isinstance(msm, dict)

            measurement_cnt += 1
            while queue.qsize() >= 500:
                time.sleep(0.1)
            queue.put(measurement_tup)
            metrics.gauge("queue_size", queue.qsize())

            if conf.stop_after is not None and measurement_cnt >= conf.stop_after:
                log.info(
                    "Exiting with stop_after. Total runtime: %f", time.time() - t00
                )
                break

            # Interact from CLI
            if conf.devel and conf.interact:
                import bpython  # debdeps: bpython3

                bpython.embed(locals_=locals())
                break

    except Exception as e:
        log.exception(e)

    finally:
        shut_down(queue)
        clean_caches()
Esempio n. 2
0
def process_measurements_from_s3(queue):
    """Pull measurements from S3 and place them in the queue"""
    for measurement_tup in s3feeder.stream_cans(conf, conf.start_day,
                                                conf.end_day):
        assert len(measurement_tup) == 3
        msm_jstr, msm, msm_uid = measurement_tup
        assert msm_jstr is None or isinstance(msm_jstr,
                                              (str, bytes)), type(msm_jstr)
        assert msm is None or isinstance(msm, dict)

        while queue.qsize() >= 500:
            time.sleep(0.1)
        assert measurement_tup is not None
        queue.put(measurement_tup)
        metrics.gauge("queue_size", queue.qsize())
Esempio n. 3
0
def test_score_web_connectivity_with_workers(cans, tmp_path):
    # Run worker processes on a big can
    # Mock out database interactions but write output json
    # files
    can = cans["big2858"]
    expected_cnt = 2858
    outdir = tmp_path

    assert tuple(tmp_path.glob("*")) == ()
    import fastpath.portable_queue as queue
    import multiprocessing as mp

    fp.db.setup = MagicMock()
    fp.db.trim_old_measurements = MagicMock()
    fp.db._autocommit_conn = MagicMock()

    m1 = MagicMock(name="mycursor")
    mctx = MagicMock(name="mock_ctx")

    # By mocking SQL execute() each workers logs its queries in a dedicated
    # file. We then collect the files to inspect if all inputs were processed.
    def mock_execute(query, *a, **kw):
        try:
            pid = os.getpid()
            wl = outdir / f"{pid}.wlog"
            if wl.is_file():
                log.debug("Loading %s", wl)
                d = ujson.load(wl.open())
            else:
                d = dict(inserted_tids=[], other_queries=[])
            if "INSERT INTO fastpath" in query:
                query_args = a[0]
                assert len(query_args) == 11
                tid = query_args[0]
                d["inserted_tids"].append(tid)
            elif "SELECT pg_notify('fastpath" in query:
                pass
            else:
                d["other_queries"].append(query)
            ujson.dump(d, wl.open("w"))
        except Exception as e:
            log.exception(e)

    mctx.execute = mock_execute
    m1.__enter__ = MagicMock(name="myenter", return_value=mctx)
    fp.db._autocommit_conn.cursor = MagicMock(name="curgen", return_value=m1)

    workers = [
        mp.Process(target=fp.msm_processor, args=(queue, )) for n in range(4)
    ]
    [t.start() for t in workers]
    for w in workers:
        wl = outdir / f"{w.pid}.wlog"
        if wl.is_file():
            wl.unlink()
        assert w.is_alive()

    for msm_n, msm in load_can(can):
        queue.put((None, msm))
    assert msm_n == expected_cnt - 1

    for w in workers:
        # each worker will receive one terminator message and quit
        queue.put(None)

    while any(w.is_alive() for w in workers):
        log.debug("waiting...")
        time.sleep(0.1)

    assert len(tuple(tmp_path.glob("*"))) == expected_cnt, tmp_path

    all_inserted_tids = set()
    for w in workers:
        wl = outdir / f"{w.pid}.wlog"
        assert wl.is_file(), "The worker did not create a logfile"
        d = ujson.load(wl.open())
        wl.unlink()
        s = set(d["inserted_tids"])
        assert len(s) == len(d["inserted_tids"]), "Duplicate INSERT INTO"
        dup = all_inserted_tids & s
        assert len(dup) == 0, f"{dup} inserted by different workers"
        all_inserted_tids = all_inserted_tids | s

    assert len(all_inserted_tids) == expected_cnt
Esempio n. 4
0
def shut_down(queue):
    log.info("Shutting down workers")
    [queue.put(None) for n in range(NUM_WORKERS)]