Ejemplo n.º 1
0
def test_row_counts():
    # this is imported here rather than at module level because
    # it isn't available in a plain parsl install, so this module
    # would otherwise fail to import and break even a basic test
    # run.
    import sqlalchemy
    from parsl.tests.configs.htex_local_alternate import fresh_config

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking and waiting for result")
    assert this_app().result() == 5

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM status, try "
                                    "WHERE status.task_id = try.task_id "
                                    "AND status.task_status_name='exec_done' "
                                    "AND task_try_time_running is NULL")
        (c, ) = result.first()
        assert c == 0

        # Two entries: one showing manager active, one inactive
        result = connection.execute("SELECT COUNT(*) FROM node")
        (c, ) = result.first()
        assert c == 2

        # There should be one block polling status
        # local provider has a status_polling_interval of 5s
        result = connection.execute("SELECT COUNT(*) FROM block")
        (c, ) = result.first()
        assert c >= 2

    logger.info("all done")
Ejemplo n.º 2
0
def test_row_counts():
    # this is imported here rather than at module level because
    # it isn't available in a plain parsl install, so this module
    # would otherwise fail to import and break even a basic test
    # run.
    import sqlalchemy
    from parsl.tests.configs.htex_local_alternate import fresh_config

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking and waiting for result")
    assert this_app().result() == 5

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 1

    logger.info("all done")
Ejemplo n.º 3
0
def test_hashsum():
    import sqlalchemy
    from parsl.tests.configs.htex_local_alternate import fresh_config

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking and waiting for result (1/4)")
    f1 = this_app(4)
    assert f1.result() == 5

    logger.info("invoking and waiting for result (2/4)")
    f2 = this_app(17)
    assert f2.result() == 18

    logger.info("invoking and waiting for result (3/4)")
    f3 = this_app(4)
    assert f3.result() == 5

    logger.info("invoking and waiting for result (4/4)")
    f4 = this_app(4)
    assert f4.result() == 5

    assert f1.task_def['hashsum'] == f3.task_def['hashsum']
    assert f1.task_def['hashsum'] == f4.task_def['hashsum']
    assert f1.task_def['hashsum'] != f2.task_def['hashsum']

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        # we should have three tasks, but with only two tries, because the
        # memo try should be missing
        result = connection.execute("SELECT COUNT(*) FROM task")
        (task_count, ) = result.first()
        assert task_count == 4

        # this will check that the number of task rows for each hashsum matches the above app invocations
        result = connection.execute(f"SELECT COUNT(task_hashsum) FROM task WHERE task_hashsum='{f1.task_def['hashsum']}'")
        (hashsum_count, ) = result.first()
        assert hashsum_count == 3

        result = connection.execute(f"SELECT COUNT(task_hashsum) FROM task WHERE task_hashsum='{f2.task_def['hashsum']}'")
        (hashsum_count, ) = result.first()
        assert hashsum_count == 1

        result = connection.execute("SELECT COUNT(*) FROM status WHERE task_status_name='exec_done'")
        (memo_count, ) = result.first()
        assert memo_count == 2

        result = connection.execute("SELECT COUNT(*) FROM status WHERE task_status_name='memo_done'")
        (memo_count, ) = result.first()
        assert memo_count == 2

    logger.info("all done")
Ejemplo n.º 4
0
def test_row_counts():
    from parsl.tests.configs.htex_local_alternate import fresh_config
    import sqlalchemy

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking apps and waiting for result")

    assert this_app().result() == 5
    assert this_app().result() == 5

    # now we've run some apps, send fuzz into the monitoring ZMQ
    # socket, before trying to run some more tests.

    # there are different kinds of fuzz:
    # could send ZMQ messages that are weird
    # could send random bytes to the TCP socket
    #   the latter is what i'm most suspicious of in my present investigation

    # dig out the interchange port...
    hub_address = parsl.dfk().hub_address
    hub_interchange_port = parsl.dfk().hub_interchange_port

    # this will send a string to a new socket connection
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.connect((hub_address, hub_interchange_port))
        s.sendall(b'fuzzing\r')

    # this will send a non-object down the DFK's existing ZMQ connection
    parsl.dfk().monitoring._dfk_channel.send(b'FuzzyByte\rSTREAM')

    # This following attack is commented out, because monitoring is not resilient
    # to this.
    # In practice, it works some of the time but in some circumstances,
    # it would still abandon writing multiple unrelated records to the database,
    # causing ongoing monitoring data loss.

    # This will send an unusual python object down the
    # DFK's existing ZMQ connection. this doesn't break the router,
    # but breaks the db_manager in a way that isn't reported until
    # the very end of the run, and database writing is abandoned
    # rather than completing, in this case.
    # I'm unclear if this is a case we should be trying to handle.
    # parsl.dfk().monitoring._dfk_channel.send_pyobj("FUZZ3")

    # hopefully long enough for any breakage to happen
    # before attempting to run more tasks
    time.sleep(5)

    assert this_app().result() == 5
    assert this_app().result() == 5

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 4

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 4

    logger.info("all done")
Ejemplo n.º 5
0
def test_row_counts():
    from parsl.tests.configs.htex_local_alternate import fresh_config
    import sqlalchemy
    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    # parsl.load() returns before all initialisation of monitoring
    # is complete, which means it isn't safe to take a read lock on
    # the database yet. This delay tries to work around that - some
    # better async behaviour might be nice, but what?
    #
    # Taking a read lock before monitoring is initialized will cause
    # a failure in the part of monitoring which creates tables, and
    # which is not protected against read locks at the time this test
    # was written.
    time.sleep(10)

    # to get an sqlite3 read lock that is held over a controllable
    # long time, create a transaction and perform a SELECT in it.
    # The lock will be held until the end of the transaction.
    # (see bottom of https://sqlite.org/lockingv3.html)

    logger.info("Getting a read lock on the monitoring database")
    with engine.begin() as readlock_connection:
        readlock_connection.execute("BEGIN TRANSACTION")
        result = readlock_connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1
        # now readlock_connection should have a read lock that will
        # stay locked until the transaction is ended, or the with
        # block ends.

        logger.info("invoking and waiting for result")
        assert this_app().result() == 5

        # there is going to be some raciness here making sure that
        # the database manager actually tries to write while the
        # read lock is held. I'm not sure if there is a better way
        # to detect this other than a hopefully long-enough sleep.
        time.sleep(10)

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find data consistent with executing one
    # task in the database.

    logger.info("checking database content")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 1

    logger.info("all done")