Ejemplo n.º 1
0
def test_downloadDataDictionary():

    ctx = reload.context()
    reload.downloadDataDictionary(ctx)
    assert filecmp.cmp(ctx["dataDictionaryInputFilePath"],
                       "redcap/metadata.json")
    os.remove(ctx["dataDictionaryInputFilePath"])
Ejemplo n.º 2
0
def test_back_up_data_dictionary():
    
    ctx = reload.context()
    with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]):
        assert reload.backUpDataDictionary(ctx)
        directory = reload.dataDictionaryBackUpDirectory(ctx)
        shutil.rmtree(directory)
Ejemplo n.º 3
0
def test_back_up_data_dictionary_not_exists():
    
    ctx = reload.context()
    assert reload.backUpDataDictionary(ctx)
    directory = reload.dataDictionaryBackUpDirectory(ctx)
    assert not os.path.exists(ctx["dataDictionaryInputFilePath"])
    assert not os.path.exists(directory)
Ejemplo n.º 4
0
def test_update_table_column():
    ctx = reload.context()
    fn = "/tmp/ssd1.csv"
    fn2 = "/tmp/ssd2.csv"
    csv1 = [[i, i] for i in range(10)]
    csv2 = [[i, i + 1] for i in range(1, 11)]
    n = len(csv1)
    n2 = len(csv2)
    write_csv(fn, ["ProposalID", "siteNumber"], csv1)
    write_csv(fn2, ["ProposalID", "siteNumber"], csv2)

    try:
        reload._updateDataIntoTableColumn(ctx, "SiteInformation", "ProposalID",
                                          fn, {})
        rows = reload.readDataFromTable(ctx, "SiteInformation")
        assert (bag_contains(rows, [{
            "siteNumber": str(row[1]),
            "ProposalID": str(row[0])
        } for row in csv1]))
        reload._updateDataIntoTableColumn(ctx, "SiteInformation", "ProposalID",
                                          fn2, {})
        rows = reload.readDataFromTable(ctx, "SiteInformation")
        assert (bag_contains(rows, [{
            "siteNumber": str(row[1]),
            "ProposalID": str(row[0])
        } for row in csv1 if row[0] not in list(map(lambda x: x[0], csv2))] +
                             [{
                                 "siteNumber": str(row[1]),
                                 "ProposalID": str(row[0])
                             } for row in csv2]))
    finally:
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
        os.unlink(fn)
        os.unlink(fn2)
Ejemplo n.º 5
0
def do_test_post_table(verb1,
                       verb2,
                       src,
                       cnttype,
                       tablename,
                       kvp1,
                       kvp2,
                       content1,
                       content2,
                       has_comments=False):
    print("cwd =", os.getcwd())
    ctx = reload.context()
    pServer = Process(target=server.server, args=[ctx], kwargs={})
    pServer.start()
    time.sleep(WAIT_PERIOD)
    pWorker = Process(target=reload.startWorker)
    pWorker.start()
    time.sleep(WAIT_PERIOD)
    try:
        print("get " + tablename)
        resp = requests.get("http://localhost:5000/table/" + tablename)
        assert (resp.json() == [])
        print("post " + tablename)
        resp = do_request_table(verb1,
                                tablename,
                                kvp1,
                                src,
                                cnttype,
                                has_comments=has_comments)
        print(resp.text)
        assert resp.status_code == 200
        taskid = resp.json()
        assert isinstance(taskid, str)
        wait_for_task_to_finish(taskid)
        print("get " + tablename)
        resp = requests.get("http://localhost:5000/table/" + tablename)
        respjson = resp.json()
        assert (bag_contains(respjson, content1))
        print("post " + tablename)
        resp = do_request_table(verb2,
                                tablename,
                                kvp2,
                                src,
                                cnttype,
                                has_comments=has_comments)
        assert resp.status_code == 200
        taskid = resp.json()
        assert isinstance(taskid, str)
        wait_for_task_to_finish(taskid)
        print("get " + tablename)
        resp = requests.get("http://localhost:5000/table/" + tablename)
        respjson = resp.json()
        assert (bag_contains(respjson, content2))
    finally:
        pWorker.terminate()
        pServer.terminate()
        reload.clearTasks()
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
Ejemplo n.º 6
0
def test_get_column_data_type_twice():
    ctx = reload.context()

    dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID")
    assert dt == "bigint"

    dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID")
    assert dt == "bigint"
Ejemplo n.º 7
0
def wait_for_task_to_start(taskid):

    ctx = reload.context()
    resp = requests.get("http://localhost:5000/task/" + taskid)
    print(resp.json())
    while resp.json()["status"] in ["queued"]:
        time.sleep(1)
        resp = requests.get("http://localhost:5000/task/" + taskid)
        print(resp.json())
Ejemplo n.º 8
0
def test_restore_database_with_lock():
    print("test_restore_database")

    ctx = reload.context()
    with database(ctx, cleanup=True):
        ts = test_back_up_database(False)
    
    with database(ctx, cleanup=True):
        assert reload.restoreDatabase(ctx, ts)
        os.remove(ctx["backupDir"] + "/" + ts)
Ejemplo n.º 9
0
def test_etl():
    
    ctx = reload.context()
    with copy_file("redcap/record.json", ctx["dataInputFilePath"]):
        with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]):
            with datatables(lambda: reload.etl(ctx)) as ret:
                assert ret
                assert os.path.isfile("/data/tables/Proposal")
                with open("/data/tables/Proposal") as f:
                    assert sum(1 for _ in f) == 2
Ejemplo n.º 10
0
def test_delete_back_up_database():
    print("test_back_up_database")
    test_sync(False)
    
    ctx = reload.context()
    with database(ctx, cleanup=True):
        ts = str(datetime.datetime.now())
        assert reload._backUpDatabase(ctx, ts)
        assert reload._deleteBackup(ctx, ts)
        assert ts not in os.listdir(ctx["backupDir"])
Ejemplo n.º 11
0
def test_clear_database():
    
    ctx = reload.context()
    reload.clearDatabase(ctx)
    engine = create_engine("postgresql+psycopg2://" + ctx["dbuser"] + ":" + ctx["dbpass"] + "@" + ctx["dbhost"] + ":" + ctx["dbport"] + "/" + ctx["dbname"])
    conn = engine.connect()
            
    rs = conn.execute("SELECT table_schema,table_name FROM information_schema.tables WHERE table_schema = 'public' ORDER BY table_schema,table_name").fetchall()
    assert len(rs) == 0
    conn.close()
    reload.createTables(ctx)
Ejemplo n.º 12
0
def test_start_worker():
    
    ctx = reload.context()
    p = Process(target = reload.startWorker)
    workers = Worker.all(connection=reload.redisQueue())
    assert len(list(workers)) == 0
    p.start()
    time.sleep(WAIT_PERIOD)
    workers = Worker.all(connection=reload.redisQueue())
    assert len(list(workers)) == 1
    p.terminate()
Ejemplo n.º 13
0
def test_get_all_tasks():
    
    ctx = reload.context()
    pServer = Process(target = server.server, args=[ctx], kwargs={})
    print("starting server ctx = " + str(ctx))
    pServer.start()
    print("server started, waiting for " + str(WAIT_PERIOD))
    time.sleep(WAIT_PERIOD)
    print("clearing tasks")
    reload.clearTasks()
    print("clearing database")
    reload.clearDatabase(ctx)
    print("creating tables")
    reload.createTables(ctx)
    print("starting worker")
    pWorker = Process(target = reload.startWorker)
    pWorker.start()
    print("worker started, waiting for " + str(WAIT_PERIOD))
    time.sleep(WAIT_PERIOD)
    print("set up")
    try:
        resp0 = requests.get("http://localhost:5000/task")
        assert len(resp0.json()["queued"]) == 0
        resp1 = requests.post("http://localhost:5000/sync")
        task_id = resp1.json()
        wait_for_task_to_start(task_id)
        resp2 = requests.get("http://localhost:5000/task")
        assert resp2.json() == {
            "queued": [],
            "started": {
                "job_ids": [task_id],
                "expired_job_ids": []
            },
            "finished": {
                "job_ids": [],
                "expired_job_ids": []
            },
            "failed": {
                "job_ids": [],
                "expired_job_ids": []
            },
            "deferred": {
                "job_ids": [],
                "expired_job_ids": []
            }
        }
    finally:
        pWorker.terminate() 
        pServer.terminate()
        reload.clearTasks()
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
Ejemplo n.º 14
0
def test_back_up_database_with_lock(cleanup=True):
    print("test_back_up_database")
    test_sync(False)
    
    ctx = reload.context()
    with database(ctx, cleanup=cleanup):
        ts = str(datetime.datetime.now())
        assert reload.backUpDatabase(ctx, ts)
        assert(ts in os.listdir(ctx["backupDir"]))
        if cleanup:
            os.remove(ctx["backupDir"] + "/" + ts)
        else:
            return ts
Ejemplo n.º 15
0
def test_back_up_endpoint():
    
    ctx = reload.context()
    p = Process(target = server.server, args=[ctx], kwargs={})
    p.start()
    time.sleep(WAIT_PERIOD)
    try:
        resp = requests.get("http://localhost:5000/backup")
        assert resp.status_code == 200
        print(resp.json())
        assert isinstance(resp.json(), list)
    finally:
        p.terminate()
        reload.clearTasks()
Ejemplo n.º 16
0
def do_test_table(table_name, columns):
    ctx = reload.context()
    conn = connect(user=ctx["dbuser"],
                   password=ctx["dbpass"],
                   host=ctx["dbhost"],
                   port=ctx["dbport"],
                   dbname=ctx["dbname"])
    conn.autocommit = True
    cur = conn.cursor()
    cur.execute('''SELECT * FROM "{0}"'''.format(table_name))
    rs = cur.fetchall()
    colnames = [desc[0] for desc in cur.description]
    for column in columns:
        assert column in colnames
Ejemplo n.º 17
0
def test_downloadData():
    ctx = reload.context()
    reload.downloadData(ctx)
    try:
        with open(ctx["dataInputFilePath"]) as f:
            obj = json.load(f)
        with open("redcap/record.json") as f2:
            obj2 = json.load(f2)
        diff = DeepDiff(obj, obj2)
        assert len(diff) == 0
    except:
        os.stderr.write(str(diff) + "\n")
        raise
    os.remove(ctx["dataInputFilePath"])
Ejemplo n.º 18
0
def do_test_auxiliary(aux1, exp):
    
    aux0 = os.environ.get("AUXILIARY_PATH")
    os.environ["AUXILIARY_PATH"] = aux1
    ctx = reload.context()
    shutil.copy("redcap/record.json", ctx["dataInputFilePath"])
    shutil.copy("redcap/metadata.json", ctx["dataDictionaryInputFilePath"])
    assert reload.etl(ctx)
    with open("/data/tables/ProposalFunding") as f:
        i = f.readline().split(",").index("totalBudgetInt")
        assert f.readline().split(",")[i] == exp
    os.remove(ctx["dataInputFilePath"])
    os.remove(ctx["dataDictionaryInputFilePath"])
    shutil.rmtree("/data/tables")
    if aux0 is None:
        del os.environ["AUXILIARY_PATH"]
    else:
        os.environ["AUXILIARY_PATH"] = aux0
Ejemplo n.º 19
0
def test_get_column_data_type_twice2():
    ctx = reload.context()
    fn = "/tmp/ssd1.csv"
    csv1 = [[i, i] for i in range(10)]
    n = len(csv1)
    write_csv(fn, ["ProposalID", "siteNumber"], csv1)

    try:
        dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID")
        assert dt == "bigint"

        reload._updateDataIntoTable(ctx, "SiteInformation", fn, {})

        dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID")
        assert dt == "bigint"
    finally:
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
        os.unlink(fn)
Ejemplo n.º 20
0
def do_test_blocklist2(blocklist1, exp):
    
    blocklist0 = os.environ.get("BLOCK_PATH")
    os.environ["BLOCK_PATH"] = blocklist1
    ctx = reload.context()
    shutil.copy("redcap/record2.json", ctx["dataInputFilePath"])
    shutil.copy("redcap/metadata.json", ctx["dataDictionaryInputFilePath"])
    assert reload.etl(ctx)
    with open("/data/tables/Proposal", newline="") as f:
        reader = csv.reader(f)
        headers = next(reader)
        i = sum(1 for row in reader)
        assert i == exp
    os.remove(ctx["dataInputFilePath"])
    os.remove(ctx["dataDictionaryInputFilePath"])
    shutil.rmtree("/data/tables")
    if blocklist0 is None:
        del os.environ["BLOCK_PATH"]
    else:
        os.environ["BLOCK_PATH"] = blocklist0
Ejemplo n.º 21
0
def do_test_insert_table(src, kvp, has_comments=False):

    ctx = reload.context()
    n = countrows(src, "text/csv") - (1 if has_comments else 0)
    try:
        reload.insertDataIntoTable(ctx, "SiteInformation", src, kvp)
        rows = reload.readDataFromTable(ctx, "SiteInformation")
        assert (bag_contains(rows, [{
            "siteNumber": str(i),
            **kvp
        } for i in range(1, n + 1)]))
        reload.insertDataIntoTable(ctx, "SiteInformation", src, kvp)
        rows = reload.readDataFromTable(ctx, "SiteInformation")
        assert (bag_contains(rows, [{
            "siteNumber": str(i),
            **kvp
        } for i in range(1, n + 1)] * 2))
    finally:
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
Ejemplo n.º 22
0
def test_sync(cleanup = True):
    
    ctx = reload.context()
    with database(ctx, cleanup=cleanup):
        with connection(ctx, autocommit=True) as conn:
            cur = conn.cursor()
            cur.execute('''SELECT COUNT(*) FROM "Proposal"''')
            rs = cur.fetchall()
            assert len(rs) == 1
            for row in rs:
                assert row[0] == 0
            
            with copytree("/etlout", "/data/tables"):
                print("sync database")
                assert reload.syncDatabase(ctx)
                cur.execute('''SELECT COUNT(*) FROM "Proposal"''')
                rs = cur.fetchall()
                assert len(rs) == 1
                for row in rs:
                    assert row[0] == 1
                    print("database synced")
Ejemplo n.º 23
0
def test_entrypoint():
    
    ctx = reload.context()
    with database(ctx):
        with connection(ctx, autocommit=True) as conn:
            cur = conn.cursor()
            cur.execute('''SELECT COUNT(*) FROM "Proposal"''')
            rs = cur.fetchall()
            assert len(rs) == 1
            for row in rs:
                assert row[0] == 0

            ctx["reloaddb"]=False
            with copy_file("redcap/record.json", ctx["dataInputFilePath"]):
                with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]):
                    with datatables(lambda: reload.entrypoint(ctx, one_off=True)):
                        cur.execute('''SELECT COUNT(*) FROM "Proposal"''')
                        rs = cur.fetchall()
                        assert len(rs) == 1
                        for row in rs:
                            assert row[0] == 1
Ejemplo n.º 24
0
def do_test_post_error(verb1, src, cnttype, tablename, kvp1, status_code,
                       resp_text):

    ctx = reload.context()
    pServer = Process(target=server.server, args=[ctx], kwargs={})
    pServer.start()
    time.sleep(WAIT_PERIOD)
    pWorker = Process(target=reload.startWorker)
    pWorker.start()
    time.sleep(WAIT_PERIOD)
    try:
        resp = do_request_table(verb1, tablename, kvp1, src, cnttype)
        assert resp.status_code == status_code
        taskid = resp.text
        assert re.match(resp_text, taskid)
    finally:
        pWorker.terminate()
        pServer.terminate()
        reload.clearTasks()
        reload.clearDatabase(ctx)
        reload.createTables(ctx)
Ejemplo n.º 25
0
def test_task():
    
    ctx = reload.context()
    p = Process(target = server.server, args=[ctx], kwargs={})
    p.start()
    time.sleep(WAIT_PERIOD)
    try:
        resp0 = requests.get("http://localhost:5000/task")
        assert len(resp0.json()["queued"]) == 0
        resp = requests.post("http://localhost:5000/backup")
        resp2 = requests.get("http://localhost:5000/task")
        assert "queued" in resp2.json()
        assert len(resp2.json()["queued"]) == 1
        for status in ["started", "finished", "failed", "deferred"]:
            assert status in resp2.json()
            for category in ["job_ids", "expired_job_ids"]:
                assert category in resp2.json()[status]
                assert len(resp2.json()[status][category]) == 0
    finally:
        p.terminate()
        reload.clearTasks()
Ejemplo n.º 26
0
def test_get_task():
    
    ctx = reload.context()
    p = Process(target = server.server, args=[ctx], kwargs={})
    p.start()
    time.sleep(WAIT_PERIOD)
    try:
        resp = requests.post("http://localhost:5000/backup")
        resp2 = requests.get("http://localhost:5000/task/" + resp.json())
        assert "name" in resp2.json()
        assert "created_at" in resp2.json()
        assert "ended_at" in resp2.json()
        assert "started_at" in resp2.json()
        assert "enqueued_at" in resp2.json()
        assert "description" in resp2.json()
        assert "status" in resp2.json()
        assert "result" in resp2.json()

    finally:
        p.terminate()
        reload.clearTasks()
Ejemplo n.º 27
0
def test_delete_task():
    
    ctx = reload.context()
    p = Process(target = server.server, args=[ctx], kwargs={})
    p.start()
    time.sleep(WAIT_PERIOD)
    try:
        resp0 = requests.get("http://localhost:5000/task")
        assert len(resp0.json()["queued"]) == 0
        resp = requests.post("http://localhost:5000/sync")
        resp1 = requests.post("http://localhost:5000/sync")
        resp2 = requests.get("http://localhost:5000/task")
        assert len(resp2.json()["queued"]) == 2
        assert resp.json() in resp2.json()["queued"]
        assert resp1.json() in resp2.json()["queued"]
        requests.delete("http://localhost:5000/task/" + resp1.json())
        resp3 = requests.get("http://localhost:5000/task")
        assert len(resp3.json()["queued"]) == 1
        assert resp.json() in resp3.json()["queued"]
        assert resp1.json() not in resp3.json()["queued"]
    finally:
        p.terminate()
        reload.clearTasks()
Ejemplo n.º 28
0
def test_downloadData():
    ctx = reload.context()
    reload.downloadData(ctx)
    assert filecmp.cmp(ctx["dataInputFilePath"], "redcap/record.json")
    os.remove(ctx["dataInputFilePath"])
Ejemplo n.º 29
0
import os
from multiprocessing import Process
import reload
import server

if __name__ == "__main__":
    ctx = reload.context()
    s = os.environ["RELOAD_SCHEDULE"] == "1"
    o = os.environ["RELOAD_ONE_OFF"] == "1"
    cdb = os.environ["CREATE_TABLES"] == "1"
    idb = os.environ["INSERT_DATA"] == "1"
    scheduleRunTime = os.environ["SCHEDULE_RUN_TIME"]
    runServer = os.environ["SERVER"] == "1"

    p2 = Process(target=reload.startWorker)
    p2.start()

    p = Process(target=reload.entrypoint,
                args=[ctx],
                kwargs={
                    "create_tables": cdb,
                    "insert_data": idb,
                    "reload": s,
                    "one_off": o,
                    "schedule_run_time": scheduleRunTime
                })
    p.start()
    if runServer:
        server.server(ctx)
    p.join()
    p2.join()
Ejemplo n.º 30
0
def test_post_table_column():
    ctx = reload.context()
    fn = "/tmp/ssd1.csv"
    fn2 = "/tmp/ssd2.csv"
    csv1 = [[i, i] for i in range(10)]
    csv2 = [[i, i + 1] for i in range(1, 11)]
    n = len(csv1)
    n2 = len(csv2)
    write_csv(fn, ["ProposalID", "siteNumber"], csv1)
    write_csv(fn2, ["ProposalID", "siteNumber"], csv2)
    tablename = "SiteInformation"
    column = "ProposalID"
    kvp1 = kvp2 = {}
    cnttype = "text/csv"
    verb1 = verb2 = requests.post
    content1 = [{
        "siteNumber": str(row[1]),
        "ProposalID": str(row[0])
    } for row in csv1]
    content2 = [{
        "siteNumber": str(row[1]),
        "ProposalID": str(row[0])
    } for row in csv1 if row[0] not in list(map(lambda x: x[0], csv2))
                ] + [{
                    "siteNumber": str(row[1]),
                    "ProposalID": str(row[0])
                } for row in csv2]

    pServer = Process(target=server.server, args=[ctx], kwargs={})
    pServer.start()
    time.sleep(WAIT_PERIOD)
    pWorker = Process(target=reload.startWorker)
    pWorker.start()
    time.sleep(WAIT_PERIOD)

    try:
        resp = do_request_table_column(verb1, tablename, column, kvp1, fn,
                                       cnttype)
        assert resp.status_code == 200
        taskid = resp.json()
        assert isinstance(taskid, str)
        wait_for_task_to_finish(taskid)
        print("get " + tablename)
        resp = requests.get("http://localhost:5000/table/" + tablename)
        respjson = resp.json()
        assert (bag_contains(respjson, content1))
        print("post " + tablename)
        resp = do_request_table_column(verb2, tablename, column, kvp2, fn2,
                                       cnttype)
        assert resp.status_code == 200
        taskid = resp.json()
        assert isinstance(taskid, str)
        wait_for_task_to_finish(taskid)
        print("get " + tablename)
        resp = requests.get("http://localhost:5000/table/" + tablename)
        respjson = resp.json()
        assert (bag_contains(respjson, content2))
    finally:
        pWorker.terminate()
        pServer.terminate()
        reload.clearTasks()
        reload.clearDatabase(ctx)
        reload.createTables(ctx)