Beispiel #1
0
def test_missing_num_shards(params_from_base_test_setup, sg_conf):
    """
    1. Launch sg_accels missing the following property in the config.
        "num_shards":16
    2. Verify there are 16 shards
    3. Verify they are distributed evenly across the nodes
    """

    cluster_conf = params_from_base_test_setup["cluster_config"]

    log_info("Running 'test_missing_num_shards'")
    log_info("cluster_conf: {}".format(cluster_conf))
    log_info("sg_conf: {}".format(sg_conf))

    cluster = Cluster(config=cluster_conf)
    cluster.reset(sg_config_path=sg_conf)

    # CBGT REST Admin API endpoint
    admin_api = Admin(cluster.sg_accels[1])
    cbgt_cfg = admin_api.get_cbgt_config()

    # Verify that default number of pindex shards is 16.
    # This may change in the future in which case this test will need to be updated.
    assert cbgt_cfg.num_shards == 16

    # Verify sharding is correct
    assert cluster.validate_cbgt_pindex_distribution_retry(
        num_running_sg_accels=3)
Beispiel #2
0
def test_take_all_sgaccels_down(params_from_base_test_setup, sg_conf):
    """
    Scenario that takes all sync_gateway accel nodes offline during doc load.
    After bring the nodes back online during load, the reshard of the DCP feed is verified.
    The changes feed is verified that all docs show up.

    1. Start doc load (1000 doc)
    2. Take all sg_accel nodes down in parallel
    3. Verify node are down
    4. Wait for doc adds to complete, store "doc_push_result_1"
    5. Verify "doc_push_result_1" docs added
    6. Start doc load (1000 docs)
    7. Wait for 5. to complete, store "doc_push_result_2"
    8. Verify "doc_push_result_2" docs added
    9. Start another doc load (1000 docs)
    10. Bring up nodes in parallel
    11. poll on p-index reshard
    12. Wait for 9. to complete, store "doc_push_result_3"
    13. Verify "doc_push_result_3" docs added
    14. Verify "doc_push_result_1" + "doc_push_result_2" + "doc_push_result_3" show up in _changes feed
    """

    cluster_conf = params_from_base_test_setup["cluster_config"]

    log_info("Running 'test_dcp_reshard_single_sg_accel_goes_down_and_up'")
    log_info("cluster_conf: {}".format(cluster_conf))

    log_info("sg_conf: {}".format(sg_conf))

    cluster = Cluster(config=cluster_conf)
    cluster.reset(sg_config_path=sg_conf)

    cluster_util = ClusterKeywords()
    topology = cluster_util.get_cluster_topology(cluster_conf)

    sg_url = topology["sync_gateways"][0]["public"]
    sg_admin_url = topology["sync_gateways"][0]["admin"]
    sg_db = "db"
    num_docs = 1000

    client = MobileRestClient()

    doc_pusher_user_info = userinfo.UserInfo("doc_pusher",
                                             "pass",
                                             channels=["A"],
                                             roles=[])
    doc_pusher_auth = client.create_user(
        url=sg_admin_url,
        db=sg_db,
        name=doc_pusher_user_info.name,
        password=doc_pusher_user_info.password,
        channels=doc_pusher_user_info.channels)

    a_user_info = userinfo.UserInfo("a_user", "pass", channels=["A"], roles=[])
    client.create_user(url=sg_admin_url,
                       db=sg_db,
                       name=a_user_info.name,
                       password=a_user_info.password,
                       channels=a_user_info.channels)
    a_user_session = client.create_session(url=sg_admin_url,
                                           db=sg_db,
                                           name=a_user_info.name,
                                           password=a_user_info.password)

    # Shutdown all accel nodes in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as ex:

        # Start adding docs
        docs_1 = document.create_docs(None,
                                      num_docs,
                                      channels=doc_pusher_user_info.channels)
        docs_1_task = ex.submit(client.add_bulk_docs,
                                url=sg_url,
                                db=sg_db,
                                docs=docs_1,
                                auth=doc_pusher_auth)

        # Take down all access nodes
        log_info("Shutting down sg_accels: [{}, {}, {}] ...".format(
            cluster.sg_accels[0], cluster.sg_accels[1], cluster.sg_accels[2]))
        sg_accel_down_task_1 = ex.submit(cluster.sg_accels[0].stop)
        sg_accel_down_task_2 = ex.submit(cluster.sg_accels[1].stop)
        sg_accel_down_task_3 = ex.submit(cluster.sg_accels[2].stop)
        assert sg_accel_down_task_1.result() == 0
        assert sg_accel_down_task_2.result() == 0
        assert sg_accel_down_task_3.result() == 0

        # Block until bulk_docs is complete
        doc_push_result_1 = docs_1_task.result()
        assert len(doc_push_result_1) == num_docs
        client.verify_docs_present(url=sg_url,
                                   db=sg_db,
                                   expected_docs=doc_push_result_1,
                                   auth=doc_pusher_auth)

        # Load sync_gateway with another batch of docs while the sg_accel nodes are offline
        docs_2_bodies = document.create_docs(
            None, num_docs, channels=doc_pusher_user_info.channels)
        docs_push_result_2 = client.add_bulk_docs(url=sg_url,
                                                  db=sg_db,
                                                  docs=docs_2_bodies,
                                                  auth=doc_pusher_auth)
        assert len(docs_push_result_2) == num_docs
        client.verify_docs_present(url=sg_url,
                                   db=sg_db,
                                   expected_docs=docs_push_result_2,
                                   auth=doc_pusher_auth)

        # Start loading Sync Gateway with another set of docs while bringing the sg_accel nodes online
        docs_3 = document.create_docs(None,
                                      num_docs,
                                      channels=doc_pusher_user_info.channels)
        docs_3_task = ex.submit(client.add_bulk_docs,
                                url=sg_url,
                                db=sg_db,
                                docs=docs_3,
                                auth=doc_pusher_auth)

        # Bring all the sg_accel nodes back up
        # Take down all access nodes
        log_info("Starting sg_accels: [{}, {}, {}] ...".format(
            cluster.sg_accels[0], cluster.sg_accels[1], cluster.sg_accels[2]))
        sg_accel_up_task_1 = ex.submit(cluster.sg_accels[0].start, sg_conf)
        sg_accel_up_task_2 = ex.submit(cluster.sg_accels[1].start, sg_conf)
        sg_accel_up_task_3 = ex.submit(cluster.sg_accels[2].start, sg_conf)
        assert sg_accel_up_task_1.result() == 0
        assert sg_accel_up_task_2.result() == 0
        assert sg_accel_up_task_3.result() == 0

        # Wait for pindex to reshard correctly
        assert cluster.validate_cbgt_pindex_distribution_retry(3)

        # Block until second bulk_docs is complete
        doc_push_result_3 = docs_3_task.result()
        assert len(doc_push_result_3) == num_docs
        client.verify_docs_present(url=sg_url,
                                   db=sg_db,
                                   expected_docs=doc_push_result_3,
                                   auth=doc_pusher_auth)

    # Combine the 3 push results and make sure the changes propagate to a_user
    # a_user has access to the doc's channel.
    log_info("Verifying all the changes show up for 'a_user' ...")
    all_docs = doc_push_result_1 + docs_push_result_2 + doc_push_result_3
    client.verify_docs_in_changes(url=sg_url,
                                  db=sg_db,
                                  expected_docs=all_docs,
                                  auth=a_user_session,
                                  polling_interval=2)
def test_take_all_sgaccels_down(params_from_base_test_setup, sg_conf):
    """
    Scenario that takes all sync_gateway accel nodes offline during doc load.
    After bring the nodes back online during load, the reshard of the DCP feed is verified.
    The changes feed is verified that all docs show up.

    1. Start doc load (1000 doc)
    2. Take all sg_accel nodes down in parallel
    3. Verify node are down
    4. Wait for doc adds to complete, store "doc_push_result_1"
    5. Verify "doc_push_result_1" docs added
    6. Start doc load (1000 docs)
    7. Wait for 5. to complete, store "doc_push_result_2"
    8. Verify "doc_push_result_2" docs added
    9. Start another doc load (1000 docs)
    10. Bring up nodes in parallel
    11. poll on p-index reshard
    12. Wait for 9. to complete, store "doc_push_result_3"
    13. Verify "doc_push_result_3" docs added
    14. Verify "doc_push_result_1" + "doc_push_result_2" + "doc_push_result_3" show up in _changes feed
    """

    cluster_conf = params_from_base_test_setup["cluster_config"]

    log_info("Running 'test_dcp_reshard_single_sg_accel_goes_down_and_up'")
    log_info("cluster_conf: {}".format(cluster_conf))

    log_info("sg_conf: {}".format(sg_conf))

    cluster = Cluster(config=cluster_conf)
    cluster.reset(sg_config_path=sg_conf)

    cluster_util = ClusterKeywords()
    topology = cluster_util.get_cluster_topology(cluster_conf)

    sg_url = topology["sync_gateways"][0]["public"]
    sg_admin_url = topology["sync_gateways"][0]["admin"]
    sg_db = "db"
    num_docs = 1000

    client = MobileRestClient()

    doc_pusher_user_info = userinfo.UserInfo("doc_pusher", "pass", channels=["A"], roles=[])
    doc_pusher_auth = client.create_user(
        url=sg_admin_url,
        db=sg_db,
        name=doc_pusher_user_info.name,
        password=doc_pusher_user_info.password,
        channels=doc_pusher_user_info.channels
    )

    a_user_info = userinfo.UserInfo("a_user", "pass", channels=["A"], roles=[])
    client.create_user(
        url=sg_admin_url,
        db=sg_db,
        name=a_user_info.name,
        password=a_user_info.password,
        channels=a_user_info.channels
    )
    a_user_session = client.create_session(
        url=sg_admin_url,
        db=sg_db,
        name=a_user_info.name,
        password=a_user_info.password
    )

    # Shutdown all accel nodes in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as ex:

        # Start adding docs
        docs_1 = document.create_docs(None, num_docs, channels=doc_pusher_user_info.channels)
        docs_1_task = ex.submit(client.add_bulk_docs, url=sg_url, db=sg_db, docs=docs_1, auth=doc_pusher_auth)

        # Take down all access nodes
        log_info("Shutting down sg_accels: [{}, {}, {}] ...".format(
            cluster.sg_accels[0],
            cluster.sg_accels[1],
            cluster.sg_accels[2]
        ))
        sg_accel_down_task_1 = ex.submit(cluster.sg_accels[0].stop)
        sg_accel_down_task_2 = ex.submit(cluster.sg_accels[1].stop)
        sg_accel_down_task_3 = ex.submit(cluster.sg_accels[2].stop)
        assert sg_accel_down_task_1.result() == 0
        assert sg_accel_down_task_2.result() == 0
        assert sg_accel_down_task_3.result() == 0

        # Block until bulk_docs is complete
        doc_push_result_1 = docs_1_task.result()
        assert len(doc_push_result_1) == num_docs
        client.verify_docs_present(url=sg_url, db=sg_db, expected_docs=doc_push_result_1, auth=doc_pusher_auth)

        # Load sync_gateway with another batch of docs while the sg_accel nodes are offline
        docs_2_bodies = document.create_docs(None, num_docs, channels=doc_pusher_user_info.channels)
        docs_push_result_2 = client.add_bulk_docs(url=sg_url, db=sg_db, docs=docs_2_bodies, auth=doc_pusher_auth)
        assert len(docs_push_result_2) == num_docs
        client.verify_docs_present(url=sg_url, db=sg_db, expected_docs=docs_push_result_2, auth=doc_pusher_auth)

        # Start loading Sync Gateway with another set of docs while bringing the sg_accel nodes online
        docs_3 = document.create_docs(None, num_docs, channels=doc_pusher_user_info.channels)
        docs_3_task = ex.submit(client.add_bulk_docs, url=sg_url, db=sg_db, docs=docs_3, auth=doc_pusher_auth)

        # Bring all the sg_accel nodes back up
        # Take down all access nodes
        log_info("Starting sg_accels: [{}, {}, {}] ...".format(
            cluster.sg_accels[0],
            cluster.sg_accels[1],
            cluster.sg_accels[2]
        ))
        sg_accel_up_task_1 = ex.submit(cluster.sg_accels[0].start, sg_conf)
        sg_accel_up_task_2 = ex.submit(cluster.sg_accels[1].start, sg_conf)
        sg_accel_up_task_3 = ex.submit(cluster.sg_accels[2].start, sg_conf)
        assert sg_accel_up_task_1.result() == 0
        assert sg_accel_up_task_2.result() == 0
        assert sg_accel_up_task_3.result() == 0

        # Wait for pindex to reshard correctly
        assert cluster.validate_cbgt_pindex_distribution_retry(3)

        # Block until second bulk_docs is complete
        doc_push_result_3 = docs_3_task.result()
        assert len(doc_push_result_3) == num_docs
        client.verify_docs_present(url=sg_url, db=sg_db, expected_docs=doc_push_result_3, auth=doc_pusher_auth)

    # Combine the 3 push results and make sure the changes propagate to a_user
    # a_user has access to the doc's channel.
    log_info("Verifying all the changes show up for 'a_user' ...")
    all_docs = doc_push_result_1 + docs_push_result_2 + doc_push_result_3
    client.verify_docs_in_changes(url=sg_url, db=sg_db, expected_docs=all_docs, auth=a_user_session, polling_interval=2)
Beispiel #4
0
def test_take_down_bring_up_sg_accel_validate_cbgt(params_from_base_test_setup,
                                                   sg_conf):
    """
    Scenario 1

    Start with 3 sg_accels
    Take down 2 sg_accels (block until down -- poll port if needed)
    Doc adds with uuids (~30 sec for cbgt to reshard)
    polling loop: wait for all docs to come back over changes feed
    Call validate pindex with correct number of accels

    Scenario 2 (Continuation)

    When bringing up, you'd have to poll the cbgt_cfg until you get expected number of nodes,
    then you could validate the pindex with 2 accels
    """

    cluster_conf = params_from_base_test_setup["cluster_config"]

    log_info("Running 'test_dcp_reshard_single_sg_accel_goes_down_and_up'")
    log_info("cluster_conf: {}".format(cluster_conf))

    log_info("sg_conf: {}".format(sg_conf))

    cluster = Cluster(config=cluster_conf)
    cluster.reset(sg_config_path=sg_conf)

    cluster_util = ClusterKeywords()
    topology = cluster_util.get_cluster_topology(cluster_conf)

    sg_url = topology["sync_gateways"][0]["public"]
    sg_admin_url = topology["sync_gateways"][0]["admin"]
    sg_db = "db"

    client = MobileRestClient()

    doc_pusher_user_info = userinfo.UserInfo("doc_pusher",
                                             "pass",
                                             channels=["A"],
                                             roles=[])
    doc_pusher_auth = client.create_user(
        url=sg_admin_url,
        db=sg_db,
        name=doc_pusher_user_info.name,
        password=doc_pusher_user_info.password,
        channels=doc_pusher_user_info.channels)

    log_info("Shutting down sg_accels: [{}, {}]".format(
        cluster.sg_accels[1], cluster.sg_accels[2]))
    # Shutdown two accel nodes in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as ex:
        sg_accel_down_task_1 = ex.submit(cluster.sg_accels[1].stop)
        sg_accel_down_task_2 = ex.submit(cluster.sg_accels[2].stop)
        assert sg_accel_down_task_1.result() == 0
        assert sg_accel_down_task_2.result() == 0

    log_info("Finished taking nodes down!")

    # It should take some time ~30 for cbgt to pick up failing nodes and reshard the pindexes. During
    # this add a 1000 docs a start a longpoll changes loop to see if those docs make to to the changes feed
    # If the reshard is successful they will show up at somepoint after. If not, the docs will fail to show up.
    doc_pusher_docs = client.add_docs(url=sg_url,
                                      db=sg_db,
                                      number=1000,
                                      id_prefix=None,
                                      auth=doc_pusher_auth,
                                      channels=doc_pusher_user_info.channels)
    assert len(doc_pusher_docs) == 1000
    client.verify_docs_in_changes(url=sg_url,
                                  db=sg_db,
                                  expected_docs=doc_pusher_docs,
                                  auth=doc_pusher_auth,
                                  polling_interval=5)

    # The pindexes should be reshared at this point since all of the changes have shown up
    assert cluster.validate_cbgt_pindex_distribution(num_running_sg_accels=1)

    log_info("Start sg_accels: [{}, {}]".format(cluster.sg_accels[1],
                                                cluster.sg_accels[2]))

    # Start two accel nodes in parallel
    status = cluster.sg_accels[1].start(sg_conf)
    assert status == 0

    # Poll on pIndex reshard after bring 2 accel nodes back
    assert cluster.validate_cbgt_pindex_distribution_retry(
        num_running_sg_accels=2)

    status = cluster.sg_accels[2].start(sg_conf)
    assert status == 0

    # Poll on pIndex reshard after bring 2 accel nodes back
    assert cluster.validate_cbgt_pindex_distribution_retry(
        num_running_sg_accels=3)
def test_take_down_bring_up_sg_accel_validate_cbgt(params_from_base_test_setup, sg_conf):
    """
    Scenario 1

    Start with 3 sg_accels
    Take down 2 sg_accels (block until down -- poll port if needed)
    Doc adds with uuids (~30 sec for cbgt to reshard)
    polling loop: wait for all docs to come back over changes feed
    Call validate pindex with correct number of accels

    Scenario 2 (Continuation)

    When bringing up, you'd have to poll the cbgt_cfg until you get expected number of nodes,
    then you could validate the pindex with 2 accels
    """

    cluster_conf = params_from_base_test_setup["cluster_config"]

    log_info("Running 'test_dcp_reshard_single_sg_accel_goes_down_and_up'")
    log_info("cluster_conf: {}".format(cluster_conf))

    log_info("sg_conf: {}".format(sg_conf))

    cluster = Cluster(config=cluster_conf)
    cluster.reset(sg_config_path=sg_conf)

    cluster_util = ClusterKeywords()
    topology = cluster_util.get_cluster_topology(cluster_conf)

    sg_url = topology["sync_gateways"][0]["public"]
    sg_admin_url = topology["sync_gateways"][0]["admin"]
    sg_db = "db"

    client = MobileRestClient()

    doc_pusher_user_info = userinfo.UserInfo("doc_pusher", "pass", channels=["A"], roles=[])
    doc_pusher_auth = client.create_user(
        url=sg_admin_url,
        db=sg_db,
        name=doc_pusher_user_info.name,
        password=doc_pusher_user_info.password,
        channels=doc_pusher_user_info.channels
    )

    log_info("Shutting down sg_accels: [{}, {}]".format(cluster.sg_accels[1], cluster.sg_accels[2]))
    # Shutdown two accel nodes in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as ex:
        sg_accel_down_task_1 = ex.submit(cluster.sg_accels[1].stop)
        sg_accel_down_task_2 = ex.submit(cluster.sg_accels[2].stop)
        assert sg_accel_down_task_1.result() == 0
        assert sg_accel_down_task_2.result() == 0

    log_info("Finished taking nodes down!")

    # It should take some time ~30 for cbgt to pick up failing nodes and reshard the pindexes. During
    # this add a 1000 docs a start a longpoll changes loop to see if those docs make to to the changes feed
    # If the reshard is successful they will show up at somepoint after. If not, the docs will fail to show up.
    doc_pusher_docs = client.add_docs(
        url=sg_url,
        db=sg_db,
        number=1000,
        id_prefix=None,
        auth=doc_pusher_auth,
        channels=doc_pusher_user_info.channels
    )
    assert len(doc_pusher_docs) == 1000
    client.verify_docs_in_changes(url=sg_url, db=sg_db, expected_docs=doc_pusher_docs, auth=doc_pusher_auth, polling_interval=5)

    # The pindexes should be reshared at this point since all of the changes have shown up
    assert cluster.validate_cbgt_pindex_distribution(num_running_sg_accels=1)

    log_info("Start sg_accels: [{}, {}]".format(cluster.sg_accels[1], cluster.sg_accels[2]))

    # Start two accel nodes in parallel
    status = cluster.sg_accels[1].start(sg_conf)
    assert status == 0

    # Poll on pIndex reshard after bring 2 accel nodes back
    assert cluster.validate_cbgt_pindex_distribution_retry(num_running_sg_accels=2)

    status = cluster.sg_accels[2].start(sg_conf)
    assert status == 0

    # Poll on pIndex reshard after bring 2 accel nodes back
    assert cluster.validate_cbgt_pindex_distribution_retry(num_running_sg_accels=3)