def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 5  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0])

            if actual_elbow != len(answer.get_clusters()):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Beispiel #4
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore,
                        **kwargs):
        repeat = 15  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False
        kstep = kwargs.get('kstep', 1)

        sample = read_sample(path_to_data)

        expected_clusters_amount = None
        if path_to_answer is not None:
            if isinstance(path_to_answer, int):
                expected_clusters_amount = path_to_answer
            else:
                expected_clusters_amount = len(
                    answer_reader(path_to_answer).get_clusters())

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, **kwargs)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce),
                         math.floor((kmax - kmin) / kstep + 1))
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if (expected_clusters_amount is not None) and (
                    actual_elbow != expected_clusters_amount):
                additional_info.append(actual_elbow)
                continue

            testing_result = True
            break

        message = None
        if expected_clusters_amount is not None:
            message = str(expected_clusters_amount) + ": " + str(
                additional_info)

        assertion.true(testing_result, message=message)