Esempio n. 1
0
    def test_edge_cases(self):
        self.assertEqual(get_correlation([], []), 0)

        self.assertEqual(get_correlation([42], []), 0)
        self.assertEqual(get_correlation([], [42]), 0)

        self.assertEqual(get_correlation([42], [42]), 0)
Esempio n. 2
0
    def test_pearson(self):
        self.assertEqual(
            get_correlation([1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7]), 1)

        self.assertEqual(
            get_correlation([1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 8]), 1)

        self.assertEqual(get_correlation([1, 2, 3], [0, 1, 0.5]), 0.5)
Esempio n. 3
0
    def calculate_on_neighbourhoods(self, word_matrix, k):
        """
		Calculates the swadeshness of each cell using the neighbourhood method.
		Cells with less than 6 relevant languages have swadeshness of 0.
		"""
        planet = Map()

        for cell in self.cells:
            try:
                languages = planet.get_nearest(cell[0], cell[1], k + 1)
            except MapError:
                cell.append(0)
                continue

            if len(languages) < 6:
                cell.append(0)
                continue

            origin = languages[0]
            languages = languages[1:]

            global_d, local_d = [], []
            for language in languages:
                distance_pair = word_matrix.get_distances(origin, language)
                if distance_pair is not None:
                    global_d.append(distance_pair[0])
                    local_d.append(distance_pair[1])

            cell.append(get_correlation(global_d, local_d))

        return self.cells
Esempio n. 4
0
    def calculate_on_circles(self, word_matrix, radius):
        """
		Calculates the swadeshness of each cell using the circles method.
		Cells with less than 6 relevant languages have swadeshness of 0.
		"""
        planet = Map()

        for cell in self.cells:
            try:
                languages = planet.get_in_radius(cell[0], cell[1], radius)
            except MapError:
                cell.append(0)
                continue

            if len(languages) < 6:
                cell.append(0)
                continue

            global_d, local_d = [], []

            for lang_a in languages:
                for lang_b in languages:
                    if lang_a >= lang_b:
                        continue

                    if (lang_a, lang_b) not in word_matrix.d:
                        continue

                    global_d.append(word_matrix.d[(lang_a, lang_b)][0])
                    local_d.append(word_matrix.d[(lang_a, lang_b)][1])

            cell.append(get_correlation(global_d, local_d))

        return self.cells
Esempio n. 5
0
    def get_swadeshness_by_nearest(self, word_matrix, k):
        """
		Calculates the swadeshness of the k languages nearest to the language
		which is nearest to the point. Only the distances between that nearest
		language (the origin) and each of the others are taken into account. 
		The parameter k must be positive integer.
		"""
        globe = Map()

        languages = globe.get_nearest(self.latitude, self.longitude, k + 1)

        if len(languages) == 0:
            return None, {}, 0
        if len(languages) == 1:
            return languages[0], {}, 0

        origin = languages[0]
        languages = languages[1:]

        d = {}
        global_d, local_d = [], []

        for language in languages:
            distance_pair = word_matrix.get_distances(origin, language)
            if distance_pair is not None:
                d[language] = distance_pair
                global_d.append(distance_pair[0])
                local_d.append(distance_pair[1])

        p = get_correlation(global_d, local_d)

        return origin, d, p
Esempio n. 6
0
    def get_swadeshness_in_radius(self, word_matrix, radius):
        """
		Calculates the swadeshness within the radius given with respect to the
		word matrix given.
		"""
        globe = Map()

        languages = globe.get_in_radius(self.latitude, self.longitude, radius)

        d = {}
        global_d, local_d = [], []

        for lang_a in languages:
            for lang_b in languages:
                if lang_a == lang_b:
                    continue

                if lang_a > lang_b:
                    continue

                if (lang_a, lang_b) not in word_matrix.d:
                    continue

                key = lang_a + ',' + lang_b
                d[key] = word_matrix.d[(lang_a, lang_b)]

                global_d.append(d[key][0])
                local_d.append(d[key][1])

        p = get_correlation(global_d, local_d)

        return d, p
Esempio n. 7
0
    def test_hypothetically(self, data):
        a = [i[0] for i in data]
        b = [i[1] for i in data]

        p = get_correlation(a, b)

        self.assertIn(type(p), (int, float))
        self.assertGreaterEqual(p, -1)
        self.assertLessEqual(p, 1)