Python StringGrouper.get_matches 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: string_grouper.string_grouper

클래스/타입: StringGrouper

메소드/함수: get_matches

hotexamples.com에서의 예제들: 8

Python StringGrouper.get_matches - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 string_grouper.string_grouper.StringGrouper.get_matches에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

StringGrouper(18)

fit(15)

get_groups(11)

get_matches(8)

add_match(4)

n_grams(4)

_get_tf_idf_matrices(3)

_build_matches(1)

_clean_groups(1)

_get_non_matches_list(1)

remove_match(1)

예제 #1

파일 보기

파일: test_string_grouper.py 프로젝트: qinyufm/string_grouper

 def test_add_match_single_occurence(self):
     """Should add the match if there are no exact duplicates"""
     test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
     test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
     sg = StringGrouper(test_series_1).fit()
     sg.add_match('no match', 'baz')
     matches = sg.get_matches()
     matches = matches[(matches.left_side == 'no match') & (matches.right_side == 'baz')]
     self.assertEqual(1, matches.shape[0])
     sg2 = StringGrouper(test_series_1, test_series_2).fit()
     sg2.add_match('no match', 'bar')
     matches = sg2.get_matches()
     matches = matches[(matches.left_side == 'no match') & (matches.right_side == 'bar')]
     self.assertEqual(1, matches.shape[0])

예제 #2

파일 보기

파일: test_string_grouper.py 프로젝트: justasojourner/string_grouper

 def test_get_matches_2_series_2_id_series(self):
     test_series_1 = pd.Series(['foo', 'bar', 'baz'])
     test_series_id_1 = pd.Series(['A0', 'A1', 'A2'])
     test_series_2 = pd.Series(['foo', 'bar', 'bop'])
     test_series_id_2 = pd.Series(['B0', 'B1', 'B2'])
     sg = StringGrouper(test_series_1,
                        test_series_2,
                        duplicates_id=test_series_id_2,
                        master_id=test_series_id_1).fit()
     left_side = ['foo', 'bar']
     left_side_id = ['A0', 'A1']
     left_index = [0, 1]
     right_side = ['foo', 'bar']
     right_side_id = ['B0', 'B1']
     right_index = [0, 1]
     similarity = [1.0, 1.0]
     expected_df = pd.DataFrame({
         'left_index': left_index,
         'left_side': left_side,
         'left_id': left_side_id,
         'similarity': similarity,
         'right_id': right_side_id,
         'right_side': right_side,
         'right_index': right_index
     })
     expected_df.loc[:,
                     'similarity'] = expected_df.loc[:,
                                                     'similarity'].astype(
                                                         sg._config.
                                                         tfidf_matrix_dtype)
     pd.testing.assert_frame_equal(expected_df, sg.get_matches())

예제 #3

파일 보기

파일: test_string_grouper.py 프로젝트: qinyufm/string_grouper

 def test_get_matches_two_dataframes(self):
     test_series_1 = pd.Series(['foo', 'bar', 'baz'])
     test_series_2 = pd.Series(['foo', 'bar', 'bop'])
     sg = StringGrouper(test_series_1, test_series_2).fit()
     left_side = ['foo', 'bar']
     right_side = ['foo', 'bar']
     similarity = [1.0, 1.0]
     expected_df = pd.DataFrame({'left_side': left_side, 'right_side': right_side, 'similarity': similarity})
     pd.testing.assert_frame_equal(expected_df, sg.get_matches())

예제 #4

파일 보기

파일: test_string_grouper.py 프로젝트: qinyufm/string_grouper

    def test_remove_match(self):
        """Should remove a match"""
        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooob'])
        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
        sg = StringGrouper(test_series_1).fit()
        sg.remove_match('foooo', 'foooob')
        matches = sg.get_matches()
        matches_1 = matches[(matches.left_side == 'foooo') & (matches.right_side == 'foooob')]
        # In the case of only a master series, the matches are recursive, so both variants are to be removed
        matches_2 = matches[(matches.left_side == 'foooob') & (matches.right_side == 'foooo')]
        self.assertEqual(0, matches_1.shape[0])
        self.assertEqual(0, matches_2.shape[0])

        sg2 = StringGrouper(test_series_1, test_series_2).fit()
        sg2.remove_match('foooo', 'foooob')
        matches = sg2.get_matches()
        matches = matches[(matches.left_side == 'foooo') & (matches.right_side == 'foooob')]
        self.assertEqual(0, matches.shape[0])

예제 #5

파일 보기

파일: test_string_grouper.py 프로젝트: qinyufm/string_grouper

 def test_add_match_single_group_matches_symmetric(self):
     """New matches that are added to a SG with only a master series should be symmetric"""
     test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
     sg = StringGrouper(test_series_1).fit()
     sg.add_match('no match', 'baz')
     matches = sg.get_matches()
     matches_1 = matches[(matches.left_side == 'no match') & (matches.right_side == 'baz')]
     self.assertEqual(1, matches_1.shape[0])
     matches_2 = matches[(matches.left_side == 'baz') & (matches.right_side == 'no match')]
     self.assertEqual(1, matches_2.shape[0])

예제 #6

파일 보기

파일: test_string_grouper.py 프로젝트: mbotezatu/string_grouper

 def test_get_matches_1_series_1_id_series(self):
     test_series_1 = pd.Series(['foo', 'bar', 'baz', 'foo'])
     test_series_id_1 = pd.Series(['A0', 'A1', 'A2', 'A3'])
     sg = StringGrouper(test_series_1, master_id=test_series_id_1)
     sg = sg.fit()
     left_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
     left_side_id = ['A0', 'A0', 'A1', 'A2', 'A3', 'A3']
     right_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
     right_side_id = ['A3', 'A0', 'A1', 'A2', 'A3', 'A0']
     similarity = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
     expected_df = pd.DataFrame({'left_side_id': left_side_id, 'left_side': left_side,
                                 'right_side_id': right_side_id, 'right_side': right_side, 'similarity': similarity})
     pd.testing.assert_frame_equal(expected_df, sg.get_matches())

예제 #7

파일 보기

 def test_get_matches_single(self):
     test_series_1 = pd.Series(['foo', 'bar', 'baz', 'foo'])
     sg = StringGrouper(test_series_1)
     sg = sg.fit()
     left_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
     right_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
     left_index = [0, 0, 1, 2, 3, 3]
     right_index = [0, 3, 1, 2, 0, 3]
     similarity = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
     expected_df = pd.DataFrame({
         'left_index': left_index,
         'left_side': left_side,
         'similarity': similarity,
         'right_side': right_side,
         'right_index': right_index
     })
     pd.testing.assert_frame_equal(expected_df, sg.get_matches())

예제 #8

파일 보기

파일: test_string_grouper.py 프로젝트: justasojourner/string_grouper

 def test_get_matches_two_dataframes(self):
     test_series_1 = pd.Series(['foo', 'bar', 'baz'])
     test_series_2 = pd.Series(['foo', 'bar', 'bop'])
     sg = StringGrouper(test_series_1, test_series_2).fit()
     left_side = ['foo', 'bar']
     left_index = [0, 1]
     right_side = ['foo', 'bar']
     right_index = [0, 1]
     similarity = [1.0, 1.0]
     expected_df = pd.DataFrame({
         'left_index': left_index,
         'left_side': left_side,
         'similarity': similarity,
         'right_side': right_side,
         'right_index': right_index
     })
     expected_df.loc[:,
                     'similarity'] = expected_df.loc[:,
                                                     'similarity'].astype(
                                                         sg._config.
                                                         tfidf_matrix_dtype)
     pd.testing.assert_frame_equal(expected_df, sg.get_matches())