Python FuzzyMatcher 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: policytool.refparse.utils

클래스/타입: FuzzyMatcher

hotexamples.com에서의 예제들: 8

Python FuzzyMatcher - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 policytool.refparse.utils.FuzzyMatcher에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FuzzyMatcher(8)

match(2)

자주 사용되는 메소드들

FuzzyMatcher (8)

match (2)

예제 #1

파일 보기

파일: investigate_match_thresholds.py 프로젝트: UriCW/policytool

def get_matches(evaluation_references, sample_N):

    # Take a random sample of the evaluation references to find matches for
    match_data = evaluation_references.sample(n=sample_N * 2,
                                              random_state=0).reset_index()
    match_data['Title'] = match_data['title']

    match_data_positive = match_data.iloc[0:sample_N]
    match_data_negative = match_data.iloc[sample_N:]

    evaluation_references_without_negative = evaluation_references.loc[
        ~evaluation_references['uber_id'].isin(match_data_negative['uber_id'])]

    fuzzy_matcher = FuzzyMatcher(evaluation_references_without_negative, -1)

    match_data_pos_neg = pd.concat([match_data_positive, match_data_negative])
    eval_references = []
    for i, ref in match_data_pos_neg.iterrows():
        match_ref = fuzzy_matcher.match(ref)
        eval_references.append(match_ref)
    eval_references = pd.DataFrame(eval_references)
    eval_references["Title Length"] = [
        len(title) for title in eval_references["Extracted title"]
    ]
    eval_references["Match Type"] = ["Positive"] * sample_N + ["Negative"
                                                               ] * sample_N

    return eval_references

예제 #2

파일 보기

	def test_close_match(self):
		real_publications = pd.DataFrame({
			'title': ['Malaria is caused by mosquitoes'],
			'uber_id': [1]
		})
		threshold = settings.FUZZYMATCH_SIMILARITY_THRESHOLD
		fuzzy_matcher = FuzzyMatcher(real_publications, threshold)
		reference = {
			'Document id': 10,
            'Reference id': 11,
            'Title': 'Malaria'			
		}
		matched_publication = fuzzy_matcher.match(reference)
		self.assertEqual(matched_publication, None)

예제 #3

파일 보기

	def test_no_string_titles(self):
		real_publications = pd.DataFrame({
			'title': [1,2]
		})
		threshold = 75
		with self.assertRaises(AttributeError):
			FuzzyMatcher(real_publications, threshold)

예제 #4

파일 보기

	def test_empty_title(self):
		real_publications = pd.DataFrame({
			'title': []
		})
		threshold = 75
		with self.assertRaises(ValueError):
			FuzzyMatcher(real_publications, threshold)

예제 #5

파일 보기

def evaluate_match_references(evaluation_references, match_threshold,
                              length_threshold, sample_N):

    # Take a random sample of the evaluation references to find matches for
    match_data = evaluation_references.sample(n=sample_N * 2,
                                              random_state=0).reset_index()
    match_data['Title'] = match_data['title']

    match_data_positive = match_data.iloc[0:sample_N]
    match_data_negative = match_data.iloc[sample_N:]

    evaluation_references_without_negative = evaluation_references.loc[
        ~evaluation_references['uber_id'].isin(match_data_negative['uber_id'])]

    fuzzy_matcher = FuzzyMatcher(evaluation_references_without_negative,
                                 match_threshold, length_threshold)

    predictions = predict_match_data(
        match_data=match_data_positive.to_dict('records') +
        match_data_negative.to_dict('records'),
        matcher=fuzzy_matcher)
    actual = match_data_positive['Reference id'].to_list() + [None] * sample_N

    metrics = evaluate_metric(actual, predictions)

    return metrics

예제 #6

파일 보기

	def init_fuzzy_matcher(self):
		real_publications = pd.DataFrame({
			'title': ['Malaria', 'Zika'],
			'uber_id': [1, 2]
		})
		threshold = settings.FUZZYMATCH_SIMILARITY_THRESHOLD
		fuzzy_matcher = FuzzyMatcher(real_publications, threshold)
		return fuzzy_matcher

예제 #7

파일 보기

	def test_init_variables(self):
		real_publications = pd.DataFrame({
			'title': ['Malaria', 'Zika']
		})
		threshold = 0
		fuzzy_matcher = FuzzyMatcher(real_publications, threshold)
		assert_frame_equal(
			fuzzy_matcher.publications, real_publications
		)
		self.assertEqual(
			fuzzy_matcher.similarity_threshold, threshold
		)
		self.assertTrue(
			fuzzy_matcher.tfidf_matrix.size != 0
		)

예제 #8

파일 보기

	def test_no_title(self):
		real_publications = pd.DataFrame({})
		threshold = 75
		with self.assertRaises(KeyError):
			FuzzyMatcher(real_publications, threshold)