예제 #1
0
 def save(self, **kwargs):
     self.last_name = self.last_name.replace(u'\u2019', "'") # I know, but store it like this anyway.
     self.slug = slugify(self.name())
     first_name_metaphone = dm(self.first_name)
     last_name_metaphone = dm(self.last_name)
     self.first_name_metaphone = first_name_metaphone[0]
     self.first_name_metaphone_alt = first_name_metaphone[1] or ''
     self.last_name_metaphone = last_name_metaphone[0]
     self.last_name_metaphone_alt = last_name_metaphone[1] or ''
     super(Person, self).save(**kwargs)
예제 #2
0
 def save(self, **kwargs):
     self.last_name = self.last_name.replace(
         u'\u2019', "'")  # I know, but store it like this anyway.
     self.slug = slugify(self.name())
     first_name_metaphone = dm(self.first_name)
     last_name_metaphone = dm(self.last_name)
     self.first_name_metaphone = first_name_metaphone[0]
     self.first_name_metaphone_alt = first_name_metaphone[1] or ''
     self.last_name_metaphone = last_name_metaphone[0]
     self.last_name_metaphone_alt = last_name_metaphone[1] or ''
     super(Person, self).save(**kwargs)
예제 #3
0
파일: test.py 프로젝트: dracos/Theatricalia
def compare(s1f, s1l, s2f, s2l):
	s1f = unicode(s1f.lower())
	s1l = unicode(s1l.lower())
	s2f = unicode(s2f.lower())
	s2l = unicode(s2l.lower())
	s1 = "%s %s" % (s1f, s1l)
	s2 = "%s %s" % (s2f, s2l)
	soundex1 = soundex(s1)
	soundex2 = soundex(s2)
	soundex1f = soundex(s1f)
	soundex2f = soundex(s2f)
	soundex1l = soundex(s1l)
	soundex2l = soundex(s2l)
	soundexMatch = (soundex1 == soundex2)
	soundexFMatch = (soundex1f == soundex2f)
	soundexLMatch = (soundex1l == soundex2l)
	dm1 = dm(s1)
	dm2 = dm(s2)
	dm1f = dm(s1f)
	dm2f = dm(s2f)
	dm1l = dm(s1l)
	dm2l = dm(s2l)
	dmMatch = (dm1 == dm2 or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0])
	dmFMatch = (dm1f == dm2f or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0])
	dmLMatch = (dm1l == dm2l or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0])
	jarowN = jarow(s1, s2)
	jarowNf = jarow(s1f, s2f)
	jarowNl = jarow(s1l, s2l)
	damerauN = damerau(s1, s2)
	damerauNf = damerau(s1f, s2f)
	damerauNl = damerau(s1l, s2l)
	qnumN = qnum(s1, s2)
	qnumNf = qnum(s1f, s2f)
	qnumNl = qnum(s1l, s2l)
	print "\n%s entered, %s wanted" % (s2, s1)
	#print "Soundex\t\tFull:%s/%s %s\tFirst:%s/%s %s\tLast:%s/%s %s" % (soundex1, soundex2, soundexMatch, soundex1f, soundex2f, soundexFMatch, soundex1l, soundex2l, soundexLMatch)
	print "Metaphone\tFull:%s/%s %s\tFirst:%s/%s %s\tLast:%s/%s %s" % (dm1, dm2, dmMatch, dm1f, dm2f, dmFMatch, dm1l, dm2l, dmLMatch)
	print "Algorithm\tFull name\tFirst name\tLast name"
	print "Jarow\t\t%.4f\t\t%.4f\t\t%.4f\nDamerau\t\t%.4f\t\t%.4f\t\t%.4f" % (jarowN, jarowNf, jarowNl, damerauN, damerauNf, damerauNl)
예제 #4
0
def search_people(search, force_similar=False, use_distance=True):
    people = []
    sounds_people = 0
    names = search.split(None, 3)
    if len(names) == 1:
        names[0] = names[0].replace(u'\u2019', "'")
        if force_similar:
            people = Person.objects.exclude(
                first_name__icontains=names[0]).exclude(
                    last_name__icontains=names[0])
        else:
            people = Person.objects.filter(
                Q(first_name__icontains=names[0])
                | Q(last_name__icontains=names[0]))
        if force_similar:
            sounds_people = 2
            dm_, dm_alt = dm(names[0])
            people = people.filter(
                Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_))
        elif not people and re.match(r'(?i)[a-z\s\'-]+$', names[0]):
            sounds_people = 1
            dm_, dm_alt = dm(names[0])
            people = Person.objects.filter(
                Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_)
                # Q(first_name_metaphone=dm_alt) | #Q(first_name_metaphone_alt=dm_) |
                # Q(last_name_metaphone_alt=dm_) | #Q(last_name_metaphone=dm_alt)
            )
        # if not people:
        #     allnames = []
        #     for p in Person.objects.all():
        #         allnames.extend((p.first_name, p.last_name))
        #     people = difflib.get_close_matches(names[0], allnames)
        #     people = Person.objects.filter(Q(first_name__in=people) | Q(last_name__in=people))
        if not people and use_distance:
            people = []
            for p in Person.objects.all():
                sim = distance(names[0].lower(), p.first_name.lower())
                sim2 = distance(names[0].lower(), p.last_name.lower())
                if sim >= threshold or sim2 >= threshold:
                    people.append((1 - max(sim, sim2), p))
            people.sort()
            people = [person for _, person in people]
    elif len(names) == 2:
        names[1] = names[1].replace(u'\u2019', "'")
        people = Person.objects.filter(first_name__icontains=names[0],
                                       last_name__icontains=names[1])
        if (not people
                and re.match(r'(?i)[a-z\s\'-]+$', search)) or force_similar:
            sounds_people = 1
            dm_first, dm_first_alt = dm(names[0])
            dm_last, dm_last_alt = dm(names[1])
            qs = Q()
            if dm_first:
                # Both names homophones
                if dm_last:
                    qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone=dm_first, last_name_metaphone_alt=dm_last) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone_alt=dm_last)
                if dm_last_alt:
                    qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last_alt) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last_alt)
                # First name homophone, Last name match
                qs |= Q(first_name_metaphone=dm_first,     last_name__icontains=names[1]) \
                    | Q(first_name_metaphone_alt=dm_first, last_name__icontains=names[1])
            if dm_first_alt:
                qs |= Q(first_name_metaphone=dm_first_alt,
                        last_name__icontains=names[1])
                if dm_last:
                    qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone=dm_first_alt, last_name_metaphone_alt=dm_last)
                if dm_last_alt:
                    qs |= Q(first_name_metaphone=dm_first_alt,
                            last_name_metaphone=dm_last_alt)
            if dm_last:
                # First name match, last name homophone
                qs |= Q(first_name__icontains=names[0],    last_name_metaphone=dm_last) \
                    | Q(first_name__icontains=names[0],    last_name_metaphone_alt=dm_last)
            if dm_last_alt:
                qs |= Q(first_name__icontains=names[0],
                        last_name_metaphone=dm_last_alt)
            people = Person.objects.filter(qs)

        if not people and use_distance:
            people = []
            people2 = []
            people3 = []
            for p in Person.objects.all():
                sim = distance(names[0].lower(), p.first_name.lower())
                sim2 = distance(names[1].lower(), p.last_name.lower())
                simB = distance(' '.join(names).lower(),
                                ('%s %s' %
                                 (p.first_name, p.last_name)).lower())
                if names[1].lower() == p.last_name.lower(
                ) and sim >= threshold:
                    people.append((1 - sim, p))
                elif re.search(names[0], p.first_name,
                               re.I) and sim2 >= threshold:
                    people2.append((1 - sim2, p))
                elif simB >= threshold:
                    people3.append((1 - simB, p))
                elif sim >= threshold and sim2 >= threshold:
                    people3.append((1 - max(sim, sim2), p))
            people.sort()
            people2.sort()
            people3.sort()
            people = people + people2 + people3
            people = [person for _, person in people]
    elif len(names) == 3:
        names[1] = names[1].replace(u'\u2019', "'")
        names[2] = names[2].replace(u'\u2019', "'")
        people = Person.objects.filter(
            Q(first_name__icontains=' '.join(names[0:2]),
              last_name__icontains=names[2])
            | Q(first_name__icontains=names[0],
                last_name__icontains=' '.join(names[1:3])))
    elif len(names) == 4:
        names[3] = names[3].replace(u'\u2019', "'")
        people = Person.objects.filter(
            Q(first_name__icontains=' '.join(names[0:3]),
              last_name__icontains=names[3])
            | Q(first_name__icontains=names[0],
                last_name__icontains=' '.join(names[1:4])))
    return people, sounds_people
예제 #5
0
def search_people(search, force_similar=False, use_distance=True):
    people = []
    sounds_people = 0
    names = search.split(None, 3)
    if len(names)==1:
        names[0] = names[0].replace(u'\u2019', "'")
        if force_similar:
            people = Person.objects.exclude(first_name__icontains=names[0]).exclude(last_name__icontains=names[0])
        else:
            people = Person.objects.filter(Q(first_name__icontains=names[0]) | Q(last_name__icontains=names[0]))
        if force_similar:
            sounds_people = 2
            dm_, dm_alt = dm(names[0])
            people = people.filter(
                Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_)
            )
        elif not people and re.match('[a-z\s\'-]+$(?i)', names[0]):
            sounds_people = 1
            dm_, dm_alt = dm(names[0])
            people = Person.objects.filter(
                Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_)
                #Q(first_name_metaphone=dm_alt) | #Q(first_name_metaphone_alt=dm_) |
                #Q(last_name_metaphone_alt=dm_) | #Q(last_name_metaphone=dm_alt)
            )
        #if not people:
        #    allnames = []
        #    for p in Person.objects.all():
        #        allnames.extend((p.first_name, p.last_name))
        #    people = difflib.get_close_matches(names[0], allnames)
        #    people = Person.objects.filter(Q(first_name__in=people) | Q(last_name__in=people))
        if not people and use_distance:
            people = []
            for p in Person.objects.all():
                sim = distance(names[0].lower(), p.first_name.lower())
                sim2 = distance(names[0].lower(), p.last_name.lower())
                if sim >= threshold or sim2 >= threshold:
                    people.append((1-max(sim, sim2), p))
            people.sort()
            people = [ person for _, person in people ]
    elif len(names)==2:
        names[1] = names[1].replace(u'\u2019', "'")
        people = Person.objects.filter(first_name__icontains=names[0], last_name__icontains=names[1])
        if (not people and re.match('[a-z\s\'-]+$(?i)', search)) or force_similar:
            sounds_people = 1
            dm_first, dm_first_alt = dm(names[0])
            dm_last, dm_last_alt = dm(names[1])
            qs = Q()
            if dm_first:
            #    # Both names homophones
                if dm_last:
                    qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone=dm_first, last_name_metaphone_alt=dm_last) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone_alt=dm_last)
                if dm_last_alt:
                    qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last_alt) \
                        | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last_alt)
                # First name homophone, Last name match
                qs |= Q(first_name_metaphone=dm_first,     last_name__icontains=names[1]) \
                    | Q(first_name_metaphone_alt=dm_first, last_name__icontains=names[1])
            if dm_first_alt:
                qs |= Q(first_name_metaphone=dm_first_alt, last_name__icontains=names[1])
                if dm_last:
                    qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last) \
                        | Q(first_name_metaphone=dm_first_alt, last_name_metaphone_alt=dm_last)
                if dm_last_alt:
                    qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last_alt)
            if dm_last:
                # First name match, last name homophone
                qs |= Q(first_name__icontains=names[0],    last_name_metaphone=dm_last) \
                    | Q(first_name__icontains=names[0],    last_name_metaphone_alt=dm_last)
            if dm_last_alt:
                qs |= Q(first_name__icontains=names[0],    last_name_metaphone=dm_last_alt)
            people = Person.objects.filter( qs )

        if not people and use_distance:
            people = []
            people2 = []
            people3 = []
            for p in Person.objects.all():
                sim = distance(names[0].lower(), p.first_name.lower())
                sim2 = distance(names[1].lower(), p.last_name.lower())
                simB = distance(' '.join(names).lower(), ('%s %s' % (p.first_name, p.last_name)).lower())
                if names[1].lower() == p.last_name.lower() and sim >= threshold:
                    people.append((1-sim, p))
                elif re.search(names[0], p.first_name, re.I) and sim2 >= threshold:
                    people2.append((1-sim2, p))
                elif simB >= threshold:
                    people3.append((1-simB, p))
                elif sim >= threshold and sim2 >= threshold:
                    people3.append((1-max(sim, sim2), p))
            people.sort()
            people2.sort()
            people3.sort()
            people = people + people2 + people3
            people = [ person for _, person in people ]
    elif len(names)==3:
        names[1] = names[1].replace(u'\u2019', "'")
        names[2] = names[2].replace(u'\u2019', "'")
        people = Person.objects.filter(
            Q(first_name__icontains=' '.join(names[0:2]), last_name__icontains=names[2]) |
            Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:3]))
        )
    elif len(names)==4:
        names[3] = names[3].replace(u'\u2019', "'")
        people = Person.objects.filter(
            Q(first_name__icontains=' '.join(names[0:3]), last_name__icontains=names[3]) |
            Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:4]))
        )
    return people, sounds_people