Beispiel #1
0
 def test_mismatch(self):
     self.assertAlmostEqual(
             name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             name_similarity(('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
             name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
Beispiel #2
0
 def test_mismatch(self):
     self.assertAlmostEqual(
         name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         name_similarity(('Claire', 'Mathieu'),
                         ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
         name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
Beispiel #3
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b), name_similarity(b, a))
Beispiel #4
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b),
                                name_similarity(b, a))
Beispiel #5
0
 def score(self, dataA, dataB):
     # TODO: this score function is far from optimal
     # refine it so that 'Claire Mathieu' and 'Claire Mathieu-Kenyon' gets
     # a decent score
     firstA, lastA = dataA
     firstB, lastB = dataB
     return name_similarity(dataA,dataB)
Beispiel #6
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #7
0
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first, name.last),
                                  (self.name.first, self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
Beispiel #8
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #9
0
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first,name.last),
                                  (self.name.first,self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
Beispiel #10
0
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity((researcher.name.first,researcher.name.last), (self.first,self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,researcher=researcher,
                     defaults={'confidence':sim})
Beispiel #11
0
 def score(self, dataA, dataB):
     score = 0.
     if dataA is None or dataB is None:
         return 0.
     for a in dataA:
         for b in dataB:
             firstA, lastA = a
             firstB, lastB = b
             score += name_similarity(a,b)
             #score += name_tools.match(firstA+' '+lastA,firstB+' '+lastB)
             # Previously, it was:
             #if match_names(a,b):
             #    score += 1.
     return score
Beispiel #12
0
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity(
             (researcher.name.first, researcher.name.last),
             (self.first, self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,
                                               researcher=researcher,
                                               defaults={'confidence': sim})
Beispiel #13
0
 def test_matching(self):
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
Beispiel #14
0
 def test_matching(self):
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
Beispiel #15
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #16
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #17
0
 def test_multiple(self):
     self.assertAlmostEqual(
         name_similarity(('Juan Pablo', 'Corella'),
                         ('J. Pablo', 'Corella')), 1.0)
Beispiel #18
0
 def test_multiple(self):
     self.assertAlmostEqual(
             name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)
Beispiel #19
0
 def test_reverse(self):
     self.assertAlmostEqual(
             name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0.7)
Beispiel #20
0
 def test_reverse(self):
     self.assertAlmostEqual(
         name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         0.7)