コード例 #1
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_mismatch(self):
     self.assertAlmostEqual(
             name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             name_similarity(('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
             name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
コード例 #2
0
 def test_mismatch(self):
     self.assertAlmostEqual(
         name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         name_similarity(('Claire', 'Mathieu'),
                         ('Claire', 'Kenyon-Mathieu')), 0)
     self.assertAlmostEqual(
         name_similarity(('Amanda P.', 'Brown'), ('Patrick', 'Brown')), 0)
コード例 #3
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b), name_similarity(b, a))
コード例 #4
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(name_similarity(a, b),
                                name_similarity(b, a))
コード例 #5
0
ファイル: similarity.py プロジェクト: jilljenn/dissemin
 def score(self, dataA, dataB):
     # TODO: this score function is far from optimal
     # refine it so that 'Claire Mathieu' and 'Claire Mathieu-Kenyon' gets
     # a decent score
     firstA, lastA = dataA
     firstB, lastB = dataB
     return name_similarity(dataA,dataB)
コード例 #6
0
ファイル: models.py プロジェクト: jilljenn/dissemin
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
コード例 #7
0
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first, name.last),
                                  (self.name.first, self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
コード例 #8
0
ファイル: models.py プロジェクト: jilljenn/dissemin
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
コード例 #9
0
ファイル: models.py プロジェクト: Lysxia/dissemin
    def update_variants(self, reset=False):
        """
        Sets the variants of this name to the candidates returned by variants_queryset
        and which have a positive name similarity with the reference name.

        .. todo::
            This should rather rely on the name variants with confidence 1.0
        """
        nvqs = self.namevariant_set.all()
        if reset:
            for nv in nvqs:
                name = nv.name
                nv.delete()
                name.update_best_confidence()

            current_name_variants = set()
        else:
            current_name_variants = set([nv.name_id for nv in nvqs])

        last = self.name.last
        for name in self.variants_queryset():
            sim = name_similarity((name.first,name.last),
                                  (self.name.first,self.name.last))
            if sim > 0 and name.id not in current_name_variants:
                self.add_name_variant(name, sim, force_update=reset)
コード例 #10
0
ファイル: models.py プロジェクト: jilljenn/dissemin
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity((researcher.name.first,researcher.name.last), (self.first,self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,researcher=researcher,
                     defaults={'confidence':sim})
コード例 #11
0
ファイル: similarity.py プロジェクト: jilljenn/dissemin
 def score(self, dataA, dataB):
     score = 0.
     if dataA is None or dataB is None:
         return 0.
     for a in dataA:
         for b in dataB:
             firstA, lastA = a
             firstB, lastB = b
             score += name_similarity(a,b)
             #score += name_tools.match(firstA+' '+lastA,firstB+' '+lastB)
             # Previously, it was:
             #if match_names(a,b):
             #    score += 1.
     return score
コード例 #12
0
 def update_variants(self):
     """
     Sets the variants of this name to the candidates returned by variants_queryset
     """
     for researcher in self.variants_queryset():
         sim = name_similarity(
             (researcher.name.first, researcher.name.last),
             (self.first, self.last))
         if sim > 0:
             old_sim = self.best_confidence
             self.best_confidence = sim
             if self.pk is None or old_sim < sim:
                 self.save()
             NameVariant.objects.get_or_create(name=self,
                                               researcher=researcher,
                                               defaults={'confidence': sim})
コード例 #13
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_matching(self):
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
             name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
             name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
コード例 #14
0
 def test_matching(self):
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0.4)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0.8)
     self.assertAlmostEqual(
         name_similarity(('R. J.', 'Ryder'), ('J.', 'Ryder')), 0.3)
     self.assertAlmostEqual(
         name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0.7)
コード例 #15
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
コード例 #16
0
ファイル: models.py プロジェクト: Lysxia/dissemin
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid) 
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],name[1], orcid=orcid,
                    user=user, homepage=homepage, email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage',homepage),('orcid',orcid),('email',email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
コード例 #17
0
 def test_multiple(self):
     self.assertAlmostEqual(
         name_similarity(('Juan Pablo', 'Corella'),
                         ('J. Pablo', 'Corella')), 1.0)
コード例 #18
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_multiple(self):
     self.assertAlmostEqual(
             name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)
コード例 #19
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_reverse(self):
     self.assertAlmostEqual(
             name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0.7)
コード例 #20
0
 def test_reverse(self):
     self.assertAlmostEqual(
         name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         0.7)