Example #1
0
 def test_mismatch(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin K.', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')),
         0)
Example #2
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(
             a, b), shallower_name_similarity(b, a))
Example #3
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(a, b),
                                shallower_name_similarity(b, a))
Example #4
0
def affiliate_author_with_orcid(ref_name,
                                orcid,
                                authors,
                                initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name)
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None] * len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations
Example #5
0
 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
     ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)
Example #6
0
 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
         ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)
Example #7
0
 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)
Example #8
0
 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')),
         1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'),
                                   ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)
Example #9
0
def affiliate_author_with_orcid(ref_name, orcid, authors, initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name) 
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None]*len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations
Example #10
0
 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)
Example #11
0
 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'), ('Clément', 'Pit-Claudel')), 0)
Example #12
0
 def test_reverse(self):
     self.assertGreater(
         shallower_name_similarity(('W. Timothy', 'Gowers'),
                                   ('Timothy', 'Gowers')), 0)
Example #13
0
 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)
Example #14
0
 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'),
                                   ('J. Pablo', 'Corella')), 1.0)
Example #15
0
 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)
Example #16
0
 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                    ('Clément', 'Pit-Claudel')),
                                     0)
Example #17
0
 def test_reverse(self):
     self.assertGreater(
             shallower_name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0)
Example #18
0
 def test_mismatch(self):
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')), 0)
Example #19
0
    def search_by_name(first, last, instance=settings.ORCID_BASE_DOMAIN):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        base_base_pub = "https://pub." + instance + "/"
        baseurl = base_base_pub + 'v1.2/search/orcid-bio/'
        dct = {
            'rows': 10,
            'start': 0,
            'q': 'family-name:%s given-names:%s' % (last, first),
        }
        url = baseurl + '?' + urlencode(dct)
        try:
            r = requests.get(url)
            # the namespace is the same for both the production and the
            # sandbox versions.
            ns = {'ns': 'http://www.orcid.org/ns/orcid'}
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details',
                                          namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity(
                    (first, last), (candidateFirst, candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath(
                    './ns:orcid-profile/ns:orcid-identifier/ns:path',
                    namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()',
                    namespaces=ns)
                homepage = None
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()',
                    namespaces=ns)

                yield {
                    'first': candidateFirst,
                    'last': candidateLast,
                    'orcid': orcid,
                    'homepage': homepage,
                    'keywords': keywords,
                }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e
Example #20
0
    def search_by_name(first, last):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        baseurl = 'http://pub.orcid.org/v1.2/search/orcid-bio/'
        dct = {
            'rows':10,
            'start':0,
            'q':'family-name:%s given-names:%s' % (last,first),
            }
        url = baseurl+'?'+urlencode(dct)
        try:
            r = requests.get(url)
            ns = {'ns':'http://www.orcid.org/ns/orcid' }
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details', namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity((first,last),(candidateFirst,candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath('./ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns)
                homepage = None 
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns)

                yield {
                        'first':candidateFirst,
                        'last':candidateLast,
                        'orcid':orcid,
                        'homepage':homepage,
                        'keywords':keywords,
                      }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e