Python shallower_name_similarity Examples, papers.name.shallower_name_similarity Python Examples

Example #1

0

Show file

 def test_mismatch(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin K.', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')),
         0)

Example #2

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(
             a, b), shallower_name_similarity(b, a))

Example #3

0

Show file

 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(a, b),
                                shallower_name_similarity(b, a))

Example #4

0

Show file

def affiliate_author_with_orcid(ref_name,
                                orcid,
                                authors,
                                initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name)
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None] * len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations

Example #5

0

Show file

 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
     ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)

Example #6

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
         ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)

Example #7

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)

Example #8

0

Show file

 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')),
         1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'),
                                   ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)

Example #9

0

Show file

File: orcid.py Project: Lysxia/dissemin

def affiliate_author_with_orcid(ref_name, orcid, authors, initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name) 
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None]*len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations

Example #10

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)

Example #11

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'), ('Clément', 'Pit-Claudel')), 0)

Example #12

0

Show file

 def test_reverse(self):
     self.assertGreater(
         shallower_name_similarity(('W. Timothy', 'Gowers'),
                                   ('Timothy', 'Gowers')), 0)

Example #13

0

Show file

 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)

Example #14

0

Show file

 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'),
                                   ('J. Pablo', 'Corella')), 1.0)

Example #15

0

Show file

 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)

Example #16

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                    ('Clément', 'Pit-Claudel')),
                                     0)

Example #17

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_reverse(self):
     self.assertGreater(
             shallower_name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0)

Example #18

0

Show file

File: test_names.py Project: Phyks/dissemin

 def test_mismatch(self):
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')), 0)

Example #19

0

Show file

File: orcid.py Project: tarsbase/dissemin

    def search_by_name(first, last, instance=settings.ORCID_BASE_DOMAIN):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        base_base_pub = "https://pub." + instance + "/"
        baseurl = base_base_pub + 'v1.2/search/orcid-bio/'
        dct = {
            'rows': 10,
            'start': 0,
            'q': 'family-name:%s given-names:%s' % (last, first),
        }
        url = baseurl + '?' + urlencode(dct)
        try:
            r = requests.get(url)
            # the namespace is the same for both the production and the
            # sandbox versions.
            ns = {'ns': 'http://www.orcid.org/ns/orcid'}
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details',
                                          namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity(
                    (first, last), (candidateFirst, candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath(
                    './ns:orcid-profile/ns:orcid-identifier/ns:path',
                    namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()',
                    namespaces=ns)
                homepage = None
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()',
                    namespaces=ns)

                yield {
                    'first': candidateFirst,
                    'last': candidateLast,
                    'orcid': orcid,
                    'homepage': homepage,
                    'keywords': keywords,
                }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e

Example #20

0

Show file

File: orcid.py Project: Lysxia/dissemin

    def search_by_name(first, last):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        baseurl = 'http://pub.orcid.org/v1.2/search/orcid-bio/'
        dct = {
            'rows':10,
            'start':0,
            'q':'family-name:%s given-names:%s' % (last,first),
            }
        url = baseurl+'?'+urlencode(dct)
        try:
            r = requests.get(url)
            ns = {'ns':'http://www.orcid.org/ns/orcid' }
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details', namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity((first,last),(candidateFirst,candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath('./ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns)
                homepage = None 
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns)

                yield {
                        'first':candidateFirst,
                        'last':candidateLast,
                        'orcid':orcid,
                        'homepage':homepage,
                        'keywords':keywords,
                      }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e