コード例 #1
0
 def test_mismatch(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin K.', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')),
         0)
コード例 #2
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(
             a, b), shallower_name_similarity(b, a))
コード例 #3
0
 def test_symmetric(self):
     pairs = [
         (('Robin', 'Ryder'), ('Robin', 'Ryder')),
         (('Robin', 'Ryder'), ('R.', 'Ryder')),
         (('R.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         (('Robin J.', 'Ryder'), ('R. J.', 'Ryder')),
         (('R. J.', 'Ryder'), ('J.', 'Ryder')),
         (('Robin', 'Ryder'), ('Robin J.', 'Ryder')),
         (('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')),
         (('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')),
         (('Claire', 'Mathieu'), ('Claire', 'Kenyon-Mathieu')),
     ]
     for a, b in pairs:
         self.assertAlmostEqual(shallower_name_similarity(a, b),
                                shallower_name_similarity(b, a))
コード例 #4
0
def affiliate_author_with_orcid(ref_name,
                                orcid,
                                authors,
                                initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name)
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None] * len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations
コード例 #5
0
 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
     ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)
コード例 #6
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_malformed(self):
     inputs = [
         (('  ', '  '), ('John', 'Doe')),
         (('Alfred', 'Kastler'), ('    ', '    ')),
         ('', (None, '')),
         ]
     for a, b in inputs:
         self.assertEqual(shallower_name_similarity(a, b), False)
コード例 #7
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')), 1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)
コード例 #8
0
 def test_matching(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Ryder')),
         1.0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('R.', 'Ryder'), ('R.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'), ('R.', 'Ryder')),
         0)
     self.assertGreater(
         shallower_name_similarity(('Robin J.', 'Ryder'),
                                   ('R. J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'),
                                   ('Robin J.', 'Ryder')), 0)
     self.assertGreater(
         shallower_name_similarity(('Robin', 'Ryder'), ('', 'Ryder')), 0)
コード例 #9
0
ファイル: orcid.py プロジェクト: Lysxia/dissemin
def affiliate_author_with_orcid(ref_name, orcid, authors, initial_affiliations=None):
    """
    Given a reference name and an ORCiD for a researcher, find out which
    author in the list is the most likely to be that author. This function
    is run on author lists of papers listed in the ORCiD record so we expect
    that one of the authors should be the same person as the ORCiD holder.
    This just finds the most similar name and returns the appropriate affiliations
    list (None everywhere except for the most similar name where it is the ORCiD).
    """
    max_sim_idx = None
    max_sim = 0.
    for idx, name in enumerate(authors):
        cur_similarity = shallower_name_similarity(name, ref_name) 
        if cur_similarity > max_sim:
            max_sim_idx = idx
            max_sim = cur_similarity
    affiliations = [None]*len(authors)
    if initial_affiliations and len(initial_affiliations) == len(authors):
        affiliations = initial_affiliations
    if max_sim_idx is not None:
        affiliations[max_sim_idx] = orcid
    return affiliations
コード例 #10
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'), ('J. Pablo', 'Corella')), 1.0)
コード例 #11
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'), ('Clément', 'Pit-Claudel')), 0)
コード例 #12
0
 def test_reverse(self):
     self.assertGreater(
         shallower_name_similarity(('W. Timothy', 'Gowers'),
                                   ('Timothy', 'Gowers')), 0)
コード例 #13
0
 def test_unicode(self):
     self.assertGreater(
         shallower_name_similarity(('Cl\u0102\u0160ment', 'Pit-Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)
コード例 #14
0
 def test_multiple(self):
     self.assertAlmostEqual(
         shallower_name_similarity(('Juan Pablo', 'Corella'),
                                   ('J. Pablo', 'Corella')), 1.0)
コード例 #15
0
 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                   ('Clément', 'Pit-Claudel')), 0)
コード例 #16
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_hyphen(self):
     self.assertGreater(
         shallower_name_similarity(('Clement F.', 'Pit Claudel'),
                                    ('Clément', 'Pit-Claudel')),
                                     0)
コード例 #17
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_reverse(self):
     self.assertGreater(
             shallower_name_similarity(('W. Timothy', 'Gowers'), ('Timothy', 'Gowers')), 0)
コード例 #18
0
ファイル: test_names.py プロジェクト: Phyks/dissemin
 def test_mismatch(self):
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin K.', 'Ryder'), ('Robin J.', 'Ryder')), 0)
     self.assertAlmostEqual(
             shallower_name_similarity(('Robin', 'Ryder'), ('Robin', 'Rider')), 0)
コード例 #19
0
ファイル: orcid.py プロジェクト: tarsbase/dissemin
    def search_by_name(first, last, instance=settings.ORCID_BASE_DOMAIN):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        base_base_pub = "https://pub." + instance + "/"
        baseurl = base_base_pub + 'v1.2/search/orcid-bio/'
        dct = {
            'rows': 10,
            'start': 0,
            'q': 'family-name:%s given-names:%s' % (last, first),
        }
        url = baseurl + '?' + urlencode(dct)
        try:
            r = requests.get(url)
            # the namespace is the same for both the production and the
            # sandbox versions.
            ns = {'ns': 'http://www.orcid.org/ns/orcid'}
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details',
                                          namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity(
                    (first, last), (candidateFirst, candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath(
                    './ns:orcid-profile/ns:orcid-identifier/ns:path',
                    namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()',
                    namespaces=ns)
                homepage = None
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()',
                    namespaces=ns)

                yield {
                    'first': candidateFirst,
                    'last': candidateLast,
                    'orcid': orcid,
                    'homepage': homepage,
                    'keywords': keywords,
                }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e
コード例 #20
0
ファイル: orcid.py プロジェクト: Lysxia/dissemin
    def search_by_name(first, last):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        baseurl = 'http://pub.orcid.org/v1.2/search/orcid-bio/'
        dct = {
            'rows':10,
            'start':0,
            'q':'family-name:%s given-names:%s' % (last,first),
            }
        url = baseurl+'?'+urlencode(dct)
        try:
            r = requests.get(url)
            ns = {'ns':'http://www.orcid.org/ns/orcid' }
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details', namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity((first,last),(candidateFirst,candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath('./ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns)
                homepage = None 
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns)

                yield {
                        'first':candidateFirst,
                        'last':candidateLast,
                        'orcid':orcid,
                        'homepage':homepage,
                        'keywords':keywords,
                      }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e