Python remove_diacritics 예제들, papers.utils.remove_diacritics Python 예제들

예제 #1

0

파일 보기

def match_first_names(pair):
    """
    Returns true when the given pair of first names
    is compatible.

    >>> match_first_names(('A','Amanda'))
    True
    >>> match_first_names(('Amanda','Amanda'))
    True
    >>> match_first_names(('Alfred','Amanda'))
    False
    >>> match_first_names(('patrick','P'))
    True
    >>> match_first_names((None,'Iryna'))
    True
    >>> match_first_names(('Clément','Clement'))
    True
    """
    a, b = pair
    if a is None or b is None:
        return True
    if len(a) == 1 and len(b) > 0:
        return a.lower() == b[0].lower()
    elif len(b) == 1 and len(a) > 0:
        return b.lower() == a[0].lower()
    else:
        return remove_diacritics(a).lower() == remove_diacritics(b).lower()

예제 #2

0

파일 보기

파일: name.py 프로젝트: Phyks/dissemin

def match_first_names(pair):
    """
    Returns true when the given pair of first names
    is compatible.

    >>> match_first_names(('A','Amanda'))
    True
    >>> match_first_names(('Amanda','Amanda'))
    True
    >>> match_first_names(('Alfred','Amanda'))
    False
    >>> match_first_names(('patrick','P'))
    True
    >>> match_first_names((None,'Iryna'))
    True
    >>> match_first_names(('Clément','Clement'))
    True
    """
    a, b = pair
    if a is None or b is None:
        return True
    if len(a) == 1 and len(b) > 0:
        return a.lower() == b[0].lower()
    elif len(b) == 1 and len(a) > 0:
        return b.lower() == a[0].lower()
    else:
        return remove_diacritics(a).lower() == remove_diacritics(b).lower()

예제 #3

0

파일 보기

파일: forms.py 프로젝트: tarsbase/dissemin

    def search(self):
        self.queryset = self.searchqueryset.models(Paper)

        q = remove_diacritics(self.cleaned_data['q'])
        if q:
            self.queryset = self.queryset.auto_query(q)

        visible = self.cleaned_data['visible']
        if visible == '':
            self.filter(visible=True)
        elif visible == 'invisible':
            self.filter(visible=False)

        self.form_filter('availability', 'availability')
        self.form_filter('oa_status__in', 'oa_status')
        self.form_filter('pubdate__gte', 'pub_after')
        self.form_filter('pubdate__lte', 'pub_before')
        self.form_filter('doctype__in', 'doctypes')

        # Filter by authors.
        # authors field: a comma separated list of full/last names.
        # Items with no whitespace of prefixed with 'last:' are considered as
        # last names; others are full names.
        for name in self.cleaned_data['authors'].split(','):
            name = remove_diacritics(name.strip())

            if name.startswith('last:'):
                is_lastname = True
                name = name[5:].strip()
            else:
                is_lastname = ' ' not in name

            if not name:
                continue

            if is_lastname:
                self.filter(authors_last=name)
            else:
                self.filter(authors_full=Sloppy(name, slop=1))

        self.queryset = aggregate_combined_status(self.queryset)

        status = self.cleaned_data['status']
        if status:
            self.queryset = self.queryset.post_filter(
                combined_status__in=status)

        # Default ordering by decreasing publication date
        order = self.cleaned_data['sort_by'] or 'pubdate'
        reverse_order = not self.cleaned_data['reverse_order']
        if reverse_order:
            order = '-' + order
        self.queryset = self.queryset.order_by(order).load_all()

        return self.queryset

예제 #4

0

파일 보기

    def fetch_publisher(self, publisher_name):
        """
        Retrieve a publisher from the RoMEO API.
        """
        if publisher_name is None:
            return

        # Prepare the query
        search_terms = dict()
        search_terms['pub'] = remove_diacritics(publisher_name)
        search_terms['qtype'] = 'all'

        root = self.perform_romeo_query(search_terms)

        # Find the publisher
        publishers = root.findall('./publishers/publisher')
        if len(publishers) == 0:
            return
        elif len(publishers) > 1:
            search_terms['qtype'] = 'exact'
            root = self.perform_romeo_query(search_terms)
            publishers = root.findall('./publishers/publisher')
            if len(publishers) != 1:
                return

        publisher = self.get_or_create_publisher(publishers[0])
        return publisher

예제 #5

0

파일 보기

def fetch_journal(search_terms, matching_mode='exact'):
    """
    Fetch the journal data from RoMEO. Returns an Journal object.
    search_terms should be a dictionnary object containing at least one of these fields:
    """
    allowed_fields = ['issn', 'jtitle']
    # Make the title HTML-safe before searching for it in the database or in the API
    if 'title' in search_terms:
        search_terms['title'] = kill_html(search_terms['title'])
    original_search_terms = search_terms.copy()

    # Check the arguments
    if not all(
            map(lambda x: x in allowed_fields, (key for key in search_terms))):
        raise ValueError('The search terms have to belong to ' +
                         str(allowed_fields) + 'but the dictionary I got is ' +
                         str(search_terms))

    # Remove diacritics (because it has to be sent in ASCII to ROMEO)
    for key in search_terms:
        search_terms[key] = remove_diacritics(search_terms[key])

    # First check we don't have it already
    journal = find_journal_in_model(search_terms)
    if journal:
        return journal

    # Perform the query
    root = perform_romeo_query(search_terms)

    # Find the matching journals (if any)
    journals = list(root.findall('./journals/journal'))
    if not journals:
        # Retry with a less restrictive matching type
        if matching_mode == 'exact':
            return fetch_journal(original_search_terms, 'contains')
        return None
    if len(journals) > 1:
        print("Warning, " + str(len(journals)) +
              " journals match the RoMEO request, " +
              "defaulting to the first one")
        # TODO different behaviour: get the ISSN and try again.
    journal = journals[0]

    names = list(journal.findall('./jtitle'))
    if not names:
        raise MetadataSourceException(
            'RoMEO returned a journal without title.\n' + 'URL was: ' +
            request)
    if len(names) > 1:
        print("Warning, " + str(len(names)) +
              " names provided for one journal, " +
              "defaulting to the first one")
    name = kill_html(names[0].text)

    issn = None
    try:
        issn = nstrip(journal.findall('./issn')[0].text)
    except KeyError, IndexError:
        pass

예제 #6

0

파일 보기

파일: name.py 프로젝트: Lysxia/dissemin

def name_normalization(ident):
    ident = remove_diacritics(ident).lower()
    ident = ident.strip()
    ident = nn_separator_re.sub('_',ident)
    ident = nn_escaping_chars_re.sub('',ident)
    ident = nn_final_nontext_re.sub('',ident)
    ident = nn_nontext_re.sub('-',ident)
    return ident

예제 #7

0

파일 보기

def name_normalization(ident):
    ident = remove_diacritics(ident).lower()
    ident = ident.strip()
    ident = nn_separator_re.sub('_', ident)
    ident = nn_escaping_chars_re.sub('', ident)
    ident = nn_final_nontext_re.sub('', ident)
    ident = nn_nontext_re.sub('-', ident)
    return ident

예제 #8

0

파일 보기

파일: romeo.py 프로젝트: jilljenn/dissemin

def fetch_journal(search_terms, matching_mode = 'exact'):
    """
    Fetch the journal data from RoMEO. Returns an Journal object.
    search_terms should be a dictionnary object containing at least one of these fields:
    """
    allowed_fields = ['issn', 'jtitle']
    # Make the title HTML-safe before searching for it in the database or in the API
    if 'title' in search_terms:
        search_terms['title'] = kill_html(search_terms['title'])
    original_search_terms = search_terms.copy()

    # Check the arguments
    if not all(map(lambda x: x in allowed_fields, (key for key in search_terms))):
        raise ValueError('The search terms have to belong to '+str(allowed_fields)+
                'but the dictionary I got is '+str(search_terms))

    # Remove diacritics (because it has to be sent in ASCII to ROMEO)
    for key in search_terms:
        search_terms[key] = remove_diacritics(search_terms[key])

    # First check we don't have it already
    journal = find_journal_in_model(search_terms)
    if journal:
        return journal

    # Perform the query
    root = perform_romeo_query(search_terms)

    # Find the matching journals (if any)
    journals = list(root.findall('./journals/journal'))
    if not journals:
        # Retry with a less restrictive matching type
        if matching_mode == 'exact':
            return fetch_journal(original_search_terms, 'contains')
        return None
    if len(journals) > 1:
        print ("Warning, "+str(len(journals))+" journals match the RoMEO request, "+
                "defaulting to the first one")
        # TODO different behaviour: get the ISSN and try again.
    journal = journals[0]

    names = list(journal.findall('./jtitle'))
    if not names:
        raise MetadataSourceException('RoMEO returned a journal without title.\n'+
                'URL was: '+request)
    if len(names) > 1:
        print("Warning, "+str(len(names))+" names provided for one journal, "+
                "defaulting to the first one")
    name = kill_html(names[0].text)
    
    issn = None
    try:
        issn = nstrip(journal.findall('./issn')[0].text)
    except KeyError, IndexError:
        pass

예제 #9

0

파일 보기

def fetch_publisher(publisher_name):
    if publisher_name is None:
        return

    # First, let's see if we have a publisher with that name
    matching_publishers = Publisher.objects.filter(name=publisher_name)
    if len(matching_publishers) == 1:
        return matching_publishers[0]

    # Second, let's see if the publisher name has often been associated to a known publisher
    aliases = list(
        AliasPublisher.objects.filter(
            name=publisher_name).order_by('-count')[:2])
    if len(aliases) == 1:
        # Only one publisher found. If it has been seen often enough under that name,
        # keep it!
        if aliases[0].count > PUBLISHER_NAME_ASSOCIATION_THRESHOLD:
            AliasPublisher.increment(publisher_name, aliases[0].publisher)
            return aliases[0].publisher
    elif len(aliases) == 2:
        # More than one publisher found (two aliases returned as we limited to the two first
        # results). Then we need to make sure the first one appears a lot more often than
        # the first
        if (aliases[0].count > PUBLISHER_NAME_ASSOCIATION_THRESHOLD
                and aliases[0].count >
                PUBLISHER_NAME_ASSOCIATION_FACTOR * aliases[1].count):
            AliasPublisher.increment(publisher_name, aliases[0].publisher)
            return aliases[0].publisher

    # Otherwise, let's try to fetch the publisher from RoMEO!

    # Prepare the query
    search_terms = dict()
    search_terms['pub'] = remove_diacritics(publisher_name)
    search_terms['qtype'] = 'all'

    root = perform_romeo_query(search_terms)

    # Find the publisher
    publishers = root.findall('./publishers/publisher')
    if len(publishers) == 0:
        return
    elif len(publishers) > 1:
        print str(len(publishers)) + " results"
        search_terms['qtype'] = 'exact'
        root = perform_romeo_query(search_terms)
        publishers = root.findall('./publishers/publisher')
        if len(publishers) != 1:
            return

    publisher = get_or_create_publisher(publishers[0])
    AliasPublisher.increment(publisher_name, publisher)
    return publisher

예제 #10

0

파일 보기

파일: romeo.py 프로젝트: Lysxia/dissemin

def fetch_publisher(publisher_name):
    if publisher_name is None:
        return

    # First, let's see if we have a publisher with that name
    matching_publishers = Publisher.objects.filter(name=publisher_name)
    if len(matching_publishers) == 1:
        return matching_publishers[0]

    # Second, let's see if the publisher name has often been associated to a known publisher
    aliases = list(AliasPublisher.objects.filter(name=publisher_name).order_by('-count')[:2])
    if len(aliases) == 1:
        # Only one publisher found. If it has been seen often enough under that name,
        # keep it!
        if aliases[0].count > PUBLISHER_NAME_ASSOCIATION_THRESHOLD:
            AliasPublisher.increment(publisher_name, aliases[0].publisher)
            return aliases[0].publisher
    elif len(aliases) == 2:
        # More than one publisher found (two aliases returned as we limited to the two first
        # results). Then we need to make sure the first one appears a lot more often than
        # the first
        if (aliases[0].count > PUBLISHER_NAME_ASSOCIATION_THRESHOLD and
            aliases[0].count > PUBLISHER_NAME_ASSOCIATION_FACTOR*aliases[1].count):
            AliasPublisher.increment(publisher_name, aliases[0].publisher)
            return aliases[0].publisher

    # Otherwise, let's try to fetch the publisher from RoMEO!

    # Prepare the query
    search_terms = dict()
    search_terms['pub'] = remove_diacritics(publisher_name)
    search_terms['qtype'] = 'all'

    root = perform_romeo_query(search_terms)

    # Find the publisher
    publishers = root.findall('./publishers/publisher')
    if len(publishers) == 0:
        return
    elif len(publishers) > 1:
        print str(len(publishers)) + " results"
        search_terms['qtype'] = 'exact'
        root = perform_romeo_query(search_terms)
        publishers = root.findall('./publishers/publisher')
        if len(publishers) != 1:
            return

    publisher =  get_or_create_publisher(publishers[0])
    AliasPublisher.increment(publisher_name, publisher)
    return publisher

예제 #11

0

파일 보기

def normalize_last_name(last):
    """
    Removes diacritics and hyphens from last names
    for comparison
    """
    return remove_diacritics(last.replace('-', ' ')).lower()

예제 #12

0

파일 보기

 def prepare_authors_last(self, obj):
     return [remove_diacritics(a['name']['last']) for a in obj.authors_list]

예제 #13

0

파일 보기

def create_paper_plain_fingerprint(title, authors, year):
    """
    Creates a robust summary of a bibliographic reference.
    This plain fingerprint should then be converted to an
    actual fingerprint by hashing it (so that the length remains
    constant).

    :param title: the title of the paper
    :param authors: the list of author names, represented
        as (first_name, last_name) pairs
    :param year: the year of publication of the paper

    >>> create_paper_plain_fingerprint(' It  cleans whitespace And Case\\n',[('John','Doe')], 2015)
    u'it-cleans-whitespace-and-case/doe'
    >>> create_paper_plain_fingerprint('HTML tags are <emph>removed</emph>',[('John','Doe')], 2015)
    u'html-tags-are-removed/doe'
    >>> create_paper_plain_fingerprint('Les accents sont supprimés', [('John','Doe')],2015)
    u'les-accents-sont-supprimes/doe'
    >>> create_paper_plain_fingerprint('Long titles are unambiguous enough to be unique by themselves, no need for authors', [('John','Doe')], 2015)
    u'long-titles-are-unambiguous-enough-to-be-unique-by-themselves-no-need-for-authors'
    >>> create_paper_plain_fingerprint('Ambiguity', [('John','Doe')], 2014)
    u'ambiguity-2014/doe'
    """
    title = kill_html(title)
    title = remove_diacritics(title).lower()
    title = stripped_chars.sub('', title)
    title = title.strip()
    title = re.sub('[ -]+', '-', title)
    buf = title

    # If the title is long enough, we return the fingerprint as is
    if len(buf) > 50:
        return buf

    # If the title is very short, we add the year (for "Preface", "Introduction", "New members" cases)
    # if len(title) <= 16:
    if not '-' in title:
        buf += '-' + str(year)

    author_names_list = []
    for author in authors:
        if not author:
            continue
        author = (remove_diacritics(author[0]), remove_diacritics(author[1]))

        # Last name, without the small words such as "van", "der", "de"…
        last_name_words, last_name_separators = split_name_words(author[1])
        last_words = []
        for i, w in enumerate(last_name_words):
            if (w[0].isupper()
                    or (i > 0 and last_name_separators[i - 1] == '-')):
                last_words.append(w)

        # If no word was uppercased, fall back on all the words
        if not last_words:
            last_words = last_name_words

        # Lowercase
        last_words = map(ulower, last_words)
        fp = '-'.join(last_words)
        author_names_list.append(fp)

    author_names_list.sort()
    for fp in author_names_list:
        buf += '/' + fp

    return buf

예제 #14

0

파일 보기

def is_fully_capitalized(s):
    return lowercase_re.search(remove_diacritics(s)) == None

예제 #15

0

파일 보기

 def prepare_text(self, obj):
     return remove_diacritics(obj.title + ' ' +
                              (' '.join(self.prepare_authors_full(obj))))

예제 #16

0

파일 보기

파일: search_indexes.py 프로젝트: Phyks/dissemin

 def prepare_text(self, obj):
     return remove_diacritics(obj.title+' '+(' '.join(
         self.prepare_authors_full(obj))))

예제 #17

0

파일 보기

파일: search_indexes.py 프로젝트: Phyks/dissemin

 def prepare_authors_last(self, obj):
     return [remove_diacritics(a['name']['last']) for a in obj.authors_list]

예제 #18

0

파일 보기

파일: forms.py 프로젝트: Phyks/dissemin

    def search(self):
        self.queryset = self.searchqueryset.models(Paper)

        q = remove_diacritics(self.cleaned_data['q'])
        if q:
            self.queryset = self.queryset.auto_query(q)

        visible = self.cleaned_data['visible']
        if visible == '':
            self.filter(visible=True)
        elif visible == 'invisible':
            self.filter(visible=False)

        self.form_filter('availability', 'availability')
        self.form_filter('oa_status__in', 'oa_status')
        self.form_filter('pubdate__gte', 'pub_after')
        self.form_filter('pubdate__lte', 'pub_before')
        self.form_filter('doctype__in', 'doctypes')

        # Filter by authors.
        # authors field: a comma separated list of full/last names.
        # Items with no whitespace of prefixed with 'last:' are considered as
        # last names; others are full names.
        for name in self.cleaned_data['authors'].split(','):
            name = name.strip()

            # If part of this author name matches ORCID identifiers, consider
            # these as orcid ids and do the filtering
            orcid_ids = [x for x in name.split(' ') if validate_orcid(x)]
            for orcid_id in orcid_ids:
                try:
                    researcher = Researcher.objects.get(orcid=orcid_id)
                    self.filter(researchers=researcher.id)
                except Researcher.DoesNotExist:
                    pass
                continue
            # Rebuild a full name excluding the ORCID id terms
            name = ' '.join([x for x in name.split(' ') if x not in orcid_ids])

            name = remove_diacritics(name.strip())

            if name.startswith('last:'):
                is_lastname = True
                name = name[5:].strip()
            else:
                is_lastname = ' ' not in name

            if not name:
                continue

            if is_lastname:
                self.filter(authors_last=name)
            else:
                reversed_name = ' '.join(reversed(name.split(' ')))
                sq = SQ()
                sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR)
                sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR)
                self.queryset = self.queryset.filter(sq)

        self.queryset = aggregate_combined_status(self.queryset)

        status = self.cleaned_data['status']
        if status:
            self.queryset = self.queryset.post_filter(
                combined_status__in=status)

        # Default ordering by decreasing publication date
        order = self.cleaned_data['sort_by'] or '-pubdate'
        self.queryset = self.queryset.order_by(order).load_all()

        return self.queryset

예제 #19

0

파일 보기

파일: name.py 프로젝트: Lysxia/dissemin

def is_fully_capitalized(s):
    return lowercase_re.search(remove_diacritics(s)) == None

예제 #20

0

파일 보기

파일: romeo.py 프로젝트: tarsbase/dissemin

def fetch_journal(search_terms, matching_mode='exact'):
    """
    Fetch the journal data from RoMEO. Returns an Journal object.
    search_terms should be a dictionnary object containing at least one of these fields:
    """
    allowed_fields = ['issn', 'jtitle']
    terms = search_terms.copy()
    # Make the title HTML-safe before searching for it in the database or in
    # the API
    if 'title' in terms:
        terms['title'] = kill_html(terms['title'])

    # Check the arguments
    if not all(key in allowed_fields for key in terms):
        raise ValueError('The search terms have to belong to ' +
                         str(allowed_fields) + 'but the dictionary I got is ' +
                         str(terms))

    # Remove diacritics (because it has to be sent in ASCII to ROMEO)
    for key in terms:
        terms[key] = remove_diacritics(terms[key])
        if len(terms[key]) > 256:
            return None

    # First check we don't have it already
    journal = find_journal_in_model(terms)
    if journal:
        return journal

    # Perform the query
    if matching_mode != 'exact':
        terms['qtype'] = matching_mode
    root = perform_romeo_query(terms)

    # Find the matching journals (if any)
    journals = list(root.findall('./journals/journal'))

    if not journals:
        return None
    elif len(journals) > 1:
        print("Warning, " + str(len(journals)) +
              " journals match the RoMEO request, " +
              "defaulting to the first one")
        # TODO different behaviour: get the ISSN and try again.
    journal = journals[0]

    names = list(journal.findall('./jtitle'))
    if not names:
        raise MetadataSourceException(
            'RoMEO returned a journal without title.\n' + 'Terms were: ' +
            unicode(terms))
    if len(names) > 1:
        print("Warning, " + str(len(names)) +
              " names provided for one journal, " +
              "defaulting to the first one")
    name = kill_html(names[0].text)

    issn = None
    try:
        issn = nstrip(journal.findall('./issn')[0].text)
    except (KeyError, IndexError):
        pass

    # Now we may have additional info, so it's worth trying again in the model
    model_journal = find_journal_in_model({'issn': issn, 'jtitle': name})
    if model_journal:
        return model_journal

    # Otherwise we need to find the publisher
    publishers = root.findall('./publishers/publisher')
    if not publishers:
        return None
    # TODO here we shouldn't default to the first one but look it up using the
    # <romeopub>
    publisher_desc = publishers[0]

    publisher = get_or_create_publisher(publisher_desc)

    result = Journal(title=name, issn=issn, publisher=publisher)
    result.save()
    return result

예제 #21

0

파일 보기

 def test_remove_diacritics(self):
     self.assertEqual(remove_diacritics('aéèï'), 'aeei')
     self.assertEqual(remove_diacritics('aéè'.encode('utf-8')), b'a\xc3\xa9\xc3\xa8')

예제 #22

0

파일 보기

파일: baremodels.py 프로젝트: Phyks/dissemin

 def google_scholar_link(self):
     """
     Link to search for the paper in Google Scholar
     """
     return 'http://scholar.google.com/scholar?'+urlencode({'q': remove_diacritics(self.title)})

예제 #23

0

파일 보기

파일: fingerprint.py 프로젝트: Phyks/dissemin

def create_paper_plain_fingerprint(title, authors, year):
    """
    Creates a robust summary of a bibliographic reference.
    This plain fingerprint should then be converted to an
    actual fingerprint by hashing it (so that the length remains
    constant).

    :param title: the title of the paper
    :param authors: the list of author names, represented
        as (first_name, last_name) pairs
    :param year: the year of publication of the paper

    >>> create_paper_plain_fingerprint(' It  cleans whitespace And Case\\n',[('John','Doe')], 2015)
    'it-cleans-whitespace-and-case/doe'
    >>> create_paper_plain_fingerprint('HTML tags are <emph>removed</emph>',[('John','Doe')], 2015)
    'html-tags-are-removed/doe'
    >>> create_paper_plain_fingerprint('Les accents sont supprimés', [('John','Doe')],2015)
    'les-accents-sont-supprimes/doe'
    >>> create_paper_plain_fingerprint('Long titles are unambiguous enough to be unique by themselves, no need for authors', [('John','Doe')], 2015)
    'long-titles-are-unambiguous-enough-to-be-unique-by-themselves-no-need-for-authors'
    >>> create_paper_plain_fingerprint('Ambiguity', [('John','Doe')], 2014)
    'ambiguity-2014/doe'
    """
    title = kill_html(title)
    title = remove_diacritics(title).lower()
    title = stripped_chars.sub('', title)
    title = title.strip()
    title = re.sub('[ -]+', '-', title)
    buf = title

    # If the title is long enough, we return the fingerprint as is
    if len(buf) > 50:
        return buf

    # If the title is very short, we add the year (for "Preface", "Introduction", "New members" cases)
    # if len(title) <= 16:
    if not '-' in title:
        buf += '-'+str(year)

    author_names_list = []
    for author in authors:
        if not author:
            continue
        author = (remove_diacritics(author[0]), remove_diacritics(author[1]))

        # Last name, without the small words such as "van", "der", "de"…
        last_name_words, last_name_separators = split_name_words(author[1])
        last_words = []
        for i, w in enumerate(last_name_words):
            if (w[0].isupper() or
                    (i > 0 and last_name_separators[i-1] == '-')):
                last_words.append(w)

        # If no word was uppercased, fall back on all the words
        if not last_words:
            last_words = last_name_words

        # Lowercase
        last_words = list(map(ulower, last_words))
        fp = '-'.join(last_words)
        author_names_list.append(fp)

    author_names_list.sort()
    for fp in author_names_list:
        buf += '/'+fp

    return buf

예제 #24

0

파일 보기

파일: baremodels.py 프로젝트: Phyks/dissemin

 def core_link(self):
     """
     Link to search for the paper in CORE
     """
     return 'http://core.ac.uk/search/'+quote(remove_diacritics(self.title))

예제 #25

0

파일 보기

파일: name.py 프로젝트: Phyks/dissemin

def normalize_last_name(last):
    """
    Removes diacritics and hyphens from last names
    for comparison
    """
    return remove_diacritics(last.replace('-',' ')).lower()

예제 #26

0

파일 보기

파일: forms.py 프로젝트: rgrunbla/dissemin

    def search(self):
        self.queryset = self.searchqueryset.models(Paper)

        q = remove_diacritics(self.cleaned_data['q'])
        if q:
            self.queryset = self.queryset.auto_query(q)

        visible = self.cleaned_data['visible']
        if visible == '':
            self.filter(visible=True)
        elif visible == 'invisible':
            self.filter(visible=False)

        self.form_filter('availability', 'availability')
        self.form_filter('oa_status__in', 'oa_status')
        self.form_filter('pubdate__gte', 'pub_after')
        self.form_filter('pubdate__lte', 'pub_before')
        self.form_filter('doctype__in', 'doctypes')

        # Filter by authors.
        # authors field: a comma separated list of full/last names.
        # Items with no whitespace of prefixed with 'last:' are considered as
        # last names; others are full names.
        for name in self.cleaned_data['authors'].split(','):
            name = name.strip()

            # If part of this author name matches ORCID identifiers, consider
            # these as orcid ids and do the filtering
            orcid_ids = [x for x in name.split(' ') if validate_orcid(x)]
            for orcid_id in orcid_ids:
                self.filter(orcids=orcid_id)

            # Rebuild a full name excluding the ORCID id terms
            name = ' '.join([x for x in name.split(' ') if x not in orcid_ids])

            name = remove_diacritics(name.strip())

            if name.startswith('last:'):
                is_lastname = True
                name = name[5:].strip()
            else:
                is_lastname = ' ' not in name

            if not name:
                continue

            if is_lastname:
                self.filter(authors_last=name)
            else:
                reversed_name = ' '.join(reversed(name.split(' ')))
                sq = SQ()
                sq.add(SQ(authors_full=Sloppy(name, slop=1)), SQ.OR)
                sq.add(SQ(authors_full=Sloppy(reversed_name, slop=1)), SQ.OR)
                self.queryset = self.queryset.filter(sq)

        self.queryset = aggregate_combined_status(self.queryset)

        status = self.cleaned_data['status']
        if status:
            self.queryset = self.queryset.post_filter(
                combined_status__in=status)

        # Default ordering by decreasing publication date
        order = self.cleaned_data['sort_by'] or '-pubdate'
        self.queryset = self.queryset.order_by(order).load_all()

        return self.queryset

예제 #27

0

파일 보기

def is_fully_capitalized(s):
    """
    Is this word fully capitalized?
    """
    return lowercase_re.search(remove_diacritics(s)) == None

예제 #28

0

파일 보기

파일: name.py 프로젝트: Phyks/dissemin

def is_fully_capitalized(s):
    """
    Is this word fully capitalized?
    """
    return lowercase_re.search(remove_diacritics(s)) == None

예제 #29

0

파일 보기

 def google_scholar_link(self):
     """
     Link to search for the paper in Google Scholar
     """
     return 'http://scholar.google.com/scholar?'+urlencode({'q': remove_diacritics(self.title)})

예제 #30

0

파일 보기

 def core_link(self):
     """
     Link to search for the paper in CORE
     """
     return 'http://core.ac.uk/search/'+quote(remove_diacritics(self.title))