def get_articles(ath):
        print('Getting documents for: "' + str(ath.id) + '"')

        payload = {
            'query': 'AU-ID({0})'.format(ath.id),
            'count': '100',
            'cursor': '*',
            'field':
            'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message',
            'date': '2007-2019'
        }
        p = Request('GET',
                    'https://api.elsevier.com/content/search/scopus',
                    params=payload).prepare()
        search = ElsSearch(p.url)
        search.execute(client)

        print('Author has written: ' + str(search.tot_num_res) + ' documents')
        # Save collaborations
        if (search.tot_num_res > 0):
            save_colls(search.results)
        print(str(search.num_res) + " Documents saved")

        total_saved = 0
        total_saved += search.num_res

        while total_saved < search.tot_num_res:
            payload = {
                'query': 'AU-ID({0})'.format(ath.id),
                'count': '100',
                'cursor': search.cursor['@next'],
                'field':
                'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message',
                'date': '2007-2019'
            }
            p = Request('GET',
                        'https://api.elsevier.com/content/search/scopus',
                        params=payload).prepare()

            search = ElsSearch(p.url)
            search.execute(client)

            # Save collaborations
            save_colls(search.results)

            # Store latest fetched page index
            last_page = Author.update(last_page=total_saved).where(
                Author.id == ath.id)
            last_page.execute()

            # Increment saved
            total_saved += search.num_res
            print(str(total_saved) + " Documents saved")

        author_fetched = Author.update(
            docs_fetched=True,
            last_page=total_saved).where(Author.id == ath.id)
        author_fetched.execute()
        print(str(ath.id) + " - DONE")
Example #2
0
    for subject in cats:
        db.connect()

        payload = {
            'query': 'AFFIL({0}) AND SUBJAREA({1})'.format(country, subject),
            'count': 200
        }

        print('Getting authors in: "{0} {1}"   '.format(country,
                                                        subject).rjust(10),
              end="")

        p = Request('GET',
                    'https://api.elsevier.com/content/search/author',
                    params=payload).prepare()
        search = ElsSearch(p.url)
        search.execute(client)

        print(Colour.BOLD + str(search.num_res) + " Authors saved" +
              Colour.END)

        def save_authors(authors):
            # Save each author in db
            for author in authors:
                a_id = str(author['dc:identifier']).split('AUTHOR_ID:')
                a_id = int(a_id[-1])
                db_author = None
                # Check if author was already saved in db
                try:
                    db_author = Author.get(Author.id == a_id)
                    db_author.country = list(db_author.country)
Example #3
0
def get_coauthors(ath):
    def save_coauthors(coauthors):
        colist = []
        for coauthor in coauthors:
            db_author = save_author(coauthor)

            colist.append(db_author.id)
        return colist

    # Check for author row in coauthors table
    db_coauthor, created = Coauthors.get_or_create(id=ath.id)

    coauthors = []
    print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' +
          Style.RESET_ALL)

    payload = {
        'co-author': '%s' % ath.id,
        'count': 50,
        'start': db_coauthor.last_page or 0,
    }
    p = Request('GET',
                'https://api.elsevier.com/content/search/author',
                params=payload).prepare()
    search = ElsSearch(p.url)
    search.execute(client)
    print(Colour.OKGREEN + 'Author has ' + Colour.BOLD +
          str(search.tot_num_res) + ' coauthors' + Colour.END)

    coauthors = coauthors + save_coauthors(search.results)
    total_saved = 0
    total_saved += search.num_res

    while total_saved < search.tot_num_res:
        # Store latest fetched page index
        last_page = Coauthors.update(last_page=total_saved).where(
            Coauthors.id == ath.id)
        last_page.execute()

        search = None
        # Build next url
        payload = {
            'co-author': '%s' % ath.id,
            'count': 25,
            'start': total_saved,
        }
        p = Request('GET',
                    'https://api.elsevier.com/content/search/author',
                    params=payload).prepare()
        search = ElsSearch(p.url)
        search.execute(client)

        # Save authors
        coauthors = coauthors + save_coauthors(search.results)

        # Increment saved
        total_saved += search.num_res

    print(
        Colour.BOLD + str(total_saved) + " Coauthors saved for '" +
        str(ath.id) + "'" + Colour.END, )

    return coauthors
        def get_coauthors(ath):
            
            def save_coauthors(coauthors):
                colist = []
                for coauthor in coauthors:
                    a_id = str(coauthor['dc:identifier']).split('AUTHOR_ID:')
                    a_id = int(a_id[-1])
                    colist.append(a_id)
                    db_author = None
                    # Check if author was already saved in db
                    try:
                        db_author = Author.get(Author.id == a_id)
                    except Author.DoesNotExist:
                        subject_areas = coauthor.get('subject-area') or []
                        if isinstance(coauthor.get('subject-area'), list) is False:
                            subject_areas = [coauthor.get('subject-area')]
                        
                        db_author = Author(id=a_id)
                        db_author.full_name = coauthor['preferred-name']

                        if coauthor.get('subject-area') is not None:
                            db_author.subject_areas = [dict(frequency=s['@frequency'], name=s['$']) for s in subject_areas]
                        db_author.document_count = coauthor.get('document-count')
                        db_author.affiliation_current = coauthor.get('affiliation-current')

                        # Write into db
                        db_author.save(force_insert=True)

                return colist


            db_author = Coauthors.get(Coauthors.id == author.id)
            
            coauthors = []

            print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' + Style.RESET_ALL)

            payload = {
                'co-author':  '%s' % ath.id,
                'count':    25,
                'start':    ath.last_page,
            }
            p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare()
            search = ElsSearch(p.url)
            search.execute(client)

            print(Colour.OKGREEN + 'Author has ' + Colour.BOLD + str(search.tot_num_res) + ' coauthors' + Colour.END)

            coauthors = coauthors + save_coauthors(search.results)
            total_saved = 0
            total_saved += search.num_res

            while total_saved < search.tot_num_res:

                # Store latest fetched page index
                last_page = Coauthors.update(last_page=total_saved).where(Coauthors.id == ath.id)
                last_page.execute()

                search = None
                # Build next url
                payload = {
                    'co-author':  '%s' % ath.id,
                    'count':    25,
                    'start':    total_saved,
                }
                p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare()
                search = ElsSearch(p.url)
                search.execute(client)

                # Save authors
                coauthors = coauthors + save_coauthors(search.results)

                # Increment saved
                total_saved += search.num_res
            
            return coauthors
        def get_articles(ath):

            def save_colls(articles):
                for article in articles:
                    if article.get('author') is not None and len(article['author']) > 1:

                        id = re_abs_id.findall(article.get('dc:identifier'))
                        id = id[0]
                        save_collaboration(article, id)
            

            print(Style.BRIGHT + 'Getting documents for: "' + str(ath.id) + '"' + Style.RESET_ALL)

            payload = {
                'query':    'AU-ID({0})'.format(ath.id),
                'count':    25,
                'start':    ath.last_page,
                'field':   'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message,affiliation',
                'date':     '2007-2019'
            }
            p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare()
            search = ElsSearch(p.url)
            search.execute(client)

            print(Colour.OKGREEN + 'Author has written: ' + Colour.BOLD + str(search.tot_num_res) + ' documents' + Colour.END)

            print(Colour.UNDERLINE + str(search.num_res) + " Documents gathered" + Colour.END)
            # Save collaborations
            save_colls(search.results)

            total_saved = 0
            total_saved += search.num_res

            next_url = None
            while total_saved < search.tot_num_res:
                
                # Store latest fetched page index
                last_page = Author.update(last_page=total_saved).where(Author.id == ath.id)
                last_page.execute()

                search = None
                payload = {
                    'query':    'AU-ID({0})'.format(ath.id),
                    'count':    25,
                    'start':    total_saved,
                    'field':   'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message,affiliation',
                    'date':     '2007-2019'
                }
                p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare()

                search = ElsSearch(p.url)
                search.execute(client)

                # Save collaborations
                save_colls(search.results)

                # Increment saved
                total_saved += search.num_res

                print(Colour.UNDERLINE + str(total_saved) + " Abstracts gathered" + Colour.END)

            fetched = Author.update(docs_fetched=True, last_page=total_saved).where(Author.id == ath.id)
            fetched.execute()
            print(Colour.OKGREEN + "DONE" + Colour.END)
def main():
    # Return Author peewee model
    def save_author(author, save_metrics=False):
        a_id = str(author['dc:identifier']).split('AUTHOR_ID:')
        a_id = int(a_id[-1])

        if isinstance(author.get('subject-area'), list) is False:
            author['subject-area'] = [author.get('subject-area')]

        # Check if author was already saved in db
        db_author, created = Author.get_or_create(id=a_id)

        if created:
            db_author.full_name = author.get('preferred-name'),
            db_author.subject_areas = [
                dict(frequency=s['@frequency'], name=s['$'])
                for s in author['subject-area'] if s is not None
            ],
            db_author.affiliation_current = author.get('affiliation-current')

            if save_metrics:
                # Request metrics for author profile
                metrics = ElsAuthor(author_id=a_id)
                metrics.read_metrics(client)
                db_author.document_count = metrics.data['coredata'].get(
                    'document-count')
                db_author.cited_by_count = metrics.data['coredata'].get(
                    'cited-by-count')
                db_author.h_index = metrics.data.get('h_index')
                db_author.coauthors_count = metrics.data.get('coauthors_count')
                db_author.is_sample = True

            db_author.save()

            print("+1 ", end="", flush=True)
        else:
            print(".", end="", flush=True)

        return db_author

    def get_coauthors(ath):
        def save_coauthors(coauthors):
            colist = []
            for coauthor in coauthors:
                db_author = save_author(coauthor)

                colist.append(db_author.id)
            return colist

        # Check for author row in coauthors table
        db_coauthor, created = Coauthors.get_or_create(id=ath.id)
        # Check if coauthors were already saved
        if (created is False
                and db_coauthor.co_list) and len(db_coauthor.co_list) > 0:
            return db_coauthor.co_list

        coauthors = []
        print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' +
              Style.RESET_ALL)

        payload = {
            'co-author': '%s' % ath.id,
            'count': 25,
            'start': db_coauthor.last_page or 0,
        }
        p = Request('GET',
                    'https://api.elsevier.com/content/search/author',
                    params=payload).prepare()
        search = ElsSearch(p.url)
        search.execute(client)
        print(Colour.OKGREEN + 'Author has ' + Colour.BOLD +
              str(search.tot_num_res) + ' coauthors' + Colour.END)

        coauthors = coauthors + save_coauthors(search.results)
        total_saved = 0
        total_saved += search.num_res

        while total_saved < search.tot_num_res:
            # Store latest fetched page index
            last_page = Coauthors.update(last_page=total_saved).where(
                Coauthors.id == ath.id)
            last_page.execute()

            search = None
            # Build next url
            payload = {
                'co-author': '%s' % ath.id,
                'count': 25,
                'start': total_saved,
            }
            p = Request('GET',
                        'https://api.elsevier.com/content/search/author',
                        params=payload).prepare()
            search = ElsSearch(p.url)
            search.execute(client)

            # Save authors
            coauthors = coauthors + save_coauthors(search.results)

            # Increment saved
            total_saved += search.num_res

        print(
            Colour.BOLD + str(total_saved) + " Coauthors saved for '" +
            str(ath.id) + "'" + Colour.END, )

        return coauthors

    def save_authors(authors):
        # Save author
        # get author coauthors
        # save coauthors data
        for author in authors:
            db_author = save_author(author, save_metrics=True)
            # Save categories only for authors and not for coauthors
            cat_tuple_to_list = db_author.cat
            if isinstance(cat_tuple_to_list, tuple):  # because f**k python
                cat_tuple_to_list = cat_tuple_to_list[0]  # because f**k python
            else:
                cat_tuple_to_list = list(db_author.cat
                                         or [])  # because f**k python
            cat_tuple_to_list = [
                i for i in cat_tuple_to_list if not isinstance(i, list)
            ]  # because f**k python

            db_author.cat = list(set(cat_tuple_to_list + [cat]))
            db_author.save()

            coauthors = get_coauthors(db_author)
            # Store only unique coauthors ids
            coauthors = list(set(coauthors))
            fetched = Coauthors.update(co_list=coauthors).where(
                Coauthors.id == db_author.id)
            fetched.execute()

    def save_latest(new_country, offset=0, total=0, is_final=False):
        with open("config.json", "w") as io:
            new_config = config
            if is_final:
                new_config[new_country] = 1
                new_config['latest_offset'] = 0
                new_config['total'] = 0
            else:
                new_config['latest_offset'] = offset
                new_config['total'] = total

            json.dump(new_config, io)

    for country in countries:
        saved = config.get(country)

        if not saved == 1 and config['latest_offset'] <= config['total']:
            payload = {
                'query': 'SUBJAREA({0}) AND AFFIL({1})'.format(cat, country),
                'count': 50,
                'start': config['latest_offset'],
                'sort': '-document-count',
            }
            print('Getting most cited authors from ' + country +
                  ' starting from latest ' + str(config['latest_offset']))

            p = Request('GET',
                        'https://api.elsevier.com/content/search/author',
                        params=payload).prepare()
            search = ElsSearch(p.url)
            search.execute(client)

            print(Colour.BOLD + "Going to save " + str(search.num_res) +
                  " authors " + Colour.END)

            # Save
            save_authors(search.results)

            total_saved = config['latest_offset']
            total_saved += search.num_res
            save_latest(country, total_saved, limit)

            next_url = None
            while total_saved < limit:
                for e in search.links:
                    if e['@ref'] == 'next':
                        next_url = e['@href']

                search = None
                search = ElsSearch(next_url)
                search.execute(client)

                # Save
                save_authors(search.results)

                # Increment saved
                total_saved += search.num_res
                save_latest(country, total_saved, limit)

                print(Colour.BOLD + str(total_saved) + " Authors saved" +
                      Colour.END,
                      end="")

            print(Colour.OKGREEN + "DONE" + Colour.END)
            save_latest(country, 0, 0, is_final=True)

        elif config['latest_offset'] == config['total']:
            save_latest(country, 0, 0, is_final=True)