def main(author_name):
    """ Print all publications as JSON to STDOUT """
    data = {}
    data['publications'] = []

    author = scholarly.fill(next(scholarly.search_author(author_name)))
    for pub in author['publications']:
        pub_details = scholarly.fill(pub)['bib']
        data['publications'].append({
            'authors':
            reformat_coauthors(pub_details['author'].split(' and ')),
            'year':
            pub_details.get('pub_year', ''),
            'title':
            pub_details.get('title', ''),
            'journal':
            pub_details.get('journal', ''),
            'volume':
            pub_details.get('volume', ''),
            'issue':
            pub_details.get('issue', ''),
            'pages':
            pub_details.get('pages', ''),
            'citations':
            pub.get('num_citations', 0),
            'pub_url':
            pub.get('pub_url', ''),
            #'eprint_url': pub.get('pub_url', '') # seems to be same as pub_url
        })

    output_json(data)
def get_all_coauthors(author_name, min_year, max_year, max_coauthors,
                      include_no_year):
    """ Get a set of all coauthors """
    author = scholarly.fill(next(scholarly.search_author(author_name)))
    all_coauthors = set()
    for pub in author['publications']:
        # Evaluate if publication year is indicated (if not, ignore depending
        # on presence of --include_no_year flag)
        if 'pub_year' in pub['bib']:
            pub_year = int(pub['bib']['pub_year'])
        elif include_no_year:
            pub_year = max_year
        else:
            pub_year = min_year - 1

        # Evaluate whether publication falls within indicated timerange
        if min_year <= pub_year <= max_year:
            coauthors = scholarly.fill(pub)['bib']['author'].split(' and ')
            # Evaluate if number of coauthors meets optional threshold
            if len(coauthors) <= max_coauthors:
                [
                    all_coauthors.add(reformat_name(coauthor))
                    for coauthor in coauthors
                ]
    return all_coauthors
Exemple #3
0
def main(args):
    gs_author = {}
    gs_pubs = []

    if not args.force:
        gs_author = load('google_scholar_author.pkl') or {}
        gs_pubs = load('google_scholar_publications.pkl') or []

    gs_author = scholarly.search_author_id('SZR6mXsAAAAJ')
    gs_author = scholarly.fill(gs_author)
    save(gs_author, 'google_scholar_author.pkl')

    current_pubs_ids = {p['author_pub_id'] for p in gs_pubs}
    author_pubs_ids = {p['author_pub_id'] for p in gs_author['publications']}
    new_pubs_ids = author_pubs_ids - current_pubs_ids
    new_pubs = [
        p for p in gs_author['publications']
        if p['author_pub_id'] in new_pubs_ids
    ]

    # TODO update based on info available on gs_author

    for p in tqdm(new_pubs):
        p = scholarly.fill(p)
        gs_pubs.append(p)
        save(gs_pubs, 'google_scholar_publications.pkl')
Exemple #4
0
def find_bbe_coauthors(
        name, institution="Caltech", start=2015, verbose=True):
    search_query = scholarly.search_author(name + ", " + institution)
    author = scholarly.fill(next(search_query))

    coauthors = set()
    for i, pub in enumerate(author['publications']):
        # Make sure this is within the date range that we care about
        try:
            if int(pub['bib']['pub_year']) < start:
                # Skip this entry
                continue
            elif verbose:
                print(i, end=" ", flush=True)
        except KeyError:
            continue
            
        # Get the full data
        pub = scholarly.fill(pub)
        
        for author in pub['bib']['author'].split("and"):
            coauthors.add(author.strip())

    print("")
    return coauthors
Exemple #5
0
 def test_search_author_single_author(self):
     query = 'Steven A. Cholewiak'
     authors = [a for a in scholarly.search_author(query)]
     self.assertGreaterEqual(len(authors), 1)
     author = scholarly.fill(authors[0])
     self.assertEqual(author['name'], u'Steven A. Cholewiak, PhD')
     self.assertEqual(author['scholar_id'], u'4bahYMkAAAAJ')
     pub = scholarly.fill(author['publications'][2])
     self.assertEqual(pub['author_pub_id'], u'4bahYMkAAAAJ:LI9QrySNdTsC')
Exemple #6
0
def scholarly_request(search_string: str) -> Dict:
    '''This function takes a search keyword string and request information about the corresponding article
	via scholarly'''
    # Get all available information
    search_query = scholarly.search_pubs(search_string)
    article_info = next(search_query)
    scholarly.fill(article_info)
    article_dict = article_info['bib']
    article_dict = normalize_scholarly_dict(article_dict)
    article_dict = add_retrieval_information(article_dict, 'Scholarly',
                                             'unstructured_ID', search_string)
    return article_dict
def fetch_citations(author,
                    filesave="citations.json",
                    proxy="",
                    proxy_list=""):
    """ Fetch citations from google scholar using scholarly """
    if proxy != "":
        print("Setting up proxy ", proxy)
        scholarly.use_proxy(scholarly.SingleProxy(http=proxy, https=proxy))
    if proxy_list != "":
        lproxies = open(proxy_list, 'r').readlines()

        def proxy_gen():
            if proxy_gen.counter >= len(lproxies):
                raise IndexError("We ran out of proxies...")
            proxy = lproxies[proxy_gen.counter]
            if not proxy.startswith("http"):
                proxy = "http://" + proxy
            proxy_gen.counter += 1
            return proxy

        proxy_gen.counter = 0
        scholarly.use_proxy(proxy_gen)

    print("Looking up " + author)
    search = scholarly.search_author(author)
    author = scholarly.fill(next(search))
    publications = []

    for i, pub in enumerate(author['publications']):
        cites = pub['num_citations']  # often this gets messed up upon .fill()
        if "pub_year" in pub['bib']:
            pubyear = pub['bib'][
                "pub_year"]  # also this gets messed up upon .fill()
            pub = scholarly.fill(pub)
            pub['bib']["pub_year"] = pubyear
        else:
            pub = scholarly.fill(pub)
            if not "pub_year" in pub.bib:
                # skip publications that really don't have a year,
                # they probably are crap that was picked up by the search robot
                continue

        pub['num_citations'] = cites
        print("Fetching: " + str(i) + "/" + str(len(author['publications'])) +
              ": " + pub['bib']["title"] + " (" + str(pub['bib']["pub_year"]) +
              ")")
        pub['bib'].pop("abstract", None)
        pub.pop("source", None)
        publications.append(pub)
    f = open(filesave, "w")
    f.write(json.dumps(publications))
    f.close()
Exemple #8
0
def quick_fetch_author(name):
    search_query = scholarly.search_author(name)
    author = scholarly.fill(next(search_query),
                            sections=['publications', 'coauthors'])
    iterator = 0

    data = {}
    publications = []
    coauthors = []

    for auth in author['coauthors']:
        coauthors.append(auth['name'])

    for pub in author['publications']:
        pub_info = {}
        make_attribute(pub_info, 'title', pub, 'bib')
        make_attribute(pub_info, 'num_citations', pub, 'plain')
        make_attribute(pub_info, 'pub_year', pub, 'bib')
        pub_info['_id'] = iterator
        iterator += 1
        publications.append(pub_info)

    make_attribute(data, 'name', author, 'plain')
    make_attribute(data, 'coauthors', coauthors, 'obj')
    make_attribute(data, 'affiliation', author, 'plain')
    make_attribute(data, 'email_domain', author, 'plain')
    make_attribute(data, 'interests', author, 'plain')
    make_attribute(data, 'citedby', author, 'plain')
    make_attribute(data, 'number_of_publications', len(publications), 'obj')
    make_attribute(data, 'publications', publications, 'obj')
    return data
Exemple #9
0
def extract_coauthors_by_id(author_id):
    """
        extracts the co-authors of the currently existing authors in the dataset
    """
    # create the output file

    author = scholarly.search_author_id(author_id)
    filled_coauthors = scholarly.fill(author, ['coauthors'])

    coauthors_list = filled_coauthors['coauthors']
    for author in coauthors_list:
        filled_author = scholarly.fill(author, ['indices'])
        register_coauthering(author_id, filled_author['scholar_id'])
        print(filled_author)
        mydict = filled_author_to_dict(filled_author)
        write_author(mydict, AUTHORS_CSV_FILE_OUTPUT_COAUTHORS)
Exemple #10
0
    def add_publications(self, request):
        owner_id = self.request.user.id
        owner = get_object_or_404(User, pk=owner_id)
        Publication.objects.filter(
            owner=owner
        ).delete()
        author_id = request.GET.get('author_id', None)
        author_basic = scholarly.search_author_id(author_id)
        author = scholarly.fill(author_basic)
        data = {}
        for publication in author['publications'][:25]:
            publication_info = publication['bib']
            if 'title' in publication_info:
                data['title'] = publication_info['title']
            if 'pub_year' in publication_info:
                data['publication_year'] = publication_info['pub_year']
            if 'author_pub_id' in publication:
                elem = publication['author_pub_id'].split(':')
                base_link = "https://scholar.google.com/citations?user={}" + \
                    "#d=gs_md_cita-d&u=%2Fcitations%3Fview_op%3D" +\
                    "view_citation%26user%3D{}%26citation_for_view%3D{}%3A{}"
                data['link'] = base_link.format(
                    elem[0], elem[0], elem[0], elem[1])
            if 'num_citations' in publication:
                data['citation_number'] = publication['num_citations']
            data['owner'] = owner_id

            serializer = PublicationSerializer(data=data)
            serializer.is_valid(raise_exception=True)
            Publication.objects.create(
                **serializer.validated_data)

        return Response(status=status.HTTP_201_CREATED)
def get_schoolar_data(author_name,
                      cache_folder="scholarly",
                      affiliation='UBC'):
    output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "..", "resources", cache_folder)
    cached = os.path.join(output_folder, format_author(author_name))
    from_cache = False
    final_data = []
    if not os.path.isfile(cached):

        try:
            # Retrieve the author's data, fill-in, and print
            search_query = scholarly.search_author(
                f'{author_name} {affiliation}')
            author = scholarly.fill(next(search_query))

            # Print the titles of the author's publications
            titles = [pub['bib']['title'] for pub in author['publications']]

            final_data = []
            for title in titles:
                logger.info("Processing " + Fore.YELLOW + title +
                            Style.RESET_ALL)
                ret = get_publication(title)
                retries = 0
                while not ret['success'] and retries < MAX_RETRIES_ON_ERROR:
                    retries += 1
                    msg = "Error while querying CrossRef API ({}), retrying ({})...".format(
                        ret["exception"], retries)
                    logger.info(Fore.RED + msg + Style.RESET_ALL)
                    ret = get_publication(title)
                    sleep(3)

                if ret['success']:
                    ret['original_title'] = title
                    final_data.append(ret)
                else:
                    logger.info(Fore.RED + '> Failed' + Style.RESET_ALL)

            final_data = list(
                filter(lambda k: k['result']['similarity'] >= 0.7, final_data))
            final_data = sorted(final_data,
                                key=lambda k: k['result']['similarity'],
                                reverse=True)

            with open(cached, 'w') as fo:
                json.dump(final_data, fo, indent=4, sort_keys=True)
        except StopIteration:
            logger.info(Fore.RED + 'no more schoolar data available' +
                        Style.RESET_ALL)
            with open(cached, 'w') as fo:
                json.dump(final_data, fo, indent=4, sort_keys=True)
        except Exception as ex:
            logger.exception(str(ex))
    else:
        with open(cached, 'r') as fo:
            final_data = json.load(fo)
            from_cache = True

    return final_data, from_cache
Exemple #12
0
 def test_search_pubs_filling_publication_contents(self):
     '''
     This process  checks the process of filling a publication that is derived
      from the search publication snippets.
     '''
     query = 'Creating correct blur and its effect on accommodation'
     results = scholarly.search_pubs(query)
     pubs = [p for p in results]
     self.assertGreaterEqual(len(pubs), 1)
     f = scholarly.fill(pubs[0])
     self.assertTrue(
         f['bib']['author'] ==
         u'Cholewiak, Steven A and Love, Gordon D and Banks, Martin S')
     self.assertTrue(
         f['author_id'] == ['4bahYMkAAAAJ', '3xJXtlwAAAAJ', 'Smr99uEAAAAJ'])
     self.assertTrue(f['bib']['journal'] == u'Journal of vision')
     self.assertTrue(f['bib']['number'] == '9')
     self.assertTrue(f['bib']['pages'] == u'1--1')
     self.assertTrue(
         f['bib']['publisher'] ==
         u'The Association for Research in Vision and Ophthalmology')
     self.assertTrue(
         f['bib']['title'] ==
         u'Creating correct blur and its effect on accommodation')
     self.assertTrue(
         f['pub_url'] ==
         u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
     self.assertTrue(f['bib']['volume'] == '18')
     self.assertTrue(f['bib']['pub_year'] == u'2018')
Exemple #13
0
def _add_or_update_academic(google_scholar_id, user_id):
    user = User.query.get(user_id)

    current_app.logger.info(f'Adding or Updating Academic: {google_scholar_id} as user {user}')

    Entrez.email = user.email

    resp = scholarly.fill(scholarly.search_author_id(google_scholar_id), sections=['indices'])

    if resp:
        a = Academic.query.filter(Academic.google_scholar_id == google_scholar_id).one_or_none()

        if a is None:
            a = Academic(google_scholar_id=google_scholar_id)
        
        a.name=resp['name']
        a.affiliation=resp['affiliation']
        a.cited_by=resp['citedby']
        a.h_index=resp['hindex']
        a.i10_index=resp['i10index']
        a.last_update_date=datetime.utcnow()
        a.is_updating = True

        db.session.add(a)
        db.session.commit()

        _update_publications(a)

        a = Academic.query.filter(Academic.google_scholar_id == google_scholar_id).one()
        a.is_updating = False
        db.session.add(a)
        db.session.commit()

    current_app.logger.info(f'Adding or Updating Academic Completed: {google_scholar_id}')
Exemple #14
0
def fetch_publication_from_id(name, id):
    search_query = scholarly.search_author(name)
    pub = scholarly.fill(next(search_query), sections=['publications'])
    scholarly.fill(pub['publications'][id])
    result = pub['publications'][id]
    data = {}
    make_attribute(data, 'title', result, 'bib')
    make_attribute(data, 'author', result, 'bib')
    make_attribute(data, 'pub_year', result, 'bib')
    make_attribute(data, 'abstract', result, 'bib')
    make_attribute(data, 'journal', result, 'bib')
    make_attribute(data, 'number', result, 'bib')
    make_attribute(data, 'pages', result, 'bib')
    make_attribute(data, 'publisher', result, 'bib')
    make_attribute(data, 'volume', result, 'bib')
    make_attribute(data, 'num_citations', result, 'plain')
    return data
Exemple #15
0
def fetch_scholar_author(gsID, fill=True):
    ''' Queries google scholar for a given author
		Also fills all stats if fill is True (a bit slower)'''
    author = scholarly.search_author_id(gsID)
    if fill:
        author = scholarly.fill(
            author, sections=['publications', 'basics', 'indices', 'counts'])
    return author
def get_papers_for_author(author_id):
    '''
        Gets and registers the papers of an author
    '''
    print("getting paper for author " + author_id)
    author = scholarly.search_author_id(author_id)
    filled_publications = scholarly.fill(author, ['publications'])
    publications_list = filled_publications['publications']
    nbpubs_counter = 0
    for publication in publications_list:
        filled_publication = scholarly.fill(publication)
        mydict = tiny_publication_to_dict(filled_publication)
        write_publication(mydict, PUBLICATIONS_CSV_FILE_OUTPUT)
        nbpubs_counter += 1
        print("nbpubs_counter =====>")
        print(nbpubs_counter)
        if nbpubs_counter > NB_MAX_PAPERS_PER_AUTHOR:
            break
Exemple #17
0
def main(args):
    print('Querying Google Scholar ...')
    author = scholarly.search_author_id('SZR6mXsAAAAJ')
    author = scholarly.fill(author, sections=['publications'])
    pubs = map(_parse, author['publications'])
    cites = {x[0]: x[1] for x in pubs if x[1] > 0}
    # jsonp = 'cites = {};'.format()
    jsonp = json.dumps(cites)
    with open(args.output, 'w') as out:
        out.write(jsonp)
Exemple #18
0
def register_authors_from_generator(author_generator):
    """
        This method goes throught the author generator and gets all
        the authors and registre them in the authors dataset
    """
    # create the file
    open(AUTHORS_CSV_FILE_OUTPUT, 'w')
    while True:
        author = next(author_generator)
        filled_author = scholarly.fill(author, ['indices'])
        mydict = filled_author_to_dict(filled_author)
        write_author(mydict, AUTHORS_CSV_FILE_OUTPUT)
Exemple #19
0
def save_csv():
    _file = open("output.csv", "w+")
    search_query = scholarly.search_author("Mayken Espinoza-Andaluz")
    author = scholarly.fill(next(search_query))
    _file.write("title|authors|year|abstract\n")
    for pub in author["publications"]:

        title = pub["bib"]["title"]
        year = pub["bib"]["pub_year"]
        abstract = pub["bib"]["title"]
        _file.write(f"{title}|{authors}|{year}|{abstract}\n")
    _file.close()
def get_author(author,university=""):
  url_part = "https://scholar.google.co.in/citations?user="******", "+university if university!='' else ''))
  try:
    authorResult = next(authorSearch)
  except:
    return "Not Found"
  authorRaw = scholarly.fill(authorResult,sections=['basics','indices','publications'])
  authorDetails = {'name':authorRaw['name'],'affiliation':authorRaw['affiliation'],'email_domain':authorRaw['email_domain'],'interests':authorRaw['interests']
                  ,'publications':len(authorRaw['publications']),'citedby':authorRaw['citedby'],'hindex':authorRaw['hindex'],'i10index':authorRaw['i10index']
                  ,'gscholar_url':url_part+authorRaw['scholar_id']}
  return authorDetails
Exemple #21
0
def composeTweet(authorName, authorHandles_dic, pubTitle, pub):
    pub = scholarly.fill(pub)
    pubURL = pub['pub_url']
    if 'pub_year' in pub['bib'].keys():
        if pub['bib']['pub_year'] == str(currentYear):
            if authorName in authorHandles_dic.keys():
                tweet = '[' + authorName + ']' + ' just published a new paper: "' + pubTitle + '"\n' + pubURL + '\n' + authorHandles_dic[authorName]
                return pubURL, tweet
            else:
                tweet = '[' + authorName + ']' + ' just published a new paper: "' + pubTitle + '"\n' + pubURL
                return pubURL, tweet
    return None, None
Exemple #22
0
def get_publications_sch(author_name):
    # This block is useful for debugging and development
    # reset=False
    # if reset:
    #     search_query = scholarly.search_author('Nathan Pemberton')
    #     author = scholarly.fill(next(search_query))
    #     with open('author.pickle', 'wb') as f:
    #         pickle.dump(author, f)
    #
    #     pubs = [ scholarly.fill(p) for p in author['publications'] ]
    #     with open('pubs.pickle', 'wb') as f:
    #         pickle.dump(pubs, f)
    # else:
    #     with open('author.pickle', 'rb') as f:
    #         author = pickle.load(f)
    #     with open('pubs.pickle', 'rb') as f:
    #         pubs = pickle.load(f)
    search_query = scholarly.search_author('Nathan Pemberton')
    author = scholarly.fill(next(search_query))
    pubs = [scholarly.fill(p) for p in author['publications']]

    for pub in pubs:
        bib = pub['bib']
        bib['ENTRYTYPE'] = 'article'
        bib['ID'] = pub['author_pub_id']
        bib['url'] = pub['eprint_url']

        if 'pub_year' in bib:
            bib['year'] = bib['pub_year']

        special_cases(pub)
        normalize_journal(pub)

    pubs.sort(key=sort_by_year, reverse=True)
    with open(BIBPATH, 'w') as f:
        f.write('@preamble{"{"name" : "' + author_name + '"}"}\n')
        for p in pubs:
            f.write(scholarly.bibtex(p))
            f.write('\n')
Exemple #23
0
def download_citations():
    # Retrieve the author's data, fill-in, and print
    # search_query = scholarly.search_author(NAME)
    search_query = scholarly.search_author_id(AUTHOR_ID)
    # author = scholarly.fill(next(search_query))
    author = scholarly.fill(search_query)
    print(author)

    # Print the titles of the author's publications
    print([pub['bib']['title'] for pub in author['publications']])

    # Take a closer look at the first publication
    # pub = scholarly.fill(author['publications'][1])
    # print(pub)
    independent_citations = []
    for pub in author['publications'][:]:
        res_dict = {}
        time.sleep(random.randint(WAIT, WAIT * 2))
        pub = scholarly.fill(pub)
        res_dict["title"] = pub['bib']["title"]
        res_dict["year"] = pub['bib']["pub_year"]
        print(pub['bib']["title"])
        res_dict["author"] = [name.strip() for name in pub['bib']["author"].split("and")]
        time.sleep(random.randint(WAIT, WAIT * 2))
        cited_this = scholarly.citedby(pub)
        if cited_this:
            res_dict['cited_this'] = [{"author": citation['bib']["author"], "title": citation['bib']["title"]} for
                                      citation
                                      in
                                      cited_this]
            indep_citations = print_citations(res_dict)
            res_dict['independent_citations'] = indep_citations
            independent_citations.append(
                {"title": res_dict["title"], "author": res_dict["author"], 'independent_citations': indep_citations})
            save_json(res_dict['title'], res_dict)
        else:
            break

    save_json("independent_citations.json", independent_citations)
Exemple #24
0
def author_to_affiliations(NAME):
    try:
        with open('affilations.p', 'rb') as f:
            affiliations = pickle.load(f)
        for k, v in affiliations.items():
            if type(v) is type(list()):
                affiliations[k] = v[0]['name']
    except:
        pass

    response = requests.get("https://dissem.in/api/search/?authors=" +
                            str(NAME))
    author_papers = response.json()
    visit_urls = []
    coauthors = []
    titles = []
    affiliations = {}
    orcids = {}
    for p in author_papers["papers"]:
        coauthors_ = p["authors"]
        records = p["records"][0]
        if "doi" in records.keys():
            visit_urls.append(records["doi"])
            doi_to_author_affil_list = crossref_commons.retrieval.get_publication_as_json(
                records["doi"])
            for al in doi_to_author_affil_list["author"]:
                key = al['given'] + str(" ") + al['family']
                #if key not in affiliations.keys():
                if len(al['affiliation']):
                    affiliations[key] = al['affiliation'][0]['name']
                if "ORCID" in al.keys():
                    orcids[key] = al["ORCID"]
                #if not len(al['affiliation']):
                search_query = list(scholarly.search_author(key))
                #sq = search_query[0]
                if len(search_query):
                    sq = search_query[0]
                    res_author_search = scholarly.fill(sq)
                    afil = res_author_search['affiliation']
                    #if "university" in afil or "state" in afil or "universidad" in afil or "college" in afil or "school" in afil:
                    if len(al['affiliation']):
                        #if al['affiliation'] in res_author_search['affiliation']:
                        print(al['affiliation'],
                              res_author_search['affiliation'])
                    affiliations[key] = res_author_search['affiliation']
                    #print(affiliations[key],key)
                    #print(affiliations)

    with open('affilations.p', 'wb') as f:
        pickle.dump(affiliations, f)
    return affiliations
Exemple #25
0
def busca_publicaciones(lista):
    listapub = []
    for row in lista:
        print(row)
        search_query = scholarly.search_author(row)

        author = scholarly.fill(next(search_query))
        for index in range(len(author['publications'])):

            pub = scholarly.fill(author['publications'][index])
            print(pub['bib'])
            try:
                listapub.append({
                    'investigador': row,
                    'year': pub['bib']['pub_year'],
                    'title': pub['bib']['title'],
                    'author': pub['bib']['author'],
                    'journal': pub['bib']['journal']
                })
            except:
                pass
    print(listapub)
    return listapub
Exemple #26
0
 def test_search_pubs_citedby(self):
     """
     Testing that when we retrieve the list of publications that cite
     a publication, the number of citing publication is the same as
     the number of papers that are returned. We use a publication
     with a small number of citations, so that the test runs quickly.
     The 'Machine-learned epidemiology' paper had 11 citations as of
     June 1, 2020.
     """
     query = 'Machine-learned epidemiology: real-time detection of foodborne illness at scale'
     pubs = [p for p in scholarly.search_pubs(query)]
     self.assertGreaterEqual(len(pubs), 1)
     filled = scholarly.fill(pubs[0])
     cites = [c for c in scholarly.citedby(filled)]
     self.assertEqual(len(cites), filled['num_citations'])
Exemple #27
0
def get_author_statistics(name):
    OUT = np.zeros((1, ), dtype=my_dtype)
    search_query = scholarly.search_author(name)  #
    author = scholarly.fill(next(search_query))
    for s in statistics[:6]:
        OUT[s] = author[s]

    for year in YEARS:
        yearstr = "cit%s" % (year)
        try:
            OUT[yearstr] = author['cites_per_year'][year]
        except:
            pass

    return OUT
Exemple #28
0
from scholarly import scholarly

author = scholarly.search_author_id('V4ycRTQAAAAJ')

pubs = scholarly.fill(author)
pub = pubs['publications'][0]['bib']['title']

query = next(scholarly.search_pubs(pub))  # depois de algumas chamadas, o Google bloqueia...
bib = scholarly.bibtex(query)
print(bib)

#query = scholarly.search_pubs("A density-based algorithm for discovering clusters in large spatial databases with noise")
#pub = next(query)
#print(pub)
#print(scholarly.bibtex(pub))
Exemple #29
0
# Add new author and populate previous pubs into pubfile
from scholarly import scholarly

newAuthors = ['Jerome Buhl', 'Steven Strogatz']  # Replace Me
with open('authors.csv', 'a') as authorFile:
    for newAuthor in newAuthors:
        authorFile.write(newAuthor + '\n')
        search_query = scholarly.search_author(newAuthor)
        author = scholarly.fill(next(search_query))
        pubIDs = []
        for pub in author['publications']:
            authPubID = pub['author_pub_id']
            iColon = authPubID.find(':')
            pubID = authPubID[iColon + 1:]
            pubIDs.append(pubID)

        with open('pubs.csv', 'a') as pubFile:
            for pubID in pubIDs:
                pubFile.write(pubID + '\n')
Exemple #30
0
from scholarly import scholarly as schl
import pandas as pd
import requests

labull = schl.search_author_id('Lszt1B4AAAAJ')
schl.pprint(labull)

# pubs associated w labull
auth = schl.fill(labull, sections=['publications', 'indices'], sortby='year')
# fill pub info
pub_info = [
    schl.fill(pub, sections=['bib', 'pub_url']) for pub in auth['publications']
]

# co-authors
pub_auth = [pub['bib']['author'] for pub in pub_info]
pub_surnames = []
for auth_list in pub_auth:
    sn = [auth.split()[-1] for auth in auth_list.split(' and ')]
    sn[sn.index('Bull')] = '**Bull**'
    pub_surnames.append(sn)

# journal
# pub_jnl = [pub['bib'].get('journal') for pub in pub_info]

# pub info string
publ_entry = [
    ', '.join(pub_surnames[pp]) + ' ' +  # author surnames
    '[' + pub['bib']['title'] + ']' + '(' + pub.get('pub_url') + ')' +
    ' ' +  # title w links
    '(' + str(pub['bib']['pub_year']) + ')'  # yr