def get_gs_citations_web(title):
    """
    Use the google scholar web URL and requests API to obtain the citations
    for a given title of a scholarly article
    Parameters
    ----------
    arg1 | title: str
        The title of a scholarly article
    Returns
    -------
    Dictionary
        dict
    """
    while True:
        try:
            # call the lumproxy object
            scholarly.use_lum_proxy()

            # make the query
            query = scholarly.search_pubs(title)

            # come out
            break
        except Exception as e:
            # come out and try again
            break

    # return the response dict
    return next(query)
Ejemplo n.º 2
0
def query_generator(search_term, venue, year, num_results, pg, grace_period=0):
    google_search_query = search_term
    query_year = year

    if venue == "CoRL":
        if year == 2019:
            query_year = 2020
        elif year == 2020:
            query_year = 2021

    while True:
        try:
            search_query = scholarly.search_pubs(
                google_search_query,
                year_low=query_year,
                year_high=query_year + grace_period,
                patents=False,
            )

            results = list(itertools.islice(search_query, num_results))

            print("Search URL: ", search_query._url)

            return results, search_query._url
        except Exception as e:
            print("Trying different proxy!")
            pg.get_next_proxy()
Ejemplo n.º 3
0
def publicationDetails(p):
    # p is the passed publication name
    search_query = scholarly.search_pubs(p)
    pd = next(search_query).fill()

    print(pd.bibtex)
    timer.sleep(5)
Ejemplo n.º 4
0
    def get(self, queries):

        publications = []
        with Controller.from_port(port=self.port) as controller:
            controller.authenticate('scholarly_password')
            socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050)
            socket.socket = socks.socksocket

            for query in queries:
                found = False
                limit = 1
                while not found:
                    try:
                        response = scholarly.search_pubs(query)
                        found = True
                    except Exception as e:
                        while True:
                            if controller.is_newnym_available():
                                print("Refreshing Tor Node...")
                                controller.signal(Signal.NEWNYM)
                                break

                elem = 1
                count = 0
                while (elem is not None) and (count < limit):
                    elem = next(response, None)
                    info = elem.bib
                    # pub = Publication(info)
                    print(type(info))
                    publications.append(info)

                    count += 1

        return publications
Ejemplo n.º 5
0
def _get_pdf(k, title):
    # Skip if exists
    if os.path.isdir(out_dir(k)) and os.listdir(out_dir(k)):
        return os.path.join(out_dir(k), os.listdir(out_dir(k))[0])
    # Normalize title
    google_search = scholarly.search_pubs(title)
    google_result = next(google_search)
    print(title)
    title = google_result['bib']['title'] + ' ' + (' '.join(
        google_result['bib']['author']))
    print(title)
    # Get DOI
    try:
        found, bib_string = get_bib_from_title(title)
    except Exception as e:
        print("Error while getting DOI", e)
        return None
    # Download
    if found:
        bib = bibtexparser.loads(bib_string).entries
        if bib and ("doi" in bib[0]) and (bib[0]['ENTRYTYPE'] == 'article'):
            doi = bib[0]["doi"]
            try:
                SciHub(doi, out_dir(k)).download(choose_scihub_url_index=3)
            except Exception as e:
                print("Error while downloading", e)
                return None
            pdf = os.path.join(out_dir(k),
                               os.listdir(out_dir(k))[0]) if os.listdir(
                                   out_dir(k)) else None
            return pdf
        else:
            print(bib)
            print("\tAbsent DOI")
    return None
Ejemplo n.º 6
0
 def search(self,
            query: str,
            generic_cover: str = "",
            locale: str = "en") -> Optional[List[MetaRecord]]:
     val = list()
     if self.active:
         title_tokens = list(
             self.get_title_tokens(query, strip_joiners=False))
         if title_tokens:
             tokens = [quote(t.encode("utf-8")) for t in title_tokens]
             query = " ".join(tokens)
         try:
             scholarly.set_timeout(20)
             scholarly.set_retries(2)
             scholar_gen = itertools.islice(scholarly.search_pubs(query),
                                            10)
         except Exception as e:
             log.warning(e)
             return None
         for result in scholar_gen:
             match = self._parse_search_result(result=result,
                                               generic_cover="",
                                               locale=locale)
             val.append(match)
     return val
Ejemplo n.º 7
0
def scholarlyBookAuthor():
    # try:
    query = request.form['query']
    search_query = scholarly.search_author(query)
    string = '['
    author = next(search_query).fill()
    for pub in author.publications:
        search_book = scholarly.search_pubs(pub.bib['title'])
        book = next(search_book)
        # print (book)
        url = ''
        try:
            url = book['blb']
        except:
            url = ''

        print(url)
        # print (basename(url))
    # for i in range(5):
    #     try:
    #         author = next(search_query)
    #         string += str(author) + ","
    #     except:
    #         print('')
    #
    # if (len(string) > 0):
    #     string = string[:-1]
    #     string=string+"]"

    return (str(string))
Ejemplo n.º 8
0
 def search(self, query, generic_cover=""):
     val = list()
     if self.active:
         scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
         i = 0
         for publication in scholar_gen:
             v = dict()
             v['id'] = publication['url_scholarbib'].split(':')[1]
             v['title'] = publication['bib'].get('title')
             v['authors'] = publication['bib'].get('author', [])
             v['description'] = publication['bib'].get('abstract', "")
             v['publisher'] = publication['bib'].get('venue', "")
             if publication['bib'].get('pub_year'):
                 v['publishedDate'] = publication['bib'].get(
                     'pub_year') + "-01-01"
             else:
                 v['publishedDate'] = ""
             v['tags'] = []
             v['rating'] = 0
             v['series'] = ""
             v['cover'] = ""
             v['url'] = publication.get('pub_url') or publication.get(
                 'eprint_url') or "",
             v['source'] = {
                 "id": self.__id__,
                 "description": "Google Scholar",
                 "link": "https://scholar.google.com/"
             }
             val.append(v)
             i += 1
             if (i >= 10):
                 break
     return val
Ejemplo n.º 9
0
def make_query(topics):
    """Queries Google scholar and returns the first new research paper found according to topics"""
    queryString = generate_query_string(topics)
    
    for topic in topics:
        queryString += topic + ' '
    
    logsDir = get_log_path()
    queryListPath = os.path.join(logsDir, 'query-list.pickle')

    # If any queries made previously, load that list, else create a new empty list 
    try:
        with open(queryListPath, 'rb') as f:
            prevQueries = pickle.load(f)
    except FileNotFoundError:
        prevQueries = []

    searchQuery = scholarly.search_pubs(queryString)
    pub = first_unique_query(prevQueries, searchQuery)

    prevQueries.append(pub.bib['title'])

    with open(queryListPath, 'wb+') as f:
        pickle.dump(prevQueries, f)

    return pub
Ejemplo n.º 10
0
def get_citations_from_title(title: str) -> int:
    """
    Args:
        title (str): Title of paper to be searched on Scholar.

    Raises:
        TypeError: If sth else than str is passed.

    Returns:
        int: Number of citations of paper.
    """

    if not isinstance(title, str):
        raise TypeError(f"Pass str not {type(title)}")

    # Search for exact match
    title = '"' + title.strip() + '"'

    matches = scholarly.search_pubs(title)
    counts = list(map(lambda p: int(p.bib["cites"]), matches))
    if len(counts) == 0:
        logger.warning(f"Found no match for {title}.")
        return 0
    if len(counts) > 1:
        logger.warning(f"Found {len(counts)} matches for {title}.")
    return counts[0]
Ejemplo n.º 11
0
def get_papers_from_paper_citations(paper_title: str):
    """
        gets the papers that cited the paper given as a parameter
        it registers the found papers in articles folder and registres the citation 
        relationship in the citations folder 
    """
   
    target_paper_generator = scholarly.search_pubs(
        paper_title)  # search by title as a keyword

    print("=======> getting the rarget pater")
    target_paper = next(target_paper_generator)  # get the first result

    print('##########################')
    publications_generator = scholarly.citedby(target_paper)
    try:
        citations_count= 0
        while citations_count<=NB_MAX_CITATIONS_PER_PAPERS:
            
            publication = next(publications_generator)
            # filled_publication = scholarly.fill(publication)
            mydict = publication_to_dict(publication)
            write_publication(mydict, PUBLICATIONS_CSV_FILE_OUTPUT)
            register_citation(
                target_paper['citedby_url'], mydict['citedby_url'])
            citations_count+=1
    except Exception as e:
        raise e
Ejemplo n.º 12
0
 def test_search_pubs_filling_publication_contents(self):
     '''
     This process  checks the process of filling a publication that is derived
      from the search publication snippets.
     '''
     query = 'Creating correct blur and its effect on accommodation'
     results = scholarly.search_pubs(query)
     pubs = [p for p in results]
     self.assertGreaterEqual(len(pubs), 1)
     f = pubs[0].fill()
     self.assertTrue(
         f.bib['author'] ==
         u'Cholewiak, Steven A and Love, Gordon D and Banks, Martin S')
     self.assertTrue(f.bib['journal'] == u'Journal of vision')
     self.assertTrue(f.bib['number'] == u'9')
     self.assertTrue(f.bib['pages'] == u'1--1')
     self.assertTrue(
         f.bib['publisher'] ==
         u'The Association for Research in Vision and Ophthalmology')
     self.assertTrue(
         f.bib['title'] ==
         u'Creating correct blur and its effect on accommodation')
     self.assertTrue(
         f.bib['url'] ==
         u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
     self.assertTrue(f.bib['volume'] == u'18')
     self.assertTrue(f.bib['year'] == u'2018')
Ejemplo n.º 13
0
def get_bibtex_for_pubs(pubs: str) -> str:
    """Returns bibtex"""
    search_query = scholarly.search_pubs(pubs)
    for result in search_query:
        if query_bib_title(result["bib"]):
            return scholarly.bibtex(result)

    raise NotFoundError(f"Can't find {pubs}")
Ejemplo n.º 14
0
def search_paper(title: str, feel_lucky: bool = True):
    """
    Search paper through google scholar, return scholarly publication container
    """
    title = get_accurate_name_from_arxiv(title)
    pub = next(scholarly.search_pubs(title))
    if not feel_lucky:
        raise NotImplementedError
    return pub
Ejemplo n.º 15
0
    def test_search_pubs_total_results(self):
        """
        As of February 4, 2021 there are 32 pubs that fit the search term:
        ["naive physics" stability "3d shape"].

        Check that the total results for that search term equals 32.
        """
        pubs = scholarly.search_pubs('"naive physics" stability "3d shape"')
        self.assertGreaterEqual(pubs.total_results, 32)
Ejemplo n.º 16
0
def search_paper():
    from scholarly import scholarly
    paper_name = request.POST.get('paper_name')
    paper = next(scholarly.search_pubs(paper_name))
    res = {
        'url': paper.bib['url'],
        'venue': paper.bib['venue'],
        'abstract': paper.bib['abstract']
    }
    return jsonify(res)
Ejemplo n.º 17
0
def search_paper(request):
    paper_name = request.POST['paper_name']  # 文献名
    #paper_name = 'Li Buyu'
    paper = next(scholarly.search_pubs(paper_name))
    content = {
        'url': paper.bib['url'],
        'venue': paper.bib['venue'],
        'abstract': paper.bib['abstract']
    }
    return HttpResponse(json.dumps(content), content_type="application/json")
Ejemplo n.º 18
0
def proxied_search_query(query):
    while True:
        try:
            search_query = scholarly.search_pubs(query)
            print("Got the results of the query")
            return search_query
        except Exception as e:
            print(e)
            print("Trying new proxy")
            set_new_proxy()
Ejemplo n.º 19
0
def do_search(search_string):
    global publications_found, current_pub, search_query
    publications_found = []

    if http_proxy or https_proxy:
        print("\n--Using HTTP proxy: " + http_proxy)
        print("--Using HTTPS proxy: " + https_proxy)
        set_proxy()

    print("\nStarting Google Scholar search.")
    print("--Using search string: \n" + search_string)

    try:
        search_query = scholarly.search_pubs(search_string)
    except Exception:
        print("\nCannot fetch the page from Google Scholar.")
        print(
            "You may have been blocked by Google Scholar, please check your internet connection."
        )
        sys.exit()

    # Iterate through retrieved publications
    end = False
    order = 1
    while not end:
        pub = next(search_query, None)
        current_pub = {}
        if pub:
            current_pub['ORDER'] = order
            current_pub['LIBRARY'] = current_lib
            current_pub['YEAR'] = pub.bib['year']
            current_pub['CITATIONS'] = pub.bib['cites']
            current_pub['URL'] = pub.bib['url']
            current_pub['TITLE'] = pub.bib['title']
            if 'abstract' in pub.bib:
                current_pub['ABSTRACT'] = pub.bib['abstract']
            else:
                current_pub['ABSTRACT'] = 'NA'

            publications_found.append(current_pub)
            order += 1
        else:
            end = True

    print('\n{} publications found'.format(len(publications_found)))
    header = [
        'ORDER', 'LIBRARY', 'YEAR', 'CITATIONS', 'URL', 'TITLE', 'ABSTRACT'
    ]
    csv_filename = 'raw-' + current_lib + '-' + str(
        filters.get_start_year()) + '-' + str(
            filters.get_final_year()) + '.csv'
    write_result(csv_filename, publications_found, header)

    logging.shutdown()  # stop scholar.log logging
Ejemplo n.º 20
0
    def proxy(self):
        proxy_works = scholarly.use_proxy(
            http=
            "http://29ea0d9d66134811b51ead72601a1181:@proxy.crawlera.com:8010/"
        )
        print(proxy_works)

        test_query = scholarly.search_pubs(
            'Perception of physical stability and center of mass of 3D objects'
        )
        print(test_query)
Ejemplo n.º 21
0
def scholarly_request(search_string: str) -> Dict:
    '''This function takes a search keyword string and request information about the corresponding article
	via scholarly'''
    # Get all available information
    search_query = scholarly.search_pubs(search_string)
    article_info = next(search_query)
    scholarly.fill(article_info)
    article_dict = article_info['bib']
    article_dict = normalize_scholarly_dict(article_dict)
    article_dict = add_retrieval_information(article_dict, 'Scholarly',
                                             'unstructured_ID', search_string)
    return article_dict
Ejemplo n.º 22
0
def results(request):
    if request.method == "POST":
        search_word = request.POST['search']

    searchquery = scholarly.search_pubs(search_word)
    data = next(searchquery)
    # print(data.bib['url'])
    title = data.bib['title']
    author = data.bib['author']
    url = data.bib['url']

    return render(request, "homepage.html", {'title': title, 'url': url, 'author': author})
Ejemplo n.º 23
0
    def test_multiple_publications(self):
        """
        As of May 12, 2020 there are at least 29 pubs that fit the search term:
        ["naive physics" stability "3d shape"].

        Check that the paper "Visual perception of the physical stability of asymmetric three-dimensional objects"
        is among them
        """
        pubs = [p.bib['title'] for p in scholarly.search_pubs('"naive physics" stability "3d shape"')]
        self.assertGreaterEqual(len(pubs), 29)

        self.assertIn(u'Visual perception of the physical stability of asymmetric three-dimensional objects', pubs)
Ejemplo n.º 24
0
def search_GoogleScholar(query_string, n=20):
    print(f"Search on Google Scholar: [{query_string}]\n")
    ranks = scholarly.search_pubs(query_string)

    pubs = []
    for pub in ranks:
        if len(pubs) == n:
            return pubs
        pubs.append(pub)

    print(f"Warning: {len(pubs)} matched publications in total.\n")
    return pubs
Ejemplo n.º 25
0
def pub_query_by_author(author):
    search_query = scholarly.search_pubs(author)
    pubs = []
    for i in range(20):
        try:
            pub = next(search_query)
            print(pub)
            pubs.append(pub)
        except:
            # print("End of the iterator")
            break
    return render_template('pub_results.html', title='文献查询结果', pubs=pubs, au=author)
    def get_research_articles(self, max_num):
        # Search string for Google Scholar to look for.
        # e.g. "{self.title} {self.director.name}" would equate to "Concussion Peter Landesman" for the movie Concussion.
        search_str = f'{self.title} {self.director.name}'
        output = f""
        try:
            pg = ProxyGenerator()
            ip = os.environ['PROXY_IP']
            pg.SingleProxy(http=ip, https=ip)
            o = scholarly.use_proxy(pg)
            search_query = scholarly.search_pubs(search_str)
            for i in range(0, max_num):
                curr = next(search_query)

                # For debugging purposes, this is how you pretty print the search query's contents.
                #scholarly.pprint(curr)

                # Grab the title of the article.
                title = curr['bib']['title']

                # Begin our formatted html output for each found research article.
                output += f"""
                    <li>
                """

                # See if a publication url (i.e. curr['pub_url']) exists. If so, add an external link to it.
                if 'pub_url' in curr:
                    output += f"""
                        <a target='_blank' href=\"{curr['pub_url']}\">{title}</a>
                    """
                else:
                    output += f"""
                        {title}
                    """

                output += f"""
                    <br>
                """

                # Writes the abstract (i.e.curr['bib']['abstract']) if it exists.
                if 'bib' in curr and 'abstract' in curr['bib']:
                    output += f"""
                        <p>{curr['bib']['abstract']}</p>
                    """

                output += f"""
                </li>
                """
        except Exception as e:
            pass
            # Useful for seeing errors in your terminal. Replace pass with the print statement below.
            #print(sys.stderr, e)
        return output
Ejemplo n.º 27
0
def search_articles(query, n=5):
    ''' recherche d articles '''
    search_query = scholarly.search_pubs(query)
    tab_doc = []
    for i in range(0, n):
        try:
            doc = next(search_query)
            tab_doc.append(doc)
        except:
            doc = None
            return tab_doc
    return tab_doc
Ejemplo n.º 28
0
 def test_get_cited_by(self):
     """
     Testing that when we retrieve the list of publications that cite
     a publication, the number of citing publication is the same as
     the number of papers that are returned
     """
     query = 'frequency-domain analysis of haptic gratings cholewiak'
     pubs = [p for p in scholarly.search_pubs(query)]
     self.assertGreaterEqual(len(pubs), 1)
     filled = pubs[0].fill()
     cites = [c for c in filled.citedby]
     self.assertEqual(str(len(cites)), filled.bib['cites'])
Ejemplo n.º 29
0
 def search(self, query, n=5):
     ''' recherche d articles '''
     search_query = scholarly.search_pubs(query)
     pubs = []
     for i in range(n):
         try:
             pub = next(search_query)
             pubs.append(Publication.from_scholar(pub))
         except Exception as e:
             print(f"Stopped because of {e}")
             return pubs
     return pubs
def get_articleInfo(title):
    while True:
        try:
            search_query = scholarly.search_pubs(title)
            print("Got the results of the query")
            break
        except Exception as e:
            print("Trying new proxy")
            set_new_proxy()

    pub = next(search_query)

    return pub