コード例 #1
0
	def query_scholarly(self, author = None, keyword = None, pub = None):
		import scholarly;
		
		RESULTS = {}
		if author is not None and keyword is not None and pub is not None:
			RESULTS['author'] =	scholarly.search_author(author);
			RESULTS['keyword'] = scholarly.search_keyword(keyword);
			RESULTS['pub'] = scholarly.search_pubs_query(pub);
			return RESULTS;
		elif author is not None:
			return scholarly.search_author(author);
		elif keyword is not None:
			return scholarly.search_keyword(keyword);
		elif pub is not None:
			return scholarly.search_pubs_query(pub);
コード例 #2
0
def publication_information(pub):
    p = ""
    c = 0
    dat = ""
    publisher = ""
    search_query = scholarly.search_pubs_query(pub)
    #    try:
    publication = next(search_query).fill()

    if re.search(publication.bib["ENTRYTYPE"], "article"):
        try:
            dat = publication.bib["year"]
        except:
            dat = ""
        print(dat)
        try:
            p = publication.bib["journal"]
        except:
            p = ""
        print(p)
        try:
            c = publication.citedby
        except:
            c = 0
        print(c)
        try:
            publisher = publication.bib["publisher"]
        except:
            publisher = 0
        print(publisher)

#    except:
#        pass
    return p, c, dat, publisher
コード例 #3
0
ファイル: providers.py プロジェクト: jdumas/autobib
def scholarly_query(authors, title):
    """
    Query Google Scholar database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.

    Returns:
        A record (dict) of the bibtex entry obtained from Google Scholar.
    """
    query = ' '.join(authors) + ' ' + title
    search_query = scholarly.search_pubs_query(query)
    try:
        res = next(search_query)
    except StopIteration:
        return None
    res.fill()
    if 'abstract' in res.bib:
        del res.bib['abstract']

    # Post-process title
    res.bib['title'] = re.sub('\\.*$', '', res.bib['title'])

    print('S: ' + nomenclature.gen_filename(res.bib))
    return res.bib
コード例 #4
0
    def query_scholar(self):
        """
        QUERT ARTICLES FROM GOOGLE SCHOLAR

        Parameters:
        ------
        None
        

        Function scrap Google Scholar and push articles in MongoDB
        
        
        Return
        ------
        None
        
        """

        tab = self.get_db_object()
        indice = 0

        while indice < len(tab):
            if 'ngc' in tab[indice]:
                search_query = scholarly.search_pubs_query(tab[indice]['ngc'])
                for i in range(500):
                    current_article = next(search_query)
                    current_article = current_article.__dict__
                    current_article["biblio"] = current_article.pop('bib')
                    current_article["ngc"] = tab[indice]["ngc"]
                    current_article["_object_id"] = tab[indice]["_id"]
                    print(current_article)
                    articles.insert(current_article)
                indice += 1
            indice += 1
コード例 #5
0
def get_all_studies(keywords: List[str]) -> List[Study]:
    retval = []
    shuffle(keywords)
    shuffled = ' '.join(keywords)
    print(shuffled)
    query = scholarly.search_pubs_query(shuffled)
    #
    # for i in range(10):
    #     try:
    #         res = next(query)
    #         retval.append(Study(res.bib['title'], res.bib['url'], res.bib['author']))
    #     except KeyError:
    #         retval.append(Study(res.bib['title'], "", res.bib['author']))
    #     except StopIteration:
    #         pass

    while True:
        try:
            res = next(query)
            retval.append(Study(res.bib['title'], res.bib['url'], res.bib['author']))
        except KeyError:
            retval.append(Study(res.bib['title'], "", res.bib['author']))
        except StopIteration:
            break

    return retval
def rename_bib_file(citeDir, filename):
    my_bucket, s3_resource = aws_bucket_info()
    bibtex_file = my_bucket.Object(filename).get()['Body'].read().decode(
        'utf-8')
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bibtex_database = bibtexparser.loads(bibtex_file, parser=parser)
    entries = bibtex_database.entries[0]
    df = read_bib_df(citation_pickle_file=citation_pickle_file)
    if entries['ID'] not in df['ID'].values:
        search_query = scholarly.search_pubs_query(entries['title'])
        res = next(search_query)
        strings_to_look = ['url', 'abstract']
        search_output = []
        for ss in strings_to_look:
            try:
                search_output.append(res.bib[ss])
            except:
                search_output.append("")
        dict_to_store = {
            'ID': entries['ID'],
            'numLikes': 0,
            'Title': entries['title'],
            'Authors': entries['author'],
            'url': search_output[0],
            'Abstract': search_output[1]
        }
        store_bib_in_df(dict_to_store,
                        citation_pickle_file=citation_pickle_file)
        new_bibfile = citeDir + entries['ID'] + "_slrm.bib"
        return new_bibfile
    else:
        return ""
コード例 #7
0
def get_cite_count_by_doi(doi):
    query = scholarly.search_pubs_query(doi)
    try:
        result = next(query).fill()
        return (result.citedby)  # Return citedby count
    except:
        return (-1)  # Couldn't find doi via scholar
コード例 #8
0
    def search_scholar(self, query, n_results):
        """ Returns a list containing the n_results most relevant
        Publications to the keywords.

        Parameters
        ----------
        query: string
            Query to search on Google Scholar.
        n_results: int
            Number of articles to return.

        Returns
        -------
        results: list(Publications)
            List containing Publication objects.
        """
        generator_results = scholarly.search_pubs_query(query)
        self.results_scholar = [next(generator_results)
                                for _ in range(n_results)]
        # time.sleep(1)
        # for _ in range(n_results):
        #     res = next(generator_results).fill()
        #     time.sleep(1)
        #     self.results_scholar.append(res)
        return self.results_scholar
コード例 #9
0
ファイル: providers.py プロジェクト: zhaochaocs/autobib
def scholarly_query(authors, title):
    """
    Query Google Scholar database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.

    Returns:
        A record (dict) of the bibtex entry obtained from Google Scholar.
    """
    query = ' '.join(authors) + ' ' + title
    search_query = scholarly.search_pubs_query(query)
    try:
        res = next(search_query)
    except StopIteration:
        return None
    res.fill()
    if 'abstract' in res.bib:
        del res.bib['abstract']

    # Post-process title
    res.bib['title'] = re.sub('\\.*$', '', res.bib['title'])

    print('S: ' + nomenclature.gen_filename(res.bib))
    return res.bib
コード例 #10
0
ファイル: extract.py プロジェクト: frederikschubert/papers
def fetch_bibtex_by_fulltext_scholar(txt, assess_results=True):
    import scholarly
    scholarly._get_page = _get_page_fast  # remove waiting time
    logger.debug(txt)
    search_query = scholarly.search_pubs_query(txt)

    # get the most likely match of the first results
    results = list(search_query)
    if len(results) > 1 and assess_results:
        maxscore = 0
        result = results[0]
        for res in results:
            score = _scholar_score(txt, res.bib)
            if score > maxscore:
                maxscore = score
                result = res
    else:
        result = results[0]

    # use url_scholarbib to get bibtex from google
    if getattr(result, 'url_scholarbib', ''):
        bibtex = scholarly._get_page(result.url_scholarbib).strip()
    else:
        raise NotImplementedError(
            'no bibtex import linke. Make crossref request using title?')
    return bibtex
コード例 #11
0
ファイル: test.py プロジェクト: epkanol/scholarly
 def test_get_cited_by(self):
     pub = next(
         scholarly.search_pubs_query(
             'frequency-domain analysis of haptic gratings cholewiak')
     ).fill()
     cites = [c for c in pub.get_citedby()]
     self.assertEqual(len(cites), pub.citedby)
コード例 #12
0
def information(pub):
    
    search_query = scholarly.search_pubs_query(pub)
    p=next(search_query)
    print(p.bib["author"])
    return p.bib["author"]
#information("TOWARDS SEAMLESS TRACKING-FREE WEB: IMPROVED DETECTION OF TRACKERS VIA ONE-CLASS LEARNING")
コード例 #13
0
ファイル: views.py プロジェクト: RonanMDONeill/FYP
def scholar_view(request, nodeType, nodeLabel):
    # Call scholarly library to retrieve Google Scholar information

    mostCited = None

    # If request is for an Author
    if nodeType == "Author":
        try:
            query = scholarly.search_author(nodeLabel)
            result = next(query).fill()
            mostCited = result.publications[0].fill()

        # If Author cannot be found
        except Exception as e:
            result = "Sorry, we could not find this author's profile."
            nodeType = "Bad query"
            mostCited = None

    # If request is for a Publicaiton
    elif nodeType == "Publication":
        try:
            query = scholarly.search_pubs_query(nodeLabel)
            result = next(query)
            print(result)

        # If Publication cannot be found
        except Exception as e:
            result = "Sorry, we could not find this paper's profile."
            nodeType = "Bad query"

    return render(request, "external/google_search_results.html", {
        "result": result,
        "nodeType": nodeType,
        "mostCited": mostCited
    })
コード例 #14
0
def build_super_dict(query, path, amount):
    amount = int(amount)
    publications = scholarly.search_pubs_query("darwin")

    field_names = ["title", "eprint", "author", "abstract", "url"]

    counter = 0
    with open(
            '/home/fabian/Documents/repos/scholar_web_scrapper/results/scrapping_results.csv',
            'w') as file:
        csv.register_dialect("toMYSQL",
                             delimiter=";",
                             quoting=1,
                             doublequote=1)
        writer = csv.DictWriter(file,
                                fieldnames=field_names,
                                dialect="toMYSQL")
        writer.writeheader()

        for publication in publications:
            row = publication.__getattribute__('bib')

            clean_row = process_row(row)

            print(clean_row)
            if clean_row is not None:
                writer.writerow(clean_row)

            if counter >= 15:
                break
            else:
                counter = counter + 1
コード例 #15
0
 def test_multiple_publications(self):
     ''' As of November 18, 2016 there are 12 pubs that fit the search term'''
     pubs = [
         p.bib['title']
         for p in scholarly.search_pubs_query('cholewiak campbell robson')
     ]
     self.assertEqual(len(pubs), 12)
     self.assertIn(u'A frequency-domain analysis of haptic gratings', pubs)
コード例 #16
0
def getCitation(titleLine, authorLine):
    global citationDict

    search_query = scholarly.search_pubs_query(titleLine)
    citeCount = next(search_query).citedby
    print(titleLine + str(citeCount) + "\n" + authorLine)

    return
コード例 #17
0
def captcha_test():
    try:
        results = scholarly.search_pubs_query('Einstien')
        first_result = results.next()
        first_result.citedby
    except StopIteration:
        return True
    return False
コード例 #18
0
def doQuery():
    # works = Works()
    # w1 = works.query(title='zika', author='johannes', publisher_name='Wiley-Blackwell')
    # for item in w1:
    #     print(item['title'])

    search_query = scholarly.search_pubs_query('wearable')
    print(next(search_query))
コード例 #19
0
ファイル: search_pubs.py プロジェクト: larocs/attention_dl
def search_pub(query):
    search = scholar.search_pubs_query(query)
    try:
        result = next(iter(search))
        result = serialize_result(result)
    except StopIteration:
        result = {}
    return result
コード例 #20
0
def publication_information_2(pub):
    publication = {}
    try:
        search_query_1 = scholarly.search_pubs_query(pub)
        publication = next(search_query_1).fill()
        print(publication)
    except:
        print(publication)
コード例 #21
0
def crawl_scholar_paper(title):
    cited_by = -1
    result = next(scholarly.search_pubs_query(title), None)

    if result is not None:
        if hasattr(result, 'citedby'):
            cited_by = result.citedby

    return cited_by
コード例 #22
0
def test_pop():
    import matplotlib.pyplot as plt
    G = nx.DiGraph()
    search_query = sch.search_pubs_query('10.1109/THS.2013.6698999')
    P = search_query.next()
    P = P.fill()
    populate_graph(P, G)
    nx.draw_spectral(G)
    plt.show()
コード例 #23
0
ファイル: scholar_crawler.py プロジェクト: ruyen/python_test
def gscholar_craw(file_name):
    search_query = scholarly.search_pubs_query(file_name)
    with open('./gs_data/' + file_name + '.txt', 'w') as f:
        for i in range(0, 200):
            temp = next(search_query)
            dict = temp.bib
            json.dump(dict, f)
            f.write('\n')
    f.close()
コード例 #24
0
ファイル: test.py プロジェクト: epkanol/scholarly
 def test_multiple_publications(self):
     ''' As of October 11, 2017 there are 23 pubs that fit the search term'''
     pubs = [
         p.bib['title'] for p in scholarly.search_pubs_query(
             '"naive physics" stability "3d shape"')
     ]
     self.assertEqual(len(pubs), 23)
     self.assertIn(
         u'Visual perception of the physical stability of asymmetric three-dimensional objects',
         pubs)
コード例 #25
0
def trx_searchciters(m):

    TRX = MaltegoTransform()
    title = m.getProperty("title.article")
    title = unidecode(title)
    #    print title
    DOI = m.getProperty("DOI")
    if DOI:
        query = DOI
    else:
        query = title
    search_query = scholarly.search_pubs_query(query)

    try:
        result = next(search_query)
    except StopIteration:
        TRX.addUIMessage("""The DOI could not be found on Google Scholar, 
which very likely means Google Scholar has never heard of this article before"""
                         )
        return TRX.returnOutput()

    titlemaybe = result.bib['title']

    TRX.addUIMessage(
        """Title found: %s. 
If this is not what you were looking for, add the article's DOI and search again"""
        % make_unicode(clean_obsession(titlemaybe)), UIM_INFORM)

    limit = m.slider
    count = 0

    for citation in result.get_citedby():

        if count == limit:
            break

        for i in bastardi:
            title = citation.bib['title'].replace(i, '')
        new = TRX.addEntity("me.Article", title.encode('utf-8'))

        #        new.setLinkLabel('Cited by')
        #        new.setLinkColor('blue')
        #        new.setLinkThickness(2)

        authors = '; '.join(
            [authore for authore in citation.bib['author'].split(' and ')])
        for i in bastardi:
            authors = authors.replace(i, '')
        new.addProperty("author", "Author", "loose", authors.encode('utf-8'))

        count += 1

    logging(TRX.returnOutput(), m.Maltegoxml)

    return TRX.returnOutput()
コード例 #26
0
ファイル: test.py プロジェクト: kanghj/scholarly
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('A frequency-domain analysis of haptic gratings')).fill()
     self.assertTrue(pub.bib['author'] == u'Cholewiak, Steven A and Kim, Kwangtaek and Tan, Hong Z and Adelstein, Bernard D')
     self.assertTrue(pub.bib['journal'] == u'Haptics, IEEE Transactions on')
     self.assertTrue(pub.bib['number'] == u'1')
     self.assertTrue(pub.bib['pages'] == u'3--14')
     self.assertTrue(pub.bib['publisher'] == u'IEEE')
     self.assertTrue(pub.bib['title'] == u'A frequency-domain analysis of haptic gratings')
     self.assertTrue(pub.bib['url'] == u'http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5210096')
     self.assertTrue(pub.bib['volume'] == u'3')
     self.assertTrue(pub.bib['year'] == u'2010')
コード例 #27
0
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('Creating correct blur and its effect on accommodation')).fill()
     self.assertTrue(pub.bib['author'] == u'Cholewiak, Steven A and Love, Gordon D and Banks, Martin S')
     self.assertTrue(pub.bib['journal'] == u'Journal of vision')
     self.assertTrue(pub.bib['number'] == u'9')
     self.assertTrue(pub.bib['pages'] == u'1--1')
     self.assertTrue(pub.bib['publisher'] == u'The Association for Research in Vision and Ophthalmology')
     self.assertTrue(pub.bib['title'] == u'Creating correct blur and its effect on accommodation')
     self.assertTrue(pub.bib['url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
     self.assertTrue(pub.bib['volume'] == u'18')
     self.assertTrue(pub.bib['year'] == u'2018')
コード例 #28
0
def GetCitation(title: str, formatStr: str, bibFolder: str) -> str:
    '''
    Given the title of an article use the package Scholarly to query google 
    scholar for that article and return the reference file in the format 
    specified by the format string (currently only .ris is implemented). Write 
    the ris file to the bibFolder location.

    Parameters
    ----------
    title : str
        Title of the article.
    formatStr : str
        Identifier of the filetype for the reference (only ris is implemented).
    bibFolder : str
        Folder to write the reference file to.

    Returns
    -------
    str
        Returns the text of the reference file.

    '''
    # Currently only supports reference manager format
    formats = {"rm": ".ris"}

    query = scholarly.search_pubs_query("\"" + title + "\"")

    # Gets the link to the citation file
    try:
        citationLink = next(query).url_scholarbib

    except StopIteration:
        return ""

    citationLink = citationLink.split('=')

    # Adjusts the format to the requested one by changing the last query param
    citationLink[-1] = formatStr
    citationLink = "=".join(citationLink)

    # Makes a get request to download the reference
    citationText = requests.get(citationLink).text
    # Filter out any problematic characters
    citationText = re.sub(r'[^\x00-\x7f]', r' ', citationText)

    # Writes the citation file to the bibliography folder using the title as the file name7
    with open(bibFolder + "/" + title + formats[formatStr],
              "w+",
              encoding='utf-8') as citationFile:

        citationFile.write(citationText)

    return citationText
コード例 #29
0
def print_title(query):
    '''Print titles of related query publications
	'''
    print(
        "===================================================================")
    print("=               Titles from ", query)
    print(
        "===================================================================")
    search_query = sc.search_pubs_query(query)
    for i in range(5):
        paper = next(search_query)
        print(paper.bib['title'])
コード例 #30
0
ファイル: zhoubangjun.py プロジェクト: jianghengle/scholarly
def get_publication(bib):
    title = bib['title']
    if title not in publications:
        try:
            search_query = scholarly.search_pubs_query(title)
            publication = next(search_query).fill()
            publications[title] = publication.bib
        except:
            print('Cannot find publication: ' + title)
            publications[title] = bib

    return publications[title]
コード例 #31
0
ファイル: test.py プロジェクト: IanEisenberg/Academic_Lineage
def main():
    search_query = "bert nlp"

    print("Fetching paper matching: %s" % search_query)

    results = scholarly.search_pubs_query(search_query)
    paper = next(results)

    paper_data = paper_extraction.extract(paper, verbose=True)
    file_utility.save_json(paper_data, PAPER_DATA_PATH)

    embed()
コード例 #32
0
ファイル: test.py プロジェクト: Suparno1998/Notemaker
def generate_metadata(input_file):
    res = None
    if "paper" in input_file:
        print(input_file)
        try:
            res = next(scholarly.search_pubs_query(input_file.replace(".pdf","")))
        except StopIteration:
            print("",end="",sep="")
        finally:
            res = res.bib
        return res
    else:
コード例 #33
0
ファイル: test.py プロジェクト: Spferical/scholarly
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('A frequency-domain analysis of haptic gratings')).fill()
     superset = pub.bib
     subset = {u'author': u'Cholewiak, Steven and Kim, Kwangtaek and Tan, Hong Z and Adelstein, Bernard D and others',
               u'journal': u'Haptics, IEEE Transactions on',
               u'number': u'1',
               u'pages': u'3--14',
               u'publisher': u'IEEE',
               u'title': u'A frequency-domain analysis of haptic gratings',
               u'url': u'http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5210096',
               u'volume': u'3',
               u'year': u'2010'}
     self.assertTrue(all(item in superset.items() for item in subset.items()))
コード例 #34
0
 def search_scholar(self):
     search_query = scholarly.search_pubs_query(self.term)
     self.result = next(search_query).fill()
     return self.result
コード例 #35
0
ファイル: test.py プロジェクト: Spferical/scholarly
 def test_multiple_publications(self):
     ''' As of October 21, 2015 there are 7 pubs that fit the search term'''
     pubs = [p.bib['title'] for p in scholarly.search_pubs_query('cholewiak campbell robson')]
     self.assertEqual(len(pubs), 7)
     self.assertIn(u'A frequency-domain analysis of haptic gratings', pubs)
コード例 #36
0
ファイル: test.py プロジェクト: Spferical/scholarly
 def test_get_cited_by(self):
     pub = next(scholarly.search_pubs_query('frequency-domain analysis of haptic gratings cholewiak')).fill()
     cites = [c for c in pub.get_citedby()]
     self.assertEqual(len(cites), pub.citedby)
コード例 #37
0
ファイル: test.py プロジェクト: Spferical/scholarly
 def test_empty_publication(self):
     pubs = [p for p in scholarly.search_pubs_query('')]
     self.assertIs(len(pubs), 0)
コード例 #38
0
__author__ = 'fccoelho'


import scholarly
import pymongo
import time
import json

conn = pymongo.MongoClient()


search_query = scholarly.search_pubs_query('zika zikv -author:zika')

def continuous_fetch():
    downloaded = [a['url_scholarbib'] for a in conn.scholar.articles.find({}, {'url_scholarbib': 1})]
    while True:
        doc = {}
        # try:
        art = next(search_query)
        if art.url_scholarbib in downloaded:
            continue
        if not art._filled:
            art.fill()
        doc['bib'] = art.bib
        try:
            doc['citedby'] = art.citedby
        except AttributeError:
            doc['citedby'] = 0
        try:
            doc['id_scholarcitedby'] = art.id_scholarcitedby
        except AttributeError: