Python WatLibSeleniumParser Examples

Programming Language: Python

Examples at hotexamples.com: 5

Python WatLibSeleniumParser - 5 examples found. These are the top rated real world Python examples of WatLibSeleniumParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WatLibParser(1)

downloadFromWatLib(1)

reset(1)

Example #1

Show file

File: academicThings.py Project: AnkaiJie/Google-Scholar-Citation-Fraud-Data-Collector

 def getWatPDF(self, url, title=None):
     print(url)
     time.sleep(15)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         return None
     else:
         newPdf = PdfObj('local', 'paper.pdf')
         return newPdf

Example #2

Show file

 def getWatPDF(self, url, title=None):
     print(url)
     time.sleep(15)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         return None
     else:
         newPdf = PdfObj('local', 'paper.pdf')
         return newPdf

Example #3

Show file

def count_overcites_paper(paper, author, cite_num_to_load=40):
    overcites_info = []
    try:
        all_pdfObjs = paper.getCitingPdfs(cite_num_to_load)

        analyzer = PaperReferenceExtractor()

        for idx, pdf in enumerate(all_pdfObjs):
            content = analyzer.getReferencesContent(pdf)
            title = pdf.getTitle()
            
            if content is None and title is not None:
                print("Citing paper number " + str(idx+1) + ": " + title + " had no PDF content found.")
                info_dict = {}
                info_dict['Citing Paper Number'] = idx+1
                info_dict['Title'] = title
                info_dict['Over-cite Count'] = "No PDF Found"
                overcites_info.append(info_dict)
                continue
            elif content is None:
                continue
                
            # print(content)
            lname = author.getLastName().title()
            numCites = analyzer.getCitesToAuthor(lname, content)
            if title is None:
                title = 'Unknown Title'
            print("Citing paper number " + str(idx+1) + ": " + title + " cites " + lname + " " + str(numCites) + " times.")
            info_dict = {}
            info_dict['Citing Paper Number'] = idx+1
            info_dict['Title'] = title
            info_dict['Over-cite Count'] = numCites
            overcites_info.append(info_dict)

    except KeyboardInterrupt:
        print('User ended program. Returning existing Data')
        WatLibSeleniumParser.reset()
        return overcites_info

    return overcites_info

Example #4

Show file

File: ScopusParse.py Project: AnkaiJie/Google-Scholar-Citation-Fraud-Data-Collector

 def getWatPDF(self, url, title=None, pdfName='paper.pdf'):
     print('Getting pdf from WatLib')
     print(url)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         print('None status')
         return None
     else:
         # try:
         newPdf = PdfObj('local', pdfName)
         # except KeyboardInterrupt:
         #     return None
         return newPdf

Example #5

Show file

File: ScopusParse.py Project: AnkaiJie/Academic-Fraud-Detector

'''
Created on Jan 05, 2016

@author: Ankai
'''
from bs4 import BeautifulSoup
import time
from ReferenceParser import IeeeReferenceParser, SpringerReferenceParser, PaperReferenceExtractor, PdfObj
import SessionInitializer
import WatLibSeleniumParser

SESSION = SessionInitializer.getSesh()
WATPARSER = WatLibSeleniumParser.WatLibParser()


class Paper:
    def __init__(self, link, loadPaperPDFs=True):
        self.url = link
        self.pdfObj = None
        self.pap_info = {}
        #self.__pap_info['Publisher'] = ''
        self.citedByUrl = None
        self.citedByNum = 0

        #Internet Session Setup
        self.loadFromScopus(loadPaperPDFs=loadPaperPDFs)

    def loadFromScopus(self, loadPaperPDFs=True):
        response = SESSION.get(self.url)
        soup = BeautifulSoup(response.content, 'lxml')