def getWatPDF(self, url, title=None):
     print(url)
     time.sleep(15)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         return None
     else:
         newPdf = PdfObj('local', 'paper.pdf')
         return newPdf
Exemple #2
0
 def getWatPDF(self, url, title=None):
     print(url)
     time.sleep(15)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         return None
     else:
         newPdf = PdfObj('local', 'paper.pdf')
         return newPdf
Exemple #3
0
def count_overcites_paper(paper, author, cite_num_to_load=40):
    overcites_info = []
    try:
        all_pdfObjs = paper.getCitingPdfs(cite_num_to_load)

        analyzer = PaperReferenceExtractor()

        for idx, pdf in enumerate(all_pdfObjs):
            content = analyzer.getReferencesContent(pdf)
            title = pdf.getTitle()
            
            if content is None and title is not None:
                print("Citing paper number " + str(idx+1) + ": " + title + " had no PDF content found.")
                info_dict = {}
                info_dict['Citing Paper Number'] = idx+1
                info_dict['Title'] = title
                info_dict['Over-cite Count'] = "No PDF Found"
                overcites_info.append(info_dict)
                continue
            elif content is None:
                continue
                
            # print(content)
            lname = author.getLastName().title()
            numCites = analyzer.getCitesToAuthor(lname, content)
            if title is None:
                title = 'Unknown Title'
            print("Citing paper number " + str(idx+1) + ": " + title + " cites " + lname + " " + str(numCites) + " times.")
            info_dict = {}
            info_dict['Citing Paper Number'] = idx+1
            info_dict['Title'] = title
            info_dict['Over-cite Count'] = numCites
            overcites_info.append(info_dict)

    except KeyboardInterrupt:
        print('User ended program. Returning existing Data')
        WatLibSeleniumParser.reset()
        return overcites_info

    return overcites_info
 def getWatPDF(self, url, title=None, pdfName='paper.pdf'):
     print('Getting pdf from WatLib')
     print(url)
     status = WatLibSeleniumParser.downloadFromWatLib(url, 'paper.pdf')
     if status is None:
         print('None status')
         return None
     else:
         # try:
         newPdf = PdfObj('local', pdfName)
         # except KeyboardInterrupt:
         #     return None
         return newPdf
'''
Created on Jan 05, 2016

@author: Ankai
'''
from bs4 import BeautifulSoup
import time
from ReferenceParser import IeeeReferenceParser, SpringerReferenceParser, PaperReferenceExtractor, PdfObj
import SessionInitializer
import WatLibSeleniumParser

SESSION = SessionInitializer.getSesh()
WATPARSER = WatLibSeleniumParser.WatLibParser()


class Paper:
    def __init__(self, link, loadPaperPDFs=True):
        self.url = link
        self.pdfObj = None
        self.pap_info = {}
        #self.__pap_info['Publisher'] = ''
        self.citedByUrl = None
        self.citedByNum = 0

        #Internet Session Setup
        self.loadFromScopus(loadPaperPDFs=loadPaperPDFs)

    def loadFromScopus(self, loadPaperPDFs=True):
        response = SESSION.get(self.url)
        soup = BeautifulSoup(response.content, 'lxml')