Example #1
0
    def setUp(self):
        proxy_generator = ProxyGenerator()
        if "CONNECTION_METHOD" in scholarly.env:
            self.connection_method = os.getenv("CONNECTION_METHOD")
        else:
            self.connection_method = "none"
        if self.connection_method == "tor":
            tor_sock_port = None
            tor_control_port = None
            tor_password = "******"
            # Tor uses the 9050 port as the default socks port 
            # on windows 9150 for socks and 9151 for control 
            if sys.platform.startswith("linux") or sys.platform.startswith("darwin"):
                tor_sock_port = 9050
                tor_control_port = 9051
            elif sys.platform.startswith("win"):
                tor_sock_port = 9150
                tor_control_port = 9151
            proxy_generator.Tor_External(tor_sock_port,tor_control_port,tor_password)
            scholarly.use_proxy(proxy_generator)

        elif self.connection_method == "tor_internal":
            if sys.platform.startswith("linux"):
                tor_cmd = 'tor'
            elif sys.platform.startswith("win"):
                tor_cmd = 'tor.exe'
            proxy_generator.Tor_Internal(tor_cmd = tor_cmd)
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "luminati":
            scholarly.set_retries(10)
            proxy_generator.Luminati(usr=os.getenv("USERNAME"),passwd=os.getenv("PASSWORD"),proxy_port = os.getenv("PORT"))
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "freeproxy":
            proxy_generator.FreeProxies()
            scholarly.use_proxy(proxy_generator)
        else:
            scholarly.use_proxy(None)
from futurewater.util import format_author

MAX_RETRIES_ON_ERROR = 3

# https://scholarly.readthedocs.io/en/latest/quickstart.html#installation
# https://github.com/scholarly-python-package/scholarly
# https://github.com/OpenAPC/openapc-de/blob/master/python/import_dois.py

logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

logger = logging.getLogger()

pg = ProxyGenerator()
pg.Tor_External(tor_sock_port=9050,
                tor_control_port=9051,
                tor_password="******")
scholarly.use_proxy(pg)


def get_schoolar_data(author_name,
                      cache_folder="scholarly",
                      affiliation='UBC'):
    output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "..", "resources", cache_folder)
    cached = os.path.join(output_folder, format_author(author_name))
    from_cache = False
    final_data = []
    if not os.path.isfile(cached):

        try:
Example #3
0
#!env python

from scholarly import scholarly
from scholarly import ProxyGenerator
import fileinput
import sys

pg = ProxyGenerator()
pg.Tor_External(9050, 9051, 'password')
scholarly.use_proxy(pg)

for a in fileinput.input():
    if a == "":
        continue

    try:
        search_query = scholarly.search_pubs(a)
        aa = next(search_query).fill()
        print(a.rstrip(), end='')
        bib = aa.bib
        print("," + str(bib['gsrank']), end='')
        print("," + str(bib['cites']), end='')
        print("")
    except:
        print(" --- Unexpected error (" + a + "): ", sys.exc_info()[0])
        pass