def setUp(self): proxy_generator = ProxyGenerator() if "CONNECTION_METHOD" in scholarly.env: self.connection_method = os.getenv("CONNECTION_METHOD") else: self.connection_method = "none" if self.connection_method == "tor": tor_sock_port = None tor_control_port = None tor_password = "******" # Tor uses the 9050 port as the default socks port # on windows 9150 for socks and 9151 for control if sys.platform.startswith("linux") or sys.platform.startswith( "darwin"): tor_sock_port = 9050 tor_control_port = 9051 elif sys.platform.startswith("win"): tor_sock_port = 9150 tor_control_port = 9151 proxy_generator.Tor_External(tor_sock_port, tor_control_port, tor_password) scholarly.use_proxy(proxy_generator) elif self.connection_method == "tor_internal": if sys.platform.startswith("linux"): tor_cmd = 'tor' elif sys.platform.startswith("win"): tor_cmd = 'tor.exe' proxy_generator.Tor_Internal(tor_cmd=tor_cmd) scholarly.use_proxy(proxy_generator) elif self.connection_method == "luminati": scholarly.set_retries(10) proxy_generator.Luminati(usr=os.getenv("USERNAME"), passwd=os.getenv("PASSWORD"), proxy_port=os.getenv("PORT")) scholarly.use_proxy(proxy_generator) elif self.connection_method == "freeproxy": proxy_generator.FreeProxies() scholarly.use_proxy(proxy_generator) else: scholarly.use_proxy(None)
from scholarly import scholarly, ProxyGenerator import json from dotenv import load_dotenv from pathlib import Path from fp.fp import FreeProxy proxy_generator = ProxyGenerator() proxy_generator.FreeProxies() scholarly.use_proxy(proxy_generator) search_query = scholarly.search_author('Maël Montévil') author = scholarly.fill(next(search_query)) pubs = [ scholarly.fill(pub) for pub in author['publications'] if (pub['num_citations'] > 0) ] pubs2 = [[pub, (list(scholarly.citedby(pub)))] for pub in pubs if 'citedby_url' in pub] print(json.dumps(pubs2, indent=2, default=lambda o: '<not serializable>'))
from scholarly import scholarly, ProxyGenerator from tqdm import tqdm from yattag import Doc, indent # Settings PEOPLE = [ "James O'Shea", "Alex Saywell", "Philip Moriarty", "Peter Beton", "James Sharp" ] OUTPUT_DIR = "D:/Nano Group Page/all_pubs" MIN_YEAR = 1990 # Setup proxy to avoid ignored requests pg = ProxyGenerator() scholarly.use_proxy(pg.FreeProxies()) # Preallocate pubs_by_year = defaultdict( list) # Defaultdict creates entries if no already existing, so can append. pubs = [] # Get all publications in an unordered list for p in PEOPLE: search_query = scholarly.search_author(f'{p}, Nottingham') author = next(search_query) info = scholarly.fill(author, sections=['publications']) pubs.append(info["publications"]) pubs = functools.reduce(operator.iconcat, pubs, []) # For every publication