예제 #1
0
    def setUp(self):
        proxy_generator = ProxyGenerator()
        if "CONNECTION_METHOD" in scholarly.env:
            self.connection_method = os.getenv("CONNECTION_METHOD")
        else:
            self.connection_method = "none"
        if self.connection_method == "tor":
            tor_sock_port = None
            tor_control_port = None
            tor_password = "******"
            # Tor uses the 9050 port as the default socks port
            # on windows 9150 for socks and 9151 for control
            if sys.platform.startswith("linux") or sys.platform.startswith(
                    "darwin"):
                tor_sock_port = 9050
                tor_control_port = 9051
            elif sys.platform.startswith("win"):
                tor_sock_port = 9150
                tor_control_port = 9151
            proxy_generator.Tor_External(tor_sock_port, tor_control_port,
                                         tor_password)
            scholarly.use_proxy(proxy_generator)

        elif self.connection_method == "tor_internal":
            if sys.platform.startswith("linux"):
                tor_cmd = 'tor'
            elif sys.platform.startswith("win"):
                tor_cmd = 'tor.exe'
            proxy_generator.Tor_Internal(tor_cmd=tor_cmd)
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "luminati":
            scholarly.set_retries(10)
            proxy_generator.Luminati(usr=os.getenv("USERNAME"),
                                     passwd=os.getenv("PASSWORD"),
                                     proxy_port=os.getenv("PORT"))
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "freeproxy":
            proxy_generator.FreeProxies()
            scholarly.use_proxy(proxy_generator)
        else:
            scholarly.use_proxy(None)
예제 #2
0
from scholarly import scholarly, ProxyGenerator
import json
from dotenv import load_dotenv
from pathlib import Path
from fp.fp import FreeProxy

proxy_generator = ProxyGenerator()
proxy_generator.FreeProxies()
scholarly.use_proxy(proxy_generator)

search_query = scholarly.search_author('Maël Montévil')

author = scholarly.fill(next(search_query))

pubs = [
    scholarly.fill(pub) for pub in author['publications']
    if (pub['num_citations'] > 0)
]

pubs2 = [[pub, (list(scholarly.citedby(pub)))] for pub in pubs
         if 'citedby_url' in pub]

print(json.dumps(pubs2, indent=2, default=lambda o: '<not serializable>'))
예제 #3
0
from scholarly import scholarly, ProxyGenerator
from tqdm import tqdm
from yattag import Doc, indent

# Settings
PEOPLE = [
    "James O'Shea", "Alex Saywell", "Philip Moriarty", "Peter Beton",
    "James Sharp"
]
OUTPUT_DIR = "D:/Nano Group Page/all_pubs"
MIN_YEAR = 1990

# Setup proxy to avoid ignored requests
pg = ProxyGenerator()
scholarly.use_proxy(pg.FreeProxies())

# Preallocate
pubs_by_year = defaultdict(
    list)  # Defaultdict creates entries if no already existing, so can append.
pubs = []

# Get all publications in an unordered list
for p in PEOPLE:
    search_query = scholarly.search_author(f'{p}, Nottingham')
    author = next(search_query)
    info = scholarly.fill(author, sections=['publications'])
    pubs.append(info["publications"])
pubs = functools.reduce(operator.iconcat, pubs, [])

# For every publication