コード例 #1
0
def download_epmc(download_path, year=2020):
    session = requests.Session()
    retries = Retry(**RETRY_PARAMETERS)
    retries.BACKOFF_MAX = RETRY_BACKOFF_MAX
    session.mount("https://", HTTPAdapter(max_retries=retries))

    year_path = os.path.join(download_path, str(year))
    os.makedirs(year_path, exist_ok=True)
    for month in range(12):
        month = f"{month+1:02}"
        month_path = os.path.join(year_path, f"{month}.jsonl")
        if os.path.exists(month_path):
            print(
                f"Skipping because {month_path} exists. Delete if you want to redownload"
            )
            continue

        tmp_month_path = f"{month_path}.tmp"
        with open(tmp_month_path, "w") as f:
            params = {
                "query":
                f"(FIRST_PDATE:[{year}-{month}-01 TO {year}-{month}-31])",
                "format": "json",
                "resultType": "core",
                "pageSize": 100,
            }
            hit_count = get_hit_count(session, params)
            for result in tqdm(
                    yield_results(session, params),
                    total=hit_count,
                    desc=f"Year {year} Month {month}",
            ):
                f.write(json.dumps(result))
                f.write("\n")
        os.rename(tmp_month_path, month_path)
コード例 #2
0
 def requests_session(self, adapter_kwargs=None):
     """
     Returns a requests session with retry params in place, and
     configured for a single persistent connection (since we
     generally expect one session per thread).
     """
     session = requests.Session()
     retries = Retry(**RETRY_PARAMETERS)
     retries.BACKOFF_MAX = RETRY_BACKOFF_MAX
     if adapter_kwargs is None:
         adapter_kwargs = {}
     session.mount('https://',
                   HTTPAdapter(max_retries=retries, **adapter_kwargs))
     return session
コード例 #3
0
def get_requests_session(auth=False):
    """
    Create a requests session with retries configured.

    :return: the configured requests session
    :rtype: requests.Session
    """
    session = requests.Session()
    retry = Retry(
        total=3,
        read=3,
        connect=3,
        backoff_factor=0.5,
        status_forcelist=(500, 502, 503, 504),
    )
    retry.BACKOFF_MAX = 2
    adapter = requests.adapters.HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    return session