def try_compute_data(s3, webhook, old_data): """ Try to run the scraper and return course data. If something goes wrong, raise `ScrapeError`. Otherwise, invoke the provided `Webhook`. `old_data` is the previous course data or `util.Unset`. """ scraper_timeout = util.get_env("scraper_timeout") try: scraper_timeout = int(scraper_timeout) if scraper_timeout <= 0: raise ValueError except ValueError: util.warn("Illegal scraper timeout: {}".format(repr(scraper_timeout))) util.log("Resetting timeout to 60 seconds") os.environ["HYPERSCHEDULE_SCRAPER_TIMEOUT"] = "60" scraper_timeout = 60 if old_data is util.Unset: # For JSON. old_data = None try: util.log("Running scraper") process = subprocess.Popen( ["python", "-m", "hyperschedule.scrapers.claremont"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, ) output, _ = process.communicate(input=json.dumps(old_data).encode(), timeout=scraper_timeout) if process.returncode != 0: raise ScrapeError("scraper failed") try: output = output.decode() except UnicodeDecodeError as e: raise ScrapeError( "scraper emitted malformed output: {}".format(e)) from None if "$delete" in output: raise ScrapeError("scraper output contains '$delete'") data = json.loads(output) if util.get_env_boolean("snitch"): webhook.get() if util.get_env_boolean("cache"): cache_file_write(data) if util.get_env_boolean("s3_write"): s3_write(s3, data) except OSError as e: raise ScrapeError( "unexpected error while running scraper: {}".format(e)) from None except subprocess.TimeoutExpired: process.kill() process.communicate() raise ScrapeError("scraper timed out after {} seconds".format( scraper_timeout)) from None except json.decoder.JSONDecodeError: raise ScrapeError("scraper did not return valid JSON") from None except requests.exceptions.RequestException as e: util.warn("failed to reach success webhook: {}".format(e)) return data
def s3_write(s3, data): """ Write provided `data` to S3 bucket. If this fails, log the error. `s3` is a boto3 S3 resource. """ try: obj = s3.Object(S3_BUCKET, S3_KEY) obj.put(Body=json.dumps(data).encode()) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: util.warn("Failed to write S3: {}".format(e))
def cache_file_read(): """ Read and return data from the scraper result cache file. If this fails, log the error and return `util.Unset`. """ try: with open(CACHE_FILE) as f: return json.load(f) except OSError as e: if CACHE_FILE.is_file(): util.warn("Failed to read cache file: {}".format(e)) except json.decoder.JSONDecodeError: util.warn("Cache file contained invalid JSON") return Unset
def s3_read(s3): """ Read and return data from the scraper result S3 bucket. If this fails, log the error and return `util.Unset`. `s3` is a boto3 S3 resource. """ try: obj = s3.Object(S3_BUCKET, S3_KEY) return json.load(obj.get()["Body"]) except ( botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError, json.JSONDecodeError, ) as e: util.warn("Failed to read S3: {}".format(e)) return Unset
def cache_file_write(data): """ Write provided `data` to cache file, atomically. If this fails, log the error. """ f = None try: with atomicwrites.atomic_write(CACHE_FILE, overwrite=True) as f: json.dump(data, f, indent=2) f.write("\n") except OSError as e: util.warn("Failed to write cache file: {}".format(e)) finally: if f: try: # Clean up in case of error, since we passed # delete=False. pathlib.Path(f.name).unlink() except OSError: pass