Example #1
0
def try_compute_data(s3, webhook, old_data):
    """
    Try to run the scraper and return course data. If something goes
    wrong, raise `ScrapeError`. Otherwise, invoke the provided
    `Webhook`. `old_data` is the previous course data or `util.Unset`.
    """
    scraper_timeout = util.get_env("scraper_timeout")
    try:
        scraper_timeout = int(scraper_timeout)
        if scraper_timeout <= 0:
            raise ValueError
    except ValueError:
        util.warn("Illegal scraper timeout: {}".format(repr(scraper_timeout)))
        util.log("Resetting timeout to 60 seconds")
        os.environ["HYPERSCHEDULE_SCRAPER_TIMEOUT"] = "60"
        scraper_timeout = 60
    if old_data is util.Unset:
        # For JSON.
        old_data = None
    try:
        util.log("Running scraper")
        process = subprocess.Popen(
            ["python", "-m", "hyperschedule.scrapers.claremont"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
        )
        output, _ = process.communicate(input=json.dumps(old_data).encode(),
                                        timeout=scraper_timeout)
        if process.returncode != 0:
            raise ScrapeError("scraper failed")
        try:
            output = output.decode()
        except UnicodeDecodeError as e:
            raise ScrapeError(
                "scraper emitted malformed output: {}".format(e)) from None
        if "$delete" in output:
            raise ScrapeError("scraper output contains '$delete'")
        data = json.loads(output)
        if util.get_env_boolean("snitch"):
            webhook.get()
        if util.get_env_boolean("cache"):
            cache_file_write(data)
        if util.get_env_boolean("s3_write"):
            s3_write(s3, data)
    except OSError as e:
        raise ScrapeError(
            "unexpected error while running scraper: {}".format(e)) from None
    except subprocess.TimeoutExpired:
        process.kill()
        process.communicate()
        raise ScrapeError("scraper timed out after {} seconds".format(
            scraper_timeout)) from None
    except json.decoder.JSONDecodeError:
        raise ScrapeError("scraper did not return valid JSON") from None
    except requests.exceptions.RequestException as e:
        util.warn("failed to reach success webhook: {}".format(e))
    return data
Example #2
0
def s3_write(s3, data):
    """
    Write provided `data` to S3 bucket. If this fails, log the error.
    `s3` is a boto3 S3 resource.
    """
    try:
        obj = s3.Object(S3_BUCKET, S3_KEY)
        obj.put(Body=json.dumps(data).encode())
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
        util.warn("Failed to write S3: {}".format(e))
Example #3
0
def cache_file_read():
    """
    Read and return data from the scraper result cache file. If this
    fails, log the error and return `util.Unset`.
    """
    try:
        with open(CACHE_FILE) as f:
            return json.load(f)
    except OSError as e:
        if CACHE_FILE.is_file():
            util.warn("Failed to read cache file: {}".format(e))
    except json.decoder.JSONDecodeError:
        util.warn("Cache file contained invalid JSON")
    return Unset
Example #4
0
def s3_read(s3):
    """
    Read and return data from the scraper result S3 bucket. If this
    fails, log the error and return `util.Unset`. `s3` is a boto3 S3
    resource.
    """
    try:
        obj = s3.Object(S3_BUCKET, S3_KEY)
        return json.load(obj.get()["Body"])
    except (
        botocore.exceptions.BotoCoreError,
        botocore.exceptions.ClientError,
        json.JSONDecodeError,
    ) as e:
        util.warn("Failed to read S3: {}".format(e))
        return Unset
Example #5
0
def cache_file_write(data):
    """
    Write provided `data` to cache file, atomically. If this fails,
    log the error.
    """
    f = None
    try:
        with atomicwrites.atomic_write(CACHE_FILE, overwrite=True) as f:
            json.dump(data, f, indent=2)
            f.write("\n")
    except OSError as e:
        util.warn("Failed to write cache file: {}".format(e))
    finally:
        if f:
            try:
                # Clean up in case of error, since we passed
                # delete=False.
                pathlib.Path(f.name).unlink()
            except OSError:
                pass