def monolith():
    """
    Runs the script. This is one monolithic function (aptly named) as the script just needs to be run; however, there is a certain
    point where I need to break out of the program if an error occurs, and I wasn't sure how exactly sys.exit() would work and whether
    or not it would mess with things outside of / calling this script, so I just made one giant method so I can return when needed.
    :return:
    """

    url_base = "https://api.data.gov/regulations/v3/documents.json?rpp=1000"

    r = redis_manager.RedisManager(redis.Redis())

    regulations_key = config.read_value('key')

    current_page = 0

    if regulations_key != "":
        # Gets number of documents available to download
        try:
            record_count = requests.get(
                "https://api.data.gov/regulations/v3/documents.json?api_key=" +
                regulations_key + "&countsOnly=1").json()["totalNumRecords"]
        except:
            logger.error('Error occured with API request')
            print("Error occurred with docs_work_gen regulations API request.")
            return 0

        # Gets the max page we'll go to; each page is 1000 documents
        max_page_hit = record_count // 1000

        # This loop generates lists of URLs, sending out a job and writing them to the work server every 1000 URLs.
        # It will stop and send whatever's left if we hit the max page limit.
        while current_page < max_page_hit:
            url_list = []
            for i in range(1000):
                current_page += 1
                url_full = url_base + "&po=" + str(current_page * 1000)

                url_list.append(url_full)

                if current_page == max_page_hit:
                    break

            # Makes a JSON from the list of URLs and send it to the queue as a job
            docs_work = [
                ''.join(
                    random.choices(string.ascii_letters + string.digits,
                                   k=16)), "docs", url_list
            ]
            r.add_to_queue(endpoints.generate_json(docs_work))

    else:
        print("No API Key!")
Exemple #2
0
import requests
import mirrulations_core.config as config
from mirrulations.mirrulations_logging import logger

key = config.read_value('key')


def call(url):
    """
    Sends an API call to regulations.gov
    Raises exceptions if it is not a valid API call
    When a 300 status code is given, return a temporary exception so the user can retry the API call
    When a 429 status code is given, the user is out of API calls and must wait an hour to make more
    When 400 or 500 status codes are given there is a problem with the API connection
    :param url: the url that will be used to make the API call
    :return: returns the json format information of the documents
    """
    logger.warning('Making API call...')
    result = requests.get(url)
    if 300 <= result.status_code < 400:
        logger.warning('API call failed')
        raise TemporaryException
    if result.status_code == 429:
        logger.warning('API call failed')
        raise ApiCountZeroException
    if 400 <= result.status_code < 600:
        logger.warning('API call failed')
        raise PermanentException
    logger.warning('API call successfully made')
    return result
from mirrulations.api_call_management import *
import json
import logging
import mirrulations_core.config as config

workfiles = []
version = "v1.3"

key = config.read_value('key')
client_id = config.read_value('client_id')

FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(filename='documents_processor.log', format=FORMAT)
d = {'clientip': '192.168.0.1', 'user': client_id}
logger = logging.getLogger('tcpserver')


def documents_processor(urls, job_id, client_id):
    """
    Call each url in the list, process the results of the calls and then form a json file to send back the results
    :param urls: list of urls that have to be called
    :param job_id: the id of the job that is being worked on currently
    :param client_id: id of the client calling this function
    :return result: the json to be returned to the server after each call is processed
    """
    global workfiles
    workfiles = []
    logger.debug('Call Successful: %s',
                 'documents_processor: Processing documents',
                 extra=d)
    logger.info('Processing documents into JSON...')
Exemple #4
0
import mirrulations.document_processor as doc
import mirrulations.documents_processor as docs
import mirrulations.api_call_management as man
import requests
import json
import time
import logging
import shutil
import tempfile
from pathlib import Path
import mirrulations_core.config as config

# These variables are specific to the current implementation
version = "v1.3"

ip = config.read_value('ip')
port = config.read_value('port')

serverurl = "http://" + ip + ":" + port

key = config.read_value('key')
client_id = config.read_value('client_id')

FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(filename='client.log', format=FORMAT)
d = {'clientip': '192.168.0.1', 'user': client_id}
logger = logging.getLogger('tcpserver')

client_health_url = "https://hc-ping.com/457a1034-83d4-4a62-8b69-c71060db3a08"