Beispiel #1
0
def return_doc(json_result, client_id):
    """
    Handles the document processing necessary for a job
    Calls the /return_doc endpoint of the server to return data for the job it completed
    :param json_result: the json received from the /get_work endpoint
    :param client_id: the id of the client that is processing the documents job
    :return: result from calling /return_doc
    """

    job_id, doc_dicts = get_json_info(json_result)
    doc_ids = []
    for dic in doc_dicts:
        doc_ids.append(dic['id'])
    path = doc.document_processor(doc_ids)
    add_client_log_files(path.name, ".")
    shutil.make_archive("result", "zip", path.name)
    fileobj = open('result.zip', 'rb')
    r = requests.post(serverurl + "/return_doc",
                      files={'file': ('result.zip', fileobj)},
                      data={
                          'json':
                          json.dumps({
                              "job_id": job_id,
                              "type": "doc",
                              "user": client_id,
                              "version": version
                          })
                      })
    r.raise_for_status()
    logger.warning('Returned Docs')
    logger.handlers[0].doRollover()
    return r
def api_call_manager(url):
    """
    If there were no errors in making an API call, get the result
    If a Temporary error occurred, sleep for 5 minutes and try again.
    Do this 50 times, and if it continues to fail, raise a CallFailException
    If a Permanent error occurs, raise a CallFailException
    If the user's ApiCount is zero, sleep for one hour to refresh the calls
    :param url: the url that will be used to make the API call
    :return: returns the resulting information of the documents
    """

    pause = 0
    while pause < 51:
        try:
            result = call(url)
            return result
        except TemporaryException:
            logger.error('API call Error, waiting 5 minutes')
            time.sleep(300)
            pause += 1
        except PermanentException:
            logger.error('API call Error')
            break
        except ApiCountZeroException:
            logger.warning('API calls exhausted')
            time.sleep(3600)
    logger.error('API call failed')
    raise CallFailException
Beispiel #3
0
def get_work(server_url, client_id):
    """
    Calls the /get_work endpoint of the server to fetch work to process
    :param client_id: the id of the client calling /get_work
    :return: the result of making a call to get work
    """
    url = server_url + '/get_work?client_id=' + client_id
    result = man.api_call_manager(url)
    logger.warning('Obtained work from server.')
    return result
Beispiel #4
0
def document_id_beginning_is_letter(document_id):
    """
    :param document_id:
    :return:
    """
    letter = document_id[0]

    result = letter.isalpha()
    if result is True:
        return True
    logger.warning('Document ID does not begin with a letter')
    return False
Beispiel #5
0
    def get_keys_from_progress_no_lock(self, job_id):
        """
        Get the key of a job that is the "progress" queue
        :param job_id: The id of the job you want to get the key for
        :return: '' if the job does not exist, or the key if the job does exist
        """
        key_list = self.r.hgetall('progress')
        logger.warning(
            'Variable Success: %s',
            'get_keys_from_progress_no_lock: list of keys successfully received'
        )
        logger.warning('CLIENT_JOB_ID: %s',
                       'get_keys_from_progress_no_lock: ' + str(job_id))
        for key in key_list:
            logger.warning('CURRENT_KEY: %s', key)
            logger.warning(
                'Assign Variable: %s',
                'get_keys_from_progress_no_lock: attempt to get the json using the key'
            )

            json_info = self.get_specific_job_from_progress_no_lock(key)
            info = literal_eval(json_info)
            if info["job_id"] == job_id:
                return key.decode("utf-8")
        return -1
Beispiel #6
0
def add_document_job_to_queue(redis_server, json_data):
    """
    Creates a job for each work file and then adds each job to the "queue"
    :param json_data: the json data containing all the work files
    :return:
    """
    logger.warning('Adding document job to the queue...')

    for work_file in json_data["data"]:
        random_id = \
            ''.join(random.choices(string.ascii_letters + string.digits, k=16))
        job = create_document_job(work_file, random_id)
        redis_server.add_to_queue(job)
        logger.warning('Document job successfully added to queue')
Beispiel #7
0
def copy_file_safely(directory, filepath):
    """
    Safely copies a file to a directory; if the file isn't there to be copied, it won't be copied.
    :param directory: Directory to copy to
    :param filepath: File to copy
    """

    if Path(filepath).exists():
        if Path(directory).exists():
            shutil.copy(filepath, directory)
        else:
            logger.warning('File not copied, directory does not exist')
    else:
        logger.warning('File not copied, file does not exist')
Beispiel #8
0
def save_client_log(client_id, compressed_file):
    """
    :param client_id:
    :param compressed_file:
    :return:
    """
    logger.warning('ms/docs_filter/save_client_log: function called')
    client_path = config.server_read_value(
        'client path') + 'client-logs/' + client_id + '/'

    files = zipfile.ZipFile(compressed_file, 'r')

    temp_directory = tempfile.mkdtemp()
    temp_directory_path = str(temp_directory + '/')

    files.extractall(temp_directory_path)

    # Create a list of all the files in the directory
    file_list = os.listdir(temp_directory_path)
    for file in file_list:
        if file.endswith('.log'):
            logger.warning('ms/docs_filter/save_client_log: found file, ' +
                           str(file) + ', that ends with log')
            if not os.path.exists(client_path):
                os.makedirs(client_path)
                shutil.copy(temp_directory_path + file, client_path)
                logger.warning('ms/docs_filter/save_client_log: '
                               'saving log to client-logs directory')
            else:
                shutil.copy(temp_directory_path + file, client_path)
                logger.warning('ms/docs_filter/save_client_log: '
                               'saving log to client-logs directory')
Beispiel #9
0
def create_document_job(work_file, job_id):
    """
    Creates a job for the server to provide to clients
    :param work_file: The list of ids for the clients to retrieve
    :param job_id: The id for the job
    :return: A json in the form of a dictionary
    """
    logger.warning('Creating document job...')
    dictionary = {
        'job_id': job_id,
        'type': 'doc',
        'data': work_file,
        'version': VERSION
    }
    return json.dumps(dictionary)
Beispiel #10
0
def get_work():
    """
    Endpoint the user will use to get work from the queue
    client_id will be one of the parameters given for logging purposes
    :return: Returns the json containing the job_id, the type of work to be done, the work that nees to be done, and
    the version number
    """
    logger.warning("Successful API Call: %s", 'get_work: get_work')
    if len(request.args) != 1:
        logger.error('Error - number of parameters incorrect')
        return 'Parameter Missing', 400
    client_id = request.args.get('client_id')
    if client_id is None:
        logger.warning("Exception: %s", 'get_work: BadParameterException, client id was none')
        logger.error('Error - no client ID')
        return 'Bad Parameter', 400
    json_info = redis_server().get_work()
    return json.dumps(json_info)
Beispiel #11
0
def check_workfile_length(json_data):
    """
        Checks the file count and attachment count of each work file
        :param json_data: the json containing the work files
        :return: True if there are 1000 or less document ids and 1000
                 or less attachments per work file False if either
                 the ids or attachments are over 1000
        """
    file_count = 0
    attachment_count = 0
    for work_file in json_data['data']:
        for line in work_file:
            file_count += 1
            attachment_count += line["count"]

        is_file_count_too_big = file_count > 1000
        is_attachment_count_too_big = attachment_count > 1000
        if is_file_count_too_big or is_attachment_count_too_big:
            return False

        logger.warning('Workfile length check completed')
        return True
Beispiel #12
0
def return_docs(json_result, client_id):
    """
    Handles the documents processing necessary for a job
    Calls the /return_docs endpoint of the server to return data for the job it completed
    :param json_result: the json received from the /get_work endpoint
    :param client_id: the id of the client that is processing the documents job
    :return: result from calling /return_docs
    """

    job_id, urls = get_json_info(json_result)
    json_info = docs.documents_processor(urls, job_id, client_id)
    path = tempfile.TemporaryDirectory()
    add_client_log_files(path.name, ".")
    shutil.make_archive("result", "zip", path.name)
    fileobj = open('result.zip', 'rb')
    r = requests.post(serverurl + "/return_docs",
                      files={'file': fileobj},
                      data={'json': json.dumps(json_info)})
    r.raise_for_status()
    logger.warning('Returned Docs')
    logger.handlers[0].doRollover()
    return r
Beispiel #13
0
def call(url):
    """
    Sends an API call to regulations.gov
    Raises exceptions if it is not a valid API call
    When a 300 status code is given, return a temporary exception so the user can retry the API call
    When a 429 status code is given, the user is out of API calls and must wait an hour to make more
    When 400 or 500 status codes are given there is a problem with the API connection
    :param url: the url that will be used to make the API call
    :return: returns the json format information of the documents
    """
    logger.warning('Making API call...')
    result = requests.get(url)
    if 300 <= result.status_code < 400:
        logger.warning('API call failed')
        raise TemporaryException
    if result.status_code == 429:
        logger.warning('API call failed')
        raise ApiCountZeroException
    if 400 <= result.status_code < 600:
        logger.warning('API call failed')
        raise PermanentException
    logger.warning('API call successfully made')
    return result
Beispiel #14
0
def add_client_log(directory_to_send_to, filepath='./mirrulations.log'):
    """
    Safely copies a file to a directory;
    if the file isn't there to be copied, it won't be copied.
    :param directory_to_send_to: Directory to copy to
    :param filepath: File to copy
    """

    if Path(filepath).exists():
        if Path(directory_to_send_to).exists():
            shutil.copy(filepath, directory_to_send_to)
            logger.warning('mcl/client/add_client_log: '
                           'mirrulations.log successfully copied')
        else:
            logger.warning('File not copied, '
                           'directory_to_send_to does not exist')
    else:
        logger.warning('File not copied, file does not exist')