def return_doc(json_result, client_id): """ Handles the document processing necessary for a job Calls the /return_doc endpoint of the server to return data for the job it completed :param json_result: the json received from the /get_work endpoint :param client_id: the id of the client that is processing the documents job :return: result from calling /return_doc """ job_id, doc_dicts = get_json_info(json_result) doc_ids = [] for dic in doc_dicts: doc_ids.append(dic['id']) path = doc.document_processor(doc_ids) add_client_log_files(path.name, ".") shutil.make_archive("result", "zip", path.name) fileobj = open('result.zip', 'rb') r = requests.post(serverurl + "/return_doc", files={'file': ('result.zip', fileobj)}, data={ 'json': json.dumps({ "job_id": job_id, "type": "doc", "user": client_id, "version": version }) }) r.raise_for_status() logger.warning('Returned Docs') logger.handlers[0].doRollover() return r
def api_call_manager(url): """ If there were no errors in making an API call, get the result If a Temporary error occurred, sleep for 5 minutes and try again. Do this 50 times, and if it continues to fail, raise a CallFailException If a Permanent error occurs, raise a CallFailException If the user's ApiCount is zero, sleep for one hour to refresh the calls :param url: the url that will be used to make the API call :return: returns the resulting information of the documents """ pause = 0 while pause < 51: try: result = call(url) return result except TemporaryException: logger.error('API call Error, waiting 5 minutes') time.sleep(300) pause += 1 except PermanentException: logger.error('API call Error') break except ApiCountZeroException: logger.warning('API calls exhausted') time.sleep(3600) logger.error('API call failed') raise CallFailException
def get_work(server_url, client_id): """ Calls the /get_work endpoint of the server to fetch work to process :param client_id: the id of the client calling /get_work :return: the result of making a call to get work """ url = server_url + '/get_work?client_id=' + client_id result = man.api_call_manager(url) logger.warning('Obtained work from server.') return result
def document_id_beginning_is_letter(document_id): """ :param document_id: :return: """ letter = document_id[0] result = letter.isalpha() if result is True: return True logger.warning('Document ID does not begin with a letter') return False
def get_keys_from_progress_no_lock(self, job_id): """ Get the key of a job that is the "progress" queue :param job_id: The id of the job you want to get the key for :return: '' if the job does not exist, or the key if the job does exist """ key_list = self.r.hgetall('progress') logger.warning( 'Variable Success: %s', 'get_keys_from_progress_no_lock: list of keys successfully received' ) logger.warning('CLIENT_JOB_ID: %s', 'get_keys_from_progress_no_lock: ' + str(job_id)) for key in key_list: logger.warning('CURRENT_KEY: %s', key) logger.warning( 'Assign Variable: %s', 'get_keys_from_progress_no_lock: attempt to get the json using the key' ) json_info = self.get_specific_job_from_progress_no_lock(key) info = literal_eval(json_info) if info["job_id"] == job_id: return key.decode("utf-8") return -1
def add_document_job_to_queue(redis_server, json_data): """ Creates a job for each work file and then adds each job to the "queue" :param json_data: the json data containing all the work files :return: """ logger.warning('Adding document job to the queue...') for work_file in json_data["data"]: random_id = \ ''.join(random.choices(string.ascii_letters + string.digits, k=16)) job = create_document_job(work_file, random_id) redis_server.add_to_queue(job) logger.warning('Document job successfully added to queue')
def copy_file_safely(directory, filepath): """ Safely copies a file to a directory; if the file isn't there to be copied, it won't be copied. :param directory: Directory to copy to :param filepath: File to copy """ if Path(filepath).exists(): if Path(directory).exists(): shutil.copy(filepath, directory) else: logger.warning('File not copied, directory does not exist') else: logger.warning('File not copied, file does not exist')
def save_client_log(client_id, compressed_file): """ :param client_id: :param compressed_file: :return: """ logger.warning('ms/docs_filter/save_client_log: function called') client_path = config.server_read_value( 'client path') + 'client-logs/' + client_id + '/' files = zipfile.ZipFile(compressed_file, 'r') temp_directory = tempfile.mkdtemp() temp_directory_path = str(temp_directory + '/') files.extractall(temp_directory_path) # Create a list of all the files in the directory file_list = os.listdir(temp_directory_path) for file in file_list: if file.endswith('.log'): logger.warning('ms/docs_filter/save_client_log: found file, ' + str(file) + ', that ends with log') if not os.path.exists(client_path): os.makedirs(client_path) shutil.copy(temp_directory_path + file, client_path) logger.warning('ms/docs_filter/save_client_log: ' 'saving log to client-logs directory') else: shutil.copy(temp_directory_path + file, client_path) logger.warning('ms/docs_filter/save_client_log: ' 'saving log to client-logs directory')
def create_document_job(work_file, job_id): """ Creates a job for the server to provide to clients :param work_file: The list of ids for the clients to retrieve :param job_id: The id for the job :return: A json in the form of a dictionary """ logger.warning('Creating document job...') dictionary = { 'job_id': job_id, 'type': 'doc', 'data': work_file, 'version': VERSION } return json.dumps(dictionary)
def get_work(): """ Endpoint the user will use to get work from the queue client_id will be one of the parameters given for logging purposes :return: Returns the json containing the job_id, the type of work to be done, the work that nees to be done, and the version number """ logger.warning("Successful API Call: %s", 'get_work: get_work') if len(request.args) != 1: logger.error('Error - number of parameters incorrect') return 'Parameter Missing', 400 client_id = request.args.get('client_id') if client_id is None: logger.warning("Exception: %s", 'get_work: BadParameterException, client id was none') logger.error('Error - no client ID') return 'Bad Parameter', 400 json_info = redis_server().get_work() return json.dumps(json_info)
def check_workfile_length(json_data): """ Checks the file count and attachment count of each work file :param json_data: the json containing the work files :return: True if there are 1000 or less document ids and 1000 or less attachments per work file False if either the ids or attachments are over 1000 """ file_count = 0 attachment_count = 0 for work_file in json_data['data']: for line in work_file: file_count += 1 attachment_count += line["count"] is_file_count_too_big = file_count > 1000 is_attachment_count_too_big = attachment_count > 1000 if is_file_count_too_big or is_attachment_count_too_big: return False logger.warning('Workfile length check completed') return True
def return_docs(json_result, client_id): """ Handles the documents processing necessary for a job Calls the /return_docs endpoint of the server to return data for the job it completed :param json_result: the json received from the /get_work endpoint :param client_id: the id of the client that is processing the documents job :return: result from calling /return_docs """ job_id, urls = get_json_info(json_result) json_info = docs.documents_processor(urls, job_id, client_id) path = tempfile.TemporaryDirectory() add_client_log_files(path.name, ".") shutil.make_archive("result", "zip", path.name) fileobj = open('result.zip', 'rb') r = requests.post(serverurl + "/return_docs", files={'file': fileobj}, data={'json': json.dumps(json_info)}) r.raise_for_status() logger.warning('Returned Docs') logger.handlers[0].doRollover() return r
def call(url): """ Sends an API call to regulations.gov Raises exceptions if it is not a valid API call When a 300 status code is given, return a temporary exception so the user can retry the API call When a 429 status code is given, the user is out of API calls and must wait an hour to make more When 400 or 500 status codes are given there is a problem with the API connection :param url: the url that will be used to make the API call :return: returns the json format information of the documents """ logger.warning('Making API call...') result = requests.get(url) if 300 <= result.status_code < 400: logger.warning('API call failed') raise TemporaryException if result.status_code == 429: logger.warning('API call failed') raise ApiCountZeroException if 400 <= result.status_code < 600: logger.warning('API call failed') raise PermanentException logger.warning('API call successfully made') return result
def add_client_log(directory_to_send_to, filepath='./mirrulations.log'): """ Safely copies a file to a directory; if the file isn't there to be copied, it won't be copied. :param directory_to_send_to: Directory to copy to :param filepath: File to copy """ if Path(filepath).exists(): if Path(directory_to_send_to).exists(): shutil.copy(filepath, directory_to_send_to) logger.warning('mcl/client/add_client_log: ' 'mirrulations.log successfully copied') else: logger.warning('File not copied, ' 'directory_to_send_to does not exist') else: logger.warning('File not copied, file does not exist')