def query_data(file_types, query_logical_expressions, output_file_names, cache_timestamp=None): """ This function is used to query data. @param file_types the file types,it is supposed to be a str array, each item in the array should be a non-None/empty str. Required. @param query_logical_expressions the query logical expressions, it is supposed to be a str array,each item in the array should be a non-None/empty str. Required. @param output_file_names the output file names (including full paths), this function will assume the files exist, it is supposed to be a non-None/empty dictionary, key is file type(non-None/empty str,must be one of the strings in file_types),value is the output file names (non-None/empty str, including full path). Required. @param cache_timestamp the cache timestamp, it is supposed to be None or a non-None datetime. Optional, default to None. @return True if use cache, False otherwise @throws TypeError throws if any argument isn't of right type @throws ValueError throws if any argument isn't valid (refer to the argument documentation) @throws DataQueryError: if any other error occurred during the operation """ signature = 'hfppnetwork.partner.httpservices.dataappliance.query_data' method_enter( signature, { "file_types": file_types, "query_logical_expressions": query_logical_expressions, "output_file_names": output_file_names, "cache_timestamp": cache_timestamp }) # Parameters checking acceptableTypes = [ 'beneficiary', 'carrier', 'inpatient', 'outpatient', 'prescription' ] check_str_list('file_types', file_types) for one_type in file_types: if not one_type in acceptableTypes: raise ValueError('File type ' + one_type + ' not acceptable.') check_str_list('query_logical_expressions', query_logical_expressions) if not len(query_logical_expressions) == len(file_types): raise ValueError( 'query_logical_expressions and file_types length not match.') check_dict('output_file_names', output_file_names) if not len(output_file_names) == len(file_types): raise ValueError('output_file_names and file_types length not match.') if cache_timestamp is not None: check_datetime('cache_timestamp', cache_timestamp) # Check if cache needs to be updated try: use_cache = check_use_cache(file_types, cache_timestamp) except: raise DataQueryError('Error occurs during checking cache data.') # Loading data from database if not use cache if not use_cache: try: type_index = 0 for one_type in file_types: # Parse and generate query string parser = Parser() generator = SQLGenerator() parse_tree = parser.parse( query_logical_expressions[type_index].strip()) sql = generator.visit(parse_tree) logging.debug("sql %s ", sql) # Query data # Instantiate relevant classes if config.dbconfig["type"] == 'redis': claimPersistence = RedisClaimPersistence() elif config.dbconfig["type"] == "mysql": claimPersistence = MySQLClaimPersistence() else: raise ValueError("Invalid db type: " + config.dbconfig["type"]) claimPersistence.connectionConfig = config.dbconfig claimFile = CSVClaimFile() processor = ClaimFileProcessor() processor.claimPersistence = claimPersistence processor.claimFile = claimFile processor.queryData(one_type, sql if (sql and len(sql) > 0) else '1=1', 0, 0, output_file_names[one_type]) # Update loop index type_index = type_index + 1 except ParserError: raise DataQueryError('Error occurs during parsing query string.') except: raise DataQueryError('Error occurs during querying data.') method_exit(signature, use_cache) return use_cache
def handle_data_request(self,request_id, study_id, query,expiration_time, cache_available=False,cache_timestamp=None, force_fullfil=False): """ This method is used to handle data request. This method will not throw exceptions. Any error should be caught and logged. @param self the DataRequestHandler itself, it should be DataRequestHandler @param request_id the request ID,it is supposed to be a non-None/empty str. Required. @param study_id the study ID,it is supposed to be a non-None/empty str. Required. @param query the query string,it is supposed to be a non-None/empty str. Required. @param expiration_time the request expiration time,it is supposed to be a non-None datetime. Required. @param cache_available whether cache is available,it is supposed to be a bool. Optional, default to False. @param cache_timestamp the cache timestamp,it is supposed to be a datetime. Optional, default to None. @param force_fullfil this parameter is set to True when this method is called by decision module. """ signature='hfppnetwork.partner.httpservices.DataRequestHandler.handle_data_request' method_enter(signature,{ "self":self, "request_id":request_id, "study_id":study_id, "query":query, "expiration_time":expiration_time, "cache_available":cache_available, "cache_timestamp":cache_timestamp }) # Dictionary to hold data query result file names query_result_file_names = {} try: #check input arguments check_string("request_id",request_id) check_string("study_id",study_id) check_string("query",query) check_datetime("expiration_time",expiration_time) check_bool("cache_available",cache_available) if cache_timestamp is not None: check_datetime("cache_timestamp",cache_timestamp) # Parse the query string try: query_dict = json.loads(query) except ValueError as e: query_dict = None method_error(signature, e) # Check if we can fulfill the data request can_fulfill_request = can_fulfill_data_request(request_id, study_id, query, expiration_time, cache_available, cache_timestamp, force_fullfil) # Dictionary to hold data conversion result file names conversion_result_file_names = {} # Data Response XML file name response_xml_file_name = None # Compressed file name compressed_file_name = None logging.debug('%s:%s', 'can_fulfill_request', can_fulfill_request) #can_fulfill_request if query_dict is not None and 'file_types' in query_dict \ and 'logical_expressions' in query_dict and can_fulfill_request: # Can fulfill the request, create temporary files for file_type in query_dict['file_types']: query_result_file_names[file_type] = tempfile.NamedTemporaryFile(delete=False).name conversion_result_file_names[file_type] = tempfile.NamedTemporaryFile(delete=False).name response_xml_file_name = tempfile.NamedTemporaryFile(delete=False).name compressed_file_name = tempfile.NamedTemporaryFile(delete=False).name # Query data use_cache = query_data(query_dict['file_types'], query_dict['logical_expressions'], query_result_file_names, cache_timestamp if cache_available else None) with open(response_xml_file_name, 'ab') as response_xml_file: # Write XML xml = '<?xml version="1.0" encoding="utf-8"?>' \ '<DataResponse>' \ '<RequestID>{request_id}</RequestID>' \ '<RequestDenied>false</RequestDenied>' \ '<ErrorMessage></ErrorMessage>' \ '<Data useCache="{use_cache}"><![CDATA['.\ format(request_id=request_id, use_cache='true' if use_cache else 'false') response_xml_file.write(xml.encode('utf-8')) if not use_cache: logging.debug('not use cache will use result from converted data') # Convert data for file_type in query_dict['file_types']: convert_data(file_type, query_result_file_names[file_type], conversion_result_file_names[file_type]) # Aggregate and compress data compressor = zlib.compressobj(level=9) with open(compressed_file_name, 'wb') as out_file: for file_type in query_dict['file_types']: with open(conversion_result_file_names[file_type], 'rb') as in_file: out_file.write(compressor.compress(in_file.read())) out_file.write(compressor.flush()) # Encode in Base64 with open(compressed_file_name, 'rb') as in_file: base64.encode(in_file, response_xml_file) # Write XML response_xml_file.write(']]></Data></DataResponse>'.encode('utf-8')) # POST XML to Network Node /data_response service if datetime.now(timezone.utc) < expiration_time: logging.debug('post to data response url %s%s', HFPP_NODE_HTTP_SERVICE_BASE_URL ,'/data_response') # Only POST the XML if the request has not been expired request = urllib.request.Request(HFPP_NODE_HTTP_SERVICE_BASE_URL + '/data_response') request.add_header('Content-Type','application/xml;charset=utf-8') request.add_header('x-hfpp-username', HFPP_PARTNER_USERNAME) request.add_header('x-hfpp-password', HFPP_PARTNER_PASSWORD) if response_xml_file_name is not None and can_fulfill_request: with open(response_xml_file_name, 'rb') as in_file,\ mmap.mmap(in_file.fileno(), 0, access=mmap.ACCESS_READ) as data_response_xml: try: resp = urllib.request.urlopen(request, data_response_xml, cafile=CA_CERTIFICATE_FILE, cadefault=True) # Parse response XML resp_content = resp.read().decode('utf-8') logging.debug('response code:%s',resp.getcode()) logging.debug('response:%s',resp_content) except urllib.error.HTTPError as e: method_error(signature, e) self._handle_error_response(e) else: data_response_xml = '<?xml version="1.0" encoding="utf-8"?>' \ '<DataResponse>' \ '<RequestID>{request_id}</RequestID>' \ '<RequestDenied>true</RequestDenied>' \ '<ErrorMessage>{waitApproval}</ErrorMessage>' \ '<Data></Data>' \ '</DataResponse>'.format(request_id=request_id, waitApproval=('' if PARTNER_IMMEDIATE_FULLFIL else 'Waiting Approval')) logging.debug('post data response xml %s', data_response_xml) try: resp = urllib.request.urlopen(request, data_response_xml.encode('utf-8'), cafile=CA_CERTIFICATE_FILE, cadefault=True) # Parse response XML resp_content = resp.read().decode('utf-8') logging.debug('response code:%s',resp.getcode()) logging.debug('response:%s',resp_content) except urllib.error.HTTPError as e: method_error(signature, e) self._handle_error_response(e) else: # Request expired, log error logging.error('Request expired') method_exit(signature) except Exception as e: # log error method_error(signature, e) finally: if query_dict is not None and 'file_types' in query_dict: # Remove temporary files for file_type in query_dict['file_types']: if file_type in query_result_file_names: self._remove_file(query_result_file_names[file_type]) if file_type in conversion_result_file_names: self._remove_file(conversion_result_file_names[file_type]) self._remove_file(compressed_file_name) self._remove_file(response_xml_file_name)
def query_data(file_types,query_logical_expressions, output_file_names, cache_timestamp=None): """ This function is used to query data. @param file_types the file types,it is supposed to be a str array, each item in the array should be a non-None/empty str. Required. @param query_logical_expressions the query logical expressions, it is supposed to be a str array,each item in the array should be a non-None/empty str. Required. @param output_file_names the output file names (including full paths), this function will assume the files exist, it is supposed to be a non-None/empty dictionary, key is file type(non-None/empty str,must be one of the strings in file_types),value is the output file names (non-None/empty str, including full path). Required. @param cache_timestamp the cache timestamp, it is supposed to be None or a non-None datetime. Optional, default to None. @return True if use cache, False otherwise @throws TypeError throws if any argument isn't of right type @throws ValueError throws if any argument isn't valid (refer to the argument documentation) @throws DataQueryError: if any other error occurred during the operation """ signature = 'hfppnetwork.partner.httpservices.dataappliance.query_data' method_enter(signature,{ "file_types":file_types, "query_logical_expressions":query_logical_expressions, "output_file_names":output_file_names, "cache_timestamp":cache_timestamp }) # Parameters checking acceptableTypes = ['beneficiary', 'carrier', 'inpatient', 'outpatient', 'prescription'] check_str_list('file_types', file_types) for one_type in file_types: if not one_type in acceptableTypes: raise ValueError('File type ' + one_type + ' not acceptable.') check_str_list('query_logical_expressions', query_logical_expressions) if not len(query_logical_expressions) == len(file_types): raise ValueError('query_logical_expressions and file_types length not match.') check_dict('output_file_names', output_file_names) if not len(output_file_names) == len(file_types): raise ValueError('output_file_names and file_types length not match.') if cache_timestamp is not None: check_datetime('cache_timestamp', cache_timestamp) # Check if cache needs to be updated try: use_cache = check_use_cache(file_types, cache_timestamp) except: raise DataQueryError('Error occurs during checking cache data.') # Loading data from database if not use cache if not use_cache: try: type_index = 0 for one_type in file_types: # Parse and generate query string parser = Parser() generator = SQLGenerator() parse_tree = parser.parse(query_logical_expressions[type_index].strip()) sql = generator.visit(parse_tree) logging.debug("sql %s ",sql) # Query data # Instantiate relevant classes if config.dbconfig["type"]=='redis': claimPersistence = RedisClaimPersistence() elif config.dbconfig["type"] == "mysql": claimPersistence = MySQLClaimPersistence() else: raise ValueError("Invalid db type: " + config.dbconfig["type"]) claimPersistence.connectionConfig = config.dbconfig claimFile = CSVClaimFile() processor = ClaimFileProcessor() processor.claimPersistence = claimPersistence processor.claimFile = claimFile processor.queryData(one_type,sql if (sql and len(sql) > 0) else '1=1', 0, 0, output_file_names[one_type]) # Update loop index type_index = type_index + 1 except ParserError: raise DataQueryError('Error occurs during parsing query string.') except: raise DataQueryError('Error occurs during querying data.') method_exit(signature,use_cache) return use_cache