def process_XML_grant_content(args_array): # Import logger logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") if "database" in args_array["command_args"]: # Pass the database connection to variable database_connection = args_array['database_connection'] # If csv file insertion is required, then open all the files # into args_array if "csv" in args_array['command_args'] or ("database" in args_array['command_args'] and args_array['database_insert_mode'] == "bulk"): args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(args_array['document_type'], args_array['file_name'], args_array['csv_directory']) # Set the start time of operation start_time = time.time() # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # create variables needed to parse the file xml_string = '' patent_xml_started = False # read through the file and append into groups of string. # Send the finished strings to be parsed # Use uspto_xml_format to determine file contents and parse accordingly #print "The xml format is: " + args_array['uspto_xml_format'] if args_array['uspto_xml_format'] == "gXML4": # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # grant bibliographic information if "<us-patent-grant" in line: patent_xml_started = True xml_string += "<us-patent-grant>" # This identifies end of well-formed XML segement for single patent # grant bibliographic information elif "</us-patent-grant" in line: patent_xml_started = False xml_string += line # Call the function extract data processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array) # Call function to write data to csv or database USPTOStoreGrantData.store_grant_data(processed_data_array, args_array) # reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: # Check which type of encoding should be used to fix the line string xml_string += USPTOSanitizer.replace_new_html_characters(line) # Used for gXML2 files elif args_array['uspto_xml_format'] == "gXML2": # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # grant bibliographic information if "<PATDOC" in line: patent_xml_started = True xml_string += "<PATDOC>" # Print line with number #print str(line_number) + " : " + line #line_number += 1 # This identifies end of well-formed XML segement for single patent # grant bibliographic information elif "</PATDOC" in line: patent_xml_started = False xml_string += line # Call the function extract data processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array) # Call function to write data to csv or database USPTOStoreGrantData.store_grant_data(processed_data_array, args_array) # reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: # Check which type of encoding should be used to fix the line string xml_string += USPTOSanitizer.replace_old_html_characters(line) # Close all the open .csv files being written to USPTOCSVHandler.close_csv_files(args_array) # Set a flag file_processed to ensure that the bulk insert succeeds # This should be true, in case the database insertion method is not bulk file_processed = True # If data is to be inserted as bulk csv files, then call the sql function if "database" in args_array["command_args"] and args_array['database_insert_mode'] == 'bulk': # Check for previous attempt to process the file and clean database if required database_connection.remove_previous_file_records(args_array['document_type'], args_array['file_name']) # Load CSV file into database file_processed = database_connection.load_csv_bulk_data(args_array) if file_processed: # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed" USPTOLogger.write_process_log(args_array) if "csv" not in args_array['command_args']: # Delete all the open csv files USPTOCSVHandler.delete_csv_files(args_array) # Print message to stdout and log print('[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) logger.info('Loaded {0} data for {1} into database. Time:{2} Finished Time: {3}'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) # Return file_processed as success status return file_processed else: # Print message to stdout and log print('[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) logger.error('Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) # Return None as failed status during database insertion return None
def extract_XML4_application_tag_counts(args_array): logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # Declare a dictionary to use in counting tags tags_dict = { "APPLICATION" : ["<us-patent-application"], "INTCLASS_A" : ["<classification-ipcr"], "USCLASS_A" : ["<main-classification", "<further-classification"], "CPCCLASS_A" : ["<classification-cpc"], "FOREIGNPRIORITY_A" : ["<priority-claim>", "<priority-claim "], "AGENT_A" : ["<agent>", "<agent "], "ASSIGNEE_A" : ["<assignee>", "<assignee "], "INVENTOR_A" : ["<inventor>", "<inventor "], "APPLICANT_A" : ["<us-applicant>", "<applicant>", "<us-applicant ", "<applicant "] } # Declare a dictionary to hold counts by table counts_dict = { "file_name" : args_array['file_name'], "APPLICATION" : 0, "INTCLASS_A" : 0, "USCLASS_A" : 0, "CPCCLASS_A" : 0, "FOREIGNPRIORITY_A" : 0, "AGENT_A" : 0, "ASSIGNEE_A" : 0, "INVENTOR_A" : 0, "APPLICANT_A" : 0 } # Print to stdout and log print("-- Starting the XML4 application tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) logger.info("Starting the XML4 application tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) # Loop through the file contents line by line for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # Loop through tags_dict items and look for XML tag for table, tag in tags_dict.items(): item_found = False # If list is provided if isinstance(tag, list): for item in tag: # Look for field tag if item in line: item_found = True if item_found == True: # Increment the count for appropriate table counts_dict[table] += 1 # Print to stdout and log print("-- Finished the XML4 application tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) logger.info("Finished the XML4 application tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) # Return the dictionary of counts for found tags if args_array['stdout_level'] == 1: pprint(counts_dict) # Return the dictionary of counts for found tags return counts_dict
def extract_XML1_application_tag_counts(args_array): logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # Declare a dictionary to use in counting tags #CPCCLASS_A and APPLICANT_A are not included in XML1 applications # APPLICANT_A are not include in XML1 applications tags_dict = { "APPLICATION" : ["<patent-application-publication"], "INTCLASS_A" : ["<classification-ipc-primary>", "<classification-ipc-secondary>"], "USCLASS_A" : ["<classification-us-primary>", "<classification-us-secondary>"], "FOREIGNPRIORITY_A" : ["<priority-application-number"], "AGENT_A" : ["<correspondence-address>"], "INVENTOR_A" : ["<first-named-inventor", "<inventor>"], } # Declare a dictionary to hold counts by table counts_dict = { "file_name" : args_array['file_name'], "APPLICATION" : 0, "INTCLASS_A" : 0, "USCLASS_A" : 0, "CPCCLASS_A" : 0, "FOREIGNPRIORITY_A" : 0, "AGENT_A" : 0, "ASSIGNEE_A" : 0, "INVENTOR_A" : 0, "APPLICANT_A" : 0 } # Print to stdout and log print("-- Starting the XML1 application tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) logger.info("Starting the XML1 application tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) # Loop through the file contents line by line for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # Loop through tags_dict items and look for XML tag for table, tag in tags_dict.items(): item_found = False # If list is provided if isinstance(tag, list): for item in tag: # Look for field tag if item in line: item_found = True if item_found == True: # Increment the count for appropriate table counts_dict[table] += 1 # Parse the tags that need to be XML parsed # Create variables needed to parse the file xml_string = '' patent_xml_started = False # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # grant bibliographic information if "<patent-application-publication" in line: patent_xml_started = True xml_string += "<patent-application-publication>" # This identifies end of well-formed XML segement for single patent # grant bibliographic information elif "</patent-application-publication" in line: patent_xml_started = False xml_string += "</patent-application-publication>" #print(xml_string) # Pass the raw_data data into Element Tree document_root = ET.fromstring(xml_string) #print(document_root) # Extract the root tag r = document_root.find('subdoc-bibliographic-information') # Count the number of assignee tags counts_dict['ASSIGNEE_A'] += len(r.findall('assignee')) # Count the number of inventor tags counts_dict['INVENTOR_A'] += len(r.findall('inventor')) # Reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: # Check which type of encoding should be used to fix the line string xml_string += USPTOSanitizer.replace_old_html_characters(line) # Print to stdout and log print("-- Finished the XML1 appication tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) logger.info("Finished the XML1 application tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) # Return the dictionary of counts for found tags if args_array['stdout_level'] == 1: pprint(counts_dict) # Return the dictionary of counts for found tags return counts_dict
def extract_XML4_grant_tag_counts(args_array): logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # Declare a dictionary to use in counting tags tags_dict = { "GRANT" : ["<us-patent-grant"], "INTCLASS_G" : ["<classification-ipcr"], "AGENT_G" : ["<agent>", "<agent "], "ASSIGNEE_G" : ["<assignee>", "<assignee "], "APPLICANT_G" : ["<us-applicant>", "<us-applicant ", "<applicant", "<applicant>"], "INVENTOR_G" : ["<inventor>", "<inventor ", "applicant-inventor"], "NONPATCIT_G" : ["<nplcit"], "EXAMINER_G" : ["<primary-examiner", "<assistant-examiner"], "FOREIGNPRIORITY_G" : ["<priority-claim>", "<priority-claim "] } # Declare a dictionary to hold counts by table counts_dict = { "file_name" : args_array['file_name'], "GRANT" : 0, "INTCLASS_G" : 0, "CPCCLASS_G" : 0, "USCLASS_G" : 0, "INVENTOR_G" : 0, "AGENT_G" : 0, "ASSIGNEE_G" : 0, "APPLICANT_G" : 0, "NONPATCIT_G" : 0, "EXAMINER_G" : 0, "GRACIT_G" : 0, "FORPATCIT_G" : 0, "FOREIGNPRIORITY_G" : 0 } # Print to stdout and log print("-- Starting the XML4 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) logger.info("Starting the XML4 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) # Loop through the file contents line by line for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # Loop through tags_dict items and look for XML tag for table, tag in tags_dict.items(): item_found = False # If list is provided if isinstance(tag, list): for item in tag: # Look for field tag if item in line: item_found = True if item_found == True: # Increment the count for appropriate table counts_dict[table] += 1 # Parse the tags that need to be XML parsed # Create variables needed to parse the file xml_string = '' patent_xml_started = False # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # grant bibliographic information if "<us-patent-grant" in line: patent_xml_started = True xml_string += "<us-patent-grant>" # This identifies end of well-formed XML segement for single patent # grant bibliographic information elif "</us-patent-grant" in line: patent_xml_started = False xml_string += "</us-patent-grant>" #print(xml_string) # Pass the raw_data data into Element Tree document_root = ET.fromstring(xml_string) #print(document_root) # Extract the root tag r = document_root.find('us-bibliographic-data-grant') # Get the patent CPC class count foc = r.find('us-field-of-classification-search') if foc is not None: counts_dict["CPCCLASS_G"] += len(foc.findall('classification-cpc-text')) counts_dict["USCLASS_G"] += len(foc.findall('classification-national')) # Get USCLASS_G count if file format uses field-of-search foc = r.find('field-of-search') if foc is not None: counts_dict["USCLASS_G"] += len(foc.findall('classification-national')) # Count the citation / reference tags if r.find('us-references-cited') != None: ref_cited_id_string = "us-references-cited" elif r.find('references-cited') != None: ref_cited_id_string = "references-cited" else: ref_cited_id_string = "references" rf = r.find(ref_cited_id_string) if rf != None: # Check if the XML format is using 'citation' or 'us-citation' if rf.find('citation') != None: citation_id_string = "citation" elif rf.find('us-citation') != None: citation_id_string = "us-citation" else: citation_id_string = "us-citation" all_rfc = rf.findall(citation_id_string) for rfc in all_rfc: # If the patent citation child is found must be a patent citation if rfc.find('patcit') != None: x = rfc.find('patcit') try: citation_country = x.find('document-id').findtext('country').strip() except: citation_country = None # Check if US or foreign patent citation if(citation_country == 'US'): counts_dict["GRACIT_G"] += 1 else: counts_dict["FORPATCIT_G"] += 1 # Count the foreign patent citiation tags # Reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: # Check which type of encoding should be used to fix the line string xml_string += line # Print to stdout and log print("-- Finished the XML4 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) logger.info("Finished the XML4 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) # Return the dictionary of counts for found tags if args_array['stdout_level'] == 1: pprint(counts_dict) # Return the dictionary of counts for found tags return counts_dict
def extract_XML2_grant_tag_counts(args_array): logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # Declare a dictionary to use in counting tags # NOTE: CPCClASS_G, APPLICANT_G, are not available in XML2 Grant files tags_dict = { "GRANT" : ["<PATDOC"], "INTCLASS_G" : ["<B510"], "USCLASS_G" : ["<B521", "<B522"], "INVENTOR_G" : ["<B721"], "AGENT_G" : ["<B740"], "ASSIGNEE_G" : ["<B730"], "NONPATCIT_G" : ["<B562"], "EXAMINER_G" : ["<B746", "<B747"], "FOREIGNPRIORITY_G" : ["<B310"] } # Declare a dictionary to hold counts by table counts_dict = { "file_name" : args_array['file_name'], "GRANT" : 0, "INTCLASS_G" : 0, "CPCCLASS_G" : 0, "USCLASS_G" : 0, "INVENTOR_G" : 0, "AGENT_G" : 0, "ASSIGNEE_G" : 0, "APPLICANT_G" : 0, "NONPATCIT_G" : 0, "EXAMINER_G" : 0, "GRACIT_G" : 0, "FORPATCIT_G" : 0, "FOREIGNPRIORITY_G" : 0 } # Print to stdout and log print("-- Starting the XML2 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) logger.info("Starting the XML2 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Started: " + time.strftime("%c")) # Loop through the file contents line by line for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # Loop through tags_dict items and look for XML tag for table, tag in tags_dict.items(): item_found = False # If list is provided if isinstance(tag, list): for item in tag: # Look for field tag if item in line: item_found = True if item_found == True: # Increment the count for appropriate table counts_dict[table] += 1 # Count the items that cannot be counted by only tags # Parse the tags that need to be XML parsed # Create variables needed to parse the file xml_string = '' patent_xml_started = False # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # grant bibliographic information if "<PATDOC" in line: patent_xml_started = True xml_string += "<PATDOC>" # This identifies end of well-formed XML segement for single patent # grant bibliographic information elif "</PATDOC" in line: patent_xml_started = False xml_string += "</PATDOC>" #print(xml_string) # Pass the raw_data data into Element Tree try: document_root = ET.fromstring(xml_string) # SDOBI is the bibliographic data r = document_root.find('SDOBI') # Patent Citations B500 = r.find('B500') if B500 is not None: for B560 in B500.findall('B560'): # B561 is Patent Citation for B561 in B560.findall('B561'): try: pcit = B561.find('PCIT').find('DOC') except: pcit = None if pcit is not None: prt = pcit.find('PARTY-US') try: citation_state = USPTOSanitizer.return_element_text(prt.find('ADR').find('STATE')).strip()[:3] except: citation_state = None try: citation_country = USPTOSanitizer.return_element_text(prt.find("ADR").find('CTRY')).strip()[:3] except: try: # If state is a US state, set country to US if USPTOSanitizer.is_US_state(citation_state): citation_country = "US" else: citation_country = None except: citation_country = None if citation_country == "US" or citation_country == None: counts_dict['GRACIT_G'] += 1 elif citation_country is not None: counts_dict['FORPATCIT_G'] += 1 # Reset the xml string xml_string = '' except ET.ParseError as e: print_xml = xml_string.split("\n") for num, line in enumerate(print_xml, start = 1): #print(str(num) + ' : ' + line) logger.error(str(num) + ' : ' + line) logger.error("Character Entity prevented ET from parsing XML in file: " + args_array['file_name'] ) traceback.print_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Exception: " + str(exc_type) + " in Filename: " + str(fname) + " on Line: " + str(exc_tb.tb_lineno) + " Traceback: " + traceback.format_exc()) # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: # Check which type of encoding should be used to fix the line string xml_string += USPTOSanitizer.replace_old_html_characters(line) # Print to stdout and log print("-- Finished the XML2 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) logger.info("Finished the XML2 grant tag counting process for contents of: " + args_array['file_name'] + ". Time Finished: " + time.strftime("%c")) # Return the dictionary of counts for found tags if args_array['stdout_level'] == 1: pprint(counts_dict) return counts_dict
def process_XML_application_content(args_array): # Process zip file by getting .dat or .txt file and .xml filenames start_time = time.time() logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction") # If csv file insertion is required, then open all the files # into args_array if "csv" in args_array['command_args'] or ("database" in args_array['command_args'] and args_array['database_insert_mode'] == "bulk"): args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(args_array['document_type'], args_array['file_name'], args_array['csv_directory']) # Extract the XML file from the ZIP file xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array) # If xml_file_contents is None or False, then return immediately if xml_file_contents == None or xml_file_contents == False: return False # create variables needed to parse the file xml_string = '' patent_xml_started = False # read through the file and append into groups of string. # Send the finished strings to be parsed # Use uspto_xml_format to determine file contents and parse accordingly if args_array['uspto_xml_format'] == "aXML4": # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # application bibliographic information if "<us-patent-application" in line: patent_xml_started = True xml_string += "<us-patent-application>" # This identifies end of well-formed XML segement for single patent # application bibliographic information elif "</us-patent-application" in line: patent_xml_started = False xml_string += "</us-patent-application>" # Call the function extract data processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array) # Call function to write data to csv or database USPTOStoreApplicationData.store_application_data(processed_data_array, args_array) # Reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: xml_string += USPTOSanitizer.replace_new_html_characters(line) elif args_array['uspto_xml_format'] == "aXML1": line_count = 1 # Loop through all lines in the xml file for line in xml_file_contents: # Decode the line from byte-object line = USPTOSanitizer.decode_line(line) # This identifies the start of well formed XML segment for patent # application bibliographic information if "<patent-application-publication" in line: patent_xml_started = True xml_string += "<patent-application-publication>" # This identifies end of well-formed XML segement for single patent # application bibliographic information elif "</patent-application-publication" in line: patent_xml_started = False xml_string += "</patent-application-publication>" # Call the function extract data processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array) # Call function to write data to csv or database USPTOStoreApplicationData.store_application_data(processed_data_array, args_array) # reset the xml string xml_string = '' # This is used to append lines of file when inside single patent grant elif patent_xml_started == True: xml_string += USPTOSanitizer.replace_old_html_characters(line) # Close the all the .csv files being written to USPTOCSVHandler.close_csv_files(args_array) # Set a flag file_processed to ensure that the bulk insert succeeds # This should be true, in case the database insertion method is not bulk file_processed = True # If data is to be inserted as bulk csv files, then call the sql function if "database" in args_array["command_args"] and args_array['database_insert_mode'] == 'bulk': # Check for previous attempt to process the file and clean database if required args_array['database_connection'].remove_previous_file_records(args_array['document_type'], args_array['file_name']) # Loop through each csv file and bulk copy into database for key, csv_file in list(args_array['csv_file_array'].items()): # Load CSV file into database file_processed = args_array['database_connection'].load_csv_bulk_data(args_array, key, csv_file) # If the file was successfully processed into the database if file_processed: # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed" USPTOLogger.write_process_log(args_array) if "csv" not in args_array['command_args']: # Close all the open csv files USPTOCSVHandler.delete_csv_files(args_array) print('[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) logger.info('Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) # Return the file procecssed status return file_processed else: print('[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) logger.error('Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c"))) # Return None to show database insertion failed return None