Python USPTOLoggerの例

プログラミング言語: Python

クラス/型: USPTOLogger

hotexamples.comのコード掲載数: 7

Python USPTOLogger - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのUSPTOLoggerの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

write_process_log(5)

setup_logger(2)

build_or_update_link_files(1)

collect_all_required_links_from_file(1)

collect_all_unstarted_links_from_file(1)

write_verified_log(1)

コード例 #1

ファイルを表示

ファイル: USPTOVerifyLinks.py プロジェクト: ayxemma/uspto

def verify_link_file(args_array):

    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # Download the file and append temp location to args array
    args_array['temp_zip_file_name'] = USPTOProcessLinks.download_zip_file(args_array)
    # Route to the correct extraction function
    counts_dict = verification_extract_data_router(args_array)
    # Store the exptected tag counts in database
    if counts_dict:
        file_processed_success = args_array['database_connection'].storeVerificationExtraction(counts_dict, args_array)
        # Log the file as verified
        if file_processed_success == True: USPTOLogger.write_verified_log(args_array)
        else:
            # Print to stdout and log
            print("The contents of: " + args_array['file_name'] + " could not be stored into the database! Time Finished: " + time.strftime("%c"))
            logger.error("The contents of: " + args_array['file_name'] + " could not be stored into the database! Time Finished: " + time.strftime("%c"))
    else:
        # Print to stdout and log
        print("The contents of: " + args_array['file_name'] + " could not be verified. Time Finished: " + time.strftime("%c"))
        logger.error("The contents of: " + args_array['file_name'] + " could not be verified. Time Finished: " + time.strftime("%c"))

    # Print to stdout and log
    print("-- Finished the verificaction process for contents of: " + args_array['file_name'] + " Time Finished: " + time.strftime("%c"))
    logger.info("Finished the verification process for contents of: " + args_array['file_name'] + " Time Finished: " + time.strftime("%c"))

コード例 #2

ファイルを表示

ファイル: USPTOProcessXMLGrant.py プロジェクト: bioinfonerd-forks/uspto

def process_XML_grant_content(args_array):

    # Import logger
    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    if "database" in args_array["command_args"]:
        # Pass the database connection to variable
        database_connection = args_array['database_connection']

    # If csv file insertion is required, then open all the files
    # into args_array
    if "csv" in args_array['command_args'] or ("database" in args_array['command_args'] and args_array['database_insert_mode'] == "bulk"):
        args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(args_array['document_type'], args_array['file_name'], args_array['csv_directory'])

    # Set the start time of operation
    start_time = time.time()

    # Extract the XML file from the ZIP file
    xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array)

    # If xml_file_contents is None or False, then return immediately
    if xml_file_contents == None or xml_file_contents == False:
        return False

    # create variables needed to parse the file
    xml_string = ''
    patent_xml_started = False
    # read through the file and append into groups of string.
    # Send the finished strings to be parsed
    # Use uspto_xml_format to determine file contents and parse accordingly
    #print "The xml format is: " + args_array['uspto_xml_format']
    if args_array['uspto_xml_format'] == "gXML4":

        # Loop through all lines in the xml file
        for line in xml_file_contents:

            # Decode the line from byte-object
            line = USPTOSanitizer.decode_line(line)

            # This identifies the start of well formed XML segment for patent
            # grant bibliographic information
            if "<us-patent-grant" in line:
                patent_xml_started = True
                xml_string += "<us-patent-grant>"

            # This identifies end of well-formed XML segement for single patent
            # grant bibliographic information
            elif "</us-patent-grant" in line:

                patent_xml_started = False
                xml_string += line
                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreGrantData.store_grant_data(processed_data_array, args_array)

                # reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                # Check which type of encoding should be used to fix the line string
                xml_string += USPTOSanitizer.replace_new_html_characters(line)

    # Used for gXML2 files
    elif args_array['uspto_xml_format'] == "gXML2":

        # Loop through all lines in the xml file
        for line in xml_file_contents:

            # Decode the line from byte-object
            line = USPTOSanitizer.decode_line(line)

            # This identifies the start of well formed XML segment for patent
            # grant bibliographic information
            if "<PATDOC" in line:
                patent_xml_started = True
                xml_string += "<PATDOC>"

                # Print line with number
                #print str(line_number) + " : " + line
                #line_number += 1

            # This identifies end of well-formed XML segement for single patent
            # grant bibliographic information
            elif "</PATDOC" in line:
                patent_xml_started = False
                xml_string += line

                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreGrantData.store_grant_data(processed_data_array, args_array)

                # reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                # Check which type of encoding should be used to fix the line string
                xml_string += USPTOSanitizer.replace_old_html_characters(line)

    # Close all the open .csv files being written to
    USPTOCSVHandler.close_csv_files(args_array)

    # Set a flag file_processed to ensure that the bulk insert succeeds
    # This should be true, in case the database insertion method is not bulk
    file_processed = True

    # If data is to be inserted as bulk csv files, then call the sql function
    if "database" in args_array["command_args"] and args_array['database_insert_mode'] == 'bulk':
        # Check for previous attempt to process the file and clean database if required
        database_connection.remove_previous_file_records(args_array['document_type'], args_array['file_name'])
        # Load CSV file into database
        file_processed = database_connection.load_csv_bulk_data(args_array)

    if file_processed:
        # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed"
        USPTOLogger.write_process_log(args_array)
        if "csv" not in args_array['command_args']:
            # Delete all the open csv files
            USPTOCSVHandler.delete_csv_files(args_array)

        # Print message to stdout and log
        print('[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.info('Loaded {0} data for {1} into database. Time:{2} Finished Time: {3}'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return file_processed as success status
        return file_processed
    else:
        # Print message to stdout and log
        print('[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.error('Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return None as failed status during database insertion
        return None

コード例 #3

ファイルを表示

ファイル: USPTOProcessPAIRData.py プロジェクト: arvindshmicrosoft/uspto

def process_PAIR_content(args_array):

    # Set the start time of operation
    start_time = time.time()

    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # Extract the .CSV file from the ZIP file
    csv_file_name = USPTOProcessZipFile.extract_csv_file_from_zip(args_array)

    # If csv_file_contents is None or False, then return immediately
    if csv_file_name == None or csv_file_name == False:
        return False

    # Set a flag based on filename to call the extraction function
    args_array['extraction_type'] = set_extraction_type(csv_file_name)
    csv_output_filename = set_csv_output_filename(csv_file_name)

    # If csv file insertion is required, then open all the files
    # into args_array
    if "csv" in args_array['command_args'] or (
            "database" in args_array['command_args']
            and args_array['database_insert_mode'] == "bulk"):
        args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(
            args_array['document_type'], csv_output_filename,
            args_array['csv_directory'], args_array['extraction_type'])

    # Open file in read mode
    with open(csv_file_name, 'r') as read_obj:
        # Pass the file object to reader() to get the reader object
        csv_reader = reader(read_obj)
        # Iterate over each row in the csv using reader object
        line_cnt = 0
        for line in csv_reader:
            if line_cnt != 0:
                # Extract the line into array
                processed_data_array = extract_csv_line(args_array, line)
                # Store the array into newly formatted CSV
                USPTOStorePAIRData.store_PAIR_data(processed_data_array,
                                                   args_array)
            line_cnt += 1

    # If not sandbox mode, then delete the .zip file
    if args_array['sandbox'] == False and os.path.exists(
            args_array['temp_zip_file_name']):
        # Print message to stdout
        print('[Purging .zip file ' + args_array['temp_zip_file_name'] +
              '...]')
        logger.info('Purging .zip file ' + args_array['temp_zip_file_name'] +
                    '...')
        os.remove(args_array['temp_zip_file_name'])

    # Close all the open .csv files being written to
    USPTOCSVHandler.close_csv_files(args_array)

    # Set a flag file_processed to ensure that the bulk insert succeeds
    # This should be true, in case the database insertion method is not bulk
    file_processed = True

    # If data is to be inserted as bulk csv files, then call the sql function
    if "database" in args_array["command_args"] and args_array[
            'database_insert_mode'] == 'bulk':
        # Check for previous attempt to process the file and clean database if required
        args_array['database_connection'].remove_previous_file_records(
            args_array['document_type'], args_array['file_name'])
        # Loop through each csv file and bulk copy into database
        for key, csv_file in list(args_array['csv_file_array'].items()):
            # Only load csv file to database if its for this instance
            if key == args_array['extraction_type']:
                # Load CSV file into database
                file_processed = args_array[
                    'database_connection'].load_csv_bulk_data(
                        args_array, key, csv_file)

    if file_processed:
        # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed"
        USPTOLogger.write_process_log(args_array)
        if "csv" not in args_array['command_args']:
            # Delete all the open csv files
            USPTOCSVHandler.delete_csv_files(args_array)

        print(
            '[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))
        logger.info(
            'Loaded {0} data for {1} into database. Time:{2} Finished Time: {3}'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))
        # Return file_processed as success status
        return file_processed
    else:
        print(
            '[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))
        logger.error(
            'Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))
        # Return None as failed status during database insertion
        return None

コード例 #4

ファイルを表示

def process_XML_application_content(args_array):

    # Process zip file by getting .dat or .txt file and .xml filenames
    start_time = time.time()

    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # If csv file insertion is required, then open all the files
    # into args_array
    if "csv" in args_array['command_args'] or ("database" in args_array['command_args'] and args_array['database_insert_mode'] == "bulk"):
        args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(args_array['document_type'], args_array['file_name'], args_array['csv_directory'])

    # Extract the XML file from the ZIP file
    xml_file_contents = USPTOProcessZipFile.extract_xml_file_from_zip(args_array)

    # If xml_file_contents is None or False, then return immediately
    if xml_file_contents == None or xml_file_contents == False:
        return False

    # create variables needed to parse the file
    xml_string = ''
    patent_xml_started = False
    # read through the file and append into groups of string.
    # Send the finished strings to be parsed
    # Use uspto_xml_format to determine file contents and parse accordingly
    if args_array['uspto_xml_format'] == "aXML4":

        # Loop through all lines in the xml file
        for line in xml_file_contents:
            # Decode the line from byte-object
            line = USPTOSanitizer.decode_line(line)

            # This identifies the start of well formed XML segment for patent
            # application bibliographic information
            if "<us-patent-application" in line:
                patent_xml_started = True
                xml_string += "<us-patent-application>"

            # This identifies end of well-formed XML segement for single patent
            # application bibliographic information
            elif "</us-patent-application" in line:
                patent_xml_started = False
                xml_string += "</us-patent-application>"

                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreApplicationData.store_application_data(processed_data_array, args_array)
                # Reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                xml_string += USPTOSanitizer.replace_new_html_characters(line)

    elif args_array['uspto_xml_format'] == "aXML1":

        line_count = 1

        # Loop through all lines in the xml file
        for line in xml_file_contents:

            # Decode the line from byte-object
            line = USPTOSanitizer.decode_line(line)

            # This identifies the start of well formed XML segment for patent
            # application bibliographic information
            if "<patent-application-publication" in line:
                patent_xml_started = True
                xml_string += "<patent-application-publication>"

            # This identifies end of well-formed XML segement for single patent
            # application bibliographic information
            elif "</patent-application-publication" in line:
                patent_xml_started = False
                xml_string += "</patent-application-publication>"

                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreApplicationData.store_application_data(processed_data_array, args_array)
                # reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                xml_string += USPTOSanitizer.replace_old_html_characters(line)

    # Close the all the .csv files being written to
    USPTOCSVHandler.close_csv_files(args_array)

    # Set a flag file_processed to ensure that the bulk insert succeeds
    # This should be true, in case the database insertion method is not bulk
    file_processed = True

    # If data is to be inserted as bulk csv files, then call the sql function
    if "database" in args_array["command_args"] and args_array['database_insert_mode'] == 'bulk':
        # Check for previous attempt to process the file and clean database if required
        args_array['database_connection'].remove_previous_file_records(args_array['document_type'], args_array['file_name'])
        # Loop through each csv file and bulk copy into database
        for key, csv_file in list(args_array['csv_file_array'].items()):
            # Load CSV file into database
            file_processed = args_array['database_connection'].load_csv_bulk_data(args_array, key, csv_file)

    # If the file was successfully processed into the database
    if file_processed:
        # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed"
        USPTOLogger.write_process_log(args_array)
        if "csv" not in args_array['command_args']:
            # Close all the open csv files
            USPTOCSVHandler.delete_csv_files(args_array)

        print('[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.info('Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return the file procecssed status
        return file_processed
    else:
        print('[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.error('Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return None to show database insertion failed
        return None

コード例 #5

ファイルを表示

def process_class_content(args_array):

    # Set the start time of operation
    start_time = time.time()

    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # Set the extraction type
    args_array['extraction_type'] = set_extraction_type(args_array['uspto_xml_format'])

    # If csv file insertion is required, then open all the files
    # into args_array
    if "csv" in args_array['command_args'] or ("database" in args_array['command_args'] and args_array['database_insert_mode'] == "bulk"):
        args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(args_array['document_type'], args_array['file_name'], args_array['csv_directory'], args_array['extraction_type'])

    # Check the classification filetype code and process accordingly
    if args_array['uspto_xml_format'] == "USCLS":

        # Open file in read mode
        with open(args_array['url_link'], 'r') as read_obj:
            # Iterate over each row in the csv using reader object
            for line in read_obj:
                #print(line)
                # Extract the line into array
                processed_data_array = return_US_class_dict(line)
                processed_data_array['FileName'] = args_array['file_name']
                # Store the array into newly formatted CSV
                class_id = str(processed_data_array['Class']) + " " + str(processed_data_array['SubClass'])
                USPTOStoreClassificationData.store_classification_data(processed_data_array, args_array, class_id)

    # Titles for CPC classifications
    elif args_array['uspto_xml_format'] == "CPCCLS":

        extraction_type = "cpc"
        # Open file in read mode
        with open(args_array['url_link'], 'r') as read_obj:
            # Pass the file object to reader() to get the reader object
            csv_reader = reader(read_obj)
            # Iterate over each row in the csv using reader object
            line_cnt = 0
            for line in csv_reader:
                if line_cnt != 0:
                    # Extract the line into array
                    processed_data_array = extract_CPC_class_dict(line)
                    # Store the array into newly formatted CSV
                    processed_data_array['FileName'] = args_array['file_name']
                    class_id = str(processed_data_array['Section']) + str(processed_data_array['Class']) + str(processed_data_array['SubClass']) + " " + str(processed_data_array['MainGroup']) + "/" + str(processed_data_array['SubGroup'])
                    USPTOStoreClassificationData.store_classification_data(processed_data_array, args_array, class_id)
                line_cnt += 1

    # USPC to CPC classification concordance table
    elif args_array['uspto_xml_format'] == "USCPCCLS":

        # Open file in read mode
        with open(args_array['url_link'], 'r') as read_obj:
            # Pass the file object to reader() to get the reader object
            csv_reader = reader(read_obj)
            # Iterate over each row in the csv using reader object
            line_cnt = 0
            for line in csv_reader:
                if line_cnt != 0:
                    # Extract the line into array
                    processed_data_array = extract_USCPC_class_dict(line, args_array['file_name'])
                    if len(processed_data_array) != 0:
                        # Store the array into newly formatted CSV
                        class_id = str(processed_data_array[0]['USClass'])
                        USPTOStoreClassificationData.store_classification_data(processed_data_array, args_array, class_id)
                line_cnt += 1

    # Set a flag file_processed to ensure that the bulk insert succeeds
    # This should be true, in case the database insertion method is not bulk
    file_processed = True

    # If data is to be inserted as bulk csv files, then call the sql function
    if "database" in args_array["command_args"] and args_array['database_insert_mode'] == 'bulk':
        # Check for previous attempt to process the file and clean database if required
        args_array['database_connection'].remove_previous_file_records(args_array['document_type'], args_array['file_name'])
        # Loop through each csv file and bulk copy into database
        for key, csv_file in list(args_array['csv_file_array'].items()):
            # Load CSV file into database
            file_processed = args_array['database_connection'].load_csv_bulk_data(args_array, key, csv_file)

    if file_processed:
        # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed"
        USPTOLogger.write_process_log(args_array)
        if "csv" not in args_array['command_args']:
            # Delete all the open csv files
            USPTOCSVHandler.delete_csv_files(args_array)

        print('[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.info('Loaded {0} data for {1} into database. Time:{2} Finished Time: {3}'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return file_processed as success status
        return file_processed
    else:
        print('[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        logger.error('Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(args_array['document_type'], args_array['url_link'], time.time() - start_time, time.strftime("%c")))
        # Return None as failed status during database insertion
        return None

コード例 #6

ファイルを表示

        "app_config_file": app_config_file,
        "allowed_args_array": allowed_args_array,
        "log_lock_file": log_lock_file,
        "classification_process_log_file": classification_process_log_file,
        "classification_text_filename": classification_text_filename,
        "grant_process_log_file": grant_process_log_file,
        "application_process_log_file": application_process_log_file,
        "application_pair_process_log_file": application_pair_process_log_file,
        "pair_process_log_file": pair_process_log_file,
        "temp_directory": app_temp_dirpath,
        "csv_directory": app_csv_dirpath,
        "sandbox_downloads_dirpath": sandbox_downloads_dirpath
    }

    # Setup logger
    USPTOLogger.setup_logger(args_array['log_level'], app_log_file)
    # Include logger in the main function
    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # Perform analysis of command line args and store in args_array
    args_array["command_args"] = build_command_arguments(sys.argv, args_array)

    # If command_args are checked OK! Start app
    if args_array["command_args"]:

        # Print the ASCII header
        print_ascii_header()

        # Set saved app configuration based on current command arguments
        # and collect existing config settings from file and append to args_array
        args_array = set_config_using_command_args(args_array)

コード例 #7

ファイルを表示

def process_XML_application_content(args_array):

    # Import logger
    logger = USPTOLogger.logging.getLogger("USPTO_Database_Construction")

    # If csv file insertion is required, then open all the files
    # into args_array
    if "csv" in args_array['command_args'] or (
            "database" in args_array['command_args']
            and args_array['database_insert_mode'] == "bulk"):
        args_array['csv_file_array'] = USPTOCSVHandler.open_csv_files(
            args_array['document_type'], args_array['file_name'],
            args_array['csv_directory'])

    # Process zip file by getting .dat or .txt file and .xml filenames
    start_time = time.time()

    # Extract the XML file from the ZIP file
    xml_file_contents = USPTOProcessZipFile.extract_zip_to_array(args_array)

    # create variables needed to parse the file
    xml_string = ''
    patent_xml_started = False
    # read through the file and append into groups of string.
    # Send the finished strings to be parsed
    # Use uspto_xml_format to determine file contents and parse accordingly
    if args_array['uspto_xml_format'] == "aXML4":

        # Loop through all lines in the xml file
        for line in xml_file.readlines():

            # This identifies the start of well formed XML segment for patent
            # application bibliographic information
            if "<us-patent-application" in line:

                patent_xml_started = True
                xml_string += line

            # This identifies end of well-formed XML segement for single patent
            # application bibliographic information
            elif "</us-patent-application" in line:

                patent_xml_started = False
                xml_string += line

                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(
                    xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreApplicationData.store_application_data(
                    processed_data_array, args_array)

                # reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                xml_string += USPTOSanitizer.replace_new_html_characters(line)

    elif args_array['uspto_xml_format'] == "aXML1":

        line_count = 1

        # Loop through all lines in the xml file
        for line in xml_file.readlines():

            # This identifies the start of well formed XML segment for patent
            # application bibliographic information
            if "<patent-application-publication" in line:

                patent_xml_started = True
                xml_string += line

            # This identifies end of well-formed XML segement for single patent
            # application bibliographic information
            elif "</patent-application-publication" in line:

                patent_xml_started = False
                xml_string += line

                # Call the function extract data
                processed_data_array = USPTOProcessLinks.extract_data_router(
                    xml_string, args_array)
                # Call function to write data to csv or database
                USPTOStoreApplicationData.store_application_data(
                    processed_data_array, args_array)

                # reset the xml string
                xml_string = ''

            # This is used to append lines of file when inside single patent grant
            elif patent_xml_started == True:
                xml_string += USPTOSanitizer.replace_old_html_characters(line)

    # Close the .xml file being read from
    xml_file.close()
    # Close the all the .csv files being written to
    USPTOCSVHandler.close_csv_files(args_array)

    # Set a flag file_processed to ensure that the bulk insert succeeds
    file_processed = True

    # If data is to be inserted as bulk csv files, then call the sql function
    if args_array['database_insert_mode'] == 'bulk':
        file_processed = args_array['database_connection'].load_csv_bulk_data(
            args_array, logger)

    # If the file was successfully processed into the database
    if file_processed:
        # Send the information to USPTOLogger.write_process_log to have log file rewritten to "Processed"
        USPTOLogger.write_process_log(args_array)
        if "csv" not in args_array['command_args']:
            # Close all the open csv files
            USPTOCSVHandler.delete_csv_files(args_array)

        # Print message to stdout and log
        print '[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(
            args_array['document_type'], args_array['url_link'],
            time.time() - start_time, time.strftime("%c"))
        logger.info(
            '[Loaded {0} data for {1} into database. Time:{2} Finished Time: {3} ]'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))

    else:
        # Print message to stdout and log
        print '[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'.format(
            args_array['document_type'], args_array['url_link'],
            time.time() - start_time, time.strftime("%c"))
        logger.info(
            '[Failed to bulk load {0} data for {1} into database. Time:{2} Finished Time: {3} ]'
            .format(args_array['document_type'], args_array['url_link'],
                    time.time() - start_time, time.strftime("%c")))