Python NCBImetaUtilities.HTTPErrorCatchの例

プログラミング言語: Python

名前空間/パッケージ名: ncbimeta

クラス/型: NCBImetaUtilities

メソッド/関数: HTTPErrorCatch

hotexamples.comのコード掲載数: 2

Python NCBImetaUtilities.HTTPErrorCatch - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのncbimeta.NCBImetaUtilities.HTTPErrorCatchの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

xml_search(5)

sql_sanitize(3)

table_exists(3)

HTTPErrorCatch(2)

check_accessory_dir(2)

コード例 #1

ファイルを表示

ファイル: test_utilities.py プロジェクト: hellothisisMatt/NCBImeta

def test_HTTPErrorCatch(tmpdir):
    '''Test the utility function HTTPErrorCatch (catch HTTP Errors)'''
    # Test query assembly
    test_ID = '5025191'
    test_email = '*****@*****.**'
    Entrez.email = test_email
    test_table = 'Assembly'
    test_max_tries = 10
    test_total_tries = 10
    test_sleep_between_tries = 0
    test_kwargs = {"db": test_table.lower(), "id": test_ID}
    test_entrez_method = Entrez.esummary

    for i in range(1, test_total_tries):
        ID_handle = NCBImetaUtilities.HTTPErrorCatch(test_entrez_method,
                                                     test_max_tries,
                                                     test_sleep_between_tries,
                                                     **test_kwargs)
    try:
        ID_record = Entrez.read(ID_handle, validate=False)
        assert 1
    except RuntimeError:
        # The ID_handle was not succesfully retrieved or the data is not correct
        assert 0

コード例 #2

ファイルを表示

def UpdateDB(table, output_dir, database, email, search_term, table_columns,
             log_path, db_dir, api_key, force_pause_seconds):
    '''
    Update the contents of a local sqlite database using records retrieved from NCBI as configured by the user.

    Parameters:
    table (str): Name of the NCBI database to search.
    output_dir (str): Path to the directory where output is written.
    database (str): Filename of the local sqlite database.
    email (str): User email.
    search_term (str): Entrez search query.
    table_columns(dict): Dictionary of column name and API name as value, ex. {AssemblyGenbankID : GbUid}.
    log_path(str): Path to the directory where the logfile is stored in.
    db_dir(str): Path to the directory where the database is stored in.
    api_key(str): NCBI user account API Key.
    force_pause_seconds(float): Number of seconds to wait in between fetch read_attempts.
    '''

    print("\nCreating/Updating the " + table +
          " table using the following parameters: " + "\n" + "\t" +
          "Database: " + "\t\t" + database + "\n" + "\t" + "Search Term:" +
          "\t" + "\t" + search_term + "\n" + "\t" + "Email: " + "\t\t\t" +
          email + "\n" + "\t" + "API Key: " + "\t\t" + api_key + "\n" + "\t" +
          "Output Directory: " + "\t" + output_dir + "\n\n",
          flush=True)

    Entrez.email = email
    Entrez.api_key = api_key
    # Allow a maximum of 3 tries for error catching before exiting program
    Entrez.max_tries = 3
    # Sleep for 1 second after an error has been generated before retrying
    Entrez.sleep_between_tries = 1

    #---------------------------------------------------------------------------#
    #                                File Setup                                 #
    #---------------------------------------------------------------------------#
    # Name of Log File
    log_file_path = os.path.join(
        LOG_PATH, "",
        os.path.splitext(database)[0] + "_" + table + ".log")

    # Check if the file already exists, either write or append to it.
    if os.path.exists(log_file_path):
        log_file = open(log_file_path, "a")
    else:
        log_file = open(log_file_path, "w")

    #--------------------------------------------------------------------------#
    #                                SQL Setup                                 #
    #--------------------------------------------------------------------------#

    # Connect to database and establish cursor for commands.
    conn = sqlite3.connect(os.path.join(db_dir, "", database))
    cur = conn.cursor()

    ## Create the database, with dynamic variables from config file
    sql_query = ("Create TABLE IF NOT EXISTS " + table +
                 " (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, " +
                 table + "_id TEXT")

    for column_name_dict in table_columns:
        column_name = list(column_name_dict.keys())[0]
        # By default, every user-specified column is type TEXT
        sql_query += ", " + column_name + " TEXT"
    sql_query += ")"

    cur.execute(sql_query)

    #-----------------------------------------------------------------------#
    #                          Entrez Search                                #
    #-----------------------------------------------------------------------#
    # Read the record, check for http, url, and runtime errors
    read_succeed = False
    read_attempts = 0

    # Database reading and entrez searching occur in a while loop to catch errors
    while not read_succeed and read_attempts < Entrez.max_tries:
        kwargs = {
            "db": table.lower(),
            "term": search_term,
            "retmax": "9999999"
        }
        entrez_method = Entrez.esearch
        # Possible urllib error and RuntimeErrors occurring in the next line
        handle = NCBImetaUtilities.HTTPErrorCatch(entrez_method,
                                                  Entrez.max_tries,
                                                  Entrez.sleep_between_tries,
                                                  **kwargs)
        try:
            record = Entrez.read(handle)
            read_succeed = True
        except RuntimeError:
            read_attempts += 1
            print("Runtime Error encountered. Sleeping for " +
                  str(Entrez.sleep_between_tries) +
                  " seconds before retrying.")
            time.sleep(Entrez.sleep_between_tries)

    if read_attempts == Entrez.max_tries and not read_succeed:
        raise ErrorMaxReadAttemptsExceeded(table)

    # Count total number of entries, create counter
    num_records = int(record['Count'])
    num_processed = 0

    #-----------------------------------------------------------------------#
    #                          Iterate Through ID List                      #
    #-----------------------------------------------------------------------#

    for ID in record['IdList']:
        #-------------------Progress Log and Entry Counter-------------------#
        # Increment entry counter and record progress to screen
        num_processed += 1
        print("ID: " + ID, flush=True)
        print("Processing record: " +
               str(num_processed) + \
               "/" + str(num_records), flush = True)

        #------------Check if Record Already Exists in Database------------#
        sql_query = ("SELECT EXISTS(SELECT " + table + "_id FROM " + table +
                     " WHERE " + table + "_id=?)")
        cur.execute(sql_query, (ID, ))

        # 0 if not found, 1 if found
        record_exists = cur.fetchone()[0]

        # If the record_exists, skip the whole next part (ie. "continue" to next record)
        if record_exists:
            continue
        '''
        IMPORTANT:
        The ID should not exists in the table UNLESS the record was fully parsed.
        ie. The database does not get updated until the end of each record.
        '''
        # This is the sleep command before implementing the HTTPerror catching in next section
        # This is controlled by the user configuration file
        time.sleep(force_pause_seconds)

        #---------------If the table isn't in Database, Add it------------#
        # The Assembly table cannot be retrieved using efetch, only docsum esummary
        if table.lower() == "assembly":
            # Use the http function to return a record summary, but wrapped in HTTP error checking
            kwargs = {"db": table.lower(), "id": ID, "retmode": "xml"}
            entrez_method = Entrez.esummary
        else:
            # We're working with any other table instead, use efetch and get xml
            kwargs = {"db": table.lower(), "id": ID, "retmode": "xml"}
            if table.lower() == "nucleotide":
                kwargs["rettype"] = "gb"
            entrez_method = Entrez.efetch

        # ID_handle is an _io.TextIOWrapped object, which originally had utf-8 encoding
        ID_handle = NCBImetaUtilities.HTTPErrorCatch(
            entrez_method, Entrez.max_tries, Entrez.sleep_between_tries,
            **kwargs)

        # Ideal world: Pass an undecoded string to the xml parser
        # Could be accomplished by opening in binary ('rb')
        # tempfiles by default are opened as mode='w+b'
        with tempfile.NamedTemporaryFile(delete=False) as temp_b:
            # Write the data from ID_handle to a temporary file (binary)
            for line in ID_handle:
                temp_b.write(str.encode(line))
            temp_b.close()
            # Read the data as binary, into the XML parser. Avoids encoding issues
            with open(temp_b.name, 'rb') as xml_source:
                ID_root = etree.parse(xml_source, parser=LXML_CDATA_PARSER)

        #----------------------------------------------------------------------#
        #                         NCBI Record Parsing                          #
        #----------------------------------------------------------------------#

        #print(etree.tostring(ID_root).decode())

        column_dict = {}
        # Add ID to the dictionary
        column_dict[table + "_id"] = [ID]
        # A special dictionary for gbseq annotations
        gbseq_dict = {}
        # Iterate through each column to search for metadata
        for column in table_columns:
            column_name = list(column.keys())[0]
            column_value = []
            column_payload = list(column.values())[0]
            column_payload = column_payload.split(", ")
            # Initialize with empty values
            column_dict[column_name] = column_value

            #-------------------------------------------------------#
            #   XML Parse for node or attribute
            #-------------------------------------------------------#
            working_root = ID_root
            # If there are special character, this query should not be used for xpath!!
            bool_special_char = False
            for char in XPATH_SPECIAL_CHAR:
                for xquery in column_payload:
                    if char in xquery:
                        bool_special_char = True
            # If no special characters, run xpath search Functions
            if not bool_special_char:
                NCBImetaUtilities.xml_search(working_root, column_payload,
                                             column_payload[0], column_name,
                                             column_dict)

            # Special parsing for GBSeq_comment
            # If we're on the GBSeq_comment element and the comment was added to the dictionary
            if "GBSeq_comment" in column_payload and len(
                    column_dict[column_name]) > 0:
                comment = column_dict[column_name][0]
                # Fix the CDS vs CDSs ambiguity
                comment = comment.replace("CDSs", "CDS")
                # comment is initialize subdivided by semi-colons
                split_comment = comment.split(";")
                for item in split_comment:
                    # Further subdivided by double colons
                    split_item = item.split("::")
                    # The elements we're interested have the :: otherwise skip
                    if len(split_item) < 2: continue
                    # Left side is the column name, right side is the metadata
                    split_key = split_item[0].lstrip(" ").rstrip(" ")
                    split_value = split_item[1].lstrip(" ").rstrip(" ")
                    gbseq_dict[split_key] = split_value

            # If the value was still empty, check for gbseq comment
            if column_payload[0] in gbseq_dict:
                column_dict[column_name].append(gbseq_dict[column_payload[0]])

        # Add quotations around each value for sql insertion
        for key in column_dict:
            # Remove empty string elements
            while "" in column_dict[key]:
                column_dict[key].remove("")
            # Remove quotations from each list element
            for i in range(0, len(column_dict[key])):
                column_dict[key][i] = column_dict[key][i].replace("\"", "")
            # The following is to help with single quotes inside
            column_dict[key] = "\"" + DB_VALUE_SEP.join(
                column_dict[key]) + "\""

        # Write the column values to the db with dynamic variables
        sql_dynamic_table = "INSERT INTO " + table + " ("
        sql_dynamic_vars = ",".join([column
                                     for column in column_dict.keys()]) + ") "
        sql_dynamic_qmarks = "VALUES (" + ",".join(
            ["?" for column in column_dict.keys()]) + ") "
        sql_dynamic_values = " VALUES (" + ",".join(
            [column_dict[column] for column in column_dict.keys()]) + ")"
        sql_query = sql_dynamic_table + sql_dynamic_vars + sql_dynamic_values
        sql_query_q = sql_dynamic_table + sql_dynamic_vars + sql_dynamic_qmarks
        cur.execute(sql_query)

        # Write to logfile
        now = datetime.datetime.now()
        log_file.write("[" + str(now) + "]" + "\t" +
                       "New entry added with ID:" + "\t" + ID + "." + "\n")
        conn.commit()

    # CLEANUP
    conn.commit()
    cur.close()
    log_file.close()