Esempio n. 1
0
def database_update_cycle(database_filename):
    """Perform a single cycle of the database update loop."""

    with start_timer() as timer:

        highest_oeis_id = find_highest_oeis_id(
        )  # Check OEIS server for highest entry ID.

        with close_when_done(sqlite3.connect(database_filename)) as dbconn:
            ensure_database_schema_created(dbconn)
            make_database_complete(
                dbconn, highest_oeis_id
            )  # Make sure we have all entries (full fetch on first run).
            update_database_entries_randomly(
                dbconn,
                highest_oeis_id // 1000)  # Refresh 0.1 % of entries randomly.
            update_database_entries_by_priority(
                dbconn, highest_oeis_id //
                200)  # Refresh 0.5 % of entries by priority.
            update_database_entries_for_nonzero_time_window(
                dbconn
            )  # Make sure we have t1 != t2 for all entries (full fetch on first run).

        consolidate_database_monthly(database_filename,
                                     remove_stale_files_flag=False)

        logger.info("Full database update cycle took {}.".format(
            timer.duration_string()))
Esempio n. 2
0
def update_database_entries_by_priority(dbconn, howmany):
    """Re-fetch entries that are old, relative to their stability.

    For each entry, a priority is determined, as follows:

    The _age_ is defined as the number of seconds ago that the entry was last fetched in its current state.
    The _stability_ is defined as the number of seconds between the first and last fetches in the current state.
    The _priority_ is the _age_ divided by the _stability_.

    A high priority indicates that the entry is old and/or unstable.
    Such entries are fetched in preference to entries that are recent and/or stable (and have a lower priority).
    """

    t_current = time.time()

    with close_when_done(dbconn.cursor()) as dbcursor:
        query = "SELECT oeis_id FROM oeis_entries ORDER BY (? - t2) / max(t2 - t1, 1e-6) DESC LIMIT ?;"
        dbcursor.execute(query, (t_current, howmany))
        highest_priority_entries = dbcursor.fetchall()

    highest_priority_entries = [oeis_id for (oeis_id, ) in highest_priority_entries]

    logger.info("Highest-priority entries in local database selected for refresh: {}.".format(len(highest_priority_entries)))

    fetch_entries_into_database(dbconn, highest_priority_entries)
Esempio n. 3
0
def update_database_entries_by_priority(dbconn, howmany):
    """Re-fetch entries that are old, relative to their stability.

    For each entry, a priority is determined, as follows:

    The _age_ is defined as the number of seconds ago that the entry was last fetched in its current state.
    The _stability_ is defined as the number of seconds between the first and last fetches in the current state.
    The _priority_ is the _age_ divided by the _stability_.

    A high priority indicates that the entry is old and/or unstable.
    Such entries are fetched in preference to entries that are recent and/or stable (and have a lower priority).
    """

    t_current = time.time()

    with close_when_done(dbconn.cursor()) as dbcursor:
        query = "SELECT oeis_id FROM oeis_entries ORDER BY (? - t2) / max(t2 - t1, 1e-6) DESC LIMIT ?;"
        dbcursor.execute(query, (t_current, howmany))
        highest_priority_entries = dbcursor.fetchall()

    highest_priority_entries = [
        oeis_id for (oeis_id, ) in highest_priority_entries
    ]

    logger.info(
        "Highest-priority entries in local database selected for refresh: {}.".
        format(len(highest_priority_entries)))

    fetch_entries_into_database(dbconn, highest_priority_entries)
Esempio n. 4
0
def process_responses(dbconn, responses):
    """Process a batch of responses by updating the local SQLite database.

    A logging message is produced that summarizes how the batch of responses was processed.
    This function returns a set of OEIS IDs that have been succesfully processed.
    """

    countFailures         = 0
    countNewEntries       = 0
    countIdenticalEntries = 0
    countUpdatedEntries   = 0

    processed_entries = set()

    with close_when_done(dbconn.cursor()) as dbcursor:

        for response in responses:

            if response is None:
                # Skip entries that are not okay.
                # Do not record the failures in the processed_entries set.
                countFailures += 1
                continue

            query = "SELECT main_content, bfile_content FROM oeis_entries WHERE oeis_id = ?;"
            dbcursor.execute(query, (response.oeis_id, ))

            previous_content = dbcursor.fetchall()

            assert len(previous_content) <= 1
            previous_content = None if len(previous_content) == 0 else previous_content[0]

            if previous_content is None:
                # The oeis_id does not occur in the database yet.
                # We will insert it as a new entry.
                query = "INSERT INTO oeis_entries(oeis_id, t1, t2, main_content, bfile_content) VALUES (?, ?, ?, ?, ?);"
                dbcursor.execute(query, (response.oeis_id, response.timestamp, response.timestamp, response.main_content, response.bfile_content))
                countNewEntries += 1
            elif previous_content != (response.main_content, response.bfile_content):
                # The database content is stale.
                # Update t1, t2, and content.
                query = "UPDATE oeis_entries SET t1 = ?, t2 = ?, main_content = ?, bfile_content = ? WHERE oeis_id = ?;"
                dbcursor.execute(query, (response.timestamp, response.timestamp, response.main_content, response.bfile_content, response.oeis_id))
                countUpdatedEntries += 1
            else:
                # The database content is identical to the freshly fetched content.
                # We will just update the t2 field, indicating the fresh fetch.
                query = "UPDATE oeis_entries SET t2 = ? WHERE oeis_id = ?;"
                dbcursor.execute(query, (response.timestamp, response.oeis_id))
                countIdenticalEntries += 1

            processed_entries.add(response.oeis_id)

    dbconn.commit()

    logger.info("Processed {} responses (failures: {}, new: {}, identical: {}, updated: {}).".format(len(responses), countFailures, countNewEntries, countIdenticalEntries, countUpdatedEntries))

    return processed_entries
Esempio n. 5
0
def process_database_entries(database_filename_in, dirname_out):

    if not os.path.exists(database_filename_in):
        logger.critical("Database file '{}' not found! Unable to continue.".format(database_filename_in))
        return

    (root, ext) = os.path.splitext(database_filename_in)

    if os.path.exists(dirname_out):
        logger.info("Removing stale directory '{}' ...".format(dirname_out))
        shutil.rmtree(dirname_out)

    # ========== fetch and process database entries, ordered by oeis_id.

    BATCH_SIZE = 1000

    with start_timer() as timer:
        with close_when_done(sqlite3.connect(database_filename_in)) as dbconn_in, close_when_done(dbconn_in.cursor()) as dbcursor_in:

            dbcursor_in.execute("SELECT oeis_id, t1, t2, main_content, bfile_content FROM oeis_entries ORDER BY oeis_id;")

            while True:

                oeis_entries = dbcursor_in.fetchmany(BATCH_SIZE)
                if len(oeis_entries) == 0:
                    break

                logger.log(logging.PROGRESS, "Processing OEIS entries A{:06} to A{:06} ...".format(oeis_entries[0][0], oeis_entries[-1][0]))

                for (oeis_id, t1, t2, main_content, bfile_content) in oeis_entries:

                    directory = os.path.join(dirname_out, "A{:03d}xxx".format(oeis_id // 1000), "A{:06d}".format(oeis_id))
                    os.makedirs(directory)

                    with open(os.path.join(directory, "metadata.json"), "w") as f:
                        metadata = [oeis_id, t1, t2]
                        json.dump(metadata, f)

                    with open(os.path.join(directory, "main_content.txt"), "w") as f:
                        f.write(main_content)

                    with open(os.path.join(directory, "bfile_content.txt"), "w") as f:
                        f.write(bfile_content)

        logger.info("Processed all database entries in {}.".format(timer.duration_string()))
Esempio n. 6
0
def process_database_entries(database_filename_in):

    if not os.path.exists(database_filename_in):
        logger.critical("Database file '{}' not found! Unable to continue.".format(database_filename_in))
        return

    (root, ext) = os.path.splitext(database_filename_in)

    database_filename_out = root + "_parsed" + ext

    if os.path.exists(database_filename_out):
        logger.info("Removing stale file '{}' ...".format(database_filename_out))
        os.remove(database_filename_out)

    # ========== fetch and process database entries, ordered by oeis_id.

    BATCH_SIZE = 1000

    with start_timer() as timer:
        with close_when_done(sqlite3.connect(database_filename_in)) as dbconn_in, close_when_done(dbconn_in.cursor()) as dbcursor_in:
            with close_when_done(sqlite3.connect(database_filename_out)) as dbconn_out, close_when_done(dbconn_out.cursor()) as dbcursor_out:

                create_database_schema(dbconn_out)

                with concurrent.futures.ProcessPoolExecutor() as pool:

                    dbcursor_in.execute("SELECT oeis_id, main_content, bfile_content FROM oeis_entries ORDER BY oeis_id;")

                    while True:

                        oeis_entries = dbcursor_in.fetchmany(BATCH_SIZE)
                        if len(oeis_entries) == 0:
                            break

                        logger.log(logging.PROGRESS, "Processing OEIS entries A{:06} to A{:06} ...".format(oeis_entries[0][0], oeis_entries[-1][0]))

                        query = "INSERT INTO oeis_entries(oeis_id, identification, value_list, name, comments, detailed_references, links, formulas, examples, maple_programs, mathematica_programs, other_programs, cross_references, keywords, offset_a, offset_b, author, extensions_and_errors) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"

                        dbcursor_out.executemany(query, pool.map(process_oeis_entry, oeis_entries))

                        dbconn_out.commit()

        logger.info("Processed all database entries in {}.".format(timer.duration_string()))
Esempio n. 7
0
def solve_linear_recurrences(database_filename_in, terms, exclude_entries = None):

    if not os.path.exists(database_filename_in):
        logger.critical("Database file '{}' not found! Unable to continue.".format(database_filename_in))
        return

    if exclude_entries is None:
        exclude_entries = frozenset()

    # ========== fetch and process database entries, ordered by oeis_id.

    BATCH_SIZE = 1000

    with start_timer() as timer:

        with close_when_done(sqlite3.connect(database_filename_in)) as dbconn_in, close_when_done(dbconn_in.cursor()) as dbcursor_in:

            with concurrent.futures.ProcessPoolExecutor() as pool:

                dbcursor_in.execute("SELECT oeis_id, main_content, bfile_content FROM oeis_entries ORDER BY oeis_id;")

                while True:

                    oeis_entries = dbcursor_in.fetchmany(BATCH_SIZE)
                    if len(oeis_entries) == 0:
                        break

                    logger.log(logging.PROGRESS, "Processing OEIS entries A{:06} to A{:06} ...".format(oeis_entries[0][0], oeis_entries[-1][0]))

                    work = [(oeis_id, main_content, bfile_content, terms) for (oeis_id, main_content, bfile_content) in oeis_entries if "A{:06d}".format(oeis_id) not in exclude_entries]

                    for (oeis_entry, solution) in pool.map(process_oeis_entry, work):
                        if solution is not None:
                            yield (str(oeis_entry), solution)

        logger.info("Processed all database entries in {}.".format(timer.duration_string()))
Esempio n. 8
0
def update_database_entries_randomly(dbconn, howmany):
    """Re-fetch (update) a random subset of entries that are already present in the local SQLite database."""

    with close_when_done(dbconn.cursor()) as dbcursor:
        dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
        present_entries = dbcursor.fetchall()

    present_entries = [oeis_id for (oeis_id, ) in present_entries]

    random_entries_count = min(howmany, len(present_entries))

    random_entries = random.sample(present_entries, random_entries_count)

    logger.info("Random entries in local database selected for refresh: {}.".format(len(random_entries)))

    fetch_entries_into_database(dbconn, random_entries)
Esempio n. 9
0
def make_database_complete(dbconn, highest_oeis_id):
    """Fetch all entries from the remote OEIS database that are not yet present in the local SQLite database."""

    with close_when_done(dbconn.cursor()) as dbcursor:
        dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
        present_entries = dbcursor.fetchall()

    present_entries = [oeis_id for (oeis_id, ) in present_entries]
    logger.info("Entries present in local database: {}.".format(len(present_entries)))

    all_entries = range(1, highest_oeis_id + 1)

    missing_entries = set(all_entries) - set(present_entries)
    logger.info("Missing entries to be fetched: {}.".format(len(missing_entries)))

    fetch_entries_into_database(dbconn, missing_entries)
Esempio n. 10
0
def consolidate_database_monthly(database_filename, remove_stale_files_flag):
    """Make a consolidated version of the database, once per day.

    The consolidated version will have a standardized filename 'oeis_vYYYYMMDD.sqlite3.xz'.

    If this filename already exists, we return immediately.

    If not, we vacuum the database, and compress its file. This process takes ~ 2 hours on a fast desktop PC.

    When the compressed database is written, we remove all 'stale' consolidated files,
    i.e., all files that are called 'oeis_vYYYYMMDD.sqlite3.xz' except the one we just wrote.
    """

    now = datetime.datetime.now()

    if now.day != 1:
        return

    xz_filename = now.strftime("oeis_v%Y%m%d.sqlite3.xz")
    if os.path.exists(xz_filename):
        return  # file already exists.

    with start_timer() as timer:

        logger.info("Consolidating database to '{}' ...".format(xz_filename))

        # Vacuum the database
        with close_when_done(sqlite3.connect(database_filename)) as dbconn:
            vacuum_database(dbconn)

        # Create the xz file.
        compress_file(database_filename, xz_filename)

        # Remove stale files.
        if remove_stale_files_flag:
            stale_files = [
                filename for filename in glob.glob("oeis_v????????.sqlite3.xz")
                if filename != xz_filename
            ]
            for filename in stale_files:
                logger.info("Removing stale consolidated file '{}' ...".format(
                    filename))
                os.remove(filename)

        logger.info("Consolidating data took {}.".format(
            timer.duration_string()))
Esempio n. 11
0
def database_update_cycle(database_filename):
    """Perform a single cycle of the database update loop."""

    with start_timer() as timer:

        highest_oeis_id = find_highest_oeis_id() # Check OEIS server for highest entry ID.

        with close_when_done(sqlite3.connect(database_filename)) as dbconn:
            ensure_database_schema_created(dbconn)
            make_database_complete(dbconn, highest_oeis_id)                      # Make sure we have all entries (full fetch on first run).
            update_database_entries_randomly(dbconn, highest_oeis_id // 1000)    # Refresh 0.1 % of entries randomly.
            update_database_entries_by_priority(dbconn, highest_oeis_id //  200) # Refresh 0.5 % of entries by priority.
            update_database_entries_for_nonzero_time_window(dbconn)              # Make sure we have t1 != t2 for all entries (full fetch on first run).

        consolidate_database_monthly(database_filename, remove_stale_files_flag = False)

        logger.info("Full database update cycle took {}.".format(timer.duration_string()))
Esempio n. 12
0
def update_database_entries_for_nonzero_time_window(dbconn):
    """ Re-fetch entries in the database that have a 0-second time window. These are entries that have been fetched only once."""

    while True:

        with close_when_done(dbconn.cursor()) as dbcursor:
            dbcursor.execute("SELECT oeis_id FROM oeis_entries WHERE t1 = t2;")
            zero_timewindow_entries = dbcursor.fetchall()

        if len(zero_timewindow_entries) == 0:
            break # no zero-timewindow entries.

        zero_timewindow_entries = [oeis_id for (oeis_id, ) in zero_timewindow_entries]

        logger.info("Entries with zero time window in local database selected for refresh: {}.".format(len(zero_timewindow_entries)))

        fetch_entries_into_database(dbconn, zero_timewindow_entries)
Esempio n. 13
0
def update_database_entries_randomly(dbconn, howmany):
    """Re-fetch (update) a random subset of entries that are already present in the local SQLite database."""

    with close_when_done(dbconn.cursor()) as dbcursor:
        dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
        present_entries = dbcursor.fetchall()

    present_entries = [oeis_id for (oeis_id, ) in present_entries]

    random_entries_count = min(howmany, len(present_entries))

    random_entries = random.sample(present_entries, random_entries_count)

    logger.info(
        "Random entries in local database selected for refresh: {}.".format(
            len(random_entries)))

    fetch_entries_into_database(dbconn, random_entries)
Esempio n. 14
0
def make_database_complete(dbconn, highest_oeis_id):
    """Fetch all entries from the remote OEIS database that are not yet present in the local SQLite database."""

    with close_when_done(dbconn.cursor()) as dbcursor:
        dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
        present_entries = dbcursor.fetchall()

    present_entries = [oeis_id for (oeis_id, ) in present_entries]
    logger.info("Entries present in local database: {}.".format(
        len(present_entries)))

    all_entries = range(1, highest_oeis_id + 1)

    missing_entries = set(all_entries) - set(present_entries)
    logger.info("Missing entries to be fetched: {}.".format(
        len(missing_entries)))

    fetch_entries_into_database(dbconn, missing_entries)
Esempio n. 15
0
def consolidate_database_monthly(database_filename, remove_stale_files_flag):
    """Make a consolidated version of the database, once per day.

    The consolidated version will have a standardized filename 'oeis_vYYYYMMDD.sqlite3.xz'.

    If this filename already exists, we return immediately.

    If not, we vacuum the database, and compress its file. This process takes ~ 2 hours on a fast desktop PC.

    When the compressed database is written, we remove all 'stale' consolidated files,
    i.e., all files that are called 'oeis_vYYYYMMDD.sqlite3.xz' except the one we just wrote.
    """

    now = datetime.datetime.now()

    if now.day != 1:
        return

    xz_filename = now.strftime("oeis_v%Y%m%d.sqlite3.xz")
    if os.path.exists(xz_filename):
        return # file already exists.

    with start_timer() as timer:

        logger.info("Consolidating database to '{}' ...".format(xz_filename))

        # Vacuum the database
        with close_when_done(sqlite3.connect(database_filename)) as dbconn:
            vacuum_database(dbconn)

        # Create the xz file.
        compress_file(database_filename, xz_filename)

        # Remove stale files.
        if remove_stale_files_flag:
            stale_files = [filename for filename in glob.glob("oeis_v????????.sqlite3.xz") if filename != xz_filename]
            for filename in stale_files:
                logger.info("Removing stale consolidated file '{}' ...".format(filename))
                os.remove(filename)

        logger.info("Consolidating data took {}.".format(timer.duration_string()))
Esempio n. 16
0
def update_database_entries_for_nonzero_time_window(dbconn):
    """ Re-fetch entries in the database that have a 0-second time window. These are entries that have been fetched only once."""

    while True:

        with close_when_done(dbconn.cursor()) as dbcursor:
            dbcursor.execute("SELECT oeis_id FROM oeis_entries WHERE t1 = t2;")
            zero_timewindow_entries = dbcursor.fetchall()

        if len(zero_timewindow_entries) == 0:
            break  # no zero-timewindow entries.

        zero_timewindow_entries = [
            oeis_id for (oeis_id, ) in zero_timewindow_entries
        ]

        logger.info(
            "Entries with zero time window in local database selected for refresh: {}."
            .format(len(zero_timewindow_entries)))

        fetch_entries_into_database(dbconn, zero_timewindow_entries)
Esempio n. 17
0
def process_responses(dbconn, responses):
    """Process a batch of responses by updating the local SQLite database.

    A logging message is produced that summarizes how the batch of responses was processed.
    This function returns a set of OEIS IDs that have been succesfully processed.
    """

    countFailures = 0
    countNewEntries = 0
    countIdenticalEntries = 0
    countUpdatedEntries = 0

    processed_entries = set()

    with close_when_done(dbconn.cursor()) as dbcursor:

        for response in responses:

            if response is None:
                # Skip entries that are not okay.
                # Do not record the failures in the processed_entries set.
                countFailures += 1
                continue

            query = "SELECT main_content, bfile_content FROM oeis_entries WHERE oeis_id = ?;"
            dbcursor.execute(query, (response.oeis_id, ))

            previous_content = dbcursor.fetchall()

            assert len(previous_content) <= 1
            previous_content = None if len(
                previous_content) == 0 else previous_content[0]

            if previous_content is None:
                # The oeis_id does not occur in the database yet.
                # We will insert it as a new entry.
                query = "INSERT INTO oeis_entries(oeis_id, t1, t2, main_content, bfile_content) VALUES (?, ?, ?, ?, ?);"
                dbcursor.execute(
                    query,
                    (response.oeis_id, response.timestamp, response.timestamp,
                     response.main_content, response.bfile_content))
                countNewEntries += 1
            elif previous_content != (response.main_content,
                                      response.bfile_content):
                # The database content is stale.
                # Update t1, t2, and content.
                query = "UPDATE oeis_entries SET t1 = ?, t2 = ?, main_content = ?, bfile_content = ? WHERE oeis_id = ?;"
                dbcursor.execute(query,
                                 (response.timestamp, response.timestamp,
                                  response.main_content,
                                  response.bfile_content, response.oeis_id))
                countUpdatedEntries += 1
            else:
                # The database content is identical to the freshly fetched content.
                # We will just update the t2 field, indicating the fresh fetch.
                query = "UPDATE oeis_entries SET t2 = ? WHERE oeis_id = ?;"
                dbcursor.execute(query, (response.timestamp, response.oeis_id))
                countIdenticalEntries += 1

            processed_entries.add(response.oeis_id)

    dbconn.commit()

    logger.info(
        "Processed {} responses (failures: {}, new: {}, identical: {}, updated: {})."
        .format(len(responses), countFailures, countNewEntries,
                countIdenticalEntries, countUpdatedEntries))

    return processed_entries
Esempio n. 18
0
def show_entries(database_filename):

    if not os.path.exists(database_filename):
        logger.critical("Database file '{}' not found! Unable to continue.".format(database_filename))
        return

    t_current = time.time()

    with close_when_done(sqlite3.connect(database_filename)) as dbconn, close_when_done(dbconn.cursor()) as dbcursor:
        query = "SELECT oeis_id, t1, t2 FROM oeis_entries;"
        dbcursor.execute(query)
        data = dbcursor.fetchall()

    data_dtype = np.dtype([
            ("oeis_id", np.int),
            ("t1"     , np.float64),
            ("t2"     , np.float64)
        ]
    )

    data = np.array(data, dtype = data_dtype)

    t1      = data["t1"]
    t2      = data["t2"]
    oeis_id = data["oeis_id"]

    age       = t_current - t2
    stability = t2 - t1

    priority = age / np.maximum(stability, 1e-6)

    plt.subplot(331)
    plt.xlabel("oeis id")
    plt.ylabel("t1 [h]")
    plt.plot(oeis_id, (t1 - t_current) / 3600.0, '.', markersize = 0.5)

    plt.subplot(332)
    plt.xlabel("oeis id")
    plt.ylabel("t2 [h]")
    plt.plot(oeis_id, (t2 - t_current) / 3600.0, '.', markersize = 0.5)

    plt.subplot(334)
    plt.hist(age / 3600.0, bins = 200, log = True)
    plt.xlabel("age [h]")

    plt.subplot(335)
    plt.xlabel("stability [h]")
    plt.ylabel("age [h]")
    plt.plot(stability / 3600.0, age / 3600.0, '.', markersize = 0.5)

    plt.subplot(336)
    #bins = np.logspace(-10.0, +20.0, 200)
    #plt.hist(np.log10(priority), bins = bins, log = True)

    RANGE_MAX = 0.15

    plt.hist(priority, range = (0, RANGE_MAX), bins = 200, log = True)

    plt.xlabel("priority ({} values > {:.3f})".format(np.sum(priority > RANGE_MAX), RANGE_MAX))

    plt.subplot(338)
    plt.hist(stability / 3600.0, bins = 200, log = True)
    plt.xlabel("stability [h]")

    plt.show()