def get_wiki_list(start_idx, end_idx, user_db_port=None, user=None, password=None):
    """
    Fetches urls of all wikis and chooses the ones in the given indexes (both start and end indexes are included).

    :param start_idx: starting index of the wikis, which should be processed.
    :param end_idx: starting index of the wikis, which should be processed.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: list of wikis' urls within given indexes
    """
    try:
        conn = db_acc.connect_to_user_database(
            constants.DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute(
                "select url from Sources where url is not NULL"
            )  # all, except 'meta'
            ret = [wiki[0] for wiki in cur][start_idx : end_idx + 1]
        conn.close()
        return ret
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
Esempio n. 2
0
def get_only_db_pages(user_db_port=None, user=None, password=None):
    """
    Get list of pages that were not loaded from API but exist in database.

    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: dataframe
    """

    try:
        conn = db_acc.connect_to_user_database(
            DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            SQL_Query = pd.read_sql_query(
                "SELECT page_id, dbname FROM Scripts WHERE in_api = 0 AND in_database = 1",
                conn,
            )
            df = pd.DataFrame(SQL_Query).applymap(encode_if_necessary)
            df["wiki"] = df["dbname"].map(get_db_map(dbs=list(df["dbname"].values))[0])
        conn.close()
        return df
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
def save_missed_content(wiki, missed, user_db_port=None, user=None, password=None):
    """
    Mark missed pages as is_missed=True in Scripts table.

    :param wiki: The wiki project corresponding to the data provided.
    :param missed: List of pages missed.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """
    missed_df = pd.DataFrame(missed, columns=["id"])

    query = (
        "INSERT INTO Scripts(dbname, page_id, in_api, is_missed) "
        "values(%s, %s, %s, %s) "
        "ON DUPLICATE KEY UPDATE in_api = %s, is_missed = %s"
    )
    try:
        conn = db_acc.connect_to_user_database(
            constants.DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute("SELECT dbname FROM Sources WHERE url = %s", wiki)
            dbname = cur.fetchone()[0]
            for index, elem in missed_df.iterrows():
                cur.execute(query, [dbname, elem["id"], 1, 1, 1, 1])
        conn.commit()
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
Esempio n. 4
0
def get_data(
    with_data,
    user_db_port,
    user,
    password,
    maxlen=MAXLEN,
):
    """
    Gets all sourcecodes of Lua functions from user's database.

    :param with_data: whether to grab all modules or only those that are not data modules (when false).
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :param maxlen: The length to which the sourcecode should be truncated.
    :return: DataFrame with columns page_id, dbname, sourcecode
    """
    query = "SELECT page_id, dbname, LEFT(sourcecode, %s) FROM Scripts" % maxlen
    if not with_data:
        query += " WHERE is_data=0"

    cols = ["page_id", "dbname", "sourcecode"]
    conn = db_acc.connect_to_user_database(
        DATABASE_NAME, user_db_port, user, password)
    with conn.cursor() as cur:
        cur.execute(query)
        df = pd.DataFrame(cur.fetchall(), columns=cols).applymap(
            encode_if_necessary)
    close_conn(conn)

    return df
Esempio n. 5
0
def get_cluster(
    with_data, user_db_port, user, password, page_id=None, dbname=None, cluster_number=0, is_cluster=False
):
    """
    Get similar modules with (page_id, dbname) or with cluster_number.

    :param with_data: whether to get cluster from 'cluster' column. If false, will get data from 'cluster_wo_data' columns.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :param page_id: Has to be provided with dbname, used when is_cluster=False. The page whose similar modules is sought.
    :param dbname: Has to be provided with page_id, used when is_cluster=False. The database of page whose similar modules is sought.
    :param cluster_number: Used when is_cluster=True. The cluster number/id to fetch all pages of that cluster.
    :return: None
    """
    col = "cluster" if with_data else "cluster_wo_data"

    if not is_cluster:
        query = "SELECT %s FROM Scripts WHERE page_id=%s AND dbname=%s" % (
            col, page_id, dbname)
        conn = db_acc.connect_to_user_database(
            DATABASE_NAME, user_db_port, user, password)
        with conn.cursor() as cur:
            cur.execute(query)
            cluster_number = cur.fetchone()[0]
        close_conn(conn)

    query = "SELECT page_id, dbname FROM Scripts WHERE %s=%s" % (
        col, cluster_number)
    cols = ["page_id", "dbname"]
    conn = db_acc.connect_to_user_database(
        DATABASE_NAME, user_db_port, user, password)
    with conn.cursor() as cur:
        cur.execute(query)
        df = pd.DataFrame(cur.fetchall(), columns=cols).applymap(
            encode_if_necessary)
    close_conn(conn)

    return df
def get_data(feature_names, user_db_port, user, password):
    """
    Collects data from Scripts table, columns include column names in feature names
    and additionally 'edits per editor' and 'edits per day' are calculated.
    Adds 'is_data' column for usage specific to webservice.

    :param feature_names: List of features whose scores are to be calculated
    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: DataFrame
    """
    cols = copy.deepcopy(feature_names)

    if "edits" not in cols:
        cols += ["edits"]
    if "editors" not in cols:
        cols += ["editors"]
    if "major_edits" in cols:
        cols.remove("major_edits")
        cols += ["minor_edits"]

    cols += ["page_id", "dbname", "is_data", "first_edit", "last_edit"]

    query = "SELECT " + ",".join(cols) + " FROM Scripts"

    conn = db_acc.connect_to_user_database(DATABASE_NAME, user_db_port, user,
                                           password)
    with conn.cursor() as cur:
        cur.execute(query)
        df = pd.DataFrame(
            cur.fetchall(),
            columns=cols,
        ).applymap(encode_if_necessary)
    close_conn(conn)

    df["edits_per_editor"] = (df["edits"] / df["editors"]).replace(np.inf, 0)
    time_range = (
        df["last_edit"] -
        df["first_edit"]).apply(lambda x: x.days + x.seconds / (24 * 60 * 60))
    df["edits_per_day"] = (df["edits"] / time_range).replace(np.inf, 0)

    if "major_edits" in feature_names:
        df["major_edits"] = df["edits"] - df["minor_edits"]

    return df[
        ["page_id", "dbname", "is_data", "edits_per_editor", "edits_per_day"] +
        feature_names]
Esempio n. 7
0
def store_data(df, col, user_db_port, user, password):
    """
    Stores the labels of clusters in user database field named by 'col'.

    :param df: The dataframe containing labels of clusters along with page_id and dbname.
    :param col: The field name in user database to which the labels are to be stored.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """
    query1 = "UPDATE Scripts SET " + col + "=NULL"
    query2 = "UPDATE Scripts SET " + col + "=%s WHERE page_id=%s AND dbname=%s"
    max_tries = 3
    retry_counter = 1

    while True:
        try:
            # Need to keep query1 and query2 in the same transaction
            conn = db_acc.connect_to_user_database(
                DATABASE_NAME, user_db_port, user, password
            )
            with conn.cursor() as cur:
                cur.execute(query1)
                for _, elem in df.iterrows():
                    cur.execute(
                        query2, [elem["group"],
                                 elem["page_id"], elem["dbname"]]
                    )
            conn.commit()
            close_conn(conn)
            break
        except (pymysql.err.DatabaseError, pymysql.err.OperationalError) as err:
            close_conn(conn)
            if retry_counter == max_tries:
                raise Exception(err)
            print("Retrying saving clusters in 1 minute...")
            retry_counter += 1
            time.sleep(60)
        except Exception as err:
            print("Something went wrong. Error saving clusters \n", repr(err))
            break
def save_to_db(entries, db, user_db_port=None, user=None, password=None):
    """
    Saves datframe into Scripts table.
    Note that title from page-table does not have namespace prefix, title from API does.
    We retain the value from API.

    :param entries: A dataframe with columns: dbname, page_id, in_database, page_is_redirect, page_is_new
    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """

    query = (
        "INSERT INTO Scripts(dbname, page_id, in_database, page_is_redirect, page_is_new) "
        "VALUES(%s, %s, %s, %s, %s) "
        "ON DUPLICATE KEY UPDATE in_database = %s, page_is_redirect = %s, page_is_new = %s"
    )
    try:
        conn = db_acc.connect_to_user_database(constants.DATABASE_NAME,
                                               user_db_port, user, password)
        with conn.cursor() as cur:
            for index, elem in entries.iterrows():
                cur.execute(
                    query,
                    [
                        db,
                        elem["page_id"],
                        1,
                        elem["page_is_redirect"],
                        elem["page_is_new"],
                        1,
                        elem["page_is_redirect"],
                        elem["page_is_new"],
                    ],
                )
        conn.commit()
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
def get_mapping(user_db_port, user, password):
    """
    Fetch and return database name to URL mapping.

    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: dictionary of fetched info in form {dbname1: url1, dbname2:url2,...}
    """
    try:
        conn = db_acc.connect_to_user_database(
            DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute("SELECT dbname, url FROM Sources WHERE url IS NOT NULL")
            db_map = {data[0]: data[1] for data in cur}
        conn.close()
        return db_map
    except Exception as err:
        print("Something went wrong getting dbname-url mapping.\n", err)
        exit(1)
def get_dbs(user_db_port=None, user=None, password=None):
    """
    Returns a list of all the dbnames from Sources table.

    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: list
    """
    try:
        conn = db_acc.connect_to_user_database(DATABASE_NAME, user_db_port,
                                               user, password)
        with conn.cursor() as cur:
            cur.execute("SELECT dbname FROM Sources WHERE url IS NOT NULL"
                        )  # all, except 'meta'
            ret = [db[0] for db in cur]
        conn.close()
        return ret
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
def get_db_map(wikis=[], dbs=[], user_db_port=None, user=None, password=None):
    """
    Fetches info from the users database about the wikis with given dbnames or urls.
    Chooses search by urls by default, if none are given (wikis is empty), searches by dbnames from dbs.

    :param wikis: list of wikis' urls, whose info needed.
    :param dbs:  list of wikis' dbnames, whose info needed.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: dictionary of fetched info in form {dbname1: url1, dbname2:url2,...},
    a comma separated placeholder(%s) string for the the number of db or wikis.
    """
    query_input = []

    if len(wikis) > 0:
        placeholders = ",".join("%s" for _ in wikis)
        query_input = wikis
        query = "SELECT dbname, url FROM Sources WHERE url IN (%s)" % placeholders
    else:
        placeholders = ",".join("%s" for _ in dbs)
        query_input = dbs
        query = "SELECT dbname, url FROM Sources WHERE dbname IN (%s)" % placeholders

    db_map = {}

    try:
        conn = db_acc.connect_to_user_database(
            constants.DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute(query, query_input)
            db_map = {data[0]: data[1] for data in cur}
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)

    return db_map, placeholders
def remove_missed_contents(user_db_port, user, password):
    """
    Removes pages with missing content or data.

    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """

    try:
        conn = db_acc.connect_to_user_database(DATABASE_NAME, user_db_port,
                                               user, password)
        with conn.cursor() as cur:
            cur.execute("DELETE FROM Scripts WHERE is_missed=1")
            cur.execute("DELETE FROM Scripts WHERE in_api=0 OR in_database=0")
        conn.commit()
        conn.close()
    except Exception as err:
        print("Something went wrong. Could not delete rows. \n", err)

    print("Removed redundant rows...")
Esempio n. 13
0
def get_last_update_local_db(user_db_port=None, user=None, password=None):
    """
    Looks into database with last update times and fetches last update time for meta table, if it is stored.
    If such file doesn't exits, creates it.

    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: Datetime.datetime of last update or None
    """
    query = "SELECT update_time FROM Sources WHERE dbname = 'meta'"
    update_time = None

    try:
        conn = db_acc.connect_to_user_database(constants.DATABASE_NAME,
                                               user_db_port, user, password)
        with conn.cursor() as cur:
            cur.execute(query)
            update_time = cur.fetchone()[0]
            return update_time
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
Esempio n. 14
0
def update_local_db(update_time, user_db_port=None, user=None, password=None):
    """
    Saves new update time for meta table, creating corresponding row if needed.

    :param update_time: Datetime.datetime, time of last update for meta table
    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """
    query = ("INSERT INTO Sources(dbname, update_time) VALUES('meta', %s) "
             "ON DUPLICATE KEY UPDATE update_time = %s")
    try:
        conn = db_acc.connect_to_user_database(constants.DATABASE_NAME,
                                               user_db_port, user, password)
        with conn.cursor() as cur:
            time = update_time.strftime("%Y-%m-%d %H:%M:%S")
            cur.execute(query, [time, time])
        conn.commit()
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
Esempio n. 15
0
def save_links_to_db(entries, user_db_port=None, user=None, password=None):
    """
    Saves links and dbnames to the local project database.

    :param entries: List(tuple) of lists(tuples), with pairs 'dbname - url'
    :param user_db_port: Port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """
    query = ("INSERT INTO Sources(dbname, url) VALUES(%s, %s) "
             "ON DUPLICATE KEY UPDATE url = %s")
    try:
        conn = db_acc.connect_to_user_database(constants.DATABASE_NAME,
                                               user_db_port, user, password)
        with conn.cursor() as cur:
            for elem in entries:
                cur.execute(query, [elem[0], elem[1], elem[1]])
        conn.commit()
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)
def save_pageview(page_id, dbname, pageviews, add, user_db_port, user, password):
    """
    Save pageviews data into Scripts table.

    :param page_id: The Id of the page whose pageviews is to be stored.
    :param dbname: Which database the module corresponds to.
    :param pageviews: The value to be stored in table.
    :param add: Whether to add to the existing pageviews(when collecting monthly data) or not.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """

    if add:
        query = (
            "UPDATE Scripts SET pageviews=pageviews+%s WHERE dbname=%s AND page_id=%s "
        )
    else:
        query = "UPDATE Scripts SET pageviews=%s WHERE dbname=%s AND page_id=%s "

    try:
        conn = db_acc.connect_to_user_database(
            DATABASE_NAME, user_db_port, user, password
        )

        with conn.cursor() as cur:
            cur.execute(query, [pageviews, dbname, page_id])

        conn.commit()
        conn.close()
    except Exception as err:
        print(
            "Something went wrong saving to db (%d, %s, %d).\n"
            % (page_id, dbname, pageviews),
            err,
        )
def get_modules(user_db_port, user, password):
    """
    A generator to fetch and return all pages in the Scripts table.

    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: Tuples with dbname, page_id, and title
    """

    try:
        conn = db_acc.connect_to_user_database(
            DATABASE_NAME, user_db_port, user, password
        )

        with conn.cursor() as cur:
            cur.execute("SELECT dbname, page_id FROM Scripts")
            for data in cur:
                yield data

        conn.close()
    except Exception as err:
        print("Something went wrong fetching module list.\n", err)
        exit(1)
def get_missed_contents(wikis, user_db_port=None, user=None, password=None):
    """
    Retry fetching data for missed pages.

    :param wiki: The wiki project corresponding to the data provided.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """

    print("Started loading missed contents...")

    db_map, placeholders = get_db_map(
        wikis=wikis, user_db_port=user_db_port, user=user, password=password
    )
    query = (
        "SELECT page_id, dbname FROM Scripts WHERE dbname IN (%s) AND in_api=1 AND is_missed=1"
        % placeholders
    )

    try:
        conn = db_acc.connect_to_user_database(
            constants.DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute(query, list(db_map.keys()))
            df = pd.DataFrame(cur, columns=["page_id", "dbname"])
        conn.close()
    except Exception as err:
        print("Something went wrong.\n", err)
        exit(1)

    df["wiki"] = df["dbname"].map(db_map)
    get_pages(df, 1, 0)
    print("Done loading missed pages!")
def query_data_generator(query,
                         function_name,
                         cols,
                         db=None,
                         replicas_port=None,
                         user_db_port=None,
                         user=None,
                         password=None,
                         replicas=True,
                         row_count=500,
                         no_offset=False):
    """
    Query database (db) and return outputs in chunks.

    :param query: The SQL query to run.
    :param function_name: The function that was used to collect this data, useful for saving when data is missed due to errors.
    :param cols: The name of the columns to be used in dataframe for the data collected with SQL.
    :param db: The database from which the data was collected.
    :param replicas_port: port for connecting to meta table through ssh tunneling, if used.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :param replicas: False if collecting data from toolsdb user database, True if collecting from other wikimedia databases.
    :param row_count: Number of rows to get in one query from the database.
    :param no_offset: Disables offset for requests, which contents change while iterating.
    :return: dataframe
    """

    offset = 0
    max_tries = 3

    while True:
        retry_counter = 0
        try:
            while True:
                try:
                    conn = (db_acc.connect_to_replicas_database(
                        db, replicas_port, user, password)
                            if replicas else db_acc.connect_to_user_database(
                                DATABASE_NAME, user_db_port, user, password))
                    with conn.cursor() as cur:
                        cur.execute(query + " LIMIT %d OFFSET %d" %
                                    (row_count, offset))
                        df = pd.DataFrame(
                            cur.fetchall(),
                            columns=cols).applymap(encode_if_necessary)
                    close_conn(conn)
                    break
                except (
                        pymysql.err.DatabaseError,
                        pymysql.err.OperationalError,
                ) as err:
                    close_conn(conn)
                    if retry_counter == max_tries:
                        raise Exception(err)
                    print("Retrying query of '%s' from %s in 1 minute..." %
                          (function_name, db))
                    retry_counter += 1
                    time.sleep(60)
            if not no_offset:
                offset += row_count
            if len(df) == 0:
                return
            yield df
        except Exception as err:
            print("Something went wrong. Could not query from %s \n" % db,
                  repr(err))
            with open("missed_db_info.txt", "a") as file:
                file.write(function_name + " " + db + "\n")
            break
def save_data(
    df,
    dbname,
    function_name,
    user_db_port=None,
    user=None,
    password=None,
    query=None,
    cols=None,
    custom=False,
):
    """
    Save data from df into Scripts table.

    :param df: The data to save into Scripts table; for custom=False df column names should match db column names.
    :param dbname: The database from which the data was collected.
    :param function_name: The function that was used to collect this data, useful for saving when data is missed due to errors.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :param query: Only used when custom=True. The query to use to save into table.
    :param cols: Only used when custom=True. The column list in order of params in the query.
    :param custom: True if providing custom query and column list to use to save into table.
    :return: None
    """

    if not custom:
        cols = df.columns.values[1:]  # skip page_id
        updates = ",".join([col + "=%s" for col in cols])

        query = "UPDATE Scripts SET %s WHERE dbname='%s' AND page_id=%s " % (
            updates,
            dbname,
            "%s",
        )

    max_tries = 3

    try:
        retry_counter = 0
        while True:
            try:
                conn = db_acc.connect_to_user_database(DATABASE_NAME,
                                                       user_db_port, user,
                                                       password)
                with conn.cursor() as cur:
                    for index, elem in df.iterrows():
                        if not custom:
                            params = list(
                                np.concatenate(
                                    (elem.values[1:], elem.values[0:1])))
                        else:
                            params = [elem[col] for col in cols]
                        cur.execute(query, params)
                conn.commit()
                close_conn(conn)
                break
            except (pymysql.err.DatabaseError,
                    pymysql.err.OperationalError) as err:
                close_conn(conn)
                if retry_counter == max_tries:
                    raise Exception(err)
                print("Retrying saving of '%s' from %s in 1 minute..." %
                      (function_name, dbname))
                retry_counter += 1
                time.sleep(60)

    except Exception as err:
        print("Something went wrong. Error saving pages from %s \n" % dbname,
              repr(err))
        with open("missed_db_info.txt", "a") as file:
            file.write(function_name + " " + dbname + "\n")
def save_content(
    wiki, data_list, in_api, in_database, user_db_port=None, user=None, password=None
):
    """
    Saves data into Scripts table.

    :param wiki: The wiki project corresponding to the data provided.
    :param data_list: The data to be saved in Scripts table.
    :param in_database: Whether data was collected from databases.
    :param in_api: Whether data was collected from API.
    :param user_db_port: port for connecting to local Sources table through ssh tunneling, if used.
    :param user: Toolforge username of the tool.
    :param password: Toolforge password of the tool.
    :return: None
    """
    data_df = pd.DataFrame(
        data_list,
        columns=[
            "id",
            "title",
            "url",
            "length",
            "content",
            "content_model",
            "touched",
            "lastrevid",
        ],
    )

    query = (
        "INSERT INTO Scripts(dbname, page_id, title, sourcecode, touched, "
        "in_api, in_database, length, content_model, lastrevid, url) "
        "VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) "
        "ON DUPLICATE KEY UPDATE title = %s, sourcecode = %s, touched = %s, in_api = %s, in_database = %s, "
        "length = %s, content_model = %s, lastrevid = %s, url = %s, is_missed=%s"
    )
    try:
        conn = db_acc.connect_to_user_database(
            constants.DATABASE_NAME, user_db_port, user, password
        )
        with conn.cursor() as cur:
            cur.execute("SELECT dbname FROM Sources WHERE url = %s", wiki)
            dbname = cur.fetchone()[0]
            for index, elem in data_df.iterrows():
                time = elem["touched"].replace("T", " ").replace("Z", " ")
                cur.execute(
                    query,
                    [
                        dbname,
                        elem["id"],
                        elem["title"],
                        elem["content"],
                        time,
                        in_api,
                        in_database,
                        elem["length"],
                        elem["content_model"],
                        elem["lastrevid"],
                        elem["url"],
                        elem["title"],
                        elem["content"],
                        time,
                        in_api,
                        in_database,
                        elem["length"],
                        elem["content_model"],
                        elem["lastrevid"],
                        elem["url"],
                        0,
                    ],
                )
        conn.commit()
        conn.close()
    except Exception as err:
        print("Error saving pages from", wiki)
        print(err)