def create_uncertain_pairs_file():
    """
    This function is called if the user wants to create a training file on the
    client side of the application. It assumes all the neccessary files were
    previously uploaded by the user. If neccessary, it creates the 2nd input 
    dataset and then executes the first 20 jupyter notebook cells. It executes
    the first 20 cells, because those cells are needed to create the uncertain
    pairs file. This file will contain pairs of examples (from the two input datasets)
    that Dedupe is unsure about.

    In the first 20 cells things like module imports, reading the configuration file, 
    reading the input datasets and creating the uncertain pairs file (if it is
    specified in the configuration file that the user wants to create the training file)
    are done.
    """

    backbone = Backbone()

    if backbone.is_tmp_file_used():
        backbone.extract_data_from_db_and_create_second_input_dataset()

    backbone.execute_jupyter_notebook_cells(idx_first_cell=0, idx_last_cell=20)

    return "Uncertain pairs file created successfully"
def run_algorithm():
    """
    This function represents the main algorithm of the service. It assumes all the
    neccessary files were uploaded by the user.
    The execution flow is the next one:
    1) Create Backbone object, which will create the configuration file for Dedupe
    2) If the user has not provided the 2nd input dataset, then a temporary file,
       that will contain rows extracted by 'jurisdiction' from the database, 
       will be created using the Backbone object
    3) Execute all the cells in the jupyter notebook
    4) If a temporary file was created update the given cluster_ids (read more about
       this in a comment below)
    5) Insert the new cluster_ids into the 'backbone_index' table
    6) Create table(s) in the database and insert the dataset(s) resulted from the
       Dedupe algorithm.
    7) Remove all the files that were used in the process, except for the configuration
       file provided by the user. We do not remove this file, because if the user
       would like to see some results, that are stored in the database, it will need
       to provide again the configuration file (since the system needs the database
       configuration data). So, we leave it there for convenience
    """

    # Backbone object that will do all the work
    backbone = Backbone()

    # run the backbone script file
    if backbone.is_tmp_file_used():
        backbone.extract_data_from_db_and_create_second_input_dataset()

    # execute all the cells in the Jupyter notebook
    backbone.execute_jupyter_notebook_cells(idx_first_cell=0)

    # if the 2nd dataset contained rows from the database, those examples from the 2nd dataset
    # already had assigned a cluster_id (backbone index), but when Dedupe created new clusters,
    # which were made of 1 example from the 1st dataset and one from the 2nd dataset,
    # it gave that cluster a unique cluster_id (one that did not exist in the backbone_index table).
    # So, we have to update the cluster_ids of those clusters with the cluster_ids that the examples 
    # from the 2nd dataset originally had.
    if backbone.is_tmp_file_used():
        utilities.update_cluster_ids_of_output_file_1(
            backbone.output_file_1,
            backbone.output_file_2,
            backbone.input_file_2
        )

    # insert the new cluster_ids created by Dedupe into the backbone_index table
    if backbone.is_tmp_file_used():
        utilities.insert_new_cluster_ids_into_backbone_index_table(
            backbone.data_from_config_file['database_config'],
            backbone.output_file_1,
            output_file_2=None,
            last_cluster_id=backbone.last_cluster_id_in_db)
    else:
        utilities.insert_new_cluster_ids_into_backbone_index_table(
            backbone.data_from_config_file['database_config'],
            backbone.output_file_1,
            backbone.output_file_2,
            backbone.last_cluster_id_in_db)

    # create a new table having FK on cluster_id (referencing the PK 'idx' of the backbone_index table) 
    # and insert the resulted dataset from Dedupe in the table
    # the resulted dataset is formed from the input dataset + 2 new columns: 'cluster_id' and 'link_score'
    utilities.create_table_and_insert_dataset_resulted_from_dedupe(
        backbone.data_from_config_file['database_config'],
        backbone.data_from_config_file['provider_1_name'],
        backbone.output_file_1)

    # if we were provided with a 2nd input dataset, insert it in the DB also
    if not backbone.is_tmp_file_used():
        utilities.create_table_and_insert_dataset_resulted_from_dedupe(
            backbone.data_from_config_file['database_config'],
            backbone.data_from_config_file['provider_2_name'],
            backbone.output_file_2)

    os.remove(backbone.input_file_1)
    os.remove(backbone.input_file_2)
    os.remove(backbone.training_file_name)
    os.remove(backbone.output_file_1)
    os.remove(backbone.output_file_2)
    os.remove(backbone.configuration_file_name_for_dedupe)

    if backbone.settings_file_name:
        os.remove(backbone.settings_file_name)
    else:
        os.remove("settings_file")

    return "Algorithm ran successfully"