def accept_collection(db, col, col_temp, **kwargs):
    auth = authenticate.prediction_login(**kwargs)
    execution_date = parse_datetime(str(kwargs["execution_date"]))
    str_date = execution_date.strftime("%Y_%m_%d")
    archieved_col = "{}_{}".format(col, str_date)
    rename_collection(auth, db, col, archieved_col)
    rename_collection(auth, db, col_temp, col)
def pull_offer_matrix(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    delete_table(auth, config.offer_matrix_connection, config.offer_matrix_db,
                 config.offer_matrix_col_temp)
    pull_table(auth, config.offer_matrix_connection, config.offer_matrix_db,
               config.offer_matrix_col_temp,
               config.offer_matrix_connection_remote,
               config.offer_matrix_db_remote, config.offer_matrix_col_remote)
def add_date_enrich(**kwargs):
    p_auth = authenticate.prediction_login(**kwargs)
    database = "medscheme"
    collection = "claims_2019_raw"
    attribute = "TREATMENT_DATE=yyyy-mm-dd hh:mm:ss.SSS"
    find = "{}"
	output = data_munging_engine.enrich_date2(p_auth, database, collection, attribute, find)
	print(output)
def get_db_list(**kwargs):
    # url = kwargs["dag_run"].conf.get("url")
    # username = kwargs["dag_run"].conf.get("username")
    # password = kwargs["dag_run"].conf.get("password")
    # p_auth = jwt_access.Authenticate(url, username, password)
    p_auth = authenticate.prediction_login(**kwargs)
    output = data_management_engine.get_document_db_list(p_auth)
    print(output)
def get_prediction_project(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    project_id = kwargs["dag_run"].conf.get("project_id")
    prediction_project = prediction_engine.get_prediction_project(
        auth, project_id)
    print(prediction_project)

    prediction_project["deployment_step"][0][
        "deployment_id"] = "mood_recommender_dynamic_changed_automated"
    prediction_engine.save_prediction_project(auth, prediction_project)
def pull_lookup_feature_store(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    delete_table(auth, config.lookup_feature_store_connection,
                 config.lookup_feature_store_db,
                 config.lookup_feature_store_col_temp)
    pull_table(auth, config.lookup_feature_store_connection,
               config.lookup_feature_store_db,
               config.lookup_feature_store_col_temp,
               config.lookup_feature_store_connection_remote,
               config.lookup_feature_store_db_remote,
               config.lookup_feature_store_col_remote)
def read_data(**kwargs):
    p_auth = authenticate.prediction_login(**kwargs)
    database = "nlp_examples"
    collection = "nlp_example_text"
    field = "{}"
    limit = 0
    projections = "{}"
    skip = 0
    output = data_management_engine.get_data(p_auth, database, collection,
                                             field, limit, projections, skip)
    print(output)
def process_ecogenetic_network(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["en"]["__var"]
	collection = data["collection"]
	collectionOut = data["collectionOut"]
	database = data["database"]
	find = data["find"]
	graphMeta = data["graphMeta"]
	graphParam = data["graphParam"]
	output = algorithm_client_pulse.process_ecogenetic_network(p_auth, collection, collectionOut, database, find, graphMeta, graphParam)
	print(output)
def personality_enrich(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["enrich"]["__var"]
	category = data["category"]
	collection = data["collection"]
	collectionOut = data["collectionOut"]
	database = data["database"]
	find = data["find"]
	groupby = data["groupby"]
	output = data_munging_engine.personality_enrich(p_auth, category, collection, collectionOut, database, find, groupby)
	print(output)
def process_client_pulse_reliability(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["cpr"]["__var"]
	collection = data["collection"]
	collectionOut = data["collectionOut"]
	database = data["database"]
	find = data["find"]
	groupby = data["groupby"]
	mongoAttribute = data["mongoAttribute"]
	rtype = data["type"]
	output = data_munging_engine.process_client_pulse_reliability(p_auth, collection, collectionOut, database, find, groupby, mongoAttribute, rtype)
	print(output)
def generate_time_series_features(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["tsf"]["__var"]
	categoryfield = data["categoryfield"]
	collection = data["collection"]
	database = data["database"]
	datefield = data["datefield"]
	featureset = data["featureset"]
	find = data["find"]
	groupby = data["groupby"]
	numfield = data["numfield"]
	output = data_munging_engine.generate_time_series_features(p_auth, categoryfield, collection, database, datefield, featureset, find, groupby, numfield)
	print(output)
def generate_forecast(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["fore"]["__var"]
	attribute = data["attribute"]
	collection = data["collection"]
	collectionOut = data["collectionOut"]
	database = data["database"]
	dateattribute = data["dateattribute"]
	find = data["find"]
	historicsteps = data["historicsteps"]
	steps = data["steps"]
	output = algorithm_client_pulse.generate_forecast(p_auth, attribute, collection, collectionOut, database, dateattribute, find, historicsteps, steps)
	print(output)
def pull_training_feature_store(**kwargs):
    execution_date = parse_datetime(str(kwargs["execution_date"]))
    print(execution_date)
    auth = authenticate.prediction_login(**kwargs)
    delete_table(auth, config.training_feature_store_connection,
                 config.training_feature_store_db,
                 config.training_feature_store_col_temp)
    pull_table(auth, config.training_feature_store_connection,
               config.training_feature_store_db,
               config.training_feature_store_col_temp,
               config.training_feature_store_connection_remote,
               config.training_feature_store_db_remote,
               config.training_feature_store_col_remote)
def prediction_enrich(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["pred"]["__var"]
	attributes = data["attributes"]
	collection = data["collection"]
	limit = data["limit"]
	mongodb = data["mongodb"]
	predictor = data["predictor"]
	predictor_label = data["predictor_label"]
	search = data["search"]
	skip = data["skip"]
	sort = data["sort"]
	output = data_munging_engine.prediction_enrich_fast(p_auth, mongodb, collection, search, sort, predictor, predictor_label, attributes, skip, limit)
	print(output)
def get_multi_offer_csv(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    filename = "{}.csv".format(config.training_feature_store_col)
    filetype = "csv"
    database = config.training_feature_store_db
    collection = config.training_feature_store_col
    field = "{}"
    sort = ""
    projection = "{}"
    limit = 0
    data_management_engine.export_documents(auth, filename, filetype, database,
                                            collection, field, sort,
                                            projection, limit)
    time.sleep(5)
def process_apriori(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	data = kwargs["dag_run"].conf.get("enrichment")["__var"]["apr"]["__var"]
	colItem = data["colItem"]
	collection = data["collection"]
	collectionOut = data["collectionOut"]
	custField = data["custField"]
	database = data["database"]
	dbItem = data["dbItem"]
	find = data["find"]
	itemField = data["itemField"]
	supportCount = data["supportCount"]
	output = algorithm_client_pulse.process_apriori(p_auth, colItem, collection, collectionOut, custField, database, dbItem, find, itemField, supportCount)
	print(output)
def get_distinct_offers(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    connection = "{}/master?user=admin".format(
        config.training_feature_store_connection_remote)
    sql = "select distinct {} from {}.{}.{}".format(
        config.training_feature_store_key_id,
        config.training_feature_store_db_remote,
        config.training_feature_store_col_remote)
    print(sql)
    result = data_management_engine.get_presto_sql(auth, connection, sql)

    offer_ids = []
    for entry in result["data"]:
        offer_ids.append(entry["offer_id"])
    print(offer_ids)
    return offer_ids
def list_collections(**kwargs):
    p_auth = authenticate.prediction_login(**kwargs)
    database = "nlp_examples"
    output = data_management_engine.get_document_db_collections(
        p_auth, database)
    print(output)
def preprocess_generate_time_series_features(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["tsf"]
	print(value)
def preprocess_personality_enrich(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["enrich"]
	print(value)
def push_offer_matrix(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    pull_table(auth, config.offer_matrix_connection, config.offer_matrix_db,
               config.offer_matrix_col, config.offer_matrix_connection,
               config.offer_matrix_db, config.offer_matrix_col)
def train_multi_models(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    data_type_changes = config.training_feature_store_data_type_changes

    offer_ids = get_distinct_offers(**kwargs)
    offer_to_models = {}
    for offer in offer_ids:
        offer_to_models[offer] = None

#	 Ingest csv as userframe
    file_name = "{}.csv".format(config.training_feature_store_col)
    featurestore_name = "fs_{}".format(config.training_feature_store_col)
    hexframename, imp = multimodel_functions.save_file_as_userframe(
        auth, file_name, featurestore_name, username, data_type_changes)

    #	 Split frame into training and validation frames
    split_ratio = config.training_split_ratio
    inv_split_ratio = 1 - split_ratio
    str_sr = "_".join(str(split_ratio).split("."))
    inv_str_sr = "_".join(str(inv_split_ratio).split("."))

    worker_h2o.split_frame(auth, hexframename, split_ratio)
    hexframename_train = "{}{}.hex".format(hexframename[:-4], inv_str_sr)
    hexframename_score = "{}{}.hex".format(hexframename[:-4], str_sr)

    for offer in offer_ids:
        #	 Train Model
        version = config.model_version
        model_id = featurestore_name + version
        offer_to_models[str(offer)] = model_id
        model_purpose = config.model_purpose
        description = config.model_description
        model_params = {
            "predict_id": featurestore_name,
            "description": description,
            "model_id": model_id,
            "model_type": "AUTOML",
            "frame_name": hexframename,
            "frame_name_desc": description,
            "model_purpose": model_purpose,
            "version": version,
            "model_parms": {
                "algo":
                "H2O-AUTOML",
                "training_frame":
                hexframename_train,
                "validation_frame":
                hexframename_score,
                "response_column":
                config.response_column_template.format(str(offer)),
                "max_models":
                config.max_models,
                "stopping_tolerance":
                config.stopping_tolerance,
                "note_stop":
                "stopping_tolerance of 0.001 for 1m rows and 0.004 for 100k rows",
                "max_runtime_secs":
                config.max_runtime_secs,
                "stopping_rounds":
                config.stopping_rounds,
                "stopping_rounds_desc":
                "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable).",
                "sort_metric":
                config.sort_metric,
                "stopping_metric":
                config.stopping_metric,
                "balance_classes":
                config.balance_classes,
                "balance_classes_desc":
                "Balance training data class counts via over/under-sampling (for imbalanced data).",
                "keep_cross_validation_predictions":
                config.keep_cross_validation_predictions,
                "keep_cross_validation_predictions_desc":
                "Whether to keep the predictions of the cross-validation predictions. This needs to be set to TRUE if running the same AutoML object for repeated runs because CV predictions are required to build additional Stacked Ensemble models in AutoML.",
                "keep_cross_validation_models":
                config.keep_cross_validation_models,
                "keep_cross_validation_models_desc":
                "Whether to keep the cross-validated models. Keeping cross-validation models may consume significantly more memory in the H2O cluster.",
                "keep_cross_validation_fold_assignment":
                config.keep_cross_validation_fold_assignment,
                "nfolds":
                config.nfolds,
                "note_folds":
                "nfolds=0 will disable the stacked ensemble creation process",
                "ignored_columns":
                config.ignored_columns,
                "exclude_algos":
                config.exlude_algos,
                "hidden": ["1"]
            }
        }
        prediction_engine.save_prediction(auth, model_params)
        worker_h2o.train_model(auth, model_id, "automl",
                               json.dumps(model_params["model_parms"]))

    for offer in offer_ids:
        model_data = worker_h2o.get_train_model(auth,
                                                offer_to_models[str(offer)],
                                                "AUTOML")
        df = multimodel_functions.list_automl_models(model_data)
        h2o_name = multimodel_functions.save_best_model(
            auth, df, rename="offer_{}".format(offer))
        from_path = "/data/models/{}.zip".format(h2o_name)
        to_path = "/data/deployed/{}.zip".format(offer)
        result = copy_file(auth, from_path, to_path, user="")
        print(result)
def preprocess_client_pulse_reliability(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["cpr"]
	print(value)
def verify_offer_matrix(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    verify_table(auth, config.offer_matrix_connection, config.offer_matrix_db,
                 config.offer_matrix_col_temp,
                 config.offer_matrix_must_exist_columns)
def preprocess_ecogenetic_network(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["en"]
	print(value)
def combine_provider_data(**kwargs):
    mongo_connect = kwargs["dag_run"].conf.get("mongo_connect")
    database = kwargs["dag_run"].conf.get("database")
    from_date = kwargs["dag_run"].conf.get("from_date")
    to_date = kwargs["dag_run"].conf.get("to_date")
    speciality_list = kwargs["dag_run"].conf.get("speciality_list")
    field_to_check_one = kwargs["dag_run"].conf.get("field_check_one")
    field_to_check_two = kwargs["dag_run"].conf.get("field_check_two")
    p_auth = authenticate.prediction_login(**kwargs)
    client = pymongo.MongoClient(mongo_connect)
    db = client[database]
    for i in speciality_list:
        speciality_name = i[0].replace("-","").replace(" ","").replace(",","").replace("&","").lower()
        #Generate the name for the filtered claim collection
        claims_data_to_process = "claims_"
        claims_data_to_process = claims_data_to_process+speciality_name
        claims_data_to_process = claims_data_to_process+"_"+from_date+to_date
        claims_data_to_process = claims_data_to_process.replace("-","").replace(" ","").replace(",","").replace("&","").lower()
        provider_collection = "provider_"
        provider_collection = provider_collection+speciality_name
        provider_collection = provider_collection+"_"+from_date+to_date
        provider_collection = provider_collection.replace("-","").replace(" ","").replace(",","").replace("&","").lower()
        
        provider_details = [
            {
            "$group":{
                    "_id":"$PRACTICE_NUMBER"
                    ,"speciality_array":{"$addToSet":"$SPECIALITY_DESCRIPTION"}
                    ,"PROVINCE_STATE":{"$addToSet":"$PROVINCE_STATE"}
                    ,"city_array":{"$addToSet":"$peer_location"}
                    ,"min_time":{"$sum":"$min_time"}
                    ,"max_time":{"$sum":"$max_time"}
                    }
            }
            ,{
            "$addFields":{
                        "PRACTICE_NUMBER":"$_id"
                        ,"count_speciality":{"$size":"$speciality_array"}
                        ,"SPECIALITY":{"$arrayElemAt":["$speciality_array",0]}
                        ,"count_city":{"$size":"$city_array"}
                        ,"peer_location":{"$arrayElemAt":["$city_array",0]}
                        ,"PROVINCE_STATE":{"$arrayElemAt":["$PROVINCE_STATE",0]}
                        }
            }
            ,{"$unset":"speciality_array"}
            ,{"$unset":"city_array"}
            ,{"$out":provider_collection}
        ]
        db[claims_data_to_process].aggregate(provider_details)
        
        from_date = from_date.replace("-","")
        to_date = to_date.replace("-","")
        claims_rollup_all_norm = "rollup_all_claims_"+speciality_name+"_"+from_date+to_date+"_fill_norm"
        print("Here",claims_rollup_all_norm)
        demographic_pipeline = [
                {
                "$lookup":{
                       "from":claims_rollup_all_norm
                       ,"localField":"PRACTICE_NUMBER"
                       ,"foreignField":"_id"
                       ,"as":"subs"
                       }
                }
                ,{
                "$replaceWith":{
                        "$mergeObjects":[{"$arrayElemAt":["$subs",0]},"$$ROOT"]
                        }
                }
                ,{"$unset":"subs"}
                ,{"$out":provider_collection}  
        ]
        db[provider_collection].aggregate(demographic_pipeline)
        
        #Add PCA factors for codeDiagnosis
        labels = ['li','pa','pe','cl']
        tabs = [
                "rollup_lines_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_one+"_fill_norm"
                ,"rollup_pay_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_one+"_fill_norm"
                ,"rollup_patients_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_one+"_fill_norm"
                ,"rollup_claims_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_one+"_fill_norm"
               ]
        print(tabs)
        for i in range(len(labels)):
            pca_join_pipeline = [
                    {
                    "$lookup":{
                           "from":tabs[i]+"_PCA"
                           ,"localField":"PRACTICE_NUMBER"
                           ,"foreignField":"_id"
                           ,"as":"subs"
                           }
                    }
                    ]
            addDict = {"$addFields":{}}
            for j in range(60):
                addDict["$addFields"][labels[i]+"po"+str(j)+"T"] = {"$arrayElemAt":["$subs.PC"+str(j) ,0]}
            pca_join_pipeline.append(addDict)
            pca_join_pipeline.append({"$unset":"subs"})    
            pca_join_pipeline.append({"$out":provider_collection})
            db[provider_collection].aggregate(pca_join_pipeline)

        #Add PCA factors for ageDiagnosis
        labels = ['li','pa','pe','cl']
        tabs = [
                "rollup_lines_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_two+"_fill_norm"
                ,"rollup_pay_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_two+"_fill_norm"
                ,"rollup_patients_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_two+"_fill_norm"
                ,"rollup_claims_claims_"+speciality_name+"_"+from_date+to_date+"_"+field_to_check_two+"_fill_norm"
               ]
        print(tabs)
        for i in range(len(labels)):
            pca_join_pipeline = [
                    {
                    "$lookup":{
                           "from":tabs[i]+"_PCA"
                           ,"localField":"PRACTICE_NUMBER"
                           ,"foreignField":"_id"
                           ,"as":"subs"
                           }
                    }
                    ]
            addDict = {"$addFields":{}}
            for j in range(60):
                addDict["$addFields"][labels[i]+"pt"+str(j)+"T"] = {"$arrayElemAt":["$subs.PC"+str(j) ,0]}
            pca_join_pipeline.append(addDict)
            pca_join_pipeline.append({"$unset":"subs"})    
            pca_join_pipeline.append({"$out":provider_collection})    
            db[provider_collection].aggregate(pca_join_pipeline)

        #Add fraud indicators
        fraud_pipeline = [
                {
                "$lookup":{
                        "from":"fraud_working"
                        ,"localField":"PRACTICE_NUMBER"
                        ,"foreignField":"PRACTICE_NUMBER"
                        ,"as":"subs"
                        }
                }
                ,{
                "$addFields":{
                        "provider_fraudulent":{"$cond":[{"$eq":["$subs",[]]},0,1]}
                        }
                }
                ,{
                "$unset":"subs"
                }
                ,{"$out":provider_collection}
        ]
        db[provider_collection].aggregate(fraud_pipeline)
        
        ru.export_collection(provider_collection,db,p_auth)
def preprocess_apriori(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["apr"]
	print(value)
def verify_lookup_feature_store(**kwargs):
    auth = authenticate.prediction_login(**kwargs)
    verify_table(auth, config.lookup_feature_store_connection,
                 config.lookup_feature_store_db,
                 config.lookup_feature_store_col_temp,
                 config.lookup_feature_store_must_exist_columns)
def preprocess_generate_forecast(**kwargs):
	p_auth = authenticate.prediction_login(**kwargs)
	value = kwargs["dag_run"].conf.get("prepare")["__var"]["fore"]
	print(value)
def rollup_by_fc_and_pca(**kwargs):
    mongo_connect = kwargs["dag_run"].conf.get("mongo_connect")
    database = kwargs["dag_run"].conf.get("database")
    from_date = kwargs["dag_run"].conf.get("from_date")
    to_date = kwargs["dag_run"].conf.get("to_date")
    speciality_list = kwargs["dag_run"].conf.get("speciality_list")
    rollUpFieldsAbbreviation = kwargs["dag_run"].conf.get("rollUpFieldsAbbreviation")
    field_check_one = kwargs["dag_run"].conf.get("field_check_one")
    field_check_two = kwargs["dag_run"].conf.get("field_check_two")
    p_auth = authenticate.prediction_login(**kwargs)
    client = pymongo.MongoClient(mongo_connect)
    db = client[database]
    for field_to_check in [field_check_one,field_check_two]: 
        for i in speciality_list:
            speciality_name = i[0].replace("-","").replace(" ","").replace(",","").replace("&","").lower()
            #Generate the name for the filtered claim collection
            claims_data_to_process = "claims_"
            claims_data_to_process = claims_data_to_process+speciality_name
            claims_data_to_process = claims_data_to_process+"_"+from_date+to_date
            claims_data_to_process = claims_data_to_process.replace("-","").replace(" ","").replace(",","").replace("&","").lower()
            
            claims_data_to_process_common_one = claims_data_to_process+"_common_"+field_to_check
            rollUpFields = [field_to_check]
            for k in rollUpFields:
                db[claims_data_to_process_common_one].create_index(k)

            claims_data_rollup_lines_one = "rollup_lines_"+claims_data_to_process+"_"+field_to_check
            claims_data_rollup_pay_one = "rollup_pay_"+claims_data_to_process+"_"+field_to_check
            claims_data_rollup_patients_one = "rollup_patients_"+claims_data_to_process+"_"+field_to_check
            claims_data_rollup_claims_one = "rollup_claims_"+claims_data_to_process+"_"+field_to_check
            
            ru.rollup_by_lines(claims_data_to_process_common_one,claims_data_rollup_lines_one,db,rollUpFields,rollUpFieldsAbbreviation)
            ru.rollup_by_payment(claims_data_to_process_common_one,claims_data_rollup_pay_one,db,rollUpFields,rollUpFieldsAbbreviation)
            ru.rollup_by_patients(claims_data_to_process_common_one,claims_data_rollup_patients_one,db,rollUpFields,rollUpFieldsAbbreviation)
            ru.rollup_by_claims(claims_data_to_process_common_one,claims_data_rollup_claims_one,db,rollUpFields,rollUpFieldsAbbreviation)
            
            claims_data_rollup_lines_one = ru.fill_collection(claims_data_rollup_lines_one,db)
            claims_data_rollup_pay_one = ru.fill_collection(claims_data_rollup_pay_one,db)
            claims_data_rollup_patients_one = ru.fill_collection(claims_data_rollup_patients_one,db)
            claims_data_rollup_claims_one = ru.fill_collection(claims_data_rollup_claims_one,db)
            
            claims_data_rollup_lines_norm_one = ru.norm_collection(claims_data_rollup_lines_one,"liT",db)
            claims_data_rollup_pay_norm_one = ru.norm_collection(claims_data_rollup_pay_one,"paT",db)
            claims_data_rollup_patients_norm_one = ru.norm_collection(claims_data_rollup_patients_one,"peT",db)
            claims_data_rollup_claims_norm_one = ru.norm_collection(claims_data_rollup_claims_one,"clT",db)
            
            pca_table = claims_data_rollup_lines_norm_one+"_PCA"
            db[pca_table].drop()
            pca_table = claims_data_rollup_pay_norm_one+"_PCA"
            db[pca_table].drop()
            pca_table = claims_data_rollup_patients_norm_one+"_PCA"
            db[pca_table].drop()
            pca_table = claims_data_rollup_claims_norm_one+"_PCA"
            db[pca_table].drop()
            contrib_table = "pca_norm_"+speciality_name.lower()+field_to_check+"_contrib_oot"
            db[contrib_table].drop()

            ru.run_pca(claims_data_rollup_lines_norm_one,"lines",["_id","liT","countProviders"],field_to_check,db,p_auth,speciality_name)
            ru.run_pca(claims_data_rollup_pay_norm_one,"pay",["_id","paT","countProviders"],field_to_check,db,p_auth,speciality_name)
            ru.run_pca(claims_data_rollup_patients_norm_one,"patients",["_id","peT","countProviders"],field_to_check,db,p_auth,speciality_name)
            ru.run_pca(claims_data_rollup_claims_norm_one,"claims",["_id","clT","countProviders"],field_to_check,db,p_auth,speciality_name)