def run_mixed_models(X_train_MM, X_test_MM, y_train_MM, y_test_MM): t1 = time.time() loom = ProcessLoom(max_runner_cap=2) # add the functions to the multiprocessing object, loom loom.add_function( MM_LR, [X_train_MM['MM_LR'], X_test_MM['MM_LR'], y_train_MM['MM_LR']], {}) loom.add_function(NN, [ X_train_MM['MM_NN'], X_test_MM['MM_NN'], y_train_MM['MM_NN'], y_test_MM['MM_NN'] ], {}) # run the processes in parallel output = loom.execute() t2 = time.time() print('total time - run mixed models: ', t2 - t1) return output[0]['output'], (output[1]['output']).reshape(-1)
def main(): stock = Stocks() start_time = datetime.datetime.now() Nselist = stock.get_list() slave1list = Nselist[:800] slave2list = Nselist[800:] loom = ProcessLoom(max_runner_cap=10) loom.add_function(slave, [slave1list, "Slave1"], {}) loom.add_function(slave, [slave2list, "Slave2"], {}) loom.execute() end_time = datetime.datetime.now() totaltime = end_time - start_time stock.client.close() connect_email(totaltime)
def preprocess(main_data, validationFlag): target = pd.DataFrame(main_data['Target']) main_data = main_data.drop(['Target'], axis=1) # specify the size of train, validation and test sets test_offset = r train_offset = floor(0.75 * (numberOfDays - test_offset)) val_offset = numberOfDays - (train_offset + test_offset) t1 = time.time() # produce train, validation and test data in parallel loom = ProcessLoom(max_runner_cap=4) if validationFlag: # validationFlag is 1 if we want to have a validation set and 0 otherwise # add the functions to the multiprocessing object, loom loom.add_function( splitData, [numberOfSelectedCounties, main_data, target, train_offset, 0], {}) loom.add_function(splitData, [ numberOfSelectedCounties, main_data, target, val_offset, train_offset ], {}) loom.add_function(splitData, [ numberOfSelectedCounties, main_data, target, test_offset, train_offset + val_offset ], {}) # run the processes in parallel output = loom.execute() t2 = time.time() #print('total time of data splitting: ', t2 - t1) X_train_train = (output[0]['output'][0]).reset_index(drop=True) X_train_val = (output[1]['output'][0]).reset_index(drop=True) X_test = (output[2]['output'][0]).reset_index(drop=True) y_train_train = np.array(output[0]['output'][1]).reshape(-1) y_train_val = np.array(output[1]['output'][1]).reshape(-1) y_test = np.array(output[2]['output'][1]).reshape(-1) return X_train_train, X_train_val, X_test, y_train_train, y_train_val, y_test else: loom.add_function(splitData, [ numberOfSelectedCounties, main_data, target, train_offset + val_offset, 0 ], {}) loom.add_function(splitData, [ numberOfSelectedCounties, main_data, target, test_offset, train_offset + val_offset ], {}) # run the processes in parallel output = loom.execute() t2 = time.time() #print('total time of data splitting: ', t2 - t1) X_train = (output[0]['output'][0]).reset_index(drop=True) X_test = (output[1]['output'][0]).reset_index(drop=True) y_train = np.array(output[0]['output'][1]).reshape(-1) y_test = np.array(output[1]['output'][1]).reshape(-1) return X_train, X_test, y_train, y_test
def main(maxHistory): history = [i for i in range(1, maxHistory + 1)] methods = ['GBM', 'GLM', 'KNN', 'NN', 'MM_LR', 'MM_NN'] none_mixed_methods = ['GBM', 'GLM', 'KNN', 'NN'] mixed_methods = ['MM_LR', 'MM_NN'] target_name = 'confirmed' base_data = makeHistoricalData(0, r, target_name, str(argv[1])) base_data = clean_data(base_data, numberOfSelectedCounties) covariates_names = list(base_data.columns) covariates_names.remove('Target') numberOfCovariates = len(covariates_names) print('number of covariates: ', numberOfCovariates) y_prediction = { 'GBM': {}, 'GLM': {}, 'KNN': {}, 'NN': {}, 'MM_LR': {}, 'MM_NN': {} } y_test_MM = {'MM_LR': {}, 'MM_NN': {}} best_h = {} best_c = {} minError = { 'GBM': int(1e10), 'GLM': int(1e10), 'KNN': int(1e10), 'NN': int(1e10), 'MM_LR': int(1e10), 'MM_NN': int(1e10) } percentage_errors = { 'GBM': {}, 'GLM': {}, 'KNN': {}, 'NN': {}, 'MM_LR': {}, 'MM_NN': {} } # percentage of absolute errors mae_errors = { 'GBM': {}, 'GLM': {}, 'KNN': {}, 'NN': {}, 'MM_LR': {}, 'MM_NN': {} } # mean absolute errors rmse_errors = { 'GBM': {}, 'GLM': {}, 'KNN': {}, 'NN': {}, 'MM_LR': {}, 'MM_NN': {} } # root mean squared errors adjR2_errors = { 'GBM': {}, 'GLM': {}, 'KNN': {}, 'NN': {}, 'MM_LR': {}, 'MM_NN': {} } # adjusted R squared errors historical_X_train = {} # X_train for best h and c historical_X_test = {} # X_test for best h and c historical_y_train = {} # y_train for best h and c historical_y_test = {} # y_test for best h and c parallel_outputs = {} for h in history: data = makeHistoricalData(h, 14, target_name, str(argv[1])) data = clean_data(data, numberOfSelectedCounties) X_train_train, X_train_val, X_test, y_train_train, y_train_val, y_test = preprocess( data, 1) y_train = np.array((pd.DataFrame(y_train_train).append( pd.DataFrame(y_train_val))).reset_index(drop=True)).reshape(-1) covariates_list = [] # covariates are sorted by their correlation with Target. We start from the first important covariate and # in each loop we add the next important one # the first covariate is Target, we start from the second one # initiate loom for parallel processing loom = ProcessLoom( max_runner_cap=len(base_data.columns) * len(none_mixed_methods) + 5) indx_c = 0 for c in covariates_names: # iterate through sorted covariates indx_c += 1 for covariate in data.columns: # add all historical covariates of this covariate and create a feature if c.split(' ')[0] in covariate: covariates_list.append(covariate) X_train_train_temp = X_train_train[covariates_list] X_train_val_temp = X_train_val[covariates_list] for method in none_mixed_methods: loom.add_function(parallel_run, [ method, X_train_train_temp, X_train_val_temp, y_train_train, y_train_val ]) # run the processes in parallel parallel_outputs['non_mixed'] = loom.execute() ind = 0 for c in range(1, numberOfCovariates + 1): for method in none_mixed_methods: y_prediction[method][( h, c)] = parallel_outputs['non_mixed'][ind]['output'] ind += 1 # save the entire session for each h and c filename = env_address + 'validation.pkl' dill.dump_session(filename) # initiate loom for parallel processing loom = ProcessLoom( max_runner_cap=len(base_data.columns) * len(mixed_methods) + 5) for c in range(1, numberOfCovariates + 1): for mixed_method in mixed_methods: y_predictions = [] # Construct the outputs for the training dataset of the 'MM' methods y_prediction['NN'][(h, c)] = np.array( y_prediction['NN'][(h, c)]).ravel() y_predictions.extend([ y_prediction['GBM'][(h, c)], y_prediction['GLM'][(h, c)], y_prediction['KNN'][(h, c)], y_prediction['NN'][(h, c)] ]) y_prediction_np = np.array(y_predictions).reshape( len(y_predictions), -1) X_mixedModel = pd.DataFrame(y_prediction_np.transpose()) X_train_MM, X_test_MM, y_train_MM, y_test_MM[mixed_method][( h, c)] = train_test_split(X_mixedModel, y_train_val, test_size=0.25) loom.add_function(mixed_prallel_run, [ mixed_method, X_train_MM, X_test_MM, y_train_MM, y_test_MM[mixed_method][(h, c)] ]) # run the processes in parallel parallel_outputs['mixed'] = loom.execute() ind = 0 for c in range(1, numberOfCovariates + 1): for mixed_method in mixed_methods: y_prediction[mixed_method][(h, c)] = np.array( parallel_outputs['mixed'][ind]['output']).ravel() ind += 1 # save the entire session for each h and c filename = env_address + 'validation.pkl' dill.dump_session(filename) indx_c = 0 for c in covariates_names: # iterate through sorted covariates indx_c += 1 for covariate in data.columns: # add all historical covariates of this covariate and create a feature if c.split(' ')[0] in covariate: covariates_list.append(covariate) X_train_train_temp = X_train_train[covariates_list] X_train_val_temp = X_train_val[covariates_list] X_test_temp = X_test[covariates_list] y_val = y_train_val for method in methods: if method == 'MM_LR' or method == 'MM_NN': y_val = y_test_MM[method][(h, indx_c)] mae_errors[method][(h, indx_c)], rmse_errors[method][(h, indx_c)], percentage_errors[method][(h, indx_c)], \ adjR2_errors[method][(h, indx_c)] = get_errors(h, indx_c, method, y_prediction[method][(h, indx_c)], y_val) if rmse_errors[method][(h, indx_c)] < minError[method]: minError[method] = rmse_errors[method][(h, indx_c)] best_h[method] = h best_c[method] = indx_c if method != 'MM_LR' and method != 'MM_NN': historical_X_train[method] = ( X_train_train_temp.append(X_train_val_temp) ).reset_index(drop=True) historical_X_test[method] = X_test_temp historical_y_train[method] = y_train historical_y_test[method] = y_test # save the entire session for each h and c filename = env_address + 'validation.pkl' dill.dump_session(filename) # save the entire session for each h filename = env_address + 'validation.pkl' dill.dump_session(filename) # plot the results of methods on validation set plot_results(3, 2, numberOfCovariates, methods, history, percentage_errors, 'Percentage Of Absolute Error') plot_results(3, 2, numberOfCovariates, methods, history, mae_errors, 'Mean Absolute Error') plot_results(3, 2, numberOfCovariates, methods, history, rmse_errors, 'Root Mean Squared Error') plot_results(3, 2, numberOfCovariates, methods, history, adjR2_errors, 'Adjusted R Squared Error') push() ################################################################################################################# columns_table = [ 'method', 'best_h', 'best_c', 'root mean squared error', 'mean absolute error', 'percentage of absolute error', 'adjusted R squared error' ] # table columns names y_prediction = {} # run non-mixed methods on the whole training set with their best h and c X_train_dict, X_test_dict, y_train_dict, y_test_dict = {}, {}, {}, {} y_prediction['GBM'], y_prediction['GLM'], y_prediction[ 'KNN'], y_prediction['NN'] = run_algorithms(historical_X_train, historical_X_test, historical_y_train, historical_y_test) table_data = [] for method in none_mixed_methods: meanAbsoluteError, rootMeanSquaredError, percentageOfAbsoluteError, adj_r_squared = get_errors( best_h[method], best_c[method], method, y_prediction[method], historical_y_test[method]) table_data.append([ method, best_h[method], best_c[method], round(rootMeanSquaredError, 2), round(meanAbsoluteError, 2), round(percentageOfAbsoluteError, 2), round(adj_r_squared, 2) ]) result = pd.DataFrame(historical_y_test[method], columns=['y_test']) result['y_prediction'] = y_prediction[method] result['absolute_error'] = abs(historical_y_test[method] - y_prediction[method]) result.to_csv(test_address + method + '.csv') table_name = 'non-mixed methods best results' plot_table(table_data, columns_table, table_name) # generate data for non-mixed methods with the best h and c of mixed models and fit mixed models on them # (with the whole training set) y_predictions = {'MM_LR': [], 'MM_NN': []} y_prediction = {} table_data = [] X_train_MM_dict, X_test_MM_dict, y_train_MM_dict, y_test_MM_dict = {}, {}, {}, {} for mixed_method in mixed_methods: y_test = None for method in none_mixed_methods: X_train, X_test, y_train, y_test = generate_data( best_h[mixed_method], best_c[mixed_method], covariates_names) X_train_dict[method] = X_train X_test_dict[method] = X_test y_train_dict[method] = y_train y_test_dict[method] = y_test y_prediction['GBM'], y_prediction['GLM'], y_prediction[ 'KNN'], y_prediction['NN'] = run_algorithms( X_train_dict, X_test_dict, y_train_dict, y_test_dict) y_predictions[mixed_method].extend([ y_prediction['GBM'], y_prediction['GLM'], y_prediction['KNN'], y_prediction['NN'] ]) y_prediction_np = np.array(y_predictions[mixed_method]).reshape( len(y_predictions[mixed_method]), -1) X_mixedModel = pd.DataFrame(y_prediction_np.transpose()) X_train_MM, X_test_MM, y_train_MM, y_test_MM = train_test_split( X_mixedModel, y_test, test_size=0.25) X_train_MM_dict[mixed_method] = X_train_MM X_test_MM_dict[mixed_method] = X_test_MM y_train_MM_dict[mixed_method] = y_train_MM y_test_MM_dict[mixed_method] = y_test_MM # save the entire session filename = env_address + 'test.pkl' dill.dump_session(filename) # mixed model with linear regression and neural network y_prediction['MM_LR'], y_prediction['MM_NN'] = run_mixed_models( X_train_MM_dict, X_test_MM_dict, y_train_MM_dict, y_test_MM_dict) for mixed_method in mixed_methods: meanAbsoluteError, rootMeanSquaredError, percentageOfAbsoluteError, adj_r_squared = get_errors( best_h[mixed_method], best_c[mixed_method], mixed_method, y_prediction[mixed_method], y_test_MM_dict[mixed_method]) table_data.append([ mixed_method, best_h[mixed_method], best_c[mixed_method], round(rootMeanSquaredError, 2), round(meanAbsoluteError, 2), round(percentageOfAbsoluteError, 2), round(adj_r_squared, 2) ]) result = pd.DataFrame(y_test_MM_dict[mixed_method], columns=['y_test']) result['y_prediction'] = y_prediction[mixed_method] result['absolute_error'] = abs(y_test_MM_dict[mixed_method] - y_prediction[mixed_method]) result.to_csv(test_address + mixed_method + '.csv') # save the entire session filename = env_address + 'test.pkl' dill.dump_session(filename) table_name = 'mixed methods best results' plot_table(table_data, columns_table, table_name) push()
def run_algorithms(X_train_dict, X_val_dict, y_train_dict, y_val_dict): t1 = time.time() loom = ProcessLoom(max_runner_cap=4) # add the functions to the multiprocessing object, loom loom.add_function( GBM, [X_train_dict['GBM'], X_val_dict['GBM'], y_train_dict['GBM']], {}) loom.add_function( GLM, [X_train_dict['GLM'], X_val_dict['GLM'], y_train_dict['GLM']], {}) loom.add_function( KNN, [X_train_dict['KNN'], X_val_dict['KNN'], y_train_dict['KNN']], {}) loom.add_function(NN, [ X_train_dict['NN'], X_val_dict['NN'], y_train_dict['NN'], y_val_dict['NN'] ], {}) # run the processes in parallel output = loom.execute() t2 = time.time() print('total time - run algorithms: ', t2 - t1) return output[0]['output'], output[1]['output'], output[2]['output'], ( output[3]['output']).reshape(-1)
verify = True if selfSignedCertificate != "" : certfile = open(certFileName,'w') os.write(certfile,selfSignedCertificate) verify = certFileName elif selfSignedCertificateS3Bucket != "" : s3 = boto3.client('s3') verify = certFileName with open(certFileName, 'w') as f: s3.download_fileobj(selfSignedCertificateS3Bucket, selfSignedCertificateS3Key, f) certfile = open(certFileName,'r') print(certfile.read()) elif _allowInValidCerts == True: verify = False sapcred=json.loads(_get_secret()) sapUser = sapcred["username"] sapPassword = sapcred["password"] return requests.get( url, headers=headers, auth=HTTPBasicAuth(sapUser,sapPassword), verify=verify) # ------------------------------------ # Execute Data Extraction from SAP in parallel # ------------------------------------ from pexecute.process import ProcessLoom loom = ProcessLoom(max_runner_cap=9) x = 0 while (x < totalEntities ): loom.add_function(_extract, [x], {}) x += 5000 output = loom.execute()
from sys import argv import sys import os import subprocess from pexecute.process import ProcessLoom def main(): <<<<<<< HEAD:one_by_one_1_validation/1_to_4/sc.py for i in range(4): ======= loom = ProcessLoom(max_runner_cap = 8) for i in range(7): >>>>>>> 5c60d8fa91da4126bd59c3e41b8253582f06fff2:one_by_one_1_validation/sc.py print(i) subprocess.call("python ./prediction.py "+str(i), shell=True) if __name__ == "__main__": main()
output2 = resp_data2.read() print(output2) print( "Presigned URL to download the image is given below, link will expire in 30 mins" ) resp3 = f"aws s3 presign {bucket}image_firefox.png --expires-in 1800 --profile default" resp_data3 = os.popen(resp3) output3 = resp_data3.read() print(output3) #finally running the code if status_code == 200: #parallely executing from pexecute.process import ProcessLoom loom = ProcessLoom(max_runner_cap=4) work = [(test_chrome_browser, [url]), (test_firefox_browser, [url])] loom.add_work(work) output = loom.execute() else: try: print("Please enter correct URL, status of url ") except ValueError as e: print("Please enter correct URL, status of url ") print(e)