def extract_data_ipc_file(file_path): """Extracts data from a pickles file at file_path Returns the data""" num_tries = 10 file_path = file_path.decode('utf-8') while True: try: if os.path.isfile(file_path): with open(file_path, "rb") as pickle_file: data, tp20_bid_shpr, file_uuid = pickle.load(pickle_file) return data, tp20_bid_shpr, file_uuid else: return pd.DataFrame(), pd.DataFrame() except EOFError: if num_tries == 0: raise else: # "EOFError on file: " + str(file_path) + " trying again..." num_tries = num_tries - 1 continue except (OSError, IOError) as e: print_error_message(e, file_path) raise except Exception as e: raise
def set_error_flag(bid_number, log_path, flag, logger): try: with open(log_path + 'tp2_' + bid_number + '.log', 'w') as error_log: error_log.write(flag + "\n") except (IOError, OSError) as e: print_error_message(e, log_path, logger) raise
def extract_data_ipc_file(file_path, logger): """Extracts data from a pickles file at file_path Returns the data""" num_tries = 10 file_path = file_path.decode('utf-8') while True: try: time.sleep(0.1) if os.path.isfile(file_path): with open(file_path, "rb") as pickle_file: data = pickle.load(pickle_file) #print data return data else: return pd.DataFrame() except EOFError: if num_tries == 0: raise else: logger.info("EOFError on extracting: " + str(file_path) + " trying again...") num_tries = num_tries - 1 continue except (OSError, IOError) as e: print_error_message( e, "Error 2.2b: Data exchange issues related to file: " + file_path, logger)
def get_data(home, bid_number, config, test, logger): """ Gets the ETL'd data from controller """ try: tp20_bid, tp20_bid_shpr, tp20_svc_grp, tp20_ceiling_svc,tp20_shpr_svc, ttpsvgp, zone_weight, tncvcel, \ tp_accessorial = pullData(bid_number, config["DB"]["db_host"], home, test) except ValueError, e: print_error_message(e, "Error 2.2b: Data transformation issue for bid number " + bid_number + " in step pullData", logger) raise
def send_request(data, tp20_bid_shpr, folder_path, logger): """ Write requests to unique file in folder_path """ file_name = str(uuid.uuid4()) #print "sending request: " + folder_path + file_name try: with open(folder_path + file_name, 'wb') as pickle_file: pickle.dump((data,tp20_bid_shpr), pickle_file, pickle.HIGHEST_PROTOCOL) except Exception as e: print_error_message(e, "Error 2.2b: Data exchange issues writing " + folder_path + file_name, logger) raise
def main(config, logger, test=False): """ This function loads all the models from memory and sets up the environment for receiving requests Note: all files in model_path need to be pickle files and all of them will be loaded into the model. If the model names are not in the INI file then they will not be used in prediction. """ # Load paths from config home = os.environ[config["PATHS"]["HOME"]] # Load paths from config paths_dict = createPaths(home, config) ceilinglookup_filename = paths_dict['input_path'] + config["DATA"][ "IWA_CEILING_PROD"] svc_to_prod_filename = paths_dict['input_path'] + config["DATA"][ "SVC_MATCHING"] strategic_overlay_filename = paths_dict['input_path'] + config["DATA"][ "STRATEGIC_OVERLAY"] sic_to_industry_filename = paths_dict['input_path'] + config["DATA"][ "SIC_TO_INDUSTRY"] eligible_accessorials_filename = paths_dict['input_path'] + config["DATA"][ "ELIGIBLE_ACCESSORIALS"] accessorial_ceiling_filename = paths_dict['input_path'] + config["DATA"][ "ACCESSORIAL_CEILING"] datatypes_filename = paths_dict['input_path'] + config["DATA"]["DATA_TYPE"] cwt_filename = paths_dict['model_path'] + config["MODELS"]["CWT"] + ".p" # Load models from model_path directory model_objs = {} logger.info("Model Path loaded from: " + paths_dict['model_path']) for model in os.listdir(paths_dict['model_path']): try: if (model == "README.MD") or (model[-2:] != ".p") or ( model == "cwt_production.p"): continue with open(paths_dict['model_path'] + model, "rb") as model_pickle: InfoLine = "Loading " + model + " model..." logger.info(InfoLine) modelName = model[:-2] model_objs[modelName] = pickle.load(model_pickle) except Exception, e: print_error_message(e, "Error 3.2a: Model cannot be loaded: " + model, logger)
def run(home, c2p_path, p2c_path, model_objs, settings, init_path, ceilinglookup_filename, svc_to_prod_filename, strategic_overlay_filename, sic_to_industry_filename, datatypes_filename, cwt_filename, accessorial_filename, accessorial_map, test, logger): """ This function runs as continuous loop and receives and processes requests using the models brought into memory using the setup() function """ modified_start = max([os.path.getctime(p2c_path + f) \ for f in os.listdir(p2c_path)]) logger.info("Consumer up and running") try: #read in datatypes and create a dtypes dict datatypes_table = pd.read_csv(datatypes_filename, index_col='Feature') data_type_dict = datatypes_table.T.to_dict(orient='record')[0] ceilinglookup_table = pd.read_csv(ceilinglookup_filename, dtype={ 'Product': 'str', 'Min_List_Rev_Wkly': 'float64', 'Max_List_Rev_Wkly': 'float64', 'Off_Inc_Cap': 'float64' }) svc_to_prod_table = pd.read_csv(svc_to_prod_filename, dtype=str) strategic_overlay_table = pd.read_csv(strategic_overlay_filename) sic_to_industry_table = pd.read_csv(sic_to_industry_filename, dtype=str) accessorial_table = pd.read_csv(accessorial_filename) accessorial_map = pd.read_csv(accessorial_map) #cwt calibration tables pd.options.mode.chained_assignment = None model = OptimalIncentives(settings=settings, model_objects=model_objs, ceilinglookup_file=ceilinglookup_table, svc_to_prod_file=svc_to_prod_table, industry_name_lookup=sic_to_industry_table, strategicOverlay=strategic_overlay_table, accessorial=accessorial_table, accessorial_map=accessorial_map, isProduction=True) model_cwt = cwt_production_class(cwt_filename, svc_to_prod_table, settings) except Exception, e: print_error_message(e, "Error 3.2b: Model created error", logger) raise
def extract_data_ipc_file(file_path, logger): """Extracts data from a pickles file at file_path Returns the data""" num_tires = 10 while True: try: time.sleep(0.1) with open(file_path, "rb") as pickle_file: data = pickle.load(pickle_file) break except EOFError: if num_tires == 0: raise else: logger.info("EOFError on extracting: " + str(file_path) + " trying again...") num_tires = num_tires - 1 continue except (IOError, OSError) as e: print_error_message(e, "Error 2.2b: Data exchange issues related to file: " + file_path, logger) return data
def extract_data_ipc_file(file_path): """Extracts data from a pickles file at file_path Returns the data""" num_tires = 10 while True: try: with open(file_path, "rb") as pickle_file: data, tp20_bid_shpr = pickle.load(pickle_file) break except EOFError: if num_tires == 0: raise else: #"EOFError on file: " + str(file_path) + " trying again..." num_tires = num_tires - 1 continue except (IOError, OSError) as e: print_error_message(e, file_path) raise except Exception as e: raise return data, tp20_bid_shpr
def main(bid_number, config, logger, test=False, test_bids=5, timeout=30): """ Main function, starts the program, setsup the paths and submits the bidnumber to process """ pd.options.mode.chained_assignment = None # Load paths from config home = os.environ[config["PATHS"]["HOME"]] # Load paths from config paths_dict = createPaths(home, config) # Write initial error flag set_error_flag(bid_number, paths_dict['error_log_path'], str(1), logger) #to do bulk scoring: check tp_bid sample and get all unique bids bid_numbers = pd.DataFrame() if test: try: tp20_bid = pd.read_csv(home + '/data/tp_bid.csv', dtype=str) bid_numbers = tp20_bid['NVP_BID_NR'].unique() print "Bid numbers found: " print bid_numbers except RuntimeError as e: print_error_message(e, "Error 3.0: General producer error due to test run", logger, False) sys.exit(1) master = pd.DataFrame() master_result = pd.DataFrame() sql = '' if test: bids_to_score = test_bids if bids_to_score == -1: bids_to_score = len(bid_numbers) else: bids_to_score = 1 for i in range(0, bids_to_score): if test: bid_number = str(bid_numbers[i]) if test: logger.info("Processing bid # " + str(i+1) + " of " + str(len(bid_numbers))) logger.info("Processing bid: " + bid_number) # Get data try: response, tp20_bid_shpr, tp20_ceiling_svc, tncvcel, tp_accessorial = \ get_data(home, bid_number, config, test, logger) except Exception, e: if test: continue else: #print_error_message(e, "Error 2.2a: Data transformation issues: ", logger) sys.exit(1) try: # Enqueue the data master = master.append(response) result = enqueue(response, tp20_bid_shpr, timeout, bid_number, paths_dict['c2p_path'], paths_dict['p2c_path'], tncvcel, paths_dict['log_path']) master_result = master_result.append(result) except RuntimeError as e: print_error_message(e, "", logger, False) if test: continue else: sys.exit(1) try: # store data sql_result = put_data(home, bid_number, config, test, result, tp20_ceiling_svc, tp_accessorial, logger) if test: sql = sql + sql_result #True except Exception, e: print_error_message(e, "", logger, False) logger.warning("Bid " + bid_number + " scoring failed.")
def get_data(home, bid_number, config, test, logger): """ Gets the ETL'd data from controller """ try: tp20_bid, tp20_bid_shpr, tp20_svc_grp, tp20_ceiling_svc,tp20_shpr_svc, ttpsvgp, zone_weight, tncvcel, \ tp_accessorial = pullData(bid_number, config["DB"]["db_host"], home, test) except ValueError, e: print_error_message(e, "Error 2.2b: Data transformation issue for bid number " + bid_number + " in step pullData", logger) raise except Exception, e: print_error_message(e, "Error 2.2a: Data transformation issue for bid number " + bid_number + " in step pullData", logger) raise try: tp_bid_table, tp_bid_svc_table = transformData(tp20_bid, tp20_bid_shpr, tp20_svc_grp, tp20_shpr_svc, ttpsvgp) except Exception, e: print_error_message(e,"Error 2.2a: Data transformation issue for bid number " + bid_number + " in step transformaData", logger) raise try: # add data check if zone_weight is None: prod_table = create_prod_base_tables(home, tp_bid_table, tp_bid_svc_table) else:
def main(bid_number, config, logger, test=False, test_bids=5, timeout=30, validations=False): """ Main function, starts the program, setups the paths and submits the bidnumber to process """ pd.options.mode.chained_assignment = None # Load paths from config home = os.environ[config["PATHS"]["HOME"]] # Load paths from config paths_dict = createPaths(home, config) # Write initial error flag set_error_flag(bid_number, paths_dict['error_log_path'], str(1), logger) # to do bulk scoring: check tp_bid sample and get all unique bids bid_numbers = pd.DataFrame() if test: try: tp20_bid = pd.read_csv(home + '/data/tp_bid.csv', dtype=str) bid_numbers = tp20_bid['NVP_BID_NR'].unique() print "Bid numbers found: " #print bid_numbers except RuntimeError as e: print_error_message( e, "Error 3.0: General producer error due to test run", logger, False) sys.exit(1) master = pd.DataFrame() master_result = pd.DataFrame() sql = '' if test: bids_to_score = test_bids if bids_to_score == -1: bids_to_score = len(bid_numbers) else: bids_to_score = 1 for i in range(0, bids_to_score): if test: bid_number = str(bid_numbers[i]) if test: logger.info("Processing bid # " + str(i + 1) + " of " + str(len(bid_numbers))) print "Processing bid # " + str(i + 1) + " of " + str( len(bid_numbers)) logger.info("Processing bid: " + bid_number) # Get data try: response, tp20_bid_shpr, tp20_ceiling_svc, tncvcel, tp_accessorial = \ get_data(home, bid_number, config, test, logger) except Exception, e: if test: continue else: # print_error_message(e, "Error 2.2a: Data transformation issues: ", logger) sys.exit(1) try: # test for CWT threshold if CWT exists, # helps prevent CWT over threshold from going to consumer cwt = response[response.Product_Mode.isin(['AIR_CWT', 'GND_CWT'])] if not cwt.empty: # test CWT threshold cwt_filename = paths_dict['model_path'] + config["MODELS"][ "CWT"] + ".p" with open(cwt_filename, "rb") as pickle_file: air_bt_threshold, air_density_threshold, air_size_threshold, air_cohort_map, \ air_incentive_map, gnd_bt_threshold, gnd_density_threshold, \ gnd_size_threshold, \ gnd_cohort_map, gnd_incentive_map = pickle.load(pickle_file) # air cwt check air_cwt = cwt[cwt.Product == 'Air_CWT'] if not air_cwt.empty: air_max = air_size_threshold['MAX VALUE'].max() air_cwt_value = cwt[cwt.Product == 'Air_CWT']['Bid_List_Rev_Wkly'].max() if air_cwt_value > air_max: raise RuntimeError( "Error 2.2a: Data transformation issues: " "CWT threshold reached") # gnd cwt check gnd_cwt = cwt[cwt.Product == 'Gnd_CWT'] if not gnd_cwt.empty: gnd_max = gnd_size_threshold['MAX VALUE'].max() gnd_cwt_value = cwt[cwt.Product == 'Gnd_CWT']['Bid_List_Rev_Wkly'].max() if gnd_cwt_value > gnd_max: raise RuntimeError( "Error 2.2a: Data transformation issues: " "CWT threshold reached") # Enqueue the data master = master.append(response) result, result_file, p2c_file = enqueue(response, tp20_bid_shpr, timeout, bid_number, paths_dict['c2p_path'], paths_dict['p2c_path'], tncvcel, paths_dict['log_path']) master_result = master_result.append(result) except RuntimeError as e: print_error_message(e, "", logger, False) if test: continue else: sys.exit(1) except (IOError, OSError) as e: print_error_message( e, "Error 3.2a: Model cannot be loaded: " + config["MODELS"]["CWT"], logger) try: # store data if validations: test = True sql_result = put_data(home, bid_number, config, test, result, tp20_ceiling_svc, tp_accessorial, logger) if test: sql = sql + sql_result # True except Exception, e: print_error_message(e, "", logger, False) logger.warning("Bid " + bid_number + " scoring failed.")
def put_data(home, bid_number, config, test, response, tp20_ceiling_svc, tp_accessorial, logger): """ Places data into Oracle DB """ try: acy_table = response[response['Product_Mode'] == 'ACY'] if not acy_table.empty: acy_table = acy_table.drop( 'SVC_GRP_NR', axis=1) # remove due to blank causing issues acy_table = acy_table.merge( tp_accessorial, how='inner', on=['MVM_DRC_CD', 'SVM_TYP_CD', 'ASY_SVC_TYP_CD']) acy_table = acy_table.merge(tp20_ceiling_svc, how='inner', left_on=['BidNumber', 'SVC_GRP_NR'], right_on=['NVP_BID_NR', 'SVC_GRP_NR']) prod_table = response[response['Product_Mode'] != 'ACY'] prod_table = prod_table.merge(tp20_ceiling_svc, how='inner', left_on=['BidNumber', 'SVC_GRP_NR'], right_on=['NVP_BID_NR', 'SVC_GRP_NR']) response = prod_table.append(acy_table) if test: acy_table = response[response['Product_Mode'] == 'ACY'] prod_table = response[response['Product_Mode'] != 'ACY'] prod_table = prod_table[[ "BidNumber", "Product", "Incentive_Freight", "Target_Low", "Target_High" ]].drop_duplicates() for index, row in prod_table.iterrows(): logger.info("{0}: Inc - {1}, Low - {2}, High - {3}".format( row["Product"], row["Incentive_Freight"], row["Target_Low"], row["Target_High"])) # try: # acy_table = acy_table[["BidNumber", "MKG_SVP_DSC_TE", "ASY_SVC_TYP_CD", # "RESI", "DAS"]].drop_duplicates() # # for index, row in acy_table.iterrows(): # if row["ASY_SVC_TYP_CD"] == 'RES': # logger.info("{0}: Inc - {1}".format(row["MKG_SVP_DSC_TE"], # row["RESI"])) # elif row["ASY_SVC_TYP_CD"] == 'GDL': # logger.info("{0}: Inc - {1}".format(row["MKG_SVP_DSC_TE"], # row["DAS"])) # except Exception, e: # pass logger.debug('******************Exiting function put_data') return pushData(home, bid_number, config["DB"]["db_host"], config, response, tp_accessorial, logger, test) except (IOError, OSError) as e: print_error_message(e, home + "/" + bid_number + "_results.csv", logger) raise except RuntimeError, e: print_error_message(e, "", logger) raise