def main(args): sys.argv =args # Parse arguments parser = ArgumentParser() parser.add_argument('-no_split_points', required=True, type=int, help='Number of split points') parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Load global state global_state = StateData.load(fname_prev_state).data # Load local nodes output activePaths = CartIter1_Loc2Glob_TD.load(local_dbs).get_data() # Run algorithm global iteration step activePaths = cart_step_1_global(global_state['args_X'], global_state['args_Y'], global_state['CategoricalVariables'], activePaths, args.no_split_points) global_out = Cart_Glob2Loc_TD( global_state['globalTree'], activePaths ) # Save global state # Save global state global_state = StateData( stepsNo = global_state['stepsNo'] + 1 , args_X = global_state['args_X'], args_Y = global_state['args_Y'], CategoricalVariables = global_state['CategoricalVariables'], globalTree = global_state['globalTree'], activePaths = activePaths, t1 = global_state['t1'] ) global_state.save(fname=fname_cur_state) # Return the algorithm's output global_out.transfer()
def main(args): # Parse arguments sys.argv = args parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Merge local nodes output args_X, args_Y, CategoricalVariables, t1 = CartInit_Loc2Glob_TD.load( local_dbs).get_data() # Run algorithm global step globalTree, activePaths = cart_init_1_global() # Save global state global_state = StateData(stepsNo=0, args_X=args_X, args_Y=args_Y, CategoricalVariables=CategoricalVariables, globalTree=globalTree, activePaths=activePaths, t1=t1) global_state.save(fname=fname_cur_state) # Transfer local output global_out = Cart_Glob2Loc_TD(globalTree, activePaths) global_out.transfer()
def main(args): sys.argv = args # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).data # Load global node output globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data() # Run algorithm local iteration step activePaths = cart_step_3_local(local_state['dataFrame'], local_state['args_X'], local_state['args_Y'], local_state['CategoricalVariables'], activePaths) ## Finished local_state = StateData( args_X=local_state['args_X'], args_Y=local_state['args_Y'], CategoricalVariables=local_state['CategoricalVariables'], dataFrame=local_state['dataFrame'], globalTree=globalTree, activePaths=activePaths) local_out = CartIter3_Loc2Glob_TD(activePaths) # Save local state local_state.save(fname=fname_cur_state) # Return local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() # raise ValueError(args) fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).data # Load global node output global_out = LogRegrIter_Glob2Loc_TD.load(global_db) # Run algorithm local iteration step local_state, local_out = logregr_local_iter(local_state=local_state, local_in=global_out) # Save local state local_state.save(fname=fname_cur_state) # Return local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).get_data() # Load global node output globalStatistics = Global2Local_TD.load(global_db).get_data()['global_in'] #raise ValueError(globalStatistics,local_state['args_X']) # Run algorithm local step Hist = run_local_step( local_state['args_X'], local_state['args_Y'], local_state['args_bins'], local_state['dataSchema'], local_state['CategoricalVariablesWithDistinctValues'], local_state['dataFrame'], globalStatistics) # Pack results local_out = multipleHist2_Loc2Glob_TD( local_state['args_X'], local_state['args_Y'], local_state['CategoricalVariablesWithDistinctValues'], Hist) # Return the output data local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Load global state global_state = StateData.load(fname_prev_state).data # Load local nodes output local_out = LogRegrIter_Loc2Glob_TD.load(local_dbs) # Run algorithm global step global_state, global_out = logregr_global_iter(global_state=global_state, global_in=local_out) # Save global state global_state.save(fname=fname_cur_state) # Return the algorithm's output global_out.transfer()
def logregr_global_iter(global_state, global_in): # Unpack global state n_obs = global_state['n_obs'] n_cols = global_state['n_cols'] ll_old = global_state['ll'] coeff = global_state['coeff'] iter = global_state['iter'] y_val_dict = global_state['y_val_dict'] schema_X = global_state['schema_X'] schema_Y = global_state['schema_Y'] # Unpack global input ll_new, grad, hess = global_in.get_data() # Compute new coefficients coeff = np.dot(np.linalg.inv(hess), grad) # Update termination quantities delta = abs(ll_new - ll_old) iter += 1 # Pack state and results global_state = StateData(n_obs=n_obs, n_cols=n_cols, ll=ll_new, coeff=coeff, delta=delta, iter=iter, y_val_dict=y_val_dict, schema_X=schema_X, schema_Y=schema_Y) global_out = LogRegrIter_Glob2Loc_TD(coeff) return global_state, global_out
def main(args): sys.argv = args init_logger() # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Load global state global_state = StateData.load(fname_prev_state).data globalTreeJ = global_state['globalTree'].tree_to_json() myresult = {"result": [{"type": "application/json", "data": globalTreeJ}]} t1 = global_state['t1'] t2 = time.localtime(time.time()) t0 = ['yy', 'mm', 'dd', 'hh', 'min', 'sec', 'wday', 'yday', 'isdst'] logging.info(" Time: ") for i in range(len(t1)): logging.info([t0[i], t2[i], t1[i], t2[i] - t1[i]]) set_algorithms_output_data(myresult)
def main(args): t1 = time.localtime(time.time()) # Parse arguments sys.argv =args parser = ArgumentParser() parser.add_argument('-x', required=True, help='Independent variable names, comma separated.') parser.add_argument('-y', required=True, help='Dependent variable name') parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-input_local_DB', required=True, help='Path to local db.') parser.add_argument('-db_query', required=True, help='Query to be executed on local db.') args, unknown = parser.parse_known_args() query = args.db_query fname_cur_state = path.abspath(args.cur_state_pkl) fname_loc_db = path.abspath(args.input_local_DB) query = query.replace("\\\"","\"") # Get variable args_X = list(args.x.replace(' ', '').split(',')) args_Y = [args.y.replace(' ', '')] #1. Query database and metadata queryMetadata = "select * from metadata where code in (" + "'" + "','".join(args_X) + "','" + "','".join(args_Y) + "'" + ");" dataSchema, metadataSchema, metadata, dataFrame = query_database(fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata) CategoricalVariables = variable_categorical_getDistinctValues(metadata) #2. Run algorithm dataFrame, CategoricalVariables = cart_init_1_local(dataFrame, dataSchema, CategoricalVariables) if len(dataFrame) < PRIVACY_MAGIC_NUMBER: raise PrivacyError('The Experiment could not run with the input provided because there are insufficient data.') #3. Save local state local_state = StateData( dataFrame = dataFrame, args_X = args_X, args_Y = args_Y, CategoricalVariables = CategoricalVariables) local_state.save(fname = fname_cur_state) # Transfer local output local_out = CartInit_Loc2Glob_TD(args_X, args_Y, CategoricalVariables, t1) local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-max_iter', type=int, required=True, help='Maximum number of iterations.') args, unknown = parser.parse_known_args() fname_prev_state = path.abspath(args.prev_state_pkl) max_iter = args.max_iter global_state = StateData.load(fname_prev_state).data termination_condition(global_state, max_iter)
def logregr_global_init(global_in): n_obs, n_cols, y_val_dict, schema_X, schema_Y = global_in.get_data() if n_obs == 0: raise ExaremeError('The selected variables contain 0 datapoints.') # Init vars ll = - 2 * n_obs * np.log(2) coeff = np.zeros(n_cols) iter = 0 # Pack state and results global_state = StateData(n_obs=n_obs, n_cols=n_cols, ll=ll, coeff=coeff, iter=iter, y_val_dict=y_val_dict, schema_X=schema_X, schema_Y=schema_Y) global_out = LogRegrIter_Glob2Loc_TD(coeff) return global_state, global_out
def main(args): sys.argv =args # Parse arguments parser = ArgumentParser() parser.add_argument('-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-max_depth', type=int, required=True, help='Maximum depth of tree') args, unknown = parser.parse_known_args() fname_prev_state = path.abspath(args.prev_state_pkl) #max_iter = args.max_iter global_state = StateData.load(fname_prev_state).data if bool(global_state['activePaths']) == False or global_state['stepsNo'] > args.max_depth : set_algorithms_output_data('STOP') else: set_algorithms_output_data('CONTINUE')
def logregr_local_init(local_in): # Unpack local input X, Y, schema_X, schema_Y = local_in n_obs = len(Y) n_cols = len(X[0]) + 1 # Add one for the intercept schema_X.insert(0, '(Intercept)') # Create dictionary for categories in Y y_val_dict = {sorted(set(Y))[0]: 0, sorted(set(Y))[1]: 1} Y = np.array([y_val_dict[yi] for yi in Y], dtype=np.uint8) # Add 1's column in X to account for intercept term X = np.insert(X, obj=0, values=np.ones(n_obs), axis=1) # Pack state and results local_state = StateData(X=X, Y=Y) local_out = LogRegrInit_Loc2Glob_TD(n_obs, n_cols, y_val_dict, schema_X, schema_Y) return local_state, local_out
def logregr_local_iter(local_state, local_in): # Unpack local state X, Y = local_state['X'], local_state['Y'] # Unpack local input coeff = local_in.get_data() # Auxiliary quantities z = np.dot(X, coeff) s = expit(z) d = np.multiply(s, (1 - s)) D = np.diag(d) # Hessian hess = np.dot(np.transpose(X), np.dot(D, X)) # Gradient grad = np.dot(np.transpose(X), np.dot(D, z + np.divide(Y - s, d))) # Log-likelihood ls1, ls2 = np.log(s), np.log(1 - s) ll = np.dot(Y, ls1) + np.dot(1 - Y, ls2) # Pack state and results local_state = StateData(X=X, Y=Y) local_out = LogRegrIter_Loc2Glob_TD(ll, grad, hess) return local_state, local_out
def main(args): sys.argv = args # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).data # Load global node output globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data() # Run algorithm local iteration step activePaths = cart_step_2_local(local_state['dataFrame'], local_state['CategoricalVariables'], local_state['args_X'], local_state['args_Y'], activePaths) # # # Run algorithm local iteration step # for key in activePaths: # df = local_state['dataFrame'] # # For each unfinished path, find the subset of dataFrame (df) # for i in xrange(len(activePaths[key]['filter'])): # df = DataFrameFilter(df, activePaths[key]['filter'][i]["variable"], # activePaths[key]['filter'][i]["operator"], # activePaths[key]['filter'][i]["value"]) # if local_state['args_Y'][0] in local_state['CategoricalVariables']: #Classification Algorithm # resultJ = node_computations(df, local_state['args_X'], activePaths[key], local_state['args_Y'][0], local_state['CategoricalVariables'],"classNumbers") # activePaths[key]["classNumbersJ"] = dict(activePaths[key]["classNumbersJ"].items() + resultJ.items()) # elif local_state['args_Y'][0] not in local_state['CategoricalVariables']: # Regression Algorithm # resultJ = node_computations(df, local_state['args_X'], activePaths[key], local_state['args_Y'][0], local_state['CategoricalVariables'],"statistics") # activePaths[key]["statisticsJ"] = dict(activePaths[key]["statisticsJ"].items() + resultJ.items()) # #print activePaths ## Finished local_state = StateData( args_X=local_state['args_X'], args_Y=local_state['args_Y'], CategoricalVariables=local_state['CategoricalVariables'], dataFrame=local_state['dataFrame'], globalTree=globalTree, activePaths=activePaths) local_out = CartIter2_Loc2Glob_TD(activePaths) # Save local state local_state.save(fname=fname_cur_state) # Return local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument('-x', required=True, help='Variable names, comma seperated ') parser.add_argument('-y', required=True, help='Categorical variables names, comma seperated.') parser.add_argument( '-bins', required=True, help='Dictionary of variables names (key) and number of bins (value)') parser.add_argument('-input_local_DB', required=True, help='Path to local db.') parser.add_argument('-db_query', required=True, help='Query to be executed on local db.') parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') args, unknown = parser.parse_known_args() query = args.db_query fname_cur_state = path.abspath(args.cur_state_pkl) fname_loc_db = path.abspath(args.input_local_DB) if args.x == '': raise ExaremeError('Field x must be non empty.') # Get data if args.y == '': args_X = list(args.x.replace(' ', '').split(',')) args_Y = [] varNames = "'" + "','".join(list(args.x.replace(' ', '').split(','))) + "'" else: args_X = list(args.x.replace(' ', '').split(',')) args_Y = list(args.y.replace(' ', '').split(',')) varNames = "'" + "','".join(list(args.x.replace( ' ', '').split(','))) + "','" + "','".join( list(args.y.replace(' ', '').split(','))) + "'" if args.bins == '': args_bins = {} else: args_bins = json.loads(args.bins) #args_bins = dict( (str(key), val) for key, val in args_bins.items()) queryMetadata = "select * from metadata where code in (" + varNames + ");" dataSchema, metadataSchema, metadata, dataFrame = query_database( fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata) CategoricalVariablesWithDistinctValues = variable_categorical_getDistinctValues( metadata) #Checking bins input for varx in args_X: if varx not in CategoricalVariablesWithDistinctValues: if varx not in args_bins: raise ExaremeError( 'Bin value is not defined for one at least non-categorical variable. i.e. ' + varx) # Run algorithm local step localStatistics = run_local_step(args_X, args_Y, args_bins, dataSchema, CategoricalVariablesWithDistinctValues, dataFrame) # Save local state local_state = StateData(args_X=args_X, args_Y=args_Y, args_bins=args_bins, dataSchema=dataSchema, CategoricalVariablesWithDistinctValues= CategoricalVariablesWithDistinctValues, dataFrame=dataFrame) local_state.save(fname=fname_cur_state) # Transfer local output local_out = multipleHist1_Loc2Glob_TD(localStatistics) #raise ValueError( local_out.get_data()) local_out.transfer()