def main(args): # Parse arguments sys.argv = args parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Merge local nodes output args_X, args_Y, CategoricalVariables, t1 = CartInit_Loc2Glob_TD.load( local_dbs).get_data() # Run algorithm global step globalTree, activePaths = cart_init_1_global() # Save global state global_state = StateData(stepsNo=0, args_X=args_X, args_Y=args_Y, CategoricalVariables=CategoricalVariables, globalTree=globalTree, activePaths=activePaths, t1=t1) global_state.save(fname=fname_cur_state) # Transfer local output global_out = Cart_Glob2Loc_TD(globalTree, activePaths) global_out.transfer()
def main(args): sys.argv =args # Parse arguments parser = ArgumentParser() parser.add_argument('-no_split_points', required=True, type=int, help='Number of split points') parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) local_dbs = path.abspath(args.local_step_dbs) # Load global state global_state = StateData.load(fname_prev_state).data # Load local nodes output activePaths = CartIter1_Loc2Glob_TD.load(local_dbs).get_data() # Run algorithm global iteration step activePaths = cart_step_1_global(global_state['args_X'], global_state['args_Y'], global_state['CategoricalVariables'], activePaths, args.no_split_points) global_out = Cart_Glob2Loc_TD( global_state['globalTree'], activePaths ) # Save global state # Save global state global_state = StateData( stepsNo = global_state['stepsNo'] + 1 , args_X = global_state['args_X'], args_Y = global_state['args_Y'], CategoricalVariables = global_state['CategoricalVariables'], globalTree = global_state['globalTree'], activePaths = activePaths, t1 = global_state['t1'] ) global_state.save(fname=fname_cur_state) # Return the algorithm's output global_out.transfer()
def main(args): sys.argv = args # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).data # Load global node output globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data() # Run algorithm local iteration step activePaths = cart_step_3_local(local_state['dataFrame'], local_state['args_X'], local_state['args_Y'], local_state['CategoricalVariables'], activePaths) ## Finished local_state = StateData( args_X=local_state['args_X'], args_Y=local_state['args_Y'], CategoricalVariables=local_state['CategoricalVariables'], dataFrame=local_state['dataFrame'], globalTree=globalTree, activePaths=activePaths) local_out = CartIter3_Loc2Glob_TD(activePaths) # Save local state local_state.save(fname=fname_cur_state) # Return local_out.transfer()
def main(args): t1 = time.localtime(time.time()) # Parse arguments sys.argv =args parser = ArgumentParser() parser.add_argument('-x', required=True, help='Independent variable names, comma separated.') parser.add_argument('-y', required=True, help='Dependent variable name') parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument('-input_local_DB', required=True, help='Path to local db.') parser.add_argument('-db_query', required=True, help='Query to be executed on local db.') args, unknown = parser.parse_known_args() query = args.db_query fname_cur_state = path.abspath(args.cur_state_pkl) fname_loc_db = path.abspath(args.input_local_DB) query = query.replace("\\\"","\"") # Get variable args_X = list(args.x.replace(' ', '').split(',')) args_Y = [args.y.replace(' ', '')] #1. Query database and metadata queryMetadata = "select * from metadata where code in (" + "'" + "','".join(args_X) + "','" + "','".join(args_Y) + "'" + ");" dataSchema, metadataSchema, metadata, dataFrame = query_database(fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata) CategoricalVariables = variable_categorical_getDistinctValues(metadata) #2. Run algorithm dataFrame, CategoricalVariables = cart_init_1_local(dataFrame, dataSchema, CategoricalVariables) if len(dataFrame) < PRIVACY_MAGIC_NUMBER: raise PrivacyError('The Experiment could not run with the input provided because there are insufficient data.') #3. Save local state local_state = StateData( dataFrame = dataFrame, args_X = args_X, args_Y = args_Y, CategoricalVariables = CategoricalVariables) local_state.save(fname = fname_cur_state) # Transfer local output local_out = CartInit_Loc2Glob_TD(args_X, args_Y, CategoricalVariables, t1) local_out.transfer()
def main(args): sys.argv = args # Parse arguments parser = ArgumentParser() parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') parser.add_argument( '-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.') parser.add_argument('-global_step_db', required=True, help='Path to db holding global step results.') args, unknown = parser.parse_known_args() fname_cur_state = path.abspath(args.cur_state_pkl) fname_prev_state = path.abspath(args.prev_state_pkl) global_db = path.abspath(args.global_step_db) # Load local state local_state = StateData.load(fname_prev_state).data # Load global node output globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data() # Run algorithm local iteration step activePaths = cart_step_2_local(local_state['dataFrame'], local_state['CategoricalVariables'], local_state['args_X'], local_state['args_Y'], activePaths) # # # Run algorithm local iteration step # for key in activePaths: # df = local_state['dataFrame'] # # For each unfinished path, find the subset of dataFrame (df) # for i in xrange(len(activePaths[key]['filter'])): # df = DataFrameFilter(df, activePaths[key]['filter'][i]["variable"], # activePaths[key]['filter'][i]["operator"], # activePaths[key]['filter'][i]["value"]) # if local_state['args_Y'][0] in local_state['CategoricalVariables']: #Classification Algorithm # resultJ = node_computations(df, local_state['args_X'], activePaths[key], local_state['args_Y'][0], local_state['CategoricalVariables'],"classNumbers") # activePaths[key]["classNumbersJ"] = dict(activePaths[key]["classNumbersJ"].items() + resultJ.items()) # elif local_state['args_Y'][0] not in local_state['CategoricalVariables']: # Regression Algorithm # resultJ = node_computations(df, local_state['args_X'], activePaths[key], local_state['args_Y'][0], local_state['CategoricalVariables'],"statistics") # activePaths[key]["statisticsJ"] = dict(activePaths[key]["statisticsJ"].items() + resultJ.items()) # #print activePaths ## Finished local_state = StateData( args_X=local_state['args_X'], args_Y=local_state['args_Y'], CategoricalVariables=local_state['CategoricalVariables'], dataFrame=local_state['dataFrame'], globalTree=globalTree, activePaths=activePaths) local_out = CartIter2_Loc2Glob_TD(activePaths) # Save local state local_state.save(fname=fname_cur_state) # Return local_out.transfer()
def main(): # Parse arguments parser = ArgumentParser() parser.add_argument('-x', required=True, help='Variable names, comma seperated ') parser.add_argument('-y', required=True, help='Categorical variables names, comma seperated.') parser.add_argument( '-bins', required=True, help='Dictionary of variables names (key) and number of bins (value)') parser.add_argument('-input_local_DB', required=True, help='Path to local db.') parser.add_argument('-db_query', required=True, help='Query to be executed on local db.') parser.add_argument( '-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.') args, unknown = parser.parse_known_args() query = args.db_query fname_cur_state = path.abspath(args.cur_state_pkl) fname_loc_db = path.abspath(args.input_local_DB) if args.x == '': raise ExaremeError('Field x must be non empty.') # Get data if args.y == '': args_X = list(args.x.replace(' ', '').split(',')) args_Y = [] varNames = "'" + "','".join(list(args.x.replace(' ', '').split(','))) + "'" else: args_X = list(args.x.replace(' ', '').split(',')) args_Y = list(args.y.replace(' ', '').split(',')) varNames = "'" + "','".join(list(args.x.replace( ' ', '').split(','))) + "','" + "','".join( list(args.y.replace(' ', '').split(','))) + "'" if args.bins == '': args_bins = {} else: args_bins = json.loads(args.bins) #args_bins = dict( (str(key), val) for key, val in args_bins.items()) queryMetadata = "select * from metadata where code in (" + varNames + ");" dataSchema, metadataSchema, metadata, dataFrame = query_database( fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata) CategoricalVariablesWithDistinctValues = variable_categorical_getDistinctValues( metadata) #Checking bins input for varx in args_X: if varx not in CategoricalVariablesWithDistinctValues: if varx not in args_bins: raise ExaremeError( 'Bin value is not defined for one at least non-categorical variable. i.e. ' + varx) # Run algorithm local step localStatistics = run_local_step(args_X, args_Y, args_bins, dataSchema, CategoricalVariablesWithDistinctValues, dataFrame) # Save local state local_state = StateData(args_X=args_X, args_Y=args_Y, args_bins=args_bins, dataSchema=dataSchema, CategoricalVariablesWithDistinctValues= CategoricalVariablesWithDistinctValues, dataFrame=dataFrame) local_state.save(fname=fname_cur_state) # Transfer local output local_out = multipleHist1_Loc2Glob_TD(localStatistics) #raise ValueError( local_out.get_data()) local_out.transfer()