Beispiel #1
0
def main(args):
    sys.argv =args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument('-no_split_points', required=True, type=int, help='Number of split points')
    parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.')
    parser.add_argument('-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.')
    parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Load global state
    global_state = StateData.load(fname_prev_state).data
    # Load local nodes output
    activePaths = CartIter1_Loc2Glob_TD.load(local_dbs).get_data()

    # Run algorithm global iteration step
    activePaths = cart_step_1_global(global_state['args_X'], global_state['args_Y'], global_state['CategoricalVariables'], activePaths, args.no_split_points)

    global_out = Cart_Glob2Loc_TD(  global_state['globalTree'], activePaths )
    # Save global state
    # Save global state
    global_state = StateData(   stepsNo = global_state['stepsNo'] + 1 ,
                                args_X = global_state['args_X'],
                                args_Y = global_state['args_Y'],
                                CategoricalVariables = global_state['CategoricalVariables'],
                                globalTree = global_state['globalTree'],
                                activePaths = activePaths,
                                t1 = global_state['t1'] )

    global_state.save(fname=fname_cur_state)
    # Return the algorithm's output
    global_out.transfer()
Beispiel #2
0
def main(args):
    # Parse arguments
    sys.argv = args
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument('-local_step_dbs',
                        required=True,
                        help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Merge local nodes output
    args_X, args_Y, CategoricalVariables, t1 = CartInit_Loc2Glob_TD.load(
        local_dbs).get_data()

    # Run algorithm global step
    globalTree, activePaths = cart_init_1_global()

    # Save global state
    global_state = StateData(stepsNo=0,
                             args_X=args_X,
                             args_Y=args_Y,
                             CategoricalVariables=CategoricalVariables,
                             globalTree=globalTree,
                             activePaths=activePaths,
                             t1=t1)
    global_state.save(fname=fname_cur_state)

    # Transfer local output
    global_out = Cart_Glob2Loc_TD(globalTree, activePaths)
    global_out.transfer()
Beispiel #3
0
def main(args):
    sys.argv = args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()

    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).data
    # Load global node output
    globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data()

    # Run algorithm local iteration step
    activePaths = cart_step_3_local(local_state['dataFrame'],
                                    local_state['args_X'],
                                    local_state['args_Y'],
                                    local_state['CategoricalVariables'],
                                    activePaths)

    ## Finished
    local_state = StateData(
        args_X=local_state['args_X'],
        args_Y=local_state['args_Y'],
        CategoricalVariables=local_state['CategoricalVariables'],
        dataFrame=local_state['dataFrame'],
        globalTree=globalTree,
        activePaths=activePaths)

    local_out = CartIter3_Loc2Glob_TD(activePaths)

    # Save local state
    local_state.save(fname=fname_cur_state)
    # Return
    local_out.transfer()
Beispiel #4
0
def main():
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()
    # raise ValueError(args)
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).data
    # Load global node output
    global_out = LogRegrIter_Glob2Loc_TD.load(global_db)
    # Run algorithm local iteration step
    local_state, local_out = logregr_local_iter(local_state=local_state,
                                                local_in=global_out)
    # Save local state
    local_state.save(fname=fname_cur_state)
    # Return
    local_out.transfer()
Beispiel #5
0
def main():
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).get_data()
    # Load global node output
    globalStatistics = Global2Local_TD.load(global_db).get_data()['global_in']
    #raise ValueError(globalStatistics,local_state['args_X'])

    # Run algorithm local step
    Hist = run_local_step(
        local_state['args_X'], local_state['args_Y'], local_state['args_bins'],
        local_state['dataSchema'],
        local_state['CategoricalVariablesWithDistinctValues'],
        local_state['dataFrame'], globalStatistics)

    # Pack results
    local_out = multipleHist2_Loc2Glob_TD(
        local_state['args_X'], local_state['args_Y'],
        local_state['CategoricalVariablesWithDistinctValues'], Hist)
    # Return the output data
    local_out.transfer()
Beispiel #6
0
def main():
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-local_step_dbs',
                        required=True,
                        help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Load global state
    global_state = StateData.load(fname_prev_state).data
    # Load local nodes output
    local_out = LogRegrIter_Loc2Glob_TD.load(local_dbs)
    # Run algorithm global step
    global_state, global_out = logregr_global_iter(global_state=global_state,
                                                   global_in=local_out)
    # Save global state
    global_state.save(fname=fname_cur_state)
    # Return the algorithm's output
    global_out.transfer()
Beispiel #7
0
def logregr_global_iter(global_state, global_in):
    # Unpack global state
    n_obs = global_state['n_obs']
    n_cols = global_state['n_cols']
    ll_old = global_state['ll']
    coeff = global_state['coeff']
    iter = global_state['iter']
    y_val_dict = global_state['y_val_dict']
    schema_X = global_state['schema_X']
    schema_Y = global_state['schema_Y']
    # Unpack global input
    ll_new, grad, hess = global_in.get_data()

    # Compute new coefficients
    coeff = np.dot(np.linalg.inv(hess), grad)
    # Update termination quantities
    delta = abs(ll_new - ll_old)
    iter += 1

    # Pack state and results
    global_state = StateData(n_obs=n_obs,
                             n_cols=n_cols,
                             ll=ll_new,
                             coeff=coeff,
                             delta=delta,
                             iter=iter,
                             y_val_dict=y_val_dict,
                             schema_X=schema_X,
                             schema_Y=schema_Y)
    global_out = LogRegrIter_Glob2Loc_TD(coeff)
    return global_state, global_out
Beispiel #8
0
def main(args):
    sys.argv = args
    init_logger()
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-local_step_dbs',
                        required=True,
                        help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Load global state
    global_state = StateData.load(fname_prev_state).data
    globalTreeJ = global_state['globalTree'].tree_to_json()

    myresult = {"result": [{"type": "application/json", "data": globalTreeJ}]}
    t1 = global_state['t1']
    t2 = time.localtime(time.time())
    t0 = ['yy', 'mm', 'dd', 'hh', 'min', 'sec', 'wday', 'yday', 'isdst']
    logging.info(" Time: ")
    for i in range(len(t1)):
        logging.info([t0[i], t2[i], t1[i], t2[i] - t1[i]])
    set_algorithms_output_data(myresult)
Beispiel #9
0
def main(args):

    t1 = time.localtime(time.time())

    # Parse arguments
    sys.argv =args
    parser = ArgumentParser()
    parser.add_argument('-x', required=True, help='Independent variable names, comma separated.')
    parser.add_argument('-y', required=True, help='Dependent variable name')
    parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.')
    parser.add_argument('-input_local_DB', required=True, help='Path to local db.')
    parser.add_argument('-db_query', required=True, help='Query to be executed on local db.')
    args, unknown = parser.parse_known_args()
    query = args.db_query
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_loc_db = path.abspath(args.input_local_DB)
    query = query.replace("\\\"","\"")

    # Get variable
    args_X = list(args.x.replace(' ', '').split(','))
    args_Y = [args.y.replace(' ', '')]

    #1. Query database and metadata
    queryMetadata = "select * from metadata where code in (" + "'" + "','".join(args_X) + "','" + "','".join(args_Y) + "'"  + ");"
    dataSchema, metadataSchema, metadata, dataFrame  = query_database(fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata)
    CategoricalVariables = variable_categorical_getDistinctValues(metadata)

    #2. Run algorithm
    dataFrame, CategoricalVariables = cart_init_1_local(dataFrame, dataSchema, CategoricalVariables)
    if len(dataFrame) < PRIVACY_MAGIC_NUMBER:
        raise PrivacyError('The Experiment could not run with the input provided because there are insufficient data.')

    #3. Save local state
    local_state = StateData( dataFrame = dataFrame,
                             args_X = args_X,
                             args_Y = args_Y,
                             CategoricalVariables = CategoricalVariables)
    local_state.save(fname = fname_cur_state)

    # Transfer local output
    local_out = CartInit_Loc2Glob_TD(args_X, args_Y, CategoricalVariables, t1)
    local_out.transfer()
Beispiel #10
0
def main():
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-max_iter',
                        type=int,
                        required=True,
                        help='Maximum number of iterations.')
    args, unknown = parser.parse_known_args()
    fname_prev_state = path.abspath(args.prev_state_pkl)
    max_iter = args.max_iter

    global_state = StateData.load(fname_prev_state).data
    termination_condition(global_state, max_iter)
Beispiel #11
0
def logregr_global_init(global_in):
    n_obs, n_cols, y_val_dict, schema_X, schema_Y = global_in.get_data()

    if n_obs == 0:
        raise ExaremeError('The selected variables contain 0 datapoints.')

    # Init vars
    ll = - 2 * n_obs * np.log(2)
    coeff = np.zeros(n_cols)
    iter = 0

    # Pack state and results
    global_state = StateData(n_obs=n_obs, n_cols=n_cols, ll=ll, coeff=coeff, iter=iter,
                             y_val_dict=y_val_dict, schema_X=schema_X, schema_Y=schema_Y)
    global_out = LogRegrIter_Glob2Loc_TD(coeff)

    return global_state, global_out
Beispiel #12
0
def main(args):
    sys.argv =args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument('-prev_state_pkl', required=True,
                        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-max_depth', type=int, required=True, help='Maximum depth of tree')
    args, unknown = parser.parse_known_args()
    fname_prev_state = path.abspath(args.prev_state_pkl)
    #max_iter = args.max_iter

    global_state = StateData.load(fname_prev_state).data

    if bool(global_state['activePaths']) == False or global_state['stepsNo'] > args.max_depth :
        set_algorithms_output_data('STOP')
    else:
        set_algorithms_output_data('CONTINUE')
Beispiel #13
0
def logregr_local_init(local_in):
    # Unpack local input
    X, Y, schema_X, schema_Y = local_in
    n_obs = len(Y)
    n_cols = len(X[0]) + 1  # Add one for the intercept
    schema_X.insert(0, '(Intercept)')
    # Create dictionary for categories in Y
    y_val_dict = {sorted(set(Y))[0]: 0, sorted(set(Y))[1]: 1}
    Y = np.array([y_val_dict[yi] for yi in Y], dtype=np.uint8)
    # Add 1's column in X to account for intercept term
    X = np.insert(X, obj=0, values=np.ones(n_obs), axis=1)

    # Pack state and results
    local_state = StateData(X=X, Y=Y)
    local_out = LogRegrInit_Loc2Glob_TD(n_obs, n_cols, y_val_dict, schema_X,
                                        schema_Y)
    return local_state, local_out
Beispiel #14
0
def logregr_local_iter(local_state, local_in):
    # Unpack local state
    X, Y = local_state['X'], local_state['Y']
    # Unpack local input
    coeff = local_in.get_data()

    # Auxiliary quantities
    z = np.dot(X, coeff)
    s = expit(z)
    d = np.multiply(s, (1 - s))
    D = np.diag(d)
    # Hessian
    hess = np.dot(np.transpose(X), np.dot(D, X))
    # Gradient
    grad = np.dot(np.transpose(X), np.dot(D, z + np.divide(Y - s, d)))
    # Log-likelihood
    ls1, ls2 = np.log(s), np.log(1 - s)
    ll = np.dot(Y, ls1) + np.dot(1 - Y, ls2)

    # Pack state and results
    local_state = StateData(X=X, Y=Y)
    local_out = LogRegrIter_Loc2Glob_TD(ll, grad, hess)
    return local_state, local_out
Beispiel #15
0
def main(args):
    sys.argv = args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()

    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).data
    # Load global node output
    globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data()

    # Run algorithm local iteration step
    activePaths = cart_step_2_local(local_state['dataFrame'],
                                    local_state['CategoricalVariables'],
                                    local_state['args_X'],
                                    local_state['args_Y'], activePaths)
    #
    # # Run algorithm local iteration step
    # for key in activePaths:
    #     df = local_state['dataFrame']
    #     # For each unfinished path, find the subset of dataFrame (df)
    #     for i in xrange(len(activePaths[key]['filter'])):
    #         df = DataFrameFilter(df, activePaths[key]['filter'][i]["variable"],
    #                                  activePaths[key]['filter'][i]["operator"],
    #                                  activePaths[key]['filter'][i]["value"])
    #     if  local_state['args_Y'][0] in local_state['CategoricalVariables']:  #Classification Algorithm
    #         resultJ = node_computations(df,  local_state['args_X'], activePaths[key],  local_state['args_Y'][0],  local_state['CategoricalVariables'],"classNumbers")
    #         activePaths[key]["classNumbersJ"] = dict(activePaths[key]["classNumbersJ"].items() + resultJ.items())
    #     elif  local_state['args_Y'][0] not in local_state['CategoricalVariables']: # Regression Algorithm
    #         resultJ = node_computations(df,  local_state['args_X'], activePaths[key],  local_state['args_Y'][0],  local_state['CategoricalVariables'],"statistics")
    #         activePaths[key]["statisticsJ"] = dict(activePaths[key]["statisticsJ"].items() + resultJ.items())
    # #print activePaths
    ## Finished
    local_state = StateData(
        args_X=local_state['args_X'],
        args_Y=local_state['args_Y'],
        CategoricalVariables=local_state['CategoricalVariables'],
        dataFrame=local_state['dataFrame'],
        globalTree=globalTree,
        activePaths=activePaths)

    local_out = CartIter2_Loc2Glob_TD(activePaths)

    # Save local state
    local_state.save(fname=fname_cur_state)
    # Return
    local_out.transfer()
Beispiel #16
0
def main():

    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument('-x',
                        required=True,
                        help='Variable names, comma seperated ')
    parser.add_argument('-y',
                        required=True,
                        help='Categorical variables names, comma seperated.')
    parser.add_argument(
        '-bins',
        required=True,
        help='Dictionary of variables names (key) and number of bins (value)')
    parser.add_argument('-input_local_DB',
                        required=True,
                        help='Path to local db.')
    parser.add_argument('-db_query',
                        required=True,
                        help='Query to be executed on local db.')
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    args, unknown = parser.parse_known_args()
    query = args.db_query
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_loc_db = path.abspath(args.input_local_DB)

    if args.x == '':
        raise ExaremeError('Field x must be non empty.')

    # Get data
    if args.y == '':
        args_X = list(args.x.replace(' ', '').split(','))
        args_Y = []
        varNames = "'" + "','".join(list(args.x.replace(' ',
                                                        '').split(','))) + "'"
    else:
        args_X = list(args.x.replace(' ', '').split(','))
        args_Y = list(args.y.replace(' ', '').split(','))
        varNames = "'" + "','".join(list(args.x.replace(
            ' ', '').split(','))) + "','" + "','".join(
                list(args.y.replace(' ', '').split(','))) + "'"
    if args.bins == '':
        args_bins = {}
    else:
        args_bins = json.loads(args.bins)
        #args_bins = dict( (str(key), val) for key, val in args_bins.items())

    queryMetadata = "select * from metadata where code in (" + varNames + ");"
    dataSchema, metadataSchema, metadata, dataFrame = query_database(
        fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata)
    CategoricalVariablesWithDistinctValues = variable_categorical_getDistinctValues(
        metadata)

    #Checking bins input
    for varx in args_X:
        if varx not in CategoricalVariablesWithDistinctValues:
            if varx not in args_bins:
                raise ExaremeError(
                    'Bin value is not defined for one at least non-categorical variable. i.e. '
                    + varx)

    # Run algorithm local step
    localStatistics = run_local_step(args_X, args_Y, args_bins, dataSchema,
                                     CategoricalVariablesWithDistinctValues,
                                     dataFrame)

    # Save local state
    local_state = StateData(args_X=args_X,
                            args_Y=args_Y,
                            args_bins=args_bins,
                            dataSchema=dataSchema,
                            CategoricalVariablesWithDistinctValues=
                            CategoricalVariablesWithDistinctValues,
                            dataFrame=dataFrame)
    local_state.save(fname=fname_cur_state)

    # Transfer local output
    local_out = multipleHist1_Loc2Glob_TD(localStatistics)
    #raise ValueError( local_out.get_data())
    local_out.transfer()