Exemple #1
0
def main(args):
    # Parse arguments
    sys.argv = args
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument('-local_step_dbs',
                        required=True,
                        help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Merge local nodes output
    args_X, args_Y, CategoricalVariables, t1 = CartInit_Loc2Glob_TD.load(
        local_dbs).get_data()

    # Run algorithm global step
    globalTree, activePaths = cart_init_1_global()

    # Save global state
    global_state = StateData(stepsNo=0,
                             args_X=args_X,
                             args_Y=args_Y,
                             CategoricalVariables=CategoricalVariables,
                             globalTree=globalTree,
                             activePaths=activePaths,
                             t1=t1)
    global_state.save(fname=fname_cur_state)

    # Transfer local output
    global_out = Cart_Glob2Loc_TD(globalTree, activePaths)
    global_out.transfer()
Exemple #2
0
def main(args):
    sys.argv =args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument('-no_split_points', required=True, type=int, help='Number of split points')
    parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.')
    parser.add_argument('-prev_state_pkl', required=True, help='Path to the pickle file holding the previous state.')
    parser.add_argument('-local_step_dbs', required=True, help='Path to db holding local step results.')
    args, unknown = parser.parse_known_args()
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    local_dbs = path.abspath(args.local_step_dbs)

    # Load global state
    global_state = StateData.load(fname_prev_state).data
    # Load local nodes output
    activePaths = CartIter1_Loc2Glob_TD.load(local_dbs).get_data()

    # Run algorithm global iteration step
    activePaths = cart_step_1_global(global_state['args_X'], global_state['args_Y'], global_state['CategoricalVariables'], activePaths, args.no_split_points)

    global_out = Cart_Glob2Loc_TD(  global_state['globalTree'], activePaths )
    # Save global state
    # Save global state
    global_state = StateData(   stepsNo = global_state['stepsNo'] + 1 ,
                                args_X = global_state['args_X'],
                                args_Y = global_state['args_Y'],
                                CategoricalVariables = global_state['CategoricalVariables'],
                                globalTree = global_state['globalTree'],
                                activePaths = activePaths,
                                t1 = global_state['t1'] )

    global_state.save(fname=fname_cur_state)
    # Return the algorithm's output
    global_out.transfer()
Exemple #3
0
def main(args):
    sys.argv = args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()

    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).data
    # Load global node output
    globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data()

    # Run algorithm local iteration step
    activePaths = cart_step_3_local(local_state['dataFrame'],
                                    local_state['args_X'],
                                    local_state['args_Y'],
                                    local_state['CategoricalVariables'],
                                    activePaths)

    ## Finished
    local_state = StateData(
        args_X=local_state['args_X'],
        args_Y=local_state['args_Y'],
        CategoricalVariables=local_state['CategoricalVariables'],
        dataFrame=local_state['dataFrame'],
        globalTree=globalTree,
        activePaths=activePaths)

    local_out = CartIter3_Loc2Glob_TD(activePaths)

    # Save local state
    local_state.save(fname=fname_cur_state)
    # Return
    local_out.transfer()
Exemple #4
0
def main(args):

    t1 = time.localtime(time.time())

    # Parse arguments
    sys.argv =args
    parser = ArgumentParser()
    parser.add_argument('-x', required=True, help='Independent variable names, comma separated.')
    parser.add_argument('-y', required=True, help='Dependent variable name')
    parser.add_argument('-cur_state_pkl', required=True, help='Path to the pickle file holding the current state.')
    parser.add_argument('-input_local_DB', required=True, help='Path to local db.')
    parser.add_argument('-db_query', required=True, help='Query to be executed on local db.')
    args, unknown = parser.parse_known_args()
    query = args.db_query
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_loc_db = path.abspath(args.input_local_DB)
    query = query.replace("\\\"","\"")

    # Get variable
    args_X = list(args.x.replace(' ', '').split(','))
    args_Y = [args.y.replace(' ', '')]

    #1. Query database and metadata
    queryMetadata = "select * from metadata where code in (" + "'" + "','".join(args_X) + "','" + "','".join(args_Y) + "'"  + ");"
    dataSchema, metadataSchema, metadata, dataFrame  = query_database(fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata)
    CategoricalVariables = variable_categorical_getDistinctValues(metadata)

    #2. Run algorithm
    dataFrame, CategoricalVariables = cart_init_1_local(dataFrame, dataSchema, CategoricalVariables)
    if len(dataFrame) < PRIVACY_MAGIC_NUMBER:
        raise PrivacyError('The Experiment could not run with the input provided because there are insufficient data.')

    #3. Save local state
    local_state = StateData( dataFrame = dataFrame,
                             args_X = args_X,
                             args_Y = args_Y,
                             CategoricalVariables = CategoricalVariables)
    local_state.save(fname = fname_cur_state)

    # Transfer local output
    local_out = CartInit_Loc2Glob_TD(args_X, args_Y, CategoricalVariables, t1)
    local_out.transfer()
Exemple #5
0
def main(args):
    sys.argv = args
    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    parser.add_argument(
        '-prev_state_pkl',
        required=True,
        help='Path to the pickle file holding the previous state.')
    parser.add_argument('-global_step_db',
                        required=True,
                        help='Path to db holding global step results.')
    args, unknown = parser.parse_known_args()

    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_prev_state = path.abspath(args.prev_state_pkl)
    global_db = path.abspath(args.global_step_db)

    # Load local state
    local_state = StateData.load(fname_prev_state).data
    # Load global node output
    globalTree, activePaths = Cart_Glob2Loc_TD.load(global_db).get_data()

    # Run algorithm local iteration step
    activePaths = cart_step_2_local(local_state['dataFrame'],
                                    local_state['CategoricalVariables'],
                                    local_state['args_X'],
                                    local_state['args_Y'], activePaths)
    #
    # # Run algorithm local iteration step
    # for key in activePaths:
    #     df = local_state['dataFrame']
    #     # For each unfinished path, find the subset of dataFrame (df)
    #     for i in xrange(len(activePaths[key]['filter'])):
    #         df = DataFrameFilter(df, activePaths[key]['filter'][i]["variable"],
    #                                  activePaths[key]['filter'][i]["operator"],
    #                                  activePaths[key]['filter'][i]["value"])
    #     if  local_state['args_Y'][0] in local_state['CategoricalVariables']:  #Classification Algorithm
    #         resultJ = node_computations(df,  local_state['args_X'], activePaths[key],  local_state['args_Y'][0],  local_state['CategoricalVariables'],"classNumbers")
    #         activePaths[key]["classNumbersJ"] = dict(activePaths[key]["classNumbersJ"].items() + resultJ.items())
    #     elif  local_state['args_Y'][0] not in local_state['CategoricalVariables']: # Regression Algorithm
    #         resultJ = node_computations(df,  local_state['args_X'], activePaths[key],  local_state['args_Y'][0],  local_state['CategoricalVariables'],"statistics")
    #         activePaths[key]["statisticsJ"] = dict(activePaths[key]["statisticsJ"].items() + resultJ.items())
    # #print activePaths
    ## Finished
    local_state = StateData(
        args_X=local_state['args_X'],
        args_Y=local_state['args_Y'],
        CategoricalVariables=local_state['CategoricalVariables'],
        dataFrame=local_state['dataFrame'],
        globalTree=globalTree,
        activePaths=activePaths)

    local_out = CartIter2_Loc2Glob_TD(activePaths)

    # Save local state
    local_state.save(fname=fname_cur_state)
    # Return
    local_out.transfer()
Exemple #6
0
def main():

    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument('-x',
                        required=True,
                        help='Variable names, comma seperated ')
    parser.add_argument('-y',
                        required=True,
                        help='Categorical variables names, comma seperated.')
    parser.add_argument(
        '-bins',
        required=True,
        help='Dictionary of variables names (key) and number of bins (value)')
    parser.add_argument('-input_local_DB',
                        required=True,
                        help='Path to local db.')
    parser.add_argument('-db_query',
                        required=True,
                        help='Query to be executed on local db.')
    parser.add_argument(
        '-cur_state_pkl',
        required=True,
        help='Path to the pickle file holding the current state.')
    args, unknown = parser.parse_known_args()
    query = args.db_query
    fname_cur_state = path.abspath(args.cur_state_pkl)
    fname_loc_db = path.abspath(args.input_local_DB)

    if args.x == '':
        raise ExaremeError('Field x must be non empty.')

    # Get data
    if args.y == '':
        args_X = list(args.x.replace(' ', '').split(','))
        args_Y = []
        varNames = "'" + "','".join(list(args.x.replace(' ',
                                                        '').split(','))) + "'"
    else:
        args_X = list(args.x.replace(' ', '').split(','))
        args_Y = list(args.y.replace(' ', '').split(','))
        varNames = "'" + "','".join(list(args.x.replace(
            ' ', '').split(','))) + "','" + "','".join(
                list(args.y.replace(' ', '').split(','))) + "'"
    if args.bins == '':
        args_bins = {}
    else:
        args_bins = json.loads(args.bins)
        #args_bins = dict( (str(key), val) for key, val in args_bins.items())

    queryMetadata = "select * from metadata where code in (" + varNames + ");"
    dataSchema, metadataSchema, metadata, dataFrame = query_database(
        fname_db=fname_loc_db, queryData=query, queryMetadata=queryMetadata)
    CategoricalVariablesWithDistinctValues = variable_categorical_getDistinctValues(
        metadata)

    #Checking bins input
    for varx in args_X:
        if varx not in CategoricalVariablesWithDistinctValues:
            if varx not in args_bins:
                raise ExaremeError(
                    'Bin value is not defined for one at least non-categorical variable. i.e. '
                    + varx)

    # Run algorithm local step
    localStatistics = run_local_step(args_X, args_Y, args_bins, dataSchema,
                                     CategoricalVariablesWithDistinctValues,
                                     dataFrame)

    # Save local state
    local_state = StateData(args_X=args_X,
                            args_Y=args_Y,
                            args_bins=args_bins,
                            dataSchema=dataSchema,
                            CategoricalVariablesWithDistinctValues=
                            CategoricalVariablesWithDistinctValues,
                            dataFrame=dataFrame)
    local_state.save(fname=fname_cur_state)

    # Transfer local output
    local_out = multipleHist1_Loc2Glob_TD(localStatistics)
    #raise ValueError( local_out.get_data())
    local_out.transfer()