Esempio n. 1
0
def pickle_static_income(index):
    """pickle sorted values of datasets
    """
    userfile = open('data/demographics.csv', 'rU')
    need_static = False
    support = {}
    try:
        static_file = open('data/informs_income_Static_value.pickle', 'rb')
        # print "Income pickle Data exist..."
        result = pickle.load(static_file)
    except:
        need_static = True
        static_file = open('data/informs_income_Static_value.pickle', 'wb')
        print "Pickle Data..."
        for i, line in enumerate(userfile):
            line = line.strip()
            if i == 0:
                continue
            # ignore first line of csv
            row = line.split(',')
            try:
                support[row[index]] += 1
            except:
                support[row[index]] = 1
        sort_value = support.keys()
        sort_value.sort(cmp=cmp_str)
        result = NumRange(sort_value, support)
        pickle.dump(result, static_file)
    static_file.close()
    userfile.close()
    return result
Esempio n. 2
0
def read_pickle_file(att_name, name):
    """
    read pickle file for numeric attributes
    return numrange object
    """
    with open('tmp/' + att_name + '_static.pickle', 'rb') as static_file:
        (numeric_dict, sort_value) = pickle.load(static_file)
        result = NumRange(sort_value, numeric_dict)
        return result
def read_pickle_file(att_name):
    """
    read pickle file for numeric attributes
    return numrange object
    """
    try:
        static_file = open('data/adult_' + att_name + '_static.pickle', 'rb')
        (numeric_dict, sort_value) = pickle.load(static_file)
    except:
        print "Pickle file not exists!!"
    static_file.close()
    result = NumRange(sort_value, numeric_dict)
    return result
def read_numeric_identifier(att_name):
    """
    read pickle file for numeric attributes
    return numrange object
    """
    csvdata = pd.read_csv('data/adult.data',
                          sep=',',
                          header=0,
                          names=ATT_NAMES)
    csvdata = csvdata.sort_values(by=att_name)
    csvdata = csvdata.dropna(subset=[att_name])
    sort_value = csvdata.age.astype('str').unique()
    result = NumRange(sort_value.tolist(), dict())
    return result
Esempio n. 5
0
def init_tree():
    global ATT_TREE, NUM_RANGE
    ATT_TREE = {}
    root = GenTree('*')
    ATT_TREE['*'] = root
    lt = GenTree('A', root)
    ATT_TREE['A'] = lt
    ATT_TREE['a1'] = GenTree('a1', lt, True)
    ATT_TREE['a2'] = GenTree('a2', lt, True)
    rt = GenTree('B', root)
    ATT_TREE['B'] = rt
    ATT_TREE['b1'] = GenTree('b1', rt, True)
    ATT_TREE['b2'] = GenTree('b2', rt, True)
    NUM_RANGE = NumRange(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                         dict())
def init():
    global ATT_TREE
    ATT_TREE = []
    tree_temp = {}
    tree = GenTree('*')
    tree_temp['*'] = tree
    lt = GenTree('1,5', tree)
    tree_temp['1,5'] = lt
    rt = GenTree('6,10', tree)
    tree_temp['6,10'] = rt
    for i in range(1, 11):
        if i <= 5:
            t = GenTree(str(i), lt, True)
        else:
            t = GenTree(str(i), rt, True)
        tree_temp[str(i)] = t
    numrange = NumRange(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                        dict())
    ATT_TREE.append(tree_temp)
    ATT_TREE.append(numrange)
Esempio n. 7
0
TREE_TEMP = {}
tree = GenTree('*')
TREE_TEMP['*'] = tree
lt = GenTree('1,5', tree)
TREE_TEMP['1,5'] = lt
rt = GenTree('6,10', tree)
TREE_TEMP['6,10'] = rt
for i in range(1, 11):
    if i <= 5:
        t = GenTree(str(i), lt, True)
    else:
        t = GenTree(str(i), rt, True)
    TREE_TEMP[str(i)] = t
ATT_TREES = [
    TREE_TEMP, TREE_TEMP, TREE_TEMP, TREE_TEMP,
    NumRange([str(t) for t in range(1, 11)], dict())
]
IS_CAT = [True, True, True, True, False]


def NCP(record):
    """
    compute Certainlty Penalty of records
    """
    record_ncp = 0.0
    for i in range(QI_LEN):
        if IS_CAT[i] is False:
            temp = 0
            try:
                float(record[i])
            except ValueError: