Beispiel #1
0
def DtModuleData1(data_set_csv, m_file_name):
    title, data_set = csvOperation.readCsv(data_set_csv)  #读取可达集的表头和数据集
    sign = search_sign(m_file_name)  #读取对称符号
    print sign
    true_title = titleChange(title, sign)  #去表头中的除对称符号,得到原始表头。将表头小写化统一名称
    true_data_set = dataSetChange(data_set, sign)  #除去数据集中的对称符号
    return true_title, true_data_set
Beispiel #2
0
def priority_test(csvfile):
    result = {}
    title, dataSet = csvOperation.readCsv(csvfile)
    for member in title:
        if member == 'Chan2[NODE_1].Cmd' or member == 'ExGntd' or member == 'ShrSet[NODE_1]' or member == 'CurCmd':
            result[member] = 0
            print('1')
        else:
            result[member] = 0.1
    return result
Beispiel #3
0
def createTree(atom_csv, undefined_percentage):  #输入为处理过的可达集、优先级字典
    title, dataSet = csvOperation.readCsv(atom_csv)
    #add 0822 for only one class  可能返回字符串,而不是字典
    classList = [example[-1] for example in dataSet]
    if len(set(classList)) == 1:
        return '(' + title[-1] + ' = ' + classList[0] + ')'
    else:
        myTree = id3.createTree(
            dataSet, title[:-1], undefined_percentage
        )  #0726,修改函数id3.createTree,加入优先级判定依据,即undefined_percentage
        return myTree
Beispiel #4
0
def percentage(csvfile):
    title, dataSet = csvOperation.readCsv(csvfile)
    undefined_percentage_dict = {}
    number_of_data = len(dataSet)
    for i in range(len(title)):
        undefined_counter = 0  #初始化计数器
        for j in range(number_of_data):
            if dataSet[j][i].lower() == 'undefined':  #j代表行数,i代表列数
                undefined_counter = undefined_counter + 1
        undefined_percentage_dict[
            title[i]] = undefined_counter / number_of_data
    return undefined_percentage_dict
Beispiel #5
0
def convert(origin_csv, atom_txt, atom_csv):
    atom_list = csvOperation.txtToList(atom_txt)
    origin_title, origin_dataSet = csvOperation.readCsv(origin_csv)
    left_list = csvOperation.getLeft(atom_list[:])
    convert_list = csvOperation.getConvertList(origin_title, left_list)
    right_list = csvOperation.getRight(atom_list[:])
    atom_dataset = csvOperation.dataSetToAtomDataSet(origin_dataSet,
                                                     convert_list, right_list,
                                                     origin_title)
    newtitle, newdataset = csvOperation.creatAtomCsv(atom_list[:],
                                                     atom_dataset[:],
                                                     origin_dataSet)
    csvOperation.creatCsv(newtitle, newdataset, atom_csv)
Beispiel #6
0
def chooseClassifyAttribute(origin_csv, attribute_list):
    title1, dataSet = csvOperation.readCsv(origin_csv)
    title = read_txt('title.txt')
    result = attribute_list[0]  #默认返回第一属性,防止所有属性包含的状态种类数均为1
    for i in range(len(title)):
        title[i] = title[i].replace('NODE_', '').lower()
    if len(attribute_list) == 1:  #只有一种属性
        return attribute_list[0]
    else:  #有多种属性
        for member in attribute_list:
            if member not in title:
                print 'error treemain 153 member not in title!!!!!!!!'
                print 'memeber :' + member
                print 'title :' + title
            else:
                position = title.index(member)
                temp_list = [example[position]
                             for example in dataSet]  #找到数据集中该位置的所有取值
                temp_set = set(temp_list)
                temp_set_list = list(temp_set)
                if len(temp_set_list) != 1:  #该属性包含的状态不只一个,选择该属性作为分类属性
                    result = member
                    break
    return result