def calculateMIC(dataFileArray,data_mark = None,neadNorm =False) :
    mic_map = {};
    for dataFileName in dataFileArray :
        if data_mark is None:
            data_mark = DATA_MARK;
        _fileName = os.path.join(data_mark, dataFileName);
        student_data,headerArray = load_data_from_file(_fileName);

        _score_map = get_final_score_map();
        _score_array = [];
        for _student_record in student_data:
            _score_array.append(_score_map[_student_record[0]]);

        featureCount = headerArray.__len__() - 1;

        if(neadNorm):
            _score_array =normizeDataSet(_score_array);

        #计算皮尔森相关系数 并输出成markdown形式
        m = MINE()
        for index in range(1,featureCount+1) :
            dataArray = getOneColumn(student_data,index);
            if (neadNorm):
                dataArray = normizeDataSet(dataArray);
            m.compute_score(dataArray,_score_array);
            mic_map[headerArray[index]] = m.mic();

    sorted_list = sorted(mic_map.items(),key=lambda i : i[1],reverse=True);
    threhold = np.mean(list(mic_map.values()));
    for header,value in sorted_list:
        if value > threhold:
            print(header,value)
Exemple #2
0
def drawDataWithScorePic(dataFileName,needNorminize = False,out_mark = None) :
    '''
    画出特征的一元分布图
    :param needNormize:
    :return:
    '''
    _fileName = os.path.join(DATA_MARK,dataFileName);

    student_data,headerArray = load_data_from_file(_fileName);

    _score_map = get_final_score_map(None);

    _score_array = [];
    for _student_record in student_data:
        _score_array.append(_score_map[_student_record[0]]);

    if needNorminize :
        _score_array = normizeDataSet(_score_array);

    #遍历所有的特征
    for colomnIndex in range(1, headerArray.__len__()):
        data = getOneColumn(student_data, colomnIndex);

        # if headerArray[colomnIndex] == "avgRemoveErrorTime":
        #     for index in range(data.__len__()):
        #         if data[index] > 300:
        #             data[index] = 300;

        if (needNorminize):
            data = normizeDataSet(dataSetA=data);

        plot.scatter(_score_array, data ,s=2);
        title = headerArray[colomnIndex]+"-score";
        if(needNorminize):
            title += "-nominized";
        plot.title(title);
        plot.xlabel("score");
        plot.ylabel(headerArray[colomnIndex]);

        parentPath = OUT_ROOT_PATH +"/"+ DATA_MARK +"/scatterWithScore/";
        checkThenMkdirs(parentPath);
        if out_mark is not None:
            title += "-"+out_mark;
        plot.savefig(parentPath+ title);
        plot.clf();
Exemple #3
0
def calculateSpearman(dataFileName, data_mark=None, neadNorm=False):
    if data_mark is None:
        data_mark = DATA_MARK
    _fileName = os.path.join(data_mark, dataFileName)
    student_data, headerArray = load_data_from_file(_fileName)

    _score_map = get_final_score_map()
    _score_array = []
    for _student_record in student_data:
        _score_array.append(_score_map[_student_record[0]])

    featureCount = headerArray.__len__() - 1

    if (neadNorm):
        _score_array = normizeDataSet(_score_array)

    for index in range(1, featureCount + 1):
        dataArray = getOneColumn(student_data, index)
        if (neadNorm):
            dataArray = normizeDataSet(dataArray)
        pValue, p = spearmanr(dataArray, _score_array)
        print(headerArray[index], pValue, p)