コード例 #1
0
def main():
    ### redirecting stdout    
    orig_stdout = sys.stdout
    f = file('analyszedata.out', 'w')
    sys.stdout = f
    
    ### Load file
    dataFile = 'example.train.csv'    
    data = util.loadCsv(dataFile)
    data = np.asarray(data)[:,:-1]
#    print data
    
    ### Get and output feature types
    analayzeFeatureType(data)
    
    ### Get and output value counts for each feature
    getValueCountsAll(data)
    
    print "\n%s The number of distinct values and the corresponding feature IDs." % util.RESULT
    print distinctValCntFeatureMap
    
    distinctValCntFeatureCntMap = dict()
    for keys, values in distinctValCntFeatureMap.items():
        distinctValCntFeatureCntMap[keys] = len(values)
        
    print "\n%s The number of distinct values and # of features." % util.RESULT
    print distinctValCntFeatureCntMap
    
    # plot
    x = [0] * len(distinctValCntFeatureCntMap)
    singley = [0] * len(distinctValCntFeatureCntMap)
    accumy = [0] * len(distinctValCntFeatureCntMap)
    
    accumValue = 0
    for keys, values in distinctValCntFeatureCntMap.items():
        x.append(keys)
        singley.append(values)
        accumValue += values
        accumy.append(accumValue)
        
    plt.plot(x, singley)
    plt.xlabel('# of distinct values in a feature')
    plt.ylabel('# of features')
    plt.show()
    
    plt.plot(x, accumy)
    plt.xlabel('# of distinct values in a feature')
    plt.ylabel('accumalated # of features')
    plt.show()

    ### redirecting stdout    
    sys.stdout = orig_stdout
    f.close()
コード例 #2
0
def countTargetValues(dataFile):
     data = util.loadCsv(dataFile)
     targets = data[:,-1]     
     targets = map(int, targets)
     print np.count_nonzero(np.asarray(targets))