'AND (gunning_fog_index >= %s AND gunning_fog_index <= %s) '
        'AND (smog_index >= %s AND smog_index <= %s) '
        'AND (coleman_liau_index >= %s AND coleman_liau_index <= %s) '
        'AND (lix >= %s AND lix <= %s) '
        'AND (rix >= %s AND rix <= %s) '
        'ORDER BY RAND() LIMIT 1000',
        (limits['ari'][0], limits['ari'][1], limits['flesch_reading_ease'][0],
         limits['flesch_reading_ease'][1],
         limits['flesch_kincaid_grade_level'][0],
         limits['flesch_kincaid_grade_level'][1],
         limits['gunning_fog_index'][0], limits['gunning_fog_index'][1],
         limits['smog_index'][0], limits['smog_index'][1],
         limits['coleman_liau_index'][0], limits['coleman_liau_index'][1],
         limits['lix'][0], limits['lix'][1], limits['rix'][0],
         limits['rix'][1]))

    seen = []
    for i in indices:
        for j in indices:
            if i == j:
                continue
            key = tuple(sorted([i, j]))
            if key in seen:
                continue
            print(key)
            seen.append(key)
            x = [float(v[i]) for v in result]
            y = [float(v[j]) for v in result]
            graph.scatter('data/%s-%s' % (i, j), x, y, True, limits[i],
                          limits[j], i, j)
Beispiel #2
0
def scatter_ab():
	dot_a = ga.scatter(fpr_a[1],tpr_a[1],'A', 'o', colors[0])
	dot_b = ga.scatter(fpr_b[1],tpr_b[1],'B', 'o', colors[1])
    result = corpus.run_sql('SELECT * FROM comment_feature_read '
                            'LEFT JOIN comment ON (comment.id=comment_feature_read.id) LEFT JOIN submission ON (submission.id=comment.submission_id) LEFT JOIN reddit ON (reddit.id=submission.reddit_id) '
                            'WHERE reddit.name = \'netsec\' '
                            'AND (ari >= %s AND ari <= %s) '
                            'AND (flesch_reading_ease >= %s AND flesch_reading_ease <= %s) '
                            'AND (flesch_kincaid_grade_level >= %s AND flesch_kincaid_grade_level <= %s) '
                            'AND (gunning_fog_index >= %s AND gunning_fog_index <= %s) '
                            'AND (smog_index >= %s AND smog_index <= %s) '
                            'AND (coleman_liau_index >= %s AND coleman_liau_index <= %s) '
                            'AND (lix >= %s AND lix <= %s) '
                            'AND (rix >= %s AND rix <= %s) '
                            'ORDER BY RAND() LIMIT 1000',
                            ( limits['ari'][0], limits['ari'][1], limits['flesch_reading_ease'][0], limits['flesch_reading_ease'][1],
                              limits['flesch_kincaid_grade_level'][0], limits['flesch_kincaid_grade_level'][1], limits['gunning_fog_index'][0], limits['gunning_fog_index'][1],
                              limits['smog_index'][0], limits['smog_index'][1], limits['coleman_liau_index'][0], limits['coleman_liau_index'][1],
                              limits['lix'][0], limits['lix'][1], limits['rix'][0], limits['rix'][1] ))

    seen = []
    for i in indices:
        for j in indices:
            if i == j:
                continue
            key = tuple(sorted([i, j]))
            if key in seen:
                continue
            print(key)
            seen.append(key)
            x = [ float(v[i]) for v in result ]
            y = [ float(v[j]) for v in result ]
            graph.scatter('data/%s-%s' % (i, j), x, y, True, limits[i], limits[j], i, j)
Beispiel #4
0
fpr_a, tpr_a, auc_a, thresholds_a = ca.roc(a_predict, label)
fpr_b, tpr_b, auc_b, thresholds_b= ca.roc(b_predict, label)
renew(fpr_a,tpr_a,auc_a)
renew(fpr_b,tpr_b,auc_b)

# Get c_infos
fpr_c, tpr_c, auc_c, thresholds_c = ca.roc(c_value, label)
c_label = []
for value in thresholds_c:
	c_label.append('C ' + str(value))

# Scatter series of c
for f,t, thres,color in zip(fpr_c,tpr_c,thresholds_c,colors):
	l = 'C: ' + str(thres)
	# plt.scatter(f,t,marker = '>', color = color, label = 'C: ' + str(thres)) 
	ga.scatter(f,t,l,'>',color)

# Scatter A B
def scatter_ab():
	dot_a = ga.scatter(fpr_a[1],tpr_a[1],'A', 'o', colors[0])
	dot_b = ga.scatter(fpr_b[1],tpr_b[1],'B', 'o', colors[1])

# Convex Hullfor A B C
points = np.array([fpr_c,tpr_c]).T
points = np.vstack((points,np.array([fpr_a[1],tpr_a[1]])))
points = np.vstack((points,np.array([fpr_b[1],tpr_b[1]])))
points = np.vstack((points,np.array([0,0])))
points = np.vstack((points,np.array([1,1])))
hull = ConvexHull(points)
for simplex in hull.simplices:
	plt.plot(points[simplex, 0], points[simplex, 1], 'k-')
    values = [ float(v['smog_index']) for v in result ]
    graph.hist('data/smog_index_hist', values, 'Smog Index', 'Frequency',
               'Frequency of Smog Index values')

    values = [ float(v['coleman_liau_index']) for v in result ]
    graph.hist('data/coleman_liau_index_hist', values, 'Coleman Liau Index', 'Frequency',
               'Frequency of Coleman Liau Index values')

    values = [ float(v['lix']) for v in result ]
    graph.hist('data/lix_hist', values, 'LIX', 'Frequency',
               'Frequency of LIX values')

    values = [ float(v['rix']) for v in result ]
    graph.hist('data/rix_hist', values, 'RIX', 'Frequency',
               'Frequency of RIX values')

    indices = ['ari', 'flesch_reading_ease', 'flesch_kincaid_grade_level', 'gunning_fog_index', 'smog_index',
               'coleman_liau_index', 'lix', 'rix']
    seen = []
    for i in indices:
        for j in indices:
            if i == j:
                continue
            key = tuple(sorted([i, j]))
            if key in seen:
                continue
            seen.append(key)
            x = [ float(v[i]) for v in result ]
            y = [ float(v[j]) for v in result ]
            graph.scatter('data/%s-%s' % (i, j), x, y, True, x_title=i, y_title=j)