Example #1
0
def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
    plt.clf()
    plt.scatter(x, y, s=10)
    plt.title("Web traffic over the last month")
    plt.xlabel("Time")
    plt.ylabel("Hits/hour")
    plt.xticks([w * 7 * 24 for w in range(10)], ["week %i" % w for w in range(10)])

    if models:
        if mx is None:
            mx = sp.linspace(0, x[-1], 1000)
        for model, style, color in zip(models, linestyles, colors):
            # print "Model:",model
            # print "Coeffs:",model.coeffs
            plt.plot(mx, model(mx), linestyle=style, linewidth=2, c=color)

        plt.legend(["d=%i" % m.order for m in models], loc="upper left")

    plt.autoscale(tight=True)
    plt.ylim(ymin=0)
    if ymax:
        plt.ylim(ymax=ymax)
    if xmin:
        plt.xlim(xmin=xmin)
    plt.grid(True, linestyle="-", color="0.75")
Example #2
0
def show_plot(X, y, n_neighbors=10, h=0.2):
    # Create color maps
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000',])

    for weights in ['uniform', 'distance']:
        # we create an instance of Neighbours Classifier and fit the data.
        clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
        clf.fit(X, y)
        clf.n_neighbors = n_neighbors

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))
        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        plt.figure()
        plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

        # Plot also the training points
        plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
        plt.xlim(xx.min(), xx.max())
        plt.ylim(yy.min(), yy.max())
        plt.title("3-Class classification (k = %i, weights = '%s')"
                  % (n_neighbors, weights))

    plt.show()
Example #3
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        self.worked = True
        samples = LGMM1(rng=self.rng,
                size=(self.n_samples,),
                **self.LGMM1_kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        centers = .5 * edges[:-1] + .5 * edges[1:]
        print edges

        pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(centers, y)
            plt.plot(centers, pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Example #4
0
def altitude():
	global alt, i
	
	#we want to create temporary file to parse, so that we don't mess with the nmea.txt file
	f1 = open('temp.txt', 'w') #creates and opens a writable txt file
	f1.truncate() #erase contents of file
	shutil.copyfile('nmea.txt', 'temp.txt') #copy nmea.txt to temp.txt
	f1.close() #close writable file
	
	f1 = open('temp.txt', 'r') #open and read only
	try: #best to use try/finally so that the file opens and closes correctly
		for line in f1: #read each line in temp.txt
			if(line[4] == 'G'): # fifth character in $GPGGA
				if(len(line) > 50): # when there is a lock, the sentence gets filled with data
					#print line
					gpgga = nmea.GPGGA()
					gpgga.parse(line)
					alt = gpgga.antenna_altitude
					i +=1 #increment the counter
					print i
					print alt
					plt.scatter(x=[i], y=[float(alt)], s = 1, c='r') #plot each point
	finally:
		f1.close()
	i=0
	
	#axis is autoscaled
	plt.ylabel('meters')
	plt.xlabel('counts')
	plt.title('ALTITUDE')
	plt.show()
Example #5
0
def plot_2d_simple(data,y=None):
    if y==None:
        plt.scatter(data[:,0],data[:,1],s=50)
    else:
        nY=len(y)
        Ycol=[collist[ y.astype(int)[i] -1 % len(collist)] for i in xrange(nY)]
        plt.scatter(data[:,0],data[:,1],c=Ycol,s=40 )
def scatter_time_vs_s(time, norm, point_labels, title):
    plt.figure()
    size = 100
    for i, l in enumerate(sorted(norm.keys())):
        if l is not "fbpca":
            plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size)
            for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
                plt.annotate(label, xy=(x, y), xytext=(0, -80),
                             textcoords='offset points', ha='right',
                             arrowprops=dict(arrowstyle="->",
                                             connectionstyle="arc3"),
                             va='bottom', size=11, rotation=90)
        else:
            plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size)
            for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
                plt.annotate(label, xy=(x, y), xytext=(0, 30),
                             textcoords='offset points', ha='right',
                             arrowprops=dict(arrowstyle="->",
                                             connectionstyle="arc3"),
                             va='bottom', size=11, rotation=90)

    plt.legend(loc="best")
    plt.suptitle(title)
    plt.ylabel("norm discrepancy")
    plt.xlabel("running time [s]")
Example #7
0
    def work(self):
        self.worked = True
        kwargs = dict(
                weights=self.weights,
                mus=self.mus,
                sigmas=self.sigmas,
                low=self.low,
                high=self.high,
                q=self.q,
                )
        samples = GMM1(rng=self.rng,
                size=(self.n_samples,),
                **kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        #print samples

        pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(edges[:-1], y)
            plt.plot(edges[:-1], pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Example #8
0
def scatter(x, y, equal=False, xlabel=None, ylabel=None, xinvert=False, yinvert=False):
    """
    Plot a scatter with simple formatting options
    """
    plt.scatter(x, y, 200, color=[0.3, 0.3, 0.3], edgecolors="white", linewidth=1, zorder=2)
    sns.despine()
    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    if equal:
        plt.axes().set_aspect("equal")
        plt.plot([0, max([x.max(), y.max()])], [0, max([x.max(), y.max()])], color=[0.6, 0.6, 0.6], zorder=1)
        bmin = min([x.min(), y.min()])
        bmax = max([x.max(), y.max()])
        rng = abs(bmax - bmin)
        plt.xlim([bmin - rng * 0.05, bmax + rng * 0.05])
        plt.ylim([bmin - rng * 0.05, bmax + rng * 0.05])
    else:
        xrng = abs(x.max() - x.min())
        yrng = abs(y.max() - y.min())
        plt.xlim([x.min() - xrng * 0.05, x.max() + xrng * 0.05])
        plt.ylim([y.min() - yrng * 0.05, y.max() + yrng * 0.05])
    if xinvert:
        plt.gca().invert_xaxis()
    if yinvert:
        plt.gca().invert_yaxis()
Example #9
0
File: LVQ.py Project: jayshonzs/ESL
def draw(data, classes, model, resolution=100):
    mycm = mpl.cm.get_cmap('Paired')
    
    one_min, one_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1
    two_min, two_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1
    xx1, xx2 = np.meshgrid(np.arange(one_min, one_max, (one_max-one_min)/resolution),
                     np.arange(two_min, two_max, (two_max-two_min)/resolution))
    
    inputs = np.c_[xx1.ravel(), xx2.ravel()]
    z = []
    for i in range(len(inputs)):
        z.append(predict(model, inputs[i])[0])
    result = np.array(z).reshape(xx1.shape)
    
    plt.contourf(xx1, xx2, result, cmap=mycm)
    plt.scatter(data[:, 0], data[:, 1], s=50, c=classes, cmap=mycm)
    
    t = np.zeros(15)
    for i in range(15):
        if i < 5:
            t[i] = 0
        elif i < 10:
            t[i] = 1
        else:
            t[i] = 2
    plt.scatter(model[:, 0], model[:, 1], s=150, c=t, cmap=mycm)
    
    plt.xlim([0, 10])
    plt.ylim([0, 10])
    
    plt.show()
Example #10
0
def influence_plot(X, y_true, y_pred, **kwargs):
    """Produces an influence plot.

    Parameters
    ----------
    X : array
        Design matrix.
    y_true : array_like
        Observed labels, either 0 or 1.
    y_pred : array_like
        Predicted probabilities, floats on [0, 1].

    Notes
    -----
    .. plot:: pyplots/influence_plot.py
    """
    r = pearson_residuals(y_true, y_pred)
    leverages = pregibon_leverages(X, y_pred)

    delta_X2 = case_deltas(r, leverages)
    dbetas = pregibon_dbetas(r, leverages)

    plt.scatter(y_pred, delta_X2, s=dbetas * 800, **kwargs)

    __, __, y1, y2 = plt.axis()
    plt.axis((0, 1, y1, y2))

    plt.xlabel('Predicted Probability')
    plt.ylabel(r'$\Delta \chi^2$')

    plt.tight_layout()
Example #11
0
def tuning(x, y, err=None, smooth=None, ylabel=None, pal=None):
    """
    Plot a tuning curve
    """
    if smooth is not None:
        xs, ys = smoothfit(x, y, smooth)
        plt.plot(xs, ys, linewidth=4, color="black", zorder=1)
    else:
        ys = asarray([0])
    if pal is None:
        pal = sns.color_palette("husl", n_colors=len(x) + 6)
        pal = pal[2 : 2 + len(x)][::-1]
    plt.scatter(x, y, s=300, linewidth=0, color=pal, zorder=2)
    if err is not None:
        plt.errorbar(x, y, yerr=err, linestyle="None", ecolor="black", zorder=1)
    plt.xlabel("Wall distance (mm)")
    plt.ylabel(ylabel)
    plt.xlim([-2.5, 32.5])
    errTmp = err
    errTmp[isnan(err)] = 0
    rng = max([nanmax(ys), nanmax(y + errTmp)])
    plt.ylim([0 - rng * 0.1, rng + rng * 0.1])
    plt.yticks(linspace(0, rng, 3))
    plt.xticks(range(0, 40, 10))
    sns.despine()
    return rng
Example #12
0
def regress_show4( yEv, yEv_calc, disp = True, graph = True, plt_title = None, ms_sz = None):

	# if the output is a vector and the original is a metrix, 
	# the output is translated to a matrix. 

	r_sqr, RMSE, MAE, DAE = estimate_accuracy4( yEv, yEv_calc, disp = disp)
	
	if graph:
		#plt.scatter( yEv.tolist(), yEv_calc.tolist())	
		plt.figure()	
		if ms_sz is None:
			ms_sz = max(min( 6000 / yEv.shape[0], 8), 3)
		# plt.plot( yEv.tolist(), yEv_calc.tolist(), '.', ms = ms_sz) # Change ms 
		plt.scatter( yEv.tolist(), yEv_calc.tolist(), s = ms_sz) 
		ax = plt.gca()
		lims = [
			np.min([ax.get_xlim(), ax.get_ylim()]),  # min of both axes
			np.max([ax.get_xlim(), ax.get_ylim()]),  # max of both axes
		]
		# now plot both limits against eachother
		#ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
		ax.plot(lims, lims, '-', color = 'pink')
		plt.xlabel('Experiment')
		plt.ylabel('Prediction')
		if plt_title is None:
			plt.title( '$r^2$={0:.1e}, RMSE={1:.1e}, MAE={2:.1e}, MedAE={3:.1e}'.format( r_sqr, RMSE, MAE, DAE))
		elif plt_title != "": 
			plt.title( plt_title)
		# plt.show()
	
	return r_sqr, RMSE, MAE, DAE
def plot(i, pcanc, lr, pp, labelFlag, Y):
    if len(str(i)) == 1:
        fig = plt.figure(i)
    else:
        fig = plt.subplot(i)
    if pcanc == 0:
        plt.title(
                  ' learning_rate: ' + str(lr)
                + ' perplexity: ' + str(pp))
        print("Plotting tSNE")
    else:
        plt.title(
                  'PCA-n_components: ' + str(pcanc)
                + ' learning_rate: ' + str(lr)
                + ' perplexity: ' + str(pp))
        print("Plotting PCA-tSNE")
    plt.scatter(Y[:, 0], Y[:, 1], c=colors)
    if labelFlag == 1:
        for label, cx, cy in zip(y, Y[:, 0], Y[:, 1]):
            plt.annotate(
                label.decode('utf-8'),
                xy = (cx, cy),
                xytext = (-10, 10),
                fontproperties=font,
                textcoords = 'offset points', ha = 'right', va = 'bottom',
                bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.9))
                #arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')
    print("Done.")
def create_plots(iterations, data, M, step_size):
    NN_b, NN_a, E = steepest_decent_training(iterations, data, M, step_size)

    X = data[:,0]
    X_cont = np.arange(-10,10,0.1)
    
    t = data[:,1]
    Y = [NN_a(np.array([[x],[1]]))[0] for x in X_cont]
    f = np.vectorize(lambda(x): sin(x) / x)

    plt.figure(1)
    plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M)
    plt.plot(X_cont,f(X_cont),label='sinc(x)')
    plt.legend()
    plt.savefig('images/nn_vs_real_%d_%d.%s' % (iterations,M,img_format), format=img_format)

    plt.figure(2)
    plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M)
    plt.scatter(X,t,color='red',label='Training data')
    plt.legend()
    plt.savefig('images/nn_vs_training_%d_%d.%s' % (iterations,M,img_format), format=img_format)

    plt.figure(3)
    plt.plot(E, label='Error (M = %d)' % M)
    plt.yscale('log')
    plt.legend()
    plt.savefig('images/error_%d_%d.%s' % (iterations,M,img_format), format=img_format)
    
    plt.show()
def plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses):
    print log.INFO, 'Plotting estimate distributions...'
    diagnoses = np.array(diagnoses)
    diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5

    # Setup plot
    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax)

    biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers)
    ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits)))
    ax.set_xlabel('DP')
    ax.set_ylabel('DPR')

    plt.scatter(dpis, dprs, c=diagnoses, edgecolor='none', s=25.0,
                vmin=0.0, vmax=1.0, cmap=pt.progression_cmap,
                alpha=0.5)

    # Plot legend
    # noinspection PyUnresolvedReferences
    rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.5,), linewidth=0),
             mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.5,), linewidth=0),
             mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.5,), linewidth=0)]
    labels = ['CN', 'MCI', 'AD']
    legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9)
    legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    # Draw or save the plot
    plt.tight_layout()
    if args.plot_file is not None:
        plt.savefig(args.plot_file, transparent=True)
    else:
        plt.show()
    plt.close(fig)
Example #16
0
def fig(data, target):
    #FIXME
    plt.scatter(data, target,  color='black')
    plt.xticks(())
    plt.yticks(())

    plt.show()
Example #17
0
def plot_words (V,labels=None,color='b',mark='o',fa='bottom'):
	W = tsne(V,2)
	i = 0
	plt.scatter(W[:,0], W[:,1],c=color,marker=mark,s=50.0)
	for label,x,y in zip(labels, W[:,0], W[:,1]):
		plt.annotate(label.decode('utf8'), xy=(x,y), xytext=(-1,1), textcoords='offset points', ha= 'center', va=fa, bbox=dict(boxstyle='round,pad=0.1', fc='white', alpha=0))
		i += 1
Example #18
0
def plot_obs_expc_new(obs, expc, expc_upper, expc_lower, analysis, log, ax = None):
    """Modified version of obs-expc plot suggested by R2. The points are separated by whether their CIs are above, below, 
    
    or overlapping the empirical value
    Input: 
    obs - list of observed values
    expc_mean - list of mean simulated values for the corresponding observed values
    expc_upper - list of the 97.5% quantile of the simulated vlaues
    expc_lower - list of the 2.5% quantile of the simulated values
    analysis - whether it is patitions or compositions
    log - whether the y axis is to be transformed. If True, expc/obs is plotted. If Flase, expc - obs is plotted.
    ax - whether the plot is generated on a given figure, or a new plot object is to be created
    
    """
    obs, expc, expc_upper, expc_lower = list(obs), list(expc), list(expc_upper), list(expc_lower)
    if not ax:
        fig = plt.figure(figsize = (3.5, 3.5))
        ax = plt.subplot(111)
    
    ind_above = [i for i in range(len(obs)) if expc_lower[i] > obs[i]]
    ind_below = [i for i in range(len(obs)) if expc_upper[i] < obs[i]]
    ind_overlap = [i for i in range(len(obs)) if expc_lower[i] <= obs[i] <= expc_upper[i]]
    
    if log:
        expc_standardize = [expc[i] / obs[i] for i in range(len(obs))]
        expc_upper_standardize = [expc_upper[i] / obs[i] for i in range(len(obs))]
        expc_lower_standardize = [expc_lower[i] / obs[i] for i in range(len(obs))]
        axis_min = 0.9 * min([expc_lower_standardize[i] for i in range(len(expc_lower_standardize)) if expc_lower_standardize[i] != 0])
        axis_max = 1.5 * max(expc_upper_standardize)
    else:
        expc_standardize = [expc[i] - obs[i] for i in range(len(obs))]
        expc_upper_standardize = [expc_upper[i] - obs[i] for i in range(len(obs))]
        expc_lower_standardize = [expc_lower[i] - obs[i] for i in range(len(obs))]
        axis_min = 1.1 * min(expc_lower_standardize)
        axis_max = 1.1 * max(expc_upper_standardize)
   
    if analysis == 'partition': col = '#228B22'
    else: col = '#CD69C9'
    ind_full = [] 
    for index in [ind_below, ind_overlap, ind_above]:
        expc_standardize_ind = [expc_standardize[i] for i in index]
        sort_ind_ind = sorted(range(len(expc_standardize_ind)), key = lambda i: expc_standardize_ind[i])
        sorted_index = [index[i] for i in sort_ind_ind]
        ind_full.extend(sorted_index)

    xaxis_max = len(ind_full)
    for i, ind in enumerate(ind_full):
        plt.plot([i, i],[expc_lower_standardize[ind], expc_upper_standardize[ind]], '-', c = col, linewidth = 0.4)
    plt.scatter(range(len(ind_full)), [expc_standardize[i] for i in ind_full], c = col,  edgecolors='none', s = 8)    
    if log: 
        plt.plot([0, xaxis_max + 1], [1, 1], 'k-', linewidth = 1.5)
        ax.set_yscale('log')
    else: plt.plot([0, xaxis_max + 1], [0, 0], 'k-', linewidth = 1.5)
    plt.plot([len(ind_below) - 0.5, len(ind_below) - 0.5], [axis_min, axis_max], 'k--')
    plt.plot([len(ind_below) + len(ind_overlap) - 0.5, len(ind_below) + len(ind_overlap) - 0.5], [axis_min, axis_max], 'k--')
    plt.xlim(0, xaxis_max)
    plt.ylim(axis_min, axis_max)
    plt.tick_params(axis = 'y', which = 'major', labelsize = 8, labelleft = 'on')
    plt.tick_params(axis = 'x', which = 'major', top = 'off', bottom = 'off', labelbottom = 'off')
    return ax
Example #19
0
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)

    # Highlight test samples
    if test_idx:
        X_test, y_test = X[test_idx, :], y[test_idx]
        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    c='',
                    alpha=1.0,
                    linewidths=1,
                    marker='o',
                    s=55, label='test set')
Example #20
0
def base(Point):#画出基础
    for x in np.linspace(Point[0] - 0.3, Point[0] + 0.3, 100):
        plt.scatter(x, Point[1], color='k', s=0.1, marker='o', label=str)
    for t in np.linspace(Point[0] - 0.2, Point[0] + 0.2, 3):
        P1 = [t, Point[1]]
        P2 = [t - 0.15, Point[1] - 0.25]
        line(P1, P2, width=0.1)
Example #21
0
def disp_external(init_r=None,extfile=None,rad_scale=1000.,col_scale=1.,cut_zero=True,control=None):
    '''displaying as scatter plot
    '''
    if extfile:
        from numpy import loadtxt
        anal_data['ext_disp']=loadtxt(extfile)
        nrow=len(anal_data['ext_disp'])//2
        anal_data['ext_peri']=anal_data['ext_disp'][:nrow]
        anal_data['ext_disp']=anal_data['ext_disp'][nrow:]
    # gpos not correct
    
    rad=anal_data['ext_disp'].ravel()*rad_scale
    col=anal_data['ext_peri'].ravel()*col_scale
    
    if control and 'xstep' in control:
        from numpy import arange
        ir=arange(0.,ncol)*control['xstep']
        ithet=arange(0.,nrow)*control['ystep']
    elif 'gpos' in anal_data:
        ir,itheta=array(anal_data['gpos']).astype('float').transpose()
    else: return
    itheta*=pi/180.
    if init_r: ir=abs('ir')+init_r
    ix,iy=ir*cos(itheta),ir*sin(itheta)
    from matplotlib.pyplot import scatter

    if cut_zero:
        sel=col>0
        rad=rad[sel]
        col=col[sel]
        ix=ix[sel]
        iy=iy[sel]
    scatter(ix,iy,rad,col,hold=0)
Example #22
0
def plt_data():
    t = [[0,1], [1,0], [1, 1], [0, 0]]
    t2 = [1, 1, 1, 0]
    X = np.array(t)
    Y = np.array(t2)

    h = .02  # step size in the mesh

    logreg = linear_model.LogisticRegression(C=1e5)

    # we create an instance of Neighbours Classifier and fit the data.
    logreg.fit(X, Y)
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, m_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure(1, figsize=(4, 3))
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())

    plt.show()
Example #23
0
def plot_data(models,dataframe,flag = 0):
    """need good and bad models, plots all the data"""
    if flag == 0:
        for key in models[0]:
            g=dataframe[(dataframe['module_category']==key[0]) & \
            (dataframe['component_category']==key[1])]
            plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1))
            plt.xlabel("Time")
            plt.ylabel("number of repairs")
            plt.title("%s, and %s" %(key[0], key[1]))
            plt.show()
    if flag ==1:
        
        for key in models[1]:
            g=dataframe[(dataframe['module_category']==key[0]) & \
            (dataframe['component_category']==key[1])]
            plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1))
            plt.xlabel("Time")
            plt.ylabel("number of repairs")
            
            if models[1][key] == [1,1,1]:
                plt.title("too little data: %s, and %s" %(key[0],key[1]))
            else:
                plt.title("no curve fit: %s, and %s" %(key[0], key[1]))
            plt.show()
Example #24
0
  def export(self, query, n_topics, n_words, title="PCA Export", fname="PCAExport"):
    vec = DictVectorizer()
    
    rows = topics_to_vectorspace(self.model, n_topics, n_words)
    X = vec.fit_transform(rows)
    pca = skPCA(n_components=2)
    X_pca = pca.fit(X.toarray()).transform(X.toarray())
    
    match = []
    for i in range(n_topics):
      topic = [t[1] for t in self.model.show_topic(i, len(self.dictionary.keys()))]
      m = None
      for word in topic:
        if word in query:
          match.append(word)
          break

    pyplot.figure()
    for i in range(X_pca.shape[0]):
      pyplot.scatter(X_pca[i, 0], X_pca[i, 1], alpha=.5)
      pyplot.text(X_pca[i, 0], X_pca[i, 1], s=' '.join([str(i), match[i]]))  
     
    pyplot.title(title)
    pyplot.savefig(fname)
     
    pyplot.close()
Example #25
0
def scatter(frame, var1, var2, var3=None, reg=False, **args):
    import matplotlib.cm as cm

    if type(frame) is copper.Dataset:
        frame = frame.frame
    x = frame[var1]
    y = frame[var2]

    if var3 is None:
        plt.scatter(x.values, y.values, **args)
    else:
        options = list(set(frame[var3]))
        for i, option in enumerate(options):
            f = frame[frame[var3] == option]
            x = f[var1]
            y = f[var2]
            c = cm.jet(i/len(options),1)
            plt.scatter(x, y, c=c, label=option, **args)
            plt.legend()

    if reg:
        slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
        line = slope * x + intercept # regression line
        plt.plot(x, line, c='r')

    plt.xlabel(var1)
    plt.ylabel(var2)
Example #26
0
def kmeans(points, k):
    
    centroids = random.sample(points, k)

    allColors = list(colors.cnames.keys())

    iterations = 0
    oldCentroids = None

    while not shouldStop(oldCentroids, centroids, iterations):
        oldCentroids = centroids
        iterations += 1
        
        #we need numpy arrays to do some cool linalg stuff
        points = np.array(points)
        centroids = np.array(centroids)
        labels = getLabels(points, centroids)
        
        centroids = getCentroids(points, labels, k)
    #plotting centroids as a red star
    x, y = zip(*centroids)
    plt.scatter(x,y, marker = '*', color = 'r', s = 80)
    
    #life is a coloring book so lets put colors on stuff
    counter = 0
    for centroid in labels.keys():
        for point in labels[centroid]:
                plt.scatter(point[0], point[1], color = allColors[counter])
       
       #6 was chosen to avoid white, white is apparantly some multiple of 5
        counter += 6

    print (iterations)
    return centroids
    def plot_contour_with_labels(contour, frame_index=0):
        """
        Makes a beautiful plot with all the points labeled.

        Parameters:
        One frame's worth of a contour

        """
        contour_x = contour[:, 0, frame_index]
        contour_y = contour[:, 1, frame_index]
        plt.plot(contour_x, contour_y, 'r', lw=3)
        plt.scatter(contour_x, contour_y, s=35)
        labels = list(str(l) for l in range(0, len(contour_x)))
        for label_index, (label, x, y), in enumerate(
                zip(labels, contour_x, contour_y)):
            # Orient the label for the first half of the points in one direction
            # and the other half in the other
            if label_index <= len(contour_x) // 2 - \
                    1:  # Minus one since indexing
                xytext = (20, -20)                     # is 0-based
            else:
                xytext = (-20, 20)
            plt.annotate(
                label, xy=(
                    x, y), xytext=xytext, textcoords='offset points', ha='right', va='bottom', bbox=dict(
                    boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(
                    arrowstyle='->', connectionstyle='arc3,rad=0'))  # , xytext=(0,0))
Example #28
0
def visualizeEigenvalues(eVal, verboseLevel):
	real = []
	imag = []

	for z in eVal:
		rp = z.real
		im = z.imag
		if not (rp == np.inf or rp == - np.inf) \
				and not (im == np.inf or im == - np.inf):
			real.append(rp)
			imag.append(im)

	if verboseLevel>=1:
		print("length of regular real values=" + str(len(real)))
		print("length of regular imag values=" + str(len(imag)))
		print("minimal real part=" + str(min(real)), "& maximal real part=" + str(max(real)))
		print("minimal imag part=" + str(min(imag)), "& maximal imag part=" + str(max(imag)))
	if verboseLevel==2:
		print("all real values:", str(real))
		print("all imag values:", str(imag))


	# plt.scatter(real[4:],img[4:])
	plt.scatter(real, imag)
	plt.grid(True)
	plt.xlabel("realpart")
	plt.ylabel("imagpart")
	plt.xlim(-10, 10)
	plt.ylim(-10, 10)
	plt.show()
def plotscatterdate(x,y):
    plt.scatter(x,y)   
    plt.xlim(0,)
    plt.xlabel('Number of Railways')
    plt.ylabel('Price in Pounds')
    plt.title('Scatter of Price against Number of Railways')
    plt.show()
Example #30
0
def plot_convergence():

    data = np.loadtxt("smooth-error.out")

    nx = data[:,0]
    aerr = data[:,1]

    ax = plt.subplot(111)
    ax.set_xscale('log')
    ax.set_yscale('log')

    plt.scatter(nx, aerr, marker="x", color="r")
    plt.plot(nx, aerr[0]*(nx[0]/nx)**2, "--", color="k")

    plt.xlabel("number of zones")
    plt.ylabel("L2 norm of abs error")

    plt.title(r"convergence for smooth advection problem", fontsize=11)

    f = plt.gcf()
    f.set_size_inches(5.0,5.0)

    plt.xlim(8,256)

    plt.savefig("smooth_converge.eps", bbox_inches="tight")
# scar_loc = np.append(scar_loc,find_nearest_state(4.2,-4.0,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(5.2,-5.0,e,overlap))

# scar_loc = np.append(scar_loc,find_nearest_state(-1,-0.7,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-1,-1.2,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.6,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.9,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.0,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.3,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-3.2,-2.5,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-3.5,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-4.0,e,overlap))
# scar_loc = np.append(scar_loc,find_nearest_state(-5.2,-5.0,e,overlap))
# scar_loc = np.sort(scar_loc.astype(int))

plt.scatter(e, overlap)
for n in range(0, np.size(scar_loc, axis=0)):
    plt.scatter(e[scar_loc[n]],
                overlap[scar_loc[n]],
                s=200,
                color="red",
                alpha=0.6)

for n in range(0, np.size(subband_loc, axis=0)):
    plt.scatter(e[subband_loc[n]],
                overlap[subband_loc[n]],
                s=200,
                color="cyan",
                alpha=0.6)
plt.show()
Example #32
0
k1 = pd.DataFrame(df.iloc[:, 0:1])
k2 = pd.DataFrame(df.iloc[:, 1:2])
k3 = pd.DataFrame(df.iloc[:, 2:3])
diff = pd.DataFrame(df.iloc[:, 4:5])

xinv = pd.DataFrame(np.linalg.pinv(x.values), x.columns, x.index)

theta = pd.DataFrame(np.dot(xinv, y))
print(theta)

output = pd.DataFrame(np.dot(x, theta))

print(np.sqrt(metrics.mean_squared_error(y, output)))

plt.scatter(diff, y)
plt.xlabel('difficulty')
plt.ylabel('average marks')

#plt.plot(diff,output,'r')
plt.scatter(diff, output)

plt.show()

#getting the python output to an html page

sum = 0
while (1):
    print('enter the mark distribution')
    ip = []
    for i in range(3):
Example #33
0
test = data[~mask]

# import Linear Regression model
from sklearn import linear_model
regr = linear_model.LinearRegression()
# convert the column (list) into array data structure
train_x = np.asanyarray(train[["ENGINE_SIZE"]])  # almost same as np.array
train_y = np.asanyarray(train[["CO2"]])
# fit into the module => Output: regr.coefficient, regr.interception
regr.fit(train_x, train_y)
# remember the basic linear formel: y = (coefficient)*x + (interception) !!!!!!
print('Coefficients: ', regr.coef_)
print('Intercept: ', regr.intercept_)

# Plot outputs !!!!!!!!!!!!
plt.scatter(train.ENGINE_SIZE, train.CO2, color='blue')
plt.plot(train_x, regr.coef_[0][0] * train_x + regr.intercept_[0],
         '-r')  # !!!!!!!!!!!!!!!!!!!!
plt.xlabel("ENGINE_SIZE")
plt.ylabel("Emission")
plt.show()

############ Evaluation #################3
from sklearn.metrics import r2_score

test_x = np.asanyarray(test[['ENGINE_SIZE']])
test_y = np.asanyarray(test[['CO2']])
test_y_predict = regr.predict(test_x)

print("Mean absolute error: {:.2f}".format(
    np.mean(np.absolute(test_y_predict - test_y))))
Example #34
0
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

# Fitting SVR to the dataset
from sklearn.svm import SVR
regressor = SVR(kernel='rbf')
regressor.fit(X, y)
# Predicting a new result
y_pred = sc_y.inverse_transform(
    regressor.predict(sc_X.transform(np.array([[6.5]]))))

# Visualising the Regression results
plt.scatter(X, y, color='red')
plt.plot(X, regressor.predict(X), color='blue')
plt.title('Truth or Bluff SVR Model')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

# Visualising the Regression results (for higher resolution and smoother curve)
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(X, y, color='red')
plt.plot(X_grid, regressor.predict(X_grid), color='blue')
plt.title('Truth or Bluff (Regression Model)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

# Fitting K-Means to the dataset
kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X)

# Visualising the clusters
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
plt.title('data_scientist_v2')
plt.xlabel('Applications')
plt.ylabel('Selected applicants')
plt.legend()
plt.show()




    x_show = np.stack((x1.flat, x2.flat), axis=1)  # 测试点
    print(x_show.shape)
    cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
    cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
    y_show_hat = model.predict(x_show)  # 预测值
    # print( y_show_hat.shape)
    # print(y_show_hat)
    y_show_hat = y_show_hat.reshape(x1.shape)  # 使之与输入的形状相同
    print(y_show_hat)
    plt.figure(1, figsize=(10, 4), facecolor='w')
    plt.subplot(1, 2, 1)  # 1行2列的第一张子图
    plt.pcolormesh(x1, x2, y_show_hat, cmap=cm_light)  # 预测值的显示
    plt.scatter(x_test[:, 0],
                x_test[:, 1],
                c=y_test.ravel(),
                edgecolors='k',
                s=150,
                zorder=10,
                cmap=cm_dark,
                marker='*')  # 测试数据
    plt.scatter(x[:, 0],
                x[:, 1],
                c=y.ravel(),
                edgecolors='k',
                s=40,
                cmap=cm_dark)  # 全部数据
    plt.xlabel('sepal length', fontsize=15)
    plt.ylabel('sepal width', fontsize=15)
    plt.xlim(x1_min, x1_max)
    plt.ylim(x2_min, x2_max)
    plt.grid(True)
Example #37
0
    plt.plot(errors[0], label=kernel_name)
plt.legend(loc='best')
plt.xlabel('Days to predict')
plt.ylabel('Relative absolute error')
plt.title('Errors as a function of time')

# %%
# Our conclusion from the above is that the shape of the error does not
# depend much on the kernel

# %%
# We now plot the error after 4 days as a function of kernel params

plt.figure()
error, start, middle = zip(*errors_by_kernel.values())
plt.scatter(start, middle, np.array(error)[:, 1])
plt.scatter(start,
            middle,
            s=300 * np.array(error)[:, 1],
            c=np.array(error)[:, 3],
            marker='o')
plt.colorbar()
plt.xlabel('start parameter')
plt.ylabel('middle parameter')
plt.title('Errors as a function of ramp kernel parameter')

# %%
# These results tell us that we want a ramp with a length of 10 and
# ramping all the way

# %%
Example #38
0
    sin_default.append(float(splitted[1]))
    sin_quirez.append(float(splitted[2]))
    sin_ff.append(float(splitted[3]))
    sin_quirez_ff.append(float(splitted[4]))
    sin_float.append(float(splitted[5]))
    sin_mpfr.append(float(splitted[6]))

    sin_default_abs.append(abs(float(splitted[1]) - float(splitted[6])))
    sin_quirez_abs.append(abs(float(splitted[2]) - float(splitted[6])))
    sin_ff_abs.append(abs(float(splitted[3]) - float(splitted[6])))
    sin_quirez_ff_abs.append(abs(float(splitted[4]) - float(splitted[6])))
    sin_float_abs.append(abs(float(splitted[5]) - float(splitted[6])))

plt.rcParams.update({'font.size': 9})

plt.scatter(theta[beginIndex:endIndex], sin_mpfr[beginIndex:endIndex], color='xkcd:grey', marker = "o", label="Real", s = 70)
plt.scatter(theta[beginIndex:endIndex], sin_quirez_ff[beginIndex:endIndex], color='k', marker = "*", label="Our CORDIC (posit)")
plt.scatter(theta[beginIndex:endIndex], sin_float[beginIndex:endIndex], color='b', marker = "x", label="Naive CORDIC (float)")
plt.ylim(2.55 * pow(10, -6), 4 * pow(10, -6))
plt.xlim(1.57079245, 1.57079365)
plt.xticks([1.5707925, 1.5707930, 1.5707935])

f = mtick.ScalarFormatter(useOffset=False, useMathText=True)
g = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x))
#h = lambda x,pos : "$2^{{-{}}}$".format('%.f' % x)
h = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x))
plt.gca().yaxis.set_major_formatter(mtick.FuncFormatter(g))
plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(h))

plt.legend()
plt.xlabel(r"$\theta$")
Example #39
0
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
plt.contourf(X1,
             X2,
             classifier.predict(np.array([X1.ravel(),
                                          X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,
             cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0],
                X_set[y_set == j, 1],
                c=ListedColormap(('red', 'green'))(i),
                label=j)
plt.title('Logistic Regression (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
def plotting(points):
    print(points)
    plt.scatter(points[:,0], points[:,1])
    plt.show()
Example #41
0
x_test = x[split:]

y = dataset[labels[1]].reshape(len(dataset[labels[1]]), 1)
y_train = y[:split]
y_test = y[split:]

#Creating the linear regression model object
reg = linear_model.LinearRegression()

#Fitting the model on the training data
reg.fit(x_train, y_train)

#Getting the coefficients that were produced for y = mx+b
print "Coefficients:", reg.coef_

#Calculating the mse (Mean Squared Error)
print "MSE:", np.mean((reg.predict(x_test) - y_test)**2)

#Calculatig the variance score of the model
print "Variance:", reg.score(x_test, y_test)

#Plotting the output
# Plot outputs
plt.scatter(x_test, y_test, color='black')
plt.plot(x_test, reg.predict(x_test), color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()
from matplotlib import pyplot as plt                   # Первый каноничный импорт
import pandas as pd                                    # Второй каноничный импорт для обработки DataSet'а

plt.style.use('fivethirtyeight')                       # Назначаем стилистику визуализации

data_set = pd.read_csv('Rap.csv')                      # Считываем данные SCV-файла с DataSet'ом
bpm = data_set['bpm']                                  # Переменная для параметра BPM в каждой строке
year = data_set['year']                                # Переменная для параметра "год релиза" в каждой строке

plt.scatter(                                           # Построение точечного графика и его настройка
	bpm, year,
	c=bpm,
	s=bpm*1.5,
	cmap='gist_heat',
	edgecolor='black',
	linewidth=.7
)

bar = plt.colorbar(                                    # Построение шкалы BPM
			orientation='horizontal',
			shrink=0.8,
			extend='both',
			extendfrac=.1
)

bar.set_label('Шкала ударов в минуту', fontsize=18)    # Подпись шкалы

plt.title('Популярность скорости '                     # Заголовок графика
		  'исполнения в Rap\'е ', fontsize=25)

plt.xlabel('BPM', fontsize=18)                         # Ось абсцисс
Example #43
0
    def optimize(self, x, y):
        self.optimizer.zero_grad()
        _, loss = self.forward_with_loss(x, y)
        loss.backward()
        self.optimizer.step()

        return loss


# train
model = Model()
for epoch in range(100):
    for x, y in data_loader:
        loss = model.optimize(x, y)
    print("Epoch {:4d} | loss : {:f}".format(epoch, loss))


# feed
mu, sigma = model(torch.tensor(x_space))
mu = mu.detach().numpy()
sigma = sigma.detach().numpy()

# visualize
plt.scatter(train_x, train_y, marker='x', c='black', alpha=0.5, label='train data')
plt.plot(x_space, mu, label='prediction mean')
plt.fill_between(x_space, mu + sigma, mu - sigma, alpha=0.5, label='+-')
plt.plot(x_space, true_y, label='true function')
plt.legend()
plt.ylim(-0.5, 1.3)
plt.show()
Example #44
0

ma_X = rules.antecedents.apply(to_list) + rules.consequents.apply(to_list)
ma_X = ma_X.apply(sorted)
rules_sets = list(ma_X)
unique = [list(m) for m in set(tuple(i) for i in rules_sets)]
index_rules = []
for i in unique:
    index_rules.append(rules_sets.index(i))

rules_r = rules.iloc[
    index_rules, :]  # getting rules without any redudancy, 18 rules
rules_r.sort_values('lift', ascending=False).head(10)

# visualizing results
plt.scatter(rules_r['support'], rules_r['confidence'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('confidence')
plt.title('Support vs Confidence')

plt.scatter(rules_r['support'], rules_r['lift'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('lift')
plt.title('Support vs lift')

fit = np.polyfit(rules_r['lift'], rules_r['confidence'],
                 1)  # 1 denote degree of polynomial
fit_fn = np.poly1d(fit)
plt.plot(rules_r['lift'], rules_r['confidence'], 'rs', rules_r['lift'],
         fit_fn(rules_r['lift']))
plt.xlabel('lift')
stride = 1
XX = cases[0:cases.size - q - lag * dd:stride]
for i in range(1,lag):
    X=cases[i*dd:cases.size - q - (lag - i) * dd:stride]
    M=mobility[i*dd:mobility.size - q - (lag - i) * dd:stride]
    XX=np.column_stack((XX,X,M))
yy = cases[lag*dd+q::stride]; 
tt = t[lag*dd+q::stride]
model = Ridge(alpha=a, fit_intercept=False).fit(XX, yy)
print(XX)
print(model.intercept_, model.coef_)

fakeMobility = np.array([-.4]*288).reshape(-1,1)
ZZ = cases[0:cases.size - q - lag * dd:stride]
for i in range(1,lag):
    X=cases[i*dd:cases.size - q - (lag - i) * dd:stride]
    M=fakeMobility[i*dd:fakeMobility.size - q - (lag - i) * dd:stride]
    ZZ=np.column_stack((ZZ,X,M))
yy = cases[lag*dd+q::stride]; 
tt = t[lag*dd+q::stride]
model = Ridge(alpha=a, fit_intercept=False).fit(ZZ, yy)

fakePred = model.predict(ZZ)
plt.scatter(t, cases, color='black'); 
plt.scatter(tt, fakePred, color='blue')
plt.plot(t, fakeMobility, color='r')
plt.title('Mobility Set To -40%')
plt.xlabel("time (days)");  
plt.ylabel("#cases")
plt.legend(["mobility", "training data","predictions"],loc='upper right')
plt.show()
Example #46
0
def draw_Battery_Use(consumption, total_power, solar_power, wind_power, dic,
                     configuration, max_power):
    # Creating mutiple text variables to display in the graph
    power_generated = total_power.sum()
    power = total_power
    total_power = np.mean(np.reshape(total_power[:8760], (365, 24)), axis=1)
    t1 = "Storage capacity: \nAmount of windturbines: \nCable area: \nMaximum Power Output: \nTotal Power Generated: \nTotal costs: "
    t2 = str(int(dic['total_storage'])) + " kWh\n" + \
        str(int(configuration[-2])) + "\n" + \
        str(int(dic['cable_area'])) + " mm²\n" + \
        str(int(max_power)) + " kW\n" + \
        str(int(power_generated)) + " kWh\n" +\
        '€' + str(int(dic['cost']))
    # Creating the solar stats text variables to display in the graph
    t3 = ""
    for I in range(4):
        if configuration[0 + I * 3] > 0:
            t3 = t3 + "SP" + str(I + 1) + " - Area: " + str(int(configuration[0 + I*3])) +\
                "m² - Angle: " + str(int(configuration[1 + I*3])) +\
                "° - Orientation: " + str(int(configuration[2 + I*3])) + "°\n"

    plt.subplot(2, 1, 1)
    plt.plot(total_power,
             color='green',
             alpha=0.5,
             label='Total energy production')
    plt.plot(solar_power, color='yellow', alpha=0.5, label='Solar energy')
    plt.plot(wind_power, color='blue', alpha=0.5, label='Wind energy')
    plt.plot(consumption, color='red', label='Energy demand')
    plt.text(330,
             total_power.max() * 1.04,
             t2,
             ha='left',
             va='top',
             style='italic',
             wrap=False)
    plt.text(330,
             total_power.max() * 1.04,
             t1,
             ha='right',
             va='top',
             wrap=False)
    plt.text(362,
             total_power.max() * 0.725,
             t3,
             ha='right',
             va='top',
             wrap=False)
    plt.legend(loc='upper center')
    plt.title("Power Average per Day")
    plt.xlabel('Days')
    plt.ylabel('kW')
    plt.xlim(0, 365)
    plt.subplot(2, 1, 2)
    power = power - 6000
    for x in range(2):
        if x == 0:
            batterycharge = [int(dic['total_storage'])]
        else:
            batterycharge = [batterycharge[-1]]
        Powershortage = []
        for I in power:
            batterycharge.append(batterycharge[-1] + I)
            if (int(dic['total_storage']) < batterycharge[-1]):
                batterycharge[-1] = int(dic['total_storage'])
            elif (0 > batterycharge[-1]):
                batterycharge[-1] = 0
                Powershortage.append(len(batterycharge) - 1)
    plt.plot(batterycharge, color='green', alpha=0.5)
    if len(Powershortage) == 0:
        plt.scatter(np.zeros(len(Powershortage)), Powershortage, color='red')
    plt.title("Power supply level over a Year")
    plt.xlabel('Hour')
    plt.ylabel('kWh')
    plt.xlim(0, 8760)
    plt.show()
Example #47
0
    def DataPlot(self, data, name_of_bed, fig=False, save_data=False):
        '''
        Method to plot data
        args::
            data: Data to be plotted
            name_of_bed: Bed to be plotted
            fig =False (To plot a shart without saving the plot) or Yes (to save the plot)
            save_data = False(This prevent the data from saving as a CSV file to save, pass True)
        '''
        self.data = data
        self.name_of_bed = name_of_bed
        self.fig = fig
        self.save_data = save_data

        assert isinstance(
            self.name_of_bed, str
        ), f"Name of Bed must be a String, but {type(self.name_of_bed)} was passed."

        if self.fig == True or self.save_data == False:
            """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles"""
            fig, ax = plt.subplots(figsize=(10, 8))
            plt.plot(self.data['Phi_scale'],
                     self.data['Cummulative_Mass_Retained'],
                     color='b')
            plt.scatter(self.data['Phi_scale'],
                        self.data['Cummulative_Mass_Retained'],
                        color='r')
            ax.xaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.yaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f"))
            ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f"))

            plt.rc('grid', linestyle="-", color='black')
            ax.spines['bottom'].set_color('1.5')
            ax.spines['top'].set_color('1.5')
            ax.spines['right'].set_color('1.5')
            ax.spines['left'].set_color('1.5')
            plt.grid(True, which='minor', color='k', linestyle='-')
            plt.grid(True, which='major', color='r', linestyle='-')

            plt.xlabel('Grain Size (Phi)', fontsize=16)
            plt.ylabel('Cumulative Mass Retained (%)', fontsize=16)
            plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}',
                      fontsize=20,
                      fontweight='bold')
            fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi)

        elif self.fig == False or self.save_data == True:
            """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles"""
            fig, ax = plt.subplots(figsize=(10, 8))
            plt.plot(self.data['Phi_scale'],
                     self.data['Cummulative_Mass_Retained'],
                     color='b')
            plt.scatter(self.data['Phi_scale'],
                        self.data['Cummulative_Mass_Retained'],
                        color='r')
            ax.xaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.yaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f"))
            ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f"))

            plt.rc('grid', linestyle="-", color='black')
            ax.spines['bottom'].set_color('1.5')
            ax.spines['top'].set_color('1.5')
            ax.spines['right'].set_color('1.5')
            ax.spines['left'].set_color('1.5')
            plt.grid(True, which='minor', color='k', linestyle='-')
            plt.grid(True, which='major', color='r', linestyle='-')

            plt.xlabel('Grain Size (Phi)', fontsize=16)
            plt.ylabel('Cumulative Mass Retained (%)', fontsize=16)
            plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}',
                      fontsize=20,
                      fontweight='bold')

            data.to_csv(f'{self.name_of_bed}.csv', index=False)

        elif self.fig == True or self.save_data == True:
            """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles"""
            fig, ax = plt.subplots(figsize=(10, 8))
            plt.plot(self.data['Phi_scale'],
                     self.data['Cummulative_Mass_Retained'],
                     color='b')
            plt.scatter(self.data['Phi_scale'],
                        self.data['Cummulative_Mass_Retained'],
                        color='r')
            ax.xaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.yaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f"))
            ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f"))

            plt.rc('grid', linestyle="-", color='black')
            ax.spines['bottom'].set_color('1.5')
            ax.spines['top'].set_color('1.5')
            ax.spines['right'].set_color('1.5')
            ax.spines['left'].set_color('1.5')
            plt.grid(True, which='minor', color='k', linestyle='-')
            plt.grid(True, which='major', color='r', linestyle='-')

            plt.xlabel('Grain Size (Phi)', fontsize=16)
            plt.ylabel('Cumulative Mass Retained (%)', fontsize=16)
            plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}',
                      fontsize=20,
                      fontweight='bold')
            fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi)
            data.to_csv(f'{self.name_of_bed}.csv', index=False)

        else:
            """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles"""
            fig, ax = plt.subplots(figsize=(10, 8))
            plt.plot(self.data['Phi_scale'],
                     self.data['Cummulative_Mass_Retained'],
                     color='b')
            plt.scatter(self.data['Phi_scale'],
                        self.data['Cummulative_Mass_Retained'],
                        color='r')
            ax.xaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.yaxis.set_minor_locator(AutoMinorLocator(n=5))
            ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f"))
            ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f"))

            plt.rc('grid', linestyle="-", color='black')
            ax.spines['bottom'].set_color('1.5')
            ax.spines['top'].set_color('1.5')
            ax.spines['right'].set_color('1.5')
            ax.spines['left'].set_color('1.5')
            plt.grid(True, which='minor', color='k', linestyle='-')
            plt.grid(True, which='major', color='r', linestyle='-')

            plt.xlabel('Grain Size (Phi)', fontsize=16)
            plt.ylabel('Cumulative Mass Retained (%)', fontsize=16)
            plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}',
                      fontsize=20,
                      fontweight='bold')
y_train = np.array(y[:num_training])

# Test data
X_test = np.array(X[num_training:]).reshape((num_test,1))
y_test = np.array(y[num_training:])

# Create linear regression object
linear_regressor = linear_model.LinearRegression()

# Train the model using the training sets
linear_regressor.fit(X_train, y_train)

# Predict the train data output
y_train_pred = linear_regressor.predict(X_train)
plt.figure()
plt.scatter(X_train, y_train, color='green')
plt.plot(X_train, y_train_pred, color='black', linewidth=4)
plt.title('Training data')
plt.show()

# Predict the test data output
y_test_pred = linear_regressor.predict(X_test)
plt.figure()
plt.scatter(X_test, y_test, color='green')
plt.plot(X_test, y_test_pred, color='black', linewidth=4)
plt.title('Test data')
plt.xticks(())
plt.yticks(())
plt.show()

# Measure performance
Example #49
0
data.shape
data.dtypes
data.describe()
data.info()

#find rows where the number of returns is 0 and drop these rows
data[data['N1']==0].count()
data.drop(data[data.N1 == 0].index, inplace=True)
data.shape
data[data.N1 == 0]

#create refund column
data['refund'] = data['A11902']/data['N11902']

#graphs
plt.scatter(data['A85300'],data['refund'])
plt.show()
plt.scatter(data['N11901'],data['refund'])
plt.show()

'''#categorical variables without zipcode
cat_var = df[['STATE','agi_stub']]
cat_var = cat_var.astype(str)
dummies = pd.get_dummies(cat_var)
dummies.columns
dummies.shape'''

#create percentage columns
df = data.copy()
df['single'] = df['mars1']/df['N1']
df['joint'] = df['MARS2']/df['N1']
Example #50
0
# Combine
lat = np.hstack((np.array(y), np.array(y1)+lat_eur, np.array(y2)+lat_us))
lon = np.hstack((np.array(x), np.array(x1)+lon_eur, np.array(x2)+lon_us))


plt.figure(figsize=(10, 3.5))
labels = ["a", "b", "c"]
nbins = 12
for _i in range(3):
    ax = plt.subplot(131 + _i, projection=AzimuthalEquidistant(0, 0))
    ax.set_global()
    lwsspy.maps.plot_map()

    weights = lwsspy.geo.azi_weights(0, 0, lat, lon, nbins=nbins*(_i+1))
    plt.scatter(lon, lat, c=weights, cmap='rainbow',
                norm=LogNorm(vmin=min(weights), vmax=max(weights)),
                transform=PlateCarree(), edgecolors='k',
                linewidth=0.25)
    formatter = ticker.FuncFormatter(lambda y, _: '{:g}'.format(y))
    cb = lwsspy.plot.nice_colorbar(orientation='horizontal', ticks=[0.3, 0.4, 0.6, 1.0, 1.5, 2.0, 3.0],  # np.arange(0.3, 3.0, 0.3),
                                   format=formatter, aspect=40, pad=0.05)
    lwsspy.plot.plot_label(
        ax, f"{labels[_i]})", location=6, box=False, dist=0.0)
    cb.set_label("Weights")
    plt.title(f"$N_b = {nbins*(_i+1)}$", fontdict=dict(fontsize='small'))
    lwsspy.plot.plot_label(
        ax,
        f"min: {np.min(weights):3.2f}\n"
        f"max: {np.max(weights):3.2f}\n"
        f"median: {np.median(weights):3.2f}\n",
        location=3, box=False, dist=-0.1, fontdict=dict(fontsize='small'))
Example #51
0
for cluster in clusters:
    x = [point[0] for point in cluster]
    y = [point[1] for point in cluster]
    z = [point[2] for point in cluster]
    if col != len(clusters):
        plt.plot(x, y, z, marker = "o", c = "{}".format(colors[col]), ms = 3)
    else:
        plt.plot(x, y, z, marker = "o", c = "w", alpha = 0.3, ms = 2)
    col += 1
    
print(time.clock() - start)
        

start = time.clock()
fig = plt.figure()
ax = plt.gca()
ax.set_axis_bgcolor((0, 0, 0))
col = 1

for cluster in clusters:
    x = [point[0] for point in cluster]
    y = [point[1] for point in cluster]
    if col != len(clusters):
        plt.scatter(x, y, s = 10, c = "{}".format(colors[col]))
    else:
        plt.scatter(x, y, s = 5, c = "w", alpha = 0.15)
    col += 1
    
print(time.clock() - start)  
            


# --------------
import matplotlib.pyplot as plt

# code starts here        


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=6)
cols=X_train.columns
fig,axes=plt.subplots(nrows=3,ncols=3,figsize=(15,10))
for i in range(3):
    for j in range(3):
        col=cols[i * 3 + j]
        plt.scatter(X_train[col],y_train)
# code ends here



# --------------
# Code starts here
corr = X_train.corr()
corr
del X_train['play_star_rating']
del X_train['val_star_rating']
del X_test['play_star_rating']
del X_test['val_star_rating']

# Code ends here
Example #53
0
import matplotlib.pyplot as plt
from sklearn import datasets

def pca(data, base_num = 1):
	n, d = data.shape #n:データ数 d:次元数 n>dじゃないとダメです。

	data_mean = data.mean(0)
	data_norm = data - data_mean

	cov = np.dot(data_norm.T, data_norm) / float(n)
	w, vl = spla.eig(cov)
	index = w.argsort()[-min(base_num, d) :]
	t = vl[:, index[:: -1]].T
	return t

if __name__ == "__main__":
	data = np.random.multivariate_normal([0, 0], [[1, 2], [3, 4]], 100)
	iris = datasets.load_iris()
	print iris.data[:, :4]
	data = iris.data[:, :2]
	base = pca(data)
	#data = np.dot(data,base)

	#ここから可視化
	plt.scatter(data[:, 0], data[:, 1])
	leng = (data.max()-data.min())/2
	pc_line = np.array([-leng, leng]) * (base[0][1] / base[0][0])
	plt.plot([-leng, leng], pc_line, "r")
	#plt.show()

Example #54
0
def visualize_points(x, y, alpha=1., colors=('red', 'green')):
    c = []
    for i in y:
        c.append(colors[i])
    plt.scatter(x[:, 0], x[:, 1], color=c, alpha=alpha)
    plt.grid(True)
Example #55
0
    def finalize(self, population, engine):
        best_indv = population.best_indv(engine.fitness)
        x = best_indv.solution
        y = engine.ori_fmax
        msg = 'Optimal solution: ({}, {})'.format(x, y)
        self.logger.info(msg)


if '__main__' == __name__:
    # Run the GA engine and print every generation
    engine.run(ng=500)
    best_indv = engine.population.best_indv(engine.fitness)
    print('Max({0},{1})'.format(best_indv.solution[0],
                                engine.fitness(best_indv)))
    x = np.linspace(0, 15, 10000)
    y = [-3 * (i - 30)**2 * math.sin(i) for i in x]
    plt.plot(x, y)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('function')
    plt.axis([-1, 16, -3000, 3000])
    plt.scatter(best_indv.solution[0], engine.fitness(best_indv), color='r')
    a = round(best_indv.solution[0], 4)
    b = round(engine.fitness(best_indv), 4)
    plt.annotate('Max(' + str(a) + ',' + str(b) + ')',
                 xy=(best_indv.solution[0], engine.fitness(best_indv)),
                 xytext=(7, 2500),
                 arrowprops=dict(facecolor='black', shrink=0.1, width=2))
    plt.show()
def show():
    d = pd.read_excel("./one_hot.xlsx", sheetname='123')
    d = np.array(d)
    scaler = preprocessing.StandardScaler().fit(d)
    d = scaler.transform(d)
    #以下是将聚类结果可视化出来
    #PCA(n_components=2)表示将4个特征的向量降维到二维,即可以画在平面
    pca_model = PCA(n_components=2)
    #将iris.data转换成标准形式,然后存入reduced_data中
    reduced_data = pca_model.fit_transform(d)
    iters, centers, assignments = k_means_cluster(reduced_data, 8)
    print(centers, assignments)

    assignments1 = pd.DataFrame(assignments)
    print(assignments1)
    assignments1.to_excel("./one_hot1.xlsx",
                          sheet_name="234",
                          index=False,
                          header=True)

    #h表示间距
    h = .2
    #下面求x_min, x_max和y_min, y_max,主要是为了确定坐标轴
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    xx_pt = list(xx.ravel())
    yy_pt = list(yy.ravel())
    xy_pts = np.array([[x, y] for x, y in zip(xx_pt, yy_pt)])
    mytree = cKDTree(centers)
    dist, indexes = mytree.query(xy_pts)
    indexes = indexes.reshape(xx.shape)

    #下面使用matplotlib将图给画出来
    plt.clf()
    plt.imshow(indexes,
               interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto',
               origin='lower')
    symbols = ['o', '^', 'D', 's', '.', ',', '<', '*']
    #sym=[sysmbols[i] for i in assignments]
    for i in range(8):
        x = []
        y = []
        for j in range(assignments.shape[0]):
            if assignments[j] == i:
                x.append(reduced_data[j][0])
                y.append(reduced_data[i][1])
        plt.plot(x, y, symbols[i], markersize=10)
    """
    temp_group = reduced_data[(i*50) : (50)*(i+1)]
    plt.plot(temp_group[:, 0], temp_group[:, 1], symbols[i], markersize=10)
    """

    plt.scatter(centers[:, 0],
                centers[:, 1],
                marker='x',
                color='black',
                s=169,
                linewidths=3,
                zorder=10)
    plt.title('K-means clustering')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.savefig('./whfypca.png')
    plt.show()
Example #57
0
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)

lda = LinearDiscriminantAnalysis(n_components=2)
X_r2 = lda.fit(X, y).transform(X)

# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_))

plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r2[y == i, 0], X_r2[y == i, 1], alpha=.8, color=color, label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('LDA of IRIS dataset')

# plt.show()

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
Example #58
0
#!/usr/bin/env python
# -*- noplot -*-
"""
This example demonstrates how to set a hyperlinks on various kinds of elements.

This currently only works with the SVG backend.
"""

import numpy as np
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

f = plt.figure()
s = plt.scatter([1, 2, 3], [4, 5, 6])
s.set_urls(['http://www.bbc.co.uk/news', 'http://www.google.com', None])
f.canvas.print_figure('scatter.svg')

f = plt.figure()
delta = 0.025
x = y = np.arange(-3.0, 3.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
Z = Z2 - Z1  # difference of Gaussians

im = plt.imshow(Z,
                interpolation='bilinear',
                cmap=cm.gray,
                origin='lower',
                extent=[-3, 3, -3, 3])
Example #59
0
# !/usr/bin/python3'
import numpy as np
from matplotlib.pyplot import scatter
from matplotlib.pyplot import show
a = np.loadtxt('magic04.txt',
               delimiter=',',
               usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
b = a[:, 0]  # 取出属性一
c = a[:, 1]  # 取出属性二
covariance = np.cov(b, c)
corrc = np.corrcoef(b, c)  # 计算相关系数矩阵
print(corrc)
scatter(b, c, 20, b, ".")  # 绘制散点图
show()
Example #60
0
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(feature_train, target_train)
# print("Slope:", reg.coef_)
# print("Intercept:", reg.intercept_)

pred = reg.predict(feature_test)

# Explained variance score: 1 is perfect prediction
from sklearn.metrics import r2_score
print('Variance score: %.3f' % r2_score(target_test, pred))

### draw the scatterplot, with color-coded training and testing points
import matplotlib.pyplot as plt
for feature, target in zip(feature_test, target_test):
    plt.scatter(feature, target, color=test_color)
for feature, target in zip(feature_train, target_train):
    plt.scatter(feature, target, color=train_color)

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")

### draw the regression line, once it's coded
try:
    plt.plot(feature_test, reg.predict(feature_test))
except NameError:
    pass

reg.fit(feature_test, target_test)
plt.plot(feature_train, reg.predict(feature_train), color="g")