def main(self):
		x_field = self.fields_by_key('x')[0]
		y_field = self.fields_by_key('y')[0]	
		x = np.array(self.slice_data(x_field,int))
		y = np.array(self.slice_data(y_field,int))
		n = len(x)
		render = StringIO.StringIO()
		
		###############################################################################
		# Fit IsotonicRegression and LinearRegression models

		ir = IsotonicRegression()

		y_ = ir.fit_transform(x, y)

		lr = LinearRegression()
		lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

		###############################################################################
		# plot result

		segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
		lc = LineCollection(segments, zorder=0)
		lc.set_array(np.ones(len(y)))
		lc.set_linewidths(0.5 * np.ones(n))

		fig = plt.figure()
		plt.plot(x, y, 'r.', markersize=12)
		plt.plot(x, y_, 'g.-', markersize=12)
		plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
		plt.gca().add_collection(lc)
		plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
		plt.title('Isotonic regression')
		plt.savefig(render,format='png')
		return render
def plot_MDS():
    """Plots the difference matrix with Multi-Dimensional Scaling"""
    diff_matrix = fast_generate_diff_matrix()
    X_true = diff_matrix
    similarities = euclidean_distances(diff_matrix)
    seed = 1
    

    mds = manifold.MDS(n_components=1, max_iter=3000, eps=1e-9, random_state=seed,
                       dissimilarity="precomputed", n_jobs=1)
    pos = mds.fit(similarities).embedding_
    
#    nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
#                        dissimilarity="precomputed", random_state=2, n_jobs=1,
#                        n_init=1)
#    npos = nmds.fit_transform(similarities, init=pos)
    
    # Rescale the data
    pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum())
#    npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum())
    
    # Rotate the data
    clf = PCA(n_components=2)
    X_true = clf.fit_transform(X_true)
    
    pos = clf.fit_transform(pos)
#    
#    npos = clf.fit_transform(npos)
    
    fig = plt.figure(1)
    ax = plt.axes([0., 0., 1., 1.])
    
    plt.scatter(X_true[:, 0], X_true[:, 1], c='r', s=20)
#    plt.scatter(pos[:, 0], pos[:, 1], s=20, c='g')
#    plt.scatter(npos[:, 0], npos[:, 1], s=20, c='b')
    plt.legend(('True position'), loc='best')
    
    similarities = similarities.max() / similarities * 100
    similarities[np.isinf(similarities)] = 0
    
    # Plot the edges
    start_idx, end_idx = np.where(pos)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[X_true[i, :], X_true[j, :]]
                for i in range(len(pos)) for j in range(len(pos))]
    values = np.abs(similarities)
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, values.max()))
    lc.set_array(similarities.flatten())
    lc.set_linewidths(0.5 * np.ones(len(segments)))
    ax.add_collection(lc)
    
    plt.show()
Esempio n. 3
0
def visualize(reader, visualization_method, value_column, segment_column):
    labels, data = organize_data(reader, visualization_method, value_column, segment_column)

    if visualization_method == 'hc':
        link = linkage(data)
        dendrogram(link, leaf_label_func=lambda i: labels[i])
        plt.gcf()
        plt.show()

    if visualization_method == 'mds':
        n = len(labels)
        data -= data.mean()
        clf = PCA(n_components=2)
        data = clf.fit_transform(data)

        similarities = euclidean_distances(data)

        # Add noise to the similarities
        noise = np.random.rand(n, n)
        noise = noise + noise.T
        noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0
        similarities += noise


        fig = plt.figure(1)
        ax = plt.axes([0., 0., 1., 1.])

        similarities = similarities.max() / similarities * 100
        similarities[np.isinf(similarities)] = 0

        plt.scatter(data[:, 0], data[:, 1], c='r', s=20)
        plt.legend('Position', loc='best')
        start_idx, end_idx = np.where(data)
        segments = [[data[i, :], data[j, :]]
                    for i in range(len(data)) for j in range(len(data))]
        values = np.abs(similarities)
        lc = LineCollection(segments,
                            zorder=0, cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, values.max()))
        lc.set_array(similarities.flatten())
        lc.set_linewidths(0.5 * np.ones(len(segments)))
        ax.add_collection(lc)

        for label, x, y in zip(labels, data[:, 0], data[:, 1]):
            plt.annotate(
                label, 
                xy = (x, y), xytext = (-20, 20),
                textcoords = 'offset points', ha = 'right', va = 'bottom',
                bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
                arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))



        plt.show()
Esempio n. 4
0
def plotRegression(x, y, y_, lr):

    segements = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
    lc = LineCollection(segements, zorder=0)
    lc.set_array(np.ones(len(y)))
    lc.set_linewidths(0.5 * np.ones(n))

    fig = plt.figure()
    plt.plot(x, y, 'r.', markersize=12)
    plt.plot(x, y_, 'g.-', markersize=12)
    plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
    plt.gca().add_collection(lc)
    plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
    plt.title('Isotonic regression')
    plt.show()
Esempio n. 5
0
class Tracks(object):
    def __init__(self, ax, tails=None):
        self.tracks = None
        self.tails = tails
        self.initialize_lines(ax)

    @staticmethod
    def create_trackmap(stormdata):
        trackmap = []
        for trackid in range(np.max(stormdata['track_id']) + 1):
            indexes = np.where(stormdata['track_id'] == trackid)[0]
            # Makes sure the track segments are in chronological order
            indexes = indexes[np.argsort(stormdata['frame_index'][indexes])]
            trackmap.append(indexes)
        return trackmap

    def remove_lines(self):
        if self.tracks is not None:
            self.tracks.remove()
            self.tracks = None

    def initialize_lines(self, ax):
        self.remove_lines()
        self.tracks = LineCollection([])
        ax.add_collection(self.tracks)

    def update_lines(self, frame_index, stormdata):
        segments = []
        for indexes in self.create_trackmap(stormdata):
            trackdata = stormdata[indexes]
            trackdata = trackdata[trackdata['frame_index'] <= frame_index]
            if self.tails:
                mask = trackdata['frame_index'] >= (frame_index - self.tails)
                trackdata = trackdata[mask]
            # There must always be something in a track, even it it is NaNs.
            segments.append(zip(trackdata['xcent'], trackdata['ycent'])
                            or [(np.nan, np.nan)])
        self.tracks.set_segments(segments)

    def lolite_line(self, indx):
        self.hilite_line(indx, 1)

    def hilite_line(self, indx, lw=4):
        if indx is not None:
            lws = self.tracks.get_linewidths()
            lws[indx] = lw
            self.tracks.set_linewidths(lws)
Esempio n. 6
0
def multidimensional_scaling(rdm, labels):

    # perform multidimensional scaling
    mds = MDS(
        n_components=2,
        max_iter=3000,
        dissimilarity='precomputed'
    )

    positions = mds.fit(rdm).embedding_
    positions /= positions.max()

    # visualize the embedding in a figure
    figure = plt.figure(1)
    ax = plt.axes([0., 0., 1., 1.])

    plt.scatter(positions[:, 0], positions[:, 1])

    # plot the edges
    segments = [[positions[i, :], positions[j, :]] for i in range(len(positions)) for j in range(len(positions))]
    values = np.abs(rdm)
    lc = LineCollection(
        segments,
        zorder=0,
        cmap=plt.cm.YlGnBu,
        norm=plt.Normalize(0, values.max())
    )
    lc.set_array(rdm.flatten())
    lc.set_linewidths(2 * np.ones(len(segments)))
    ax.add_collection(lc)

    # add labels
    for index, label in enumerate(labels):
        plt.annotate(label, (positions[index, 0], positions[index, 1]))

    plt.show()
Esempio n. 7
0
class HoughDemo(ImageProcessDemo):
    TITLE = u"Hough Demo"
    DEFAULT_IMAGE = "stuff.jpg"
    SETTINGS = ["th2", "show_canny", "rho", "theta", "hough_th",
                "minlen", "maxgap", "dp", "mindist", "param2",
                "min_radius", "max_radius", "blur_sigma",
                "linewidth", "alpha", "check_line", "check_circle"]

    check_line = Bool(True)
    check_circle = Bool(True)

    #Gaussian blur parameters
    blur_sigma = Range(0.1, 5.0, 2.0)
    show_blur = Bool(False)

    # Canny parameters
    th2 = Range(0.0, 255.0, 200.0)
    show_canny = Bool(False)

    # HoughLine parameters
    rho = Range(1.0, 10.0, 1.0)
    theta = Range(0.1, 5.0, 1.0)
    hough_th = Range(1, 100, 40)
    minlen = Range(0, 100, 10)
    maxgap = Range(0, 20, 10)

    # HoughtCircle parameters

    dp = Range(1.0, 5.0, 1.9)
    mindist = Range(1.0, 100.0, 50.0)
    param2 = Range(5, 100, 50)
    min_radius = Range(5, 100, 20)
    max_radius = Range(10, 100, 70)

    # draw parameters
    linewidth = Range(1.0, 3.0, 1.0)
    alpha = Range(0.0, 1.0, 0.6)

    def control_panel(self):
        return VGroup(
            Group(
                Item("blur_sigma", label=u"标准方差"),
                Item("show_blur", label=u"显示结果"),
                label=u"高斯模糊参数"
            ),
            Group(
                Item("th2", label=u"阈值2"),
                Item("show_canny", label=u"显示结果"),
                label=u"边缘检测参数"
            ),
            Group(
                Item("rho", label=u"偏移分辨率(像素)"),
                Item("theta", label=u"角度分辨率(角度)"),
                Item("hough_th", label=u"阈值"),
                Item("minlen", label=u"最小长度"),
                Item("maxgap", label=u"最大空隙"),
                label=u"直线检测"
            ),
            Group(
                Item("dp", label=u"分辨率(像素)"),
                Item("mindist", label=u"圆心最小距离(像素)"),
                Item("param2", label=u"圆心检查阈值"),
                Item("min_radius", label=u"最小半径"),
                Item("max_radius", label=u"最大半径"),
                label=u"圆检测"
            ),
            Group(
                Item("linewidth", label=u"线宽"),
                Item("alpha", label=u"alpha"),
                HGroup(
                    Item("check_line", label=u"直线"),
                    Item("check_circle", label=u"圆"),
                ),
                label=u"绘图参数"
            )
        )

    def __init__(self, **kwargs):
        super(HoughDemo, self).__init__(**kwargs)
        self.connect_dirty("th2, show_canny, show_blur, rho, theta, hough_th,"
                            "min_radius, max_radius, blur_sigma,"
                           "minlen, maxgap, dp, mindist, param2, "
                           "linewidth, alpha, check_line, check_circle")
        self.lines = LineCollection([], linewidths=2, alpha=0.6)
        self.axe.add_collection(self.lines)

        self.circles = EllipseCollection(
            [], [], [],
            units="xy",
            facecolors="none",
            edgecolors="red",
            linewidths=2,
            alpha=0.6,
            transOffset=self.axe.transData)

        self.axe.add_collection(self.circles)

    def _img_changed(self):
        self.img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)

    def draw(self):
        img_smooth = cv2.GaussianBlur(self.img_gray, (0, 0), self.blur_sigma, self.blur_sigma)
        img_edge = cv2.Canny(img_smooth, self.th2 * 0.5, self.th2)

        if self.show_blur and self.show_canny:
            show_img = cv2.cvtColor(np.maximum(img_smooth, img_edge), cv2.COLOR_BAYER_BG2BGR)
        elif self.show_blur:
            show_img = cv2.cvtColor(img_smooth, cv2.COLOR_BAYER_BG2BGR)
        elif self.show_canny:
            show_img = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2BGR)
        else:
            show_img = self.img

        if self.check_line:
            theta = self.theta / 180.0 * np.pi
            lines = cv2.HoughLinesP(img_edge,
                                    self.rho, theta, self.hough_th,
                                    minLineLength=self.minlen,
                                    maxLineGap=self.maxgap)

            if lines is not None:
                lines = lines[0]
                lines.shape = -1, 2, 2
                self.lines.set_segments(lines)
                self.lines.set_visible(True)
            else:
                self.lines.set_visible(False)
        else:
            self.lines.set_visible(False)

        if self.check_circle:
            circles = cv2.HoughCircles(img_smooth, 3,
                                       self.dp, self.mindist,
                                       param1=self.th2,
                                       param2=self.param2,
                                       minRadius=self.min_radius,
                                       maxRadius=self.max_radius)

            if circles is not None:
                circles = circles[0]
                self.circles._heights = self.circles._widths = circles[:, 2]
                self.circles.set_offsets(circles[:, :2])
                self.circles._angles = np.zeros(len(circles))
                self.circles._transOffset = self.axe.transData
                self.circles.set_visible(True)
            else:
                self.circles.set_visible(False)
        else:
            self.circles.set_visible(False)

        self.lines.set_linewidths(self.linewidth)
        self.circles.set_linewidths(self.linewidth)
        self.lines.set_alpha(self.alpha)
        self.circles.set_alpha(self.alpha)

        self.draw_image(show_img)
Esempio n. 8
0
def getStockMarketStructure(symbol_dict):
 	
# Choose a time period reasonnably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
	d1 = datetime.datetime(2009, 1, 1)
	d2 = datetime.datetime(2011, 1, 1)
#d1 = datetime.datetime.now() - timedelta(days=365*2)
#d2 = datetime.datetime.now()- timedelta(days=1)
# kraft symbol has now changed from KFT to MDLZ in yahoo
        symbols, names = np.array(list(symbol_dict.items())).T

        quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True)
          for symbol in symbols]

        open = np.array([q.open for q in quotes]).astype(np.float)
        close = np.array([q.close for q in quotes]).astype(np.float)

# The daily variations of the quotes are what carry most information
        variation = close - open

###############################################################################
# Learn a graphical structure from the correlations
        edge_model = covariance.GraphLassoCV()

# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
        X = variation.copy().T
        X /= X.std(axis=0)
        edge_model.fit(X)

###############################################################################
# Cluster using affinity propagation

        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()

        for i in range(n_labels + 1):
            print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

###############################################################################
# Find a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane

# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
        node_position_model = manifold.LocallyLinearEmbedding(
            n_components=2, eigen_solver='dense', n_neighbors=6)

        embedding = node_position_model.fit_transform(X.T).T

###############################################################################
# Visualization
        plt.figure(1, facecolor='w', figsize=(10, 8))
        plt.clf()
        ax = plt.axes([0., 0., 1., 1.])
        plt.axis('off')
# Display a graph of the partial correlations
        partial_correlations = edge_model.precision_.copy()
        d = 1 / np.sqrt(np.diag(partial_correlations))
        partial_correlations *= d
        partial_correlations *= d[:, np.newaxis]
        non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
# Plot the nodes using the coordinates of our embedding
        plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                    cmap=plt.cm.spectral)
# Plot the edges
        start_idx, end_idx = np.where(non_zero)
#a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[embedding[:, start], embedding[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = np.abs(partial_correlations[non_zero])
        lc = LineCollection(segments,
                            zorder=0, cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(15 * values)
        ax.add_collection(lc)
# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
        for index, (name, label, (x, y)) in enumerate(
                zip(names, labels, embedding.T)):

            dx = x - embedding[0]
            dx[index] = 1
            dy = y - embedding[1]
            dy[index] = 1
            this_dx = dx[np.argmin(np.abs(dy))]
            this_dy = dy[np.argmin(np.abs(dx))]
            if this_dx > 0:
                horizontalalignment = 'left'
                x = x + .002
            else:
                horizontalalignment = 'right'
                x = x - .002
            if this_dy > 0:
                verticalalignment = 'bottom'
                y = y + .002
            else:
                verticalalignment = 'top'
                y = y - .002
	
            plt.text(x, y, name, size=10,
                    horizontalalignment=horizontalalignment,
                    verticalalignment=verticalalignment,
                    bbox=dict(facecolor='w',
                            edgecolor=plt.cm.spectral(label / float(n_labels)),
                            alpha=.6))
        plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
                embedding[0].max() + .10 * embedding[0].ptp(),)
        plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                embedding[1].max() + .03 * embedding[1].ptp())
#plt.show()
        filename_1 = id_generator()+'.svg'
        plt.savefig(filename_1) 
        return filename_1
    def dendrogram(self,
                   w=12,
                   h=17,
                   colors=10,
                   color_labels=True,
                   weight_nodes=True,
                   annotate=True):
        """
        Draws dendrogram
        :colors: Approx. no of color clusters in figure.
        """
        self.labels = self.parse_topic_labels('labels')
        self.colors = colors
        fig = plt.figure(figsize=(w, h))
        #plt.title("Topic Dendrogram")
        plt.xlabel("Distance")
        #plt.ylabel("Topic")

        R = hierarchymod.dendrogram(
            self.Z,
            orientation='right',
            #labels=labelList,
            distance_sort='descending',
            show_leaf_counts=False,
            no_plot=False,
            leaf_label_func=self._labelpicker,
            #color_threshold=2.0*np.max(self.Z[:,2])
            link_color_func=self._colorpicker)

        self.ax = plt.gca()

        if weight_nodes:
            self.get_node_weights()

            #assumes orientation left or right
            self.lines = []
            for (xline, yline) in zip(R['dcoord'], R['icoord']):
                coords = list(zip(xline, yline))
                self.lines.append(coords)
            for i, line in enumerate(self.lines):
                coord_array = np.array(line, dtype=float)
                line.append(coord_array)
                line.append(R['i_list'][i])

            i_dict = {}
            new_colls = []
            num_colls = len(self.ax.collections)
            for i, c in enumerate(self.ax.collections):
                i_dict[i] = []
                segments = []
                widths = []
                color = c.get_color()
                for j, p in enumerate(c.get_paths()):
                    for line in self.lines:
                        if np.equal(line[4], p.vertices).all():
                            i_dict[i].append(line[5])
                            s, w = self.segment_path(p.vertices, line[5])
                    segments.extend(s)
                    widths.extend(w)
                coll = LineCollection(segments)
                coll.set_color(color)
                coll.set_linewidths(widths)
                new_colls.append(coll)

            # replace old line collections
            for c in new_colls:
                self.ax.add_collection(c)
            self.ax.collections = self.ax.collections[num_colls:]

        if color_labels:
            self.cluster_idxs = {}
            for c, pi in zip(R['color_list'], R['icoord']):
                for leg in pi[1:3]:
                    i = (leg - 5.0) / 10.0
                    if abs(i - int(i)) < 1e-5:
                        self.cluster_idxs[int(i)] = c

            ylbls = self.ax.get_ymajorticklabels()
            for c, y in enumerate(ylbls):
                y.set_color(self.cluster_idxs[c])

            #tempfix
            self.ax.get_ymajorticklabels()[11].set_color(self.cluster_idxs[12])

        self.ax.set_xlim(left=0.6)
        if annotate:
            #self.ax.annotate("Fiscal policy", (1.08, 20))
            self.ax.annotate("Fiscal policy\nand corporate finance",
                             (1.12, 43))
            self.ax.annotate("Financial markets", (1.13, 133))
            #self.ax.annotate("Politics, domestic", (0.94,179))
            self.ax.annotate("Entertainment", (1.14, 225))
            self.ax.annotate("Labor market, career \nand organization",
                             (1.03, 295))
            self.ax.annotate("Politics", (1.085, 383))
            #self.ax.annotate("Crime", (0.925,405))
            self.ax.annotate("Sports", (1.05, 460))
            self.ax.annotate("US & UK", (1.0, 600))
            self.ax.annotate("Industry and trade", (1.08, 770))
            #self.ax.annotate("Environment", (1.01, 777))

        plt.tight_layout()
        fig.savefig(os.path.join(params().paths['lda'],
                                 'dendrogram' + str(self.num_topics) + '.pdf'),
                    dpi=300)
        fig.savefig(os.path.join(params().paths['lda'],
                                 'dendrogram' + str(self.num_topics) + '.png'),
                    dpi=300)

        plt.show()
        return fig, self.ax, R
Esempio n. 10
0
class SunPlotPy(wx.Frame, Spatial, Grid ):
    """ 
    The main frame of the application
    """
    title = 'sunplot(py)'

    # Plotting options
    autoclim=True
    showedges=False
    bgcolor='k'
    textcolor='w'
    cmap='RdBu'
    particlesize = 1.8
    particlecolor = 'm'

    # other flags
    collectiontype='cells'
    oldcollectiontype='cells'

    # 
    tindex=0 
    depthlevs = [0., 10., 100., 200., 300., 400., 500.,\
        1000.,2000.,3000.,4000.,5000]

    _FillValue=999999
    
    def __init__(self):
        wx.Frame.__init__(self, None, -1, self.title)
        
        self.create_menu()
        self.create_status_bar()
        self.create_main_panel()
        
        #self.draw_figure()

    def create_menu(self):
        self.menubar = wx.MenuBar()
        
        ###
        # File Menu
        ###
        menu_file = wx.Menu()
        # Load a hydro output file
        m_expt = menu_file.Append(-1, "&Open file\tCtrl-O", "Open netcdf file")
        self.Bind(wx.EVT_MENU, self.on_open_file, m_expt)

        # Load a grid file
        m_grid = menu_file.Append(-1, "&Load grid\tCtrl-G", "Load SUNTANS grid from folder")
        self.Bind(wx.EVT_MENU, self.on_load_grid, m_grid)

        # Load a particle file
        m_part = menu_file.Append(-1, "&Load PTM file\tCtrl-Shift-P", "Load a PTM file")
        self.Bind(wx.EVT_MENU, self.on_load_ptm, m_part)

        # Save current scene as an animation
        m_anim = menu_file.Append(-1,"&Save animation of current scene\tCtrl-S","Save animation")
        self.Bind(wx.EVT_MENU, self.on_save_anim, m_anim)

        # Save the current figure
        m_prin = menu_file.Append(-1,"&Print current scene\tCtrl-P","Save figure")
        self.Bind(wx.EVT_MENU, self.on_save_fig, m_prin)



        menu_file.AppendSeparator()
        # Exit
        m_exit = menu_file.Append(-1, "E&xit\tCtrl-X", "Exit")
        self.Bind(wx.EVT_MENU, self.on_exit, m_exit)

        ###
        # Tools menu
        ###
        menu_tools = wx.Menu()
        m_gridstat = menu_tools.Append(-1, "&Plot grid size statistics", "SUNTANS grid size")
        self.Bind(wx.EVT_MENU, self.on_plot_gridstat, m_gridstat)

        m_countcells = menu_tools.Append(-1, "&Count # grid cells", "Grid cell count")
        self.Bind(wx.EVT_MENU, self.on_count_cells, m_countcells)

        m_overlaybathy = menu_tools.Append(-1, "&Overlay depth contours", "Depth overlay")
        self.Bind(wx.EVT_MENU, self.on_overlay_bathy, m_overlaybathy)

        
        ###
        # Help Menu
        ###
        menu_help = wx.Menu()
        m_about = menu_help.Append(-1, "&About\tF1", "About the demo")
        self.Bind(wx.EVT_MENU, self.on_about, m_about)
        
        
        # Add all of the menu bars
        self.menubar.Append(menu_file, "&File")
        self.menubar.Append(menu_tools, "&Tools")
        self.menubar.Append(menu_help, "&Help")
        self.SetMenuBar(self.menubar)

    def create_main_panel(self):
        """ Creates the main panel with all the controls on it:
             * mpl canvas 
             * mpl navigation toolbar
             * Control panel for interaction
        """
        self.panel = wx.Panel(self)
        
        # Create the mpl Figure and FigCanvas objects. 
        # 5x4 inches, 100 dots-per-inch
        #
        self.dpi = 100
        #self.fig = Figure((7.0, 6.0), dpi=self.dpi,facecolor=self.bgcolor)
        self.fig = Figure((7.0, 6.0), dpi=self.dpi)
        self.canvas = FigCanvas(self.panel, -1, self.fig)
        
        
        # Since we have only one plot, we can use add_axes 
        # instead of add_subplot, but then the subplot
        # configuration tool in the navigation toolbar wouldn't
        # work.
        #
        self.axes = self.fig.add_subplot(111)
        #SetAxColor(self.axes,self.textcolor,self.bgcolor)
        
        # Bind the 'pick' event for clicking on one of the bars
        #
        #self.canvas.mpl_connect('pick_event', self.on_pick)
        
        ########
        # Create widgets
        ########
        self.variable_list = wx.ComboBox(
            self.panel, 
            size=(200,-1),
            choices=['Select a variable...'],
            style=wx.CB_READONLY)
        self.variable_list.Bind(wx.EVT_COMBOBOX, self.on_select_variable)
        
        self.time_list = wx.ComboBox(
            self.panel, 
            size=(200,-1),
            choices=['Select a time step...'],
            style=wx.CB_READONLY)
        self.time_list.Bind(wx.EVT_COMBOBOX, self.on_select_time)

        self.depthlayer_list = wx.ComboBox(
            self.panel, 
            size=(200,-1),
            choices=['Select a vertical layer...'],
            style=wx.CB_READONLY)
        self.depthlayer_list.Bind(wx.EVT_COMBOBOX, self.on_select_depth)

        self.show_edge_check = wx.CheckBox(self.panel, -1, 
            "Show Edges",
            style=wx.ALIGN_RIGHT)
        self.show_edge_check.Bind(wx.EVT_CHECKBOX, self.on_show_edges)

        if USECMOCEAN:
            cmaps=[]
            for cmap in cm.cmapnames:
                cmaps.append(cmap)
                cmaps.append(cmap+'_r') # Add all reverse map options
        else:
            # Use matplotlib standard
            cmaps = matplotlib.cm.datad.keys()

        cmaps.sort()
        self.colormap_list = wx.ComboBox(
            self.panel, 
            size=(100,-1),
            choices=cmaps,
            style=wx.CB_READONLY)
        self.colormap_list.Bind(wx.EVT_COMBOBOX, self.on_select_cmap)
        self.colormap_label = wx.StaticText(self.panel, -1,"Colormap:")

        self.clim_check = wx.CheckBox(self.panel, -1, 
            "Manual color limits ",
            style=wx.ALIGN_RIGHT)
        self.clim_check.Bind(wx.EVT_CHECKBOX, self.on_clim_check)

        self.climlow = wx.TextCtrl(
            self.panel, 
            size=(100,-1),
            style=wx.TE_PROCESS_ENTER)
        self.climlow.Bind(wx.EVT_TEXT_ENTER, self.on_climlow)
        
        self.climhigh = wx.TextCtrl(
            self.panel, 
            size=(100,-1),
            style=wx.TE_PROCESS_ENTER)
        self.climhigh.Bind(wx.EVT_TEXT_ENTER, self.on_climhigh)
 


        # Labels
        self.variable_label = wx.StaticText(self.panel, -1,"Variable:",size=(200,-1))
        self.time_label = wx.StaticText(self.panel, -1,"Time step:",size=(200,-1))
        self.depth_label = wx.StaticText(self.panel, -1,"Vertical level:",size=(200,-1))


        # Create the navigation toolbar, tied to the canvas
        #
        self.toolbar = NavigationToolbar(self.canvas)
        #self.toolbar.toolitems[8][3]='my_save_fig'

        #def my_save_fig(self,*args):
        #    print 'saving figure'
        #    return "break"

        
        #########
        # Layout with box sizers
        #########
        
        self.vbox = wx.BoxSizer(wx.VERTICAL)
        self.vbox.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW)
        self.vbox.Add(self.toolbar, 0, wx.EXPAND)

        self.vbox.AddSpacer(10)
        #self.vbox.Add((-1,25))

        flags = wx.ALIGN_LEFT | wx.ALL | wx.ALIGN_CENTER_VERTICAL

        self.hbox0 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox0.Add(self.show_edge_check, 0, border=10, flag=flags)
        self.hbox0.Add(self.colormap_label, 0, border=10, flag=flags)
        self.hbox0.Add(self.colormap_list, 0, border=10, flag=flags)
        self.hbox0.Add(self.clim_check, 0, border=10, flag=flags)
        self.hbox0.Add(self.climlow, 0, border=10, flag=flags)
        self.hbox0.Add(self.climhigh, 0, border=10, flag=flags)

        self.vbox.AddSpacer(5)
        self.hbox1 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox1.Add(self.variable_label, 0, border=10, flag=flags)
        self.hbox1.Add(self.time_label, 0, border=10, flag=flags)
        self.hbox1.Add(self.depth_label, 0, border=10, flag=flags)

        self.vbox.AddSpacer(5)
        self.hbox2 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox2.Add(self.variable_list, 0, border=10, flag=flags)
        self.hbox2.Add(self.time_list, 0, border=10, flag=flags)
        self.hbox2.Add(self.depthlayer_list, 0, border=10, flag=flags)
       
        self.vbox.Add(self.hbox1, 0, flag = wx.ALIGN_LEFT | wx.TOP)
        self.vbox.Add(self.hbox2, 0, flag = wx.ALIGN_LEFT | wx.TOP)
        self.vbox.Add(self.hbox0, 0, flag = wx.ALIGN_LEFT | wx.TOP)
        
        self.panel.SetSizer(self.vbox)
        self.vbox.Fit(self)
    
    ##########
    # Event functions
    ##########

    def create_figure(self):
        """ 
        Creates the figure
        """
        # Find the colorbar limits if unspecified
        if self.autoclim:
            self.clim = [self.data.min(),self.data.max()]
            self.climlow.SetValue('%3.1f'%self.clim[0])
            self.climhigh.SetValue('%3.1f'%self.clim[1])
         
        if self.__dict__.has_key('collection'):
            #self.collection.remove()
            self.axes.collections.remove(self.collection)
        else:
            # First call - set the axes limits
            self.axes.set_aspect('equal')
            self.axes.set_xlim(self.xlims)
            self.axes.set_ylim(self.ylims)
 

        if self.collectiontype=='cells':
            self.collection = PolyCollection(self.xy,cmap=self.cmap)
            self.collection.set_array(np.array(self.data[:]))
            if not self.showedges:
                self.collection.set_edgecolors(self.collection.to_rgba(np.array((self.data[:])))) 
        elif self.collectiontype=='edges':
            xylines = [self.xp[self.edges],self.yp[self.edges]]
            linesc = [zip(xylines[0][ii,:],xylines[1][ii,:]) for ii in range(self.Ne)]
            self.collection = LineCollection(linesc,array=np.array(self.data[:]),cmap=self.cmap)

        self.collection.set_clim(vmin=self.clim[0],vmax=self.clim[1])

        self.axes.add_collection(self.collection)    
        self.title=self.axes.set_title(self.genTitle(),color=self.textcolor)
        self.axes.set_xlabel('Easting [m]')
        self.axes.set_ylabel('Northing [m]')

        # create a colorbar

        if not self.__dict__.has_key('cbar'):
            self.cbar = self.fig.colorbar(self.collection)
            #SetAxColor(self.cbar.ax.axes,self.textcolor,self.bgcolor)
        else:
            #pass
            print 'Updating colorbar...'
            #self.cbar.check_update(self.collection)
            self.cbar.on_mappable_changed(self.collection)

        self.canvas.draw()
   
    def update_figure(self):
        if self.autoclim:
            self.clim = [self.data.min(),self.data.max()]
            self.climlow.SetValue('%3.1f'%self.clim[0])
            self.climhigh.SetValue('%3.1f'%self.clim[1])
        else:
            self.clim = [float(self.climlow.GetValue()),\
                float(self.climhigh.GetValue())]
 
        # check whether it is cell or edge type
        if self.hasDim(self.variable,self.griddims['Ne']):
            self.collectiontype='edges'
        elif self.hasDim(self.variable,self.griddims['Nc']):
            self.collectiontype='cells'

        # Create a new figure if the variable has gone from cell to edge of vice
        # versa
        if not self.collectiontype==self.oldcollectiontype:
            self.create_figure()
            self.oldcollectiontype=self.collectiontype

        self.collection.set_array(np.array(self.data[:]))
        self.collection.set_clim(vmin=self.clim[0],vmax=self.clim[1])

        # Cells only
        if self.collectiontype=='cells':
            if not self.showedges:
                self.collection.set_edgecolors(self.collection.to_rgba(np.array((self.data[:])))) 
            else:
                self.collection.set_edgecolors('k')
                self.collection.set_linewidths(0.2)

        # Update the title
        self.title=self.axes.set_title(self.genTitle(),color=self.textcolor)

        #Update the colorbar
        self.cbar.update_normal(self.collection)

        # redraw the figure
        self.canvas.draw()
    
    def on_pick(self, event):
        # The event received here is of the type
        # matplotlib.backend_bases.PickEvent
        #
        # It carries lots of information, of which we're using
        # only a small amount here.
        # 
        box_points = event.artist.get_bbox().get_points()
        msg = "You've clicked on a bar with coords:\n %s" % box_points
        
        dlg = wx.MessageDialog(
            self, 
            msg, 
            "Click!",
            wx.OK | wx.ICON_INFORMATION)

        dlg.ShowModal() 
        dlg.Destroy()        
    
    def on_select_variable(self, event):
        vname = event.GetString()
        self.flash_status_message("Selecting variable: %s"%vname)
        # update the spatial object and load the data
        self.variable = vname
        self.loadData(variable=self.variable)

        # Check if the variable has a depth coordinate
        depthstr = ['']
        # If so populate the vertical layer box
        if self.hasDim(self.variable,self.griddims['Nk']):
            depthstr = ['%3.1f'%self.z_r[k] for k in range(self.Nkmax)]
            depthstr += ['surface','seabed']

        elif self.hasDim(self.variable,'Nkw'):
            depthstr = ['%3.1f'%self.z_w[k] for k in range(self.Nkmax+1)]

        self.depthlayer_list.SetItems(depthstr)

        # Update the plot
        self.update_figure()



    def on_select_time(self, event):
        self.tindex = event.GetSelection()
        # Update the object time index and reload the data
        if self.plot_type=='hydro':
            if not self.tstep==self.tindex:
                self.tstep=self.tindex
                self.loadData()
                self.flash_status_message("Selecting variable: %s..."%event.GetString())

                # Update the plot
                self.update_figure()
        elif self.plot_type=='particles':
            self.PTM.plot(self.tindex,ax=self.axes,\
                xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())
        
            self.canvas.draw()


    def on_select_depth(self, event):
        kindex = event.GetSelection()
        if not self.klayer[0]==kindex:
            # Check if its the seabed or surface value
            if kindex>=self.Nkmax:
                kindex=event.GetString()
            self.klayer = [kindex]
            self.loadData()       
            self.flash_status_message("Selecting depth: %s..."%event.GetString())

            # Update the plot
            self.update_figure()

    def on_open_file(self, event):
        file_choices = "SUNTANS NetCDF (*.nc)|*.nc*|UnTRIM NetCDF (*.nc)|*.nc*|All Files (*.*)|*.*"
        
        dlg = wx.FileDialog(
            self, 
            message="Open SUNTANS file...",
            defaultDir=os.getcwd(),
            defaultFile="",
            wildcard=file_choices,
            style= wx.FD_MULTIPLE)
        
        if dlg.ShowModal() == wx.ID_OK:
            self.plot_type='hydro'

            path = dlg.GetPaths()

            # Initialise the class
            if dlg.GetFilterIndex() == 0 or dlg.GetFilterIndex() > 1: #SUNTANS
                self.flash_status_message("Opening SUNTANS file: %s" % path)
		try:
		    Spatial.__init__(self, path, _FillValue=self._FillValue)
		except:
		    Spatial.__init__(self, path, _FillValue=-999999)
                startvar='dv'
            if dlg.GetFilterIndex()==1: #UnTRIM
                self.flash_status_message("Opening UnTRIMS file: %s" % path)
                #Spatial.__init__(self,path,gridvars=untrim_gridvars,griddims=untrim_griddims)
                UNTRIMSpatial.__init__(self,path)
                startvar='Mesh2_face_depth'
            
            # Populate the drop down menus
            vnames = self.listCoordVars()
            self.variable_list.SetItems(vnames)
            
            # Update the time drop down list
            if self.__dict__.has_key('time'):
                self.timestr = [datetime.strftime(tt,'%d-%b-%Y %H:%M:%S') for tt in self.time]
            else:
                # Assume that it is a harmonic-type file
                self.timestr = self.nc.Constituent_Names.split()

            self.time_list.SetItems(self.timestr)

            # Draw the depth
            if startvar in vnames:
                self.variable=startvar
                self.loadData()
                self.create_figure()

    def on_load_grid(self, event):
        
        dlg = wx.DirDialog(
            self, 
            message="Open SUNTANS grid from folder...",
            defaultPath=os.getcwd(),
            style= wx.DD_DEFAULT_STYLE)
        
        if dlg.ShowModal() == wx.ID_OK:
            path = dlg.GetPath()

            # Initialise the class
            self.flash_status_message("Opening SUNTANS grid from folder: %s" % path)
            Grid.__init__(self,path)

            # Plot the Grid
            if self.__dict__.has_key('collection'):
                self.axes.collections.remove(self.collection)

            self.axes,self.collection = self.plotmesh(ax=self.axes,edgecolors='y')

            # redraw the figure
            self.canvas.draw()

    def on_load_ptm(self, event):
        file_choices = "PTM NetCDF (*.nc)|*.nc|PTM Binary (*_bin.out)|*_bin.out|All Files (*.*)|*.*"
        
        dlg = wx.FileDialog(
            self, 
            message="Open PTM file...",
            defaultDir=os.getcwd(),
            defaultFile="",
            wildcard=file_choices,
            style= wx.FD_MULTIPLE)
        
        if dlg.ShowModal() == wx.ID_OK:
            self.plot_type = 'particles'
            path = dlg.GetPath()

            # Initialise the class
            if dlg.GetFilterIndex() == 0: #SUNTANS
                self.flash_status_message("Opening PTM netcdf file: %s" % path)
                self.PTM = PtmNC(path)
            elif dlg.GetFilterIndex() == 1: #PTM
                self.flash_status_message("Opening PTM binary file: %s" % path)
                self.PTM = PtmBin(path)

            self.Nt = self.PTM.nt
            
            # Update the time drop down list
            self.timestr = [datetime.strftime(tt,'%d-%b-%Y %H:%M:%S') for tt in self.PTM.time]
            self.time_list.SetItems(self.timestr)

            # Plot the first time step
            if self.__dict__.has_key('xlims'):
                self.PTM.plot(self.PTM.nt-1,ax=self.axes,xlims=self.xlims,\
                ylims=self.ylims,color=self.particlecolor,\
                fontcolor='w',markersize=self.particlesize)
            else:
                self.PTM.plot(self.PTM.nt-1,ax=self.axes,fontcolor='w',\
                    color=self.particlecolor,markersize=self.particlesize)
            # redraw the figure
            self.canvas.draw()

        
    def on_show_edges(self,event):
        sender=event.GetEventObject()
        self.showedges = sender.GetValue()

        # Update the figure
        self.update_figure()

    def on_clim_check(self,event):
        sender=event.GetEventObject()
        if sender.GetValue() == True:
            self.autoclim=False
            self.update_figure()
        else:
            self.autoclim=True
       

    def on_climlow(self,event):
        self.clim[0] = event.GetString()
        #self.update_figure()

    def on_climhigh(self,event):
        self.clim[1] = event.GetString()
        #self.update_figure()

    def on_select_cmap(self,event):
        self.cmap=event.GetString()
        if USECMOCEAN:
            self.collection.set_cmap(getattr(cm,self.cmap))
        else:
            self.collection.set_cmap(self.cmap)

        # Update the figure
        self.update_figure()

    def on_save_fig(self,event):
        """
        Save a figure of the current scene to a file
        """
        file_choices = " (*.png)|*.png| (*.pdf)|*.pdf |(*.jpg)|*.jpg |(*.eps)|*eps "
        filters=['.png','.pdf','.png','.png']

        
        dlg = wx.FileDialog(
            self, 
            message="Save figure to file...",
            defaultDir=os.getcwd(),
            defaultFile="",
            wildcard=file_choices,
            style= wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)

        if dlg.ShowModal() == wx.ID_OK:

            path = dlg.GetPath()
            ext = filters[dlg.GetFilterIndex()]
            if ext in path:
                outfile=path
            else:
                outfile = path+ext

            self.fig.savefig(outfile)

            


    def on_save_anim(self,event):
        """
        Save an animation of the current scene to a file
        """
        file_choices = "Quicktime (*.mov)|*.mov| (*.gif)|*.gif| (*.avi)|*.avi |(*.mp4)|*.mp4 "
        filters=['.mov','.gif','.avi','.mp4']

        
        dlg = wx.FileDialog(
            self, 
            message="Output animation file...",
            defaultDir=os.getcwd(),
            defaultFile="",
            wildcard=file_choices,
            style= wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)

        if dlg.ShowModal() == wx.ID_OK:

            path = dlg.GetPath()
            ext = filters[dlg.GetFilterIndex()]
            if ext in path:
                outfile=path
            else:
                outfile = path+ext
            self.flash_status_message("Saving figure to file: %s" %outfile)
            self.flash_status_message("Saving animation to file: %s" %outfile)

            # Create the animation
            #self.tstep = range(self.Nt) # Use all time steps for animation
            #self.animate(cbar=self.cbar,cmap=self.cmap,\
            #    xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())
            def initanim():
                if not self.plot_type=='particles':
                    return (self.title, self.collection)
                else:
                    return (self.PTM.title,self.PTM.p_handle)

            def updateScalar(i):
                if not self.plot_type=='particles':
                    self.tstep=[i]
                    self.loadData()
                    self.update_figure()
                    return (self.title,self.collection)
                elif self.plot_type=='particles':
                    self.PTM.plot(i,ax=self.axes,\
                        xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())
                    return (self.PTM.title,self.PTM.p_handle)

            self.anim = animation.FuncAnimation(self.fig, \
                updateScalar, init_func = initanim, frames=self.Nt, interval=50, blit=True)

            if ext=='.gif':
                self.anim.save(outfile,writer='imagemagick',fps=6)
            elif ext=='.mp4':
                print 'Saving html5 video...'
                # Ensures html5 compatibility
                self.anim.save(outfile,writer='mencoder',fps=6,\
                    bitrate=3600,extra_args=['-ovc','x264']) # mencoder options
                    #bitrate=3600,extra_args=['-vcodec','libx264'])
            else:
                self.anim.save(outfile,writer='mencoder',fps=6,bitrate=3600)

            # Return the figure back to its status
            del self.anim
            self.tstep=self.tindex
            if not self.plot_type=='particles':
                self.loadData()
                self.update_figure()

            # Bring up a dialog box
            dlg2= wx.MessageDialog(self, 'Animation complete.', "Done", wx.OK)
            dlg2.ShowModal()
            dlg2.Destroy()

    def on_exit(self, event):
        self.Destroy()
        
    def on_about(self, event):
        msg = """ SUNTANS NetCDF visualization tool
        
            *Author: Matt Rayson
            *Institution: Stanford University
            *Created: October 2013
        """
        dlg = wx.MessageDialog(self, msg, "About", wx.OK)
        dlg.ShowModal()
        dlg.Destroy()

    def on_count_cells(self,eveny):
        msg = "Total 3-D grid cells = %d"%(self.count_cells())
        dlg = wx.MessageDialog(self, msg, "No. cells", wx.OK)
        dlg.ShowModal()
        dlg.Destroy()

    def on_overlay_bathy(self,event):
        # Plot depth contours
        print 'Plotting contours...'
        self.contourf(z=self.dv, clevs=self.depthlevs,\
            ax=self.axes,\
            filled=False, colors='0.5', linewidths=0.5, zorder=1e6)
        print 'Done'
   
    def on_plot_gridstat(self, event):
        """
        Plot the grid size histogram in a new figure
        """
        matplotlib.pyplot.figure()
        self.plothist()
        matplotlib.pyplot.show()


    def create_status_bar(self):
        self.statusbar = self.CreateStatusBar()

    def flash_status_message(self, msg, flash_len_ms=1500):
        self.statusbar.SetStatusText(msg)
        self.timeroff = wx.Timer(self)
        self.Bind(
            wx.EVT_TIMER, 
            self.on_flash_status_off, 
            self.timeroff)
        self.timeroff.Start(flash_len_ms, oneShot=True)
    
    def on_flash_status_off(self, event):
        self.statusbar.SetStatusText('')
Esempio n. 11
0
def plot_graph(settings=None, macro_data_z=None, negate_fields=None):

    symbols = np.array(settings['data_fieldnames']).T
    graph_data = macro_data_z[macro_data_z.index > settings['common_start_date']
                             ][settings['data_fields']].iloc[2:]
    if negate_fields is not None:
        graph_data[negate_fields] = -graph_data[negate_fields]

    graph_data = graph_data.rolling(window=3, center=False).sum()
    variation = graph_data.values.T

    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()

    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane
    from sklearn.decomposition import kernel_pca
    # node_position_model = manifold.LocallyLinearEmbedding(
    #     n_components=2, eigen_solver='dense', n_neighbors=8)
    # node_position_model = KernelPCA(kernel='rbf',
    #                                 fit_inverse_transform=True,
    #                                 gamma=10,
    #                                 n_components=2)
    node_position_model = manifold.SpectralEmbedding(n_components=2,
                                                     n_neighbors=6)

    # node_position_model = PCA(n_components=2)
    embedding = node_position_model.fit_transform(X.T).T
    # embedding = components[[0, 1]].values.T
    f1 = 0
    f2 = 1

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(12, 6))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    # plt.axis('off')
    # ax.set_axis_bgcolor('k')

    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[f1],
                embedding[f2],
                s=100 * d ** 2,
                c=labels,
                cmap=plt.cm.coolwarm)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    segments = [[embedding[[f1, f2], start], embedding[[f1, f2], stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.coolwarm,
                        norm=plt.Normalize(0, .7 * np.sqrt(values.max())))
    lc.set_array(np.sqrt(values))
    lc.set_linewidths(15 * np.sqrt(values))
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    label_offset = 0.002

    for index, (name, label, (f_1, f_2)) in enumerate(
            zip(symbols, labels, embedding.T)):

        if f1 == 0:
            x = f_1
        if f1 == 1:
            x = f_2

        if f2 == 0:
            y = f_1
        if f2 == 1:
            y = f_2

        dx = x - embedding[f1]
        dx[index] = 1
        dy = y - embedding[f2]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x += label_offset
        else:
            horizontalalignment = 'right'
            x -= label_offset
        if this_dy > 0:
            verticalalignment = 'bottom'
            y += label_offset
        else:
            verticalalignment = 'top'
            y -= label_offset
        plt.text(x, y, name, size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(embedding[f1].min() - .15 * embedding[f1].ptp(),
             embedding[f1].max() + .10 * embedding[f1].ptp(),)
    plt.ylim(embedding[f2].min() - .03 * embedding[f2].ptp(),
             embedding[f2].max() + .03 * embedding[f2].ptp())
    plt.show()

    plt.savefig('figures/macro_graph.png',
                facecolor='w',
                edgecolor='w',
                transparent=True)
Esempio n. 12
0
           c=labels,
           cmap=pl.cm.spectral)

# Plot the edges
start_idx, end_idx = np.where(non_zero)
#a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[embedding[:, start], embedding[:, stop]]
            for start, stop in zip(start_idx, end_idx)]
values = np.abs(partial_correlations[non_zero])
lc = LineCollection(segments,
                    zorder=0,
                    cmap=pl.cm.hot_r,
                    norm=pl.Normalize(0, .7 * values.max()))
lc.set_array(values)
lc.set_linewidths(15 * values)
ax.add_collection(lc)

# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)):

    dx = x - embedding[0]
    dx[index] = 1
    dy = y - embedding[1]
    dy[index] = 1
    this_dx = dx[np.argmin(np.abs(dy))]
    this_dy = dy[np.argmin(np.abs(dx))]
    if this_dx > 0:
        horizontalalignment = 'left'
        x = x + .002
def StockMarketOLD():
    ###############################################################################
    # Retrieve the data from Internet

    # Choose a time period reasonnably calm (not too long ago so that we get
    # high-tech firms, and before the 2008 crash)
    d1 = datetime.datetime(2005, 1, 1)
    d2 = datetime.datetime(2009, 12, 31)

    # kraft symbol has now changed from KFT to MDLZ in yahoo
    symbol_dict = {
        'TOT': 'Total',
        'XOM': 'Exxon',
        'CVX': 'Chevron',
        'COP': 'ConocoPhillips',
        'VLO': 'Valero Energy',
        'MSFT': 'Microsoft',
        'IBM': 'IBM',
        'TWX': 'Time Warner',
        'CMCSA': 'Comcast',
        #'CVC': 'Cablevision',
        #'YHOO': 'Yahoo',
        #'DELL': 'Dell',
        'HPQ': 'HP',
        'AMZN': 'Amazon',
        'TM': 'Toyota',
        'CAJ': 'Canon',
        'MTU': 'Mitsubishi',
        'SNE': 'Sony',
        #'F': 'Ford',
        'HMC': 'Honda',
        #'NAV': 'Navistar',
        'NOC': 'Northrop Grumman',
        'BA': 'Boeing',
        'KO': 'Coca Cola',
        'MMM': '3M',
        'MCD': 'Mc Donalds',
        #'PEP': 'Pepsi',
        'MDLZ': 'Kraft Foods',
        'K': 'Kellogg',
        'UN': 'Unilever',
        'MAR': 'Marriott',
        'PG': 'Procter Gamble',
        'CL': 'Colgate-Palmolive',
        'GE': 'General Electrics',
        'WFC': 'Wells Fargo',
        'JPM': 'JPMorgan Chase',
        #'AIG': 'AIG',
        'AXP': 'American Express',
        'BAC': 'Bank of America',
        'GS': 'Goldman Sachs',
        'AAPL': 'Apple',
        'SAP': 'SAP',
        'CSCO': 'Cisco',
        'TXN': 'Texas Instruments',
        'XRX': 'Xerox',
        #'LMT': 'Lookheed Martin',
        'WMT': 'Wal-Mart',
        'WBA': 'Walgreen',
        'HD': 'Home Depot',
        'GSK': 'GlaxoSmithKline',
        'PFE': 'Pfizer',
        'SNY': 'Sanofi-Aventis',
        'NVS': 'Novartis',
        'KMB': 'Kimberly-Clark',
        'R': 'Ryder',
        'GD': 'General Dynamics',
        'RTN': 'Raytheon',
        'CVS': 'CVS',
        'CAT': 'Caterpillar',
        'DD': 'DuPont de Nemours',

        #'GM': 'General Motors',
        #'GOOG' : 'Google',
        'ORCL' : 'Oracle',
        'NVO':'Novo Nordisk',
        'LLY':'Eli Lilly and Company',
        #'FB':'Facebook',
        'MRK':'Merck Co',
        }
    '''
    symbol_dict = {'Danske.CO':'Danske Bank',
                   'Maersk-B.CO':'Maersk',
                   'DSV.CO':'DSV',
                   'FLS.CO':'FLS',
                   'Gen.CO':'Genmab',
                   'TDC.CO':'TDC',
                   'CARL-B.CO':'Carlsberg',
                   'CHR.CO':'Chr Hansen',
                   'COLO-B.CO':'Coloplast',
                   'GN.CO':'GN Store Nord',
                   'NDA-DKK.co':'Nordea',
                   'Novo-B.co':'Novo Nordisk',
                   'NZYM-B.CO':'Novozymes',
                   'PNDORA.CO':'Pandora',
                   'Tryg.co':'Tryg',
                   'VWS.CO':'Vestas',
                   'WDH.CO':'William Demant',
                   'G4s.co':'G4S',
                   'JYSK.CO':'Jyske Bank',
                   'KBHL.CO':'Kobenhavns Lufthavne',
                   'RBREW.CO':'Royal Unibrew',
                   'ROCK-B.CO':'Rockwool',
                   'SYDB.CO':'Sydbank',
                   'TOP.CO':'Topdanmark',
                   #'ALMB.CO':'Alm Brand',
                   'AURI-B.CO':'Auriga',
                   'Bava.CO':'Bavarian Nordic',
                   'BO.CO':'Bang Olufsen',
                   'DFDS.CO':'DFDS',
                   'DNORD.CO':'DS Norden',
                   'GES.CO':'Greentech',
                   'IC.CO':'IC Group',
                   'JDAN.CO':'Jeudan',
                   #'JUTBK.CO':'Jutlander Bank',
                   #'MATAS.CO':'Matas',
                   'NKT.CO':'NKT',
                   #'NNIT.CO':'NNIT',
                   'NORDJB.CO':'Nordjyske Bank',
                   #'ONXEO.CO':'Onxeo',
                   #'OSSR.CO':'Ossur',
                   'PAAL-B.CO':'Per Aarslef',
                   'RILBA.CO':'Ringkobing Landbobank',
                   'SAS-DKK.CO':'SAS',
                   'SCHO.CO':'Schouw Co.',
                   'SIM.CO':'SimCorp',
                   'Solar-B.co':'Solar B',
                   'SPNO.CO':'Spar Nord',
                   'TIV.CO':'Tivoli',
                   'UIE.CO':'UIE',
                   'VELO.CO':'Veloxis',
                   'ZEAL.CO':'Zealand Pharma'
                   }
    '''
    symbols, names = np.array(list(symbol_dict.items())).T

    for symbol in symbols:
        print symbol
        if len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T)) != 1259:
            print symbol, len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T))


    open = pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T)
    close = pd.DataFrame(np.array([[q[6] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T)

    # The daily variations of the quotes are what carry most information
    variation = np.array(close - open)

    ###############################################################################
    # Learn a graphical structure from the correlations
    #edge_model = covariance.GraphLassoCV()


    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery


    df = pd.read_csv('data/TData9313_final5.csv',index_col=0)
    X = variation.copy()

    pd.DataFrame(np.round(np.cov(X.T),3),columns=symbols,index=symbols).to_latex('covariancetable.tex')

    print np.max(np.round(np.cov(X.T),3))

    X /= X.std(axis=0)

    covariance_,precision_ = graphical_lasso(X,0.3)

    print pd.DataFrame(precision_)

    #edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(covariance_)

    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane

    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)

    embedding = node_position_model.fit_transform(X.T).T

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(20, 16))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    plt.annotate('From %s to %s' % (d1.strftime('%Y-%m-%d'),d2.strftime('%Y-%m-%d')),xy=(0.11,-0.37),size=25)

    print X.shape

    for i in range(n_labels + 1):
        plt.annotate('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])),xy=(-0.43,0.02-i*0.02),size=18)
        pass



    # Display a graph of the partial correlations
    #partial_correlations = edge_model.precision_.copy()
    partial_correlations = precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=200 * d ** 2, c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.get_cmap('Greys'),
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=22,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(embedding[0].min() - .25 * embedding[0].ptp(),
             embedding[0].max() + .20 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(),
             embedding[1].max() + .20 * embedding[1].ptp())

    plt.savefig('Graphs/StockCluster.pdf',bbox_inches='tight')
    plt.savefig('Graphs/StockCluster.svg',bbox_inches='tight')
    plt.show()
Esempio n. 14
0
def clusterSymbol(dbdf):
    global dflength
    saveType = False
    try:

        book_kosdaq = xlrd.open_workbook("../../Kosdaq_symbols.xls")
        sheet_kosdaq = book_kosdaq.sheet_by_name('kosdaq')

        book_kospi = xlrd.open_workbook('../../Kospi_Symbols.xls')
        sheet_kospi = book_kospi.sheet_by_name('kospi')

        quotes2 = []
        nametitles = []
        codearrs = []
        titlefound = False
        for title in dbdf['title']:
            if ' ' in title:
                title  = title.replace(' ','')
            if '&' in title:
                title  = title.replace('&','and')
            if '-' in title:
                title  = title.replace('-','')    
            print 'title',title
            for cnt in range(sheet_kospi.nrows):
            
                if sheet_kospi.row_values(cnt)[1] == title:
                    
                    code = '{0:06d}'.format(int(sheet_kospi.row_values(cnt)[0]))
                    name = sheet_kospi.row_values(cnt)[1]
                    print code,name
                    markettype = 1
                    titlefound = True
                    break

            for cnt in range(sheet_kosdaq.nrows):
                
                if sheet_kosdaq.row_values(cnt)[1] == title:
                    
                    code = '{0:06d}'.format(int(sheet_kosdaq.row_values(cnt)[0]))
                    name = sheet_kosdaq.row_values(cnt)[1]
                    print code,name
                    markettype = 2
                    titlefound = True
                    break  

            if titlefound == False:
                continue   
            titlefound = False         
            try:        
                startdatemode = 2
                dbtradinghist = 'none'
                histmode = 'none'
                plotly = 'plotly'
                stdmode = 'stddb'
                tangentmode = 'tangentdb'        
                daych  =0
                runcount = 0
                srcsite = 1#google
                # srcsite = 2#yahoo
                writedblog = 'none'
                updbpattern = 'none'
                appenddb = 'none'

                print 'found code',code, name
                bars = cluster_fetchData(str(code),markettype,name,'realtime','dbpattern',histmode,runcount,srcsite,writedblog,updbpattern\
                                        ,appenddb,startdatemode,\
                                         dbtradinghist,plotly,stdmode,'none',daych,tangentmode)
                
                # bars = bars[1:]

                if dflength == 0:
                    dflength = len(bars)
                else:
                    if dflength > len(bars):
                        dflength = len(bars)
                
                quotes2.append(bars)
                nametitles.append(name)
                codearrs.append(code)
                clear_output()
            except Exception,e:
                # print 'error title',name
                pass

        npquotesOpen = []  
        npquotesClose = []   
        count = 0
        for q in quotes2:
            # print q.tail()
            # print pd.isnull(q).any().any()
            # if pd.isnull(q).any().any() == True:
            #     print 'NaN'
            #     continue
            q = q.fillna(0)

            if dflength < len(q):
                q = q[:dflength]
                npquotesOpen.append(q['Open'].values)
                npquotesClose.append(q['Close'].values)
                # print q['Close'].values,'count',count,len(q)    
            else:
                npquotesOpen.append(q['Open'].values)
                npquotesClose.append(q['Close'].values)
                # print q['Close'].values,'count',count,len(q)
            count += 1
            # print len(q.values),'dflength',dflength
        open2 = np.array(npquotesOpen).astype(np.float)         
        close2 = np.array(npquotesClose).astype(np.float)         
        # npquotesClose = []        
        # for q in quotes2:
        #     npquotesClose.append(q['Close'].values)
        # npquotesOpen = np.array([q['Open'].values for q in quotes2])
        # open2 =  npquotesOpen
        # npquotesClose = np.array([q['Close'].values for q in quotes2])
        # close2 =  npquotesClose
        # print npquotesOpen
        # print npquotesClose
        
        variation = (close2 - open2)
        
        symbol_dict = dict(zip(codearrs,nametitles))

        symbols, names = np.array(symbol_dict.items()).T

        edge_model = covariance.GraphLassoCV()

        # standardize the time series: using correlations rather than covariance
        # is more efficient for structure recovery
        tempX = variation.T
        # print tempX,'tempX len',len(tempX)
        X = variation.copy().T
        # print 'open len',len(open2),'close len',len(close2),'variation len',len(variation),'X len',len(X)
        print 'type open',type(open2),'type close',type(close2),'type variation',type(variation),'type X',type(X)
        print 'shape open',open2.shape,'shape close',close2.shape,'shape variation',variation.shape,'shape X',X.shape

        
        X /= X.std(axis=0)
        edge_model.fit(X)

        # ###############################################################################
        # # Cluster using affinity propagation

        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()

        # print names
        # print 'type symbols',type(symbols),'type names',type(names)
        # for name in names:
        #     print 'name',name
        # print names[0],names[1],names[2],names[3]
        # print 'lables',labels,'n_labels',n_labels,'type labels',type(labels)

        randomtitles = pd.DataFrame()
        for i in range(n_labels+1):
            # print labels == i
            print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
            if 1 < len(names[labels==i]) <= 3:
                # print 'random cluster ',np.random.choice(names[labels==i],3)
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],1)})
                randomtitles = pd.concat([tmpdf, randomtitles])
            elif 3 < len(names[labels==i]) <= 5:
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],2)})
                randomtitles = pd.concat([tmpdf, randomtitles])
            elif 5 < len(names[labels==i]) <= 7:
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],4)})
                randomtitles = pd.concat([tmpdf, randomtitles])    
            elif 7 < len(names[labels==i]) :
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],5)})
                randomtitles = pd.concat([tmpdf, randomtitles])        
                # print randomtitles

        # for i in range(n_labels + 1):
        #     print 'Cluster '+str(i + 1)+', '+ names[labels == i]
        
        # ###############################################################################
        # Find a low-dimension embedding for visualization: find the best position of
        # the nodes (the stocks) on a 2D plane

        # We use a dense eigen_solver to achieve reproducibility (arpack is
        # initiated with random vectors that we don't control). In addition, we
        # use a large number of neighbors to capture the large-scale structure.
        node_position_model = manifold.LocallyLinearEmbedding(
            n_components=2, eigen_solver='dense', n_neighbors=6)

        embedding = node_position_model.fit_transform(X.T).T

        # ###############################################################################
        # Visualization
        pl.figure(1, facecolor='w', figsize=(15, 15))
        pl.clf()
        ax = pl.axes([0., 0., 1., 1.])
        pl.axis('off')

        # Display a graph of the partial correlations
        partial_correlations = edge_model.precision_.copy()
        d = 1 / np.sqrt(np.diag(partial_correlations))
        partial_correlations *= d
        partial_correlations *= d[:, np.newaxis]
        non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

        # Plot the nodes using the coordinates of our embedding
        pl.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                   cmap=pl.cm.spectral)

        # Plot the edges
        start_idx, end_idx = np.where(non_zero)
        #a sequence of (*line0*, *line1*, *line2*), where::
        #            linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[embedding[:, start], embedding[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = np.abs(partial_correlations[non_zero])
        lc = LineCollection(segments,
                            zorder=0, cmap=pl.cm.hot_r,
                            norm=pl.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(15 * values)
        ax.add_collection(lc)

        # Add a label to each node. The challenge here is that we want to
        # position the labels to avoid overlap with other labels
        for index, (name, label, (x, y)) in enumerate(
                zip(names, labels, embedding.T)):

            dx = x - embedding[0]
            dx[index] = 1
            dy = y - embedding[1]
            dy[index] = 1
            this_dx = dx[np.argmin(np.abs(dy))]
            this_dy = dy[np.argmin(np.abs(dx))]
            if this_dx > 0:
                horizontalalignment = 'left'
                x = x + .002
            else:
                horizontalalignment = 'right'
                x = x - .002
            if this_dy > 0:
                verticalalignment = 'bottom'
                y = y + .002
            else:
                verticalalignment = 'top'
                y = y - .002
            pl.text(x, y, name, size=10,
                    horizontalalignment=horizontalalignment,
                    verticalalignment=verticalalignment,
                    bbox=dict(facecolor='w',
                              edgecolor=pl.cm.spectral(label / float(n_labels)),
                              alpha=.6))

        pl.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
                embedding[0].max() + .10 * embedding[0].ptp(),)
        pl.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                embedding[1].max() + .03 * embedding[1].ptp())

        pl.show()
        
        return randomtitles
Esempio n. 15
0
def cluster_data(data):
    names = data.columns
    edge_model = covariance.GraphLassoCV()
    data = np.array(data)

    X = data.copy().T
    X /= X.std(axis=0)

    edge_model.fit(X)
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

    #Visualization
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)
    embedding = node_position_model.fit_transform(X.T).T
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=100 * d**2,
                c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):
        name = str(name).decode('utf-8').encode('utf-8')
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
    plt.show()
Esempio n. 16
0
    def plotManifoldDistances(self,
                              segments: List[Union[MessageSegment,
                                                   TypedSegment, RawMessage,
                                                   Any]],
                              distances: numpy.ndarray,
                              labels: numpy.ndarray,
                              templates: List = None,
                              plotEdges=False,
                              countMarkers=False):
        """
        Plot distances of segments according to (presumably multidimensional) features.
        This function abstracts from the actual feature by directly taking a precomputed similarity matrix and
        arranging the segments relative to each other according to their distances using Multidimensional Scaling (MDS).
        See module `manifold` from package `sklearn`.

        If segments is a list of `TypedSegment` or `MessageSegment`, this function plots the feature values of each
        given segment overlaying each other besides the distances; they are colored according to the given labels.

        >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
        >>> from utils.loader import BaseLoader
        >>> from inference.analyzers import Value
        >>>
        >>> bytedata = [
        ...     bytes([1, 2, 3, 4]),
        ...     bytes([   2, 3, 4]),
        ...     bytes([   1, 3, 4]),
        ...     bytes([   2, 4   ]),
        ...     bytes([   2, 3   ]),
        ...     bytes([20, 30, 37, 50, 69, 2, 30]),
        ...     bytes([        37,  5, 69       ]),
        ...     bytes([70, 2, 3, 4]),
        ...     bytes([3, 2, 3, 4])
        ...     ]
        >>> messages  = [RawMessage(bd) for bd in bytedata]
        >>> specimens = BaseLoader(messages)
        >>> analyzers = [Value(message) for message in messages]
        >>> segments  = [TypedSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers]
        >>> for seg in segments[:4]:
        ...     seg.fieldtype = "ft1"
        >>> for seg in segments[4:6]:
        ...     seg.fieldtype = "ft2"
        >>> for seg in segments[6:]:
        ...     seg.fieldtype = "ft3"
        >>> DistanceCalculator.debug = False
        >>> dc = DistanceCalculator(segments, thresholdFunction=DistanceCalculator.neutralThreshold, thresholdArgs=None)
        Calculated distances for 37 segment pairs in ... seconds.
        >>> dp = DistancesPlotter(specimens, "test", False)
        >>> dp.plotManifoldDistances(segments, dc.distanceMatrix, numpy.array([1,2,3,1,1,0,1,0,2]))
        >>> # comment out writing of file for doctest
        >>> # dp.writeOrShowFigure()

        :param segments: If `segments` is a list of `TypedSegment`s, field types are marked as small markers
            within the label marker. labels containing "Noise" then are not explicitly marked like the other labeled
            segments
        :param distances: The precomputed similarity matrix:
            symmetric matrix, rows/columns in the order of `segments`
        :param labels: Labels of strings (or ints or any other printable type) identifying the cluster for each segment
        :param templates: Templates of clusters to be printed alongside with the feature values.
            CURRENTLY UNTESTED
        :param plotEdges: Plot of edges between each pair of segment markers.
            Caution: Adds n^2 lines which takes very long compared to the scatterplot and
            quickly becomes a huge load especially when rendering the plot as PDF.
        :param countMarkers: add text labels with information at positions with multiple markers
        """
        from sklearn import manifold
        from sklearn.decomposition import PCA

        # plot configuration
        labsize = 150  # label markers: size factor
        typsize = 30  # type markers: size factor
        # self._cm          # label color map
        fcm = cm.cubehelix  # type color map

        # identify unique labels
        allabels = set(labels)
        if all(
                isinstance(l, numpy.integer) or l.isdigit() for l in allabels
                if l != "Noise"):
            ulab = sorted(allabels,
                          key=lambda l: -1 if l == "Noise" else int(l))
        else:
            ulab = sorted(allabels)

        # subsample if segment count is larger than maxSamples
        maxSamples = 1000
        originalSegmentCount = len(segments)
        if originalSegmentCount > 2 * maxSamples:
            import math
            ratiorev = originalSegmentCount / maxSamples
            step2keep = math.floor(ratiorev)
            lab2idx = dict()
            for idx, lab in enumerate(labels):
                if lab not in lab2idx:
                    lab2idx[lab] = list()
                lab2idx[lab].append(idx)
            # copy list to remove elements without side-effects
            segments = segments.copy()
            # to save the indices to be removed
            idx2rem = list()
            # determines a subset evenly distributed over all clusters while honoring the ratio to reduce to.
            for lab, ics in lab2idx.items():
                keep = set(ics[::step2keep])
                idx2rem.extend(set(ics) - keep)
            idx2rem = sorted(idx2rem, reverse=True)
            for idx in idx2rem:
                del segments[idx]
            labels = numpy.delete(labels, idx2rem, 0)
            distances = numpy.delete(numpy.delete(distances, idx2rem, 0),
                                     idx2rem, 1)
        else:
            idx2rem = None

        # prepare MDS
        seed = numpy.random.RandomState(seed=3)
        mds = manifold.MDS(n_components=2,
                           max_iter=3000,
                           eps=1e-9,
                           random_state=seed,
                           dissimilarity="precomputed",
                           n_jobs=1)
        pos = mds.fit(distances).embedding_
        # print(distances)

        # Rotate the data
        clf = PCA(n_components=2)

        pos = clf.fit_transform(pos)

        fig = self._fig
        axMDS, axSeg = self._axes  # type: plt.Axes, plt.Axes

        if idx2rem is not None:
            axSeg.text(
                0, -5,
                'Subsampled: {} of {} segments'.format(len(segments),
                                                       originalSegmentCount))

        # omit noise in cluster labels if types are plotted anyway.
        if isinstance(segments[0], TypedSegment):
            for l in ulab:
                if isinstance(l, str) and "Noise" in l:
                    ulab.remove(l)
        elif isinstance(segments[0],
                        RawMessage) and segments[0].messageType != "Raw":
            for l in ulab:
                try:
                    if int(l) == -1:
                        ulab.remove(l)
                except ValueError as e:
                    pass  # not a problem, just keep the cluster, since its not noise.

        # prepare color space
        cIdx = [
            int(round(each))
            for each in numpy.linspace(2, self._cm.N - 2, len(ulab))
        ]
        if templates is None:
            templates = ulab
        # iterate unique labels and scatter plot each of these clusters
        for c, (l,
                t) in enumerate(zip(ulab,
                                    templates)):  # type: int, (Any, Template)
            # test with:
            # color = [list(numpy.random.randint(0, 10, 4) / 10)]
            # plt.scatter(numpy.random.randint(0,10,4), numpy.random.randint(0,10,4), c=color)
            lColor = self._cm(cIdx[c])
            class_member_mask = (labels == l)
            try:
                x = list(compress(pos[:, 0].tolist(), class_member_mask))
                y = list(compress(pos[:, 1].tolist(), class_member_mask))
                # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row."
                # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html:
                axMDS.scatter(
                    x,
                    y,
                    c=colors.to_rgba_array(lColor),
                    alpha=.6,
                    s=labsize,
                    # s=s-(c*s/len(ulab)),  #
                    lw=0,
                    label=str(l))
            except IndexError as e:
                print(pos)
                print(distances)
                print(segments)
                raise e

            if isinstance(t, Template):
                axSeg.plot(t.values, c=lColor, linewidth=4)

        # include field type labels for TypedSegments input
        if isinstance(segments[0], (TypedSegment, RawMessage)):
            if isinstance(segments[0], TypedSegment):
                ftypes = numpy.array([seg.fieldtype for seg in segments])  # PP
            elif isinstance(segments[0],
                            RawMessage) and segments[0].messageType != 'Raw':
                ftypes = numpy.array([msg.messageType
                                      for msg in segments])  # PP
            else:
                ftypes = set()
            # identify unique types
            utyp = sorted(set(ftypes))
            # prepare color space
            # noinspection PyUnresolvedReferences
            cIdx = [
                int(round(each))
                for each in numpy.linspace(30, fcm.N - 30, len(utyp))
            ]
            # iterate unique types and scatter plot each of these groups
            for n, ft in enumerate(utyp):  # PP
                fColor = fcm(cIdx[n])
                type_member_mask = (ftypes == ft)
                x = list(compress(pos[:, 0].tolist(), type_member_mask))
                y = list(compress(pos[:, 1].tolist(), type_member_mask))
                # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row."
                # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html:
                axMDS.scatter(x,
                              y,
                              c=colors.to_rgba_array(fColor),
                              alpha=1,
                              s=typsize,
                              lw=0,
                              label=str(ft))

                if isinstance(segments[0], TypedSegment):
                    for seg in compress(segments, type_member_mask):
                        axSeg.plot(seg.values, c=fColor, alpha=0.05)
        elif isinstance(segments[0], MessageSegment):
            for c, l in enumerate(ulab):
                lColor = self._cm(cIdx[c])
                class_member_mask = (labels == l)
                for seg in compress(segments, class_member_mask):
                    axSeg.plot(seg.values, c=lColor, alpha=0.1)
        else:
            axSeg.text(.5,
                       .5,
                       'nothing to plot\n(message alignment)',
                       horizontalalignment='center')

        # place the label/type legend at the best position
        if isinstance(segments[0], RawMessage):
            axMDS.legend(bbox_to_anchor=(1.04, 1),
                         scatterpoints=1,
                         shadow=False)
            axSeg.patch.set_alpha(0.0)
            axSeg.axis('off')
        else:
            axMDS.legend(scatterpoints=1, loc='best', shadow=False)

        if plotEdges:
            # plotting of edges takes a long time compared to the scatterplot (and especially when rendering the PDF)
            from matplotlib.collections import LineCollection
            # Plot the edges
            lines = [[pos[i, :], pos[j, :]] for i in range(len(pos))
                     for j in range(len(pos))]
            values = numpy.abs(distances)
            # noinspection PyUnresolvedReferences
            lc = LineCollection(lines,
                                zorder=0,
                                cmap=plt.cm.Blues,
                                norm=plt.Normalize(0, values.max()))
            # lc.set_alpha(.1)
            lc.set_array(distances.flatten())
            lc.set_linewidths(0.5 * numpy.ones(len(segments)))
            axMDS.add_collection(lc)

        if countMarkers:
            # Count markers at identical positions and plot text with information about the markers at this position
            from collections import Counter
            import math
            if isinstance(segments[0], TypedSegment):
                coordCounter = Counter([
                    (posX, posY, seg.fieldtype)
                    for seg, lab, posX, posY in zip(
                        segments, labels, pos[:, 0].tolist(), pos[:,
                                                                  1].tolist())
                ])
            else:
                coordCounter = Counter([
                    (posX, posY, lab) for lab, posX, posY in zip(
                        labels, pos[:, 0].tolist(), pos[:, 1].tolist())
                ])
            for (posX, posY, lab), cnt in coordCounter.items():
                if cnt > 1:
                    theta = hash(str(lab)) % 360
                    r = 1
                    posXr = posX + r * math.cos(theta)
                    posYr = posY + r * math.sin(theta)
                    axMDS.text(posXr,
                               posYr,
                               "{}: {}".format(lab, cnt),
                               withdash=True)

        fig.canvas.toolbar.update()
def plot_cluster(X, labels, model):
    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)
    embedding = node_position_model.fit_transform(X.T).T

    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Display a graph of the partial correlations
    partial_correlations = model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=100 * d**2,
                c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    # a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())

    plt.show()
Esempio n. 18
0
# 第一个元素为非零的数在O轴即竖轴的下标,第二个元素为非零的数在1轴即横轴的下标

# a sequence of (*line0*, *line1*, *line2*), where::linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[embedding[:, start], embedding[:, stop]]
            for start, stop in zip(start_idx, end_idx)]
#embedding为二维数组

values = np.abs(partial_correlations[non_zero])  # 用non_zero遮罩后的15个元素的数组
lc = LineCollection(segments,
                    zorder=0,
                    cmap=plt.cm.hot_r,
                    norm=plt.Normalize(
                        0, .7 * values.max()))  # zorder:调整层次,cmap:colormap

lc.set_array(values)
lc.set_linewidths(6 * values)
ax.add_collection(lc)

# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)):
    dx = x - embedding[0]
    dx[index] = 1
    dy = y - embedding[1]
    dy[index] = 1
    this_dx = dx[np.argmin(np.abs(dy))]
    this_dy = dy[np.argmin(np.abs(dx))]
    if this_dx > 0:
        horizontalalignment = 'left'
        x = x + .002
    else:
Esempio n. 19
0
def plots_topography(dpa, ax_dendrogram, ax_project):
    Nclus_m = np.max(dpa.labels_) + 1
    cmap = plt.get_cmap('tab10', Nclus_m)
    # Convert from border densities to distances
    nd = int((Nclus_m * Nclus_m - Nclus_m) / 2)
    Dis = np.empty(nd, dtype=float)
    nl = 0
    Fmax = max(dpa.densities_)
    Rho_bord = np.zeros((Nclus_m, Nclus_m), dtype=float)
    for row in dpa.topography_:
        Rho_bord[row[0]][row[1]] = row[2]
        Rho_bord[row[1]][row[0]] = row[2]
        Dis[nl] = Fmax - row[2]
        nl = nl + 1
    # dendrogram representation
    DD = sp.cluster.hierarchy.single(Dis)
    dn = sp.cluster.hierarchy.dendrogram(DD,
                                         color_threshold=0,
                                         above_threshold_color='k',
                                         ax=ax_dendrogram)
    xlbls = ax_dendrogram.get_xmajorticklabels()
    dorder = []
    for lbl in xlbls:
        dorder.append(int(lbl._text))
        lbl.set_color(cmap(int(lbl._text)))
        lbl.set_weight('bold')


# 2D projection representation of the topography
    pop = np.zeros((Nclus_m), dtype=int)
    for i in range(len(dpa.labels_)):
        pop[dpa.labels_[i]] = pop[dpa.labels_[i]] + 1
    d_dis = np.zeros((Nclus_m, Nclus_m), dtype=float)
    model = manifold.MDS(n_components=2,
                         n_jobs=10,
                         dissimilarity='precomputed')
    for i in range(Nclus_m):
        for j in range(Nclus_m):
            d_dis[i][j] = Fmax - Rho_bord[i][j]
    for i in range(Nclus_m):
        d_dis[i][i] = 0.
    out = model.fit_transform(d_dis)
    ax_project.yaxis.set_major_locator(plt.NullLocator())
    ax_project.xaxis.set_major_locator(plt.NullLocator())
    s = []
    col = []
    for i in range(Nclus_m):
        s.append(20. * sqrt(pop[i]))
        col.append(i)
    ax_project.scatter(out[:, 0], out[:, 1], s=s, c=col, cmap=cmap)
    #plt.colorbar(ticks=range(Nclus_m))
    #plt.clim(-0.5, Nclus_m-0.5)
    for i in range(Nclus_m):
        ax_project.annotate(i, (out[i, 0], out[i, 1]))
    for i in range(Nclus_m):
        for j in range(Nclus_m):
            d_dis[i][j] = Rho_bord[i][j]
    rr = np.amax(d_dis)
    if (rr > 0.):
        d_dis = d_dis / rr * 100.
    start_idx, end_idx = np.where(out)
    segments = [[out[i, :], out[j, :]] for i in range(len(out))
                for j in range(len(out))]
    values = np.abs(d_dis)
    lc = LineCollection(segments,
                        zorder=0,
                        norm=plt.Normalize(0, values.max()))
    lc.set_array(d_dis.flatten())
    lc.set_edgecolor(np.full(len(segments), 'black'))
    lc.set_facecolor(np.full(len(segments), 'black'))
    lc.set_linewidths(0.2 * Rho_bord.flatten())
    ax_project.add_collection(lc)
    return ax_dendrogram, ax_project
Esempio n. 20
0
def affinity_propagation_network(X, names=None):
    """
    Cluster (affinity propagation based on the correlation of ) rows of X,
        printing out cluster contents and
        drawing a labeled network of the results, with darker edges for more correlated pairs

    X can be an array or a pandas DataFrame.
    names are labels for the rows, which will be taken to be the indices of the dataframe, or the "names" column, or
    0..n-1 otherwise

    Very lightly adapted from
        http://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html#example-applications-plot-stock-market-py

    Author: Gael Varoquaux [email protected]
    License: BSD 3 clause

    The output of the 3 models are combined in a 2D graph where nodes represents the columns and edges the:
    * cluster labels are used to define the color of the nodes
    * the sparse covariance model is used to display the strength of the edges
    * the 2D embedding is used to position the nodes in the plan

    This example has a fair amount of visualization-related code, as visualization is crucial here to display the graph.
    One of the challenge is to position the labels minimizing overlap. For this we use an heuristic based on the
    direction of the nearest neighbor along each axis
    """

    X = X.copy()

    if isinstance(X, pd.DataFrame):
        if isinstance(names, basestring):
            names = X.pop(names)
        elif names is None:
            names = X.index.values
        X = X.as_matrix().T
    elif names is None:
        names = range(X.shape[0])

    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()

    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    # X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane

    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)

    embedding = node_position_model.fit_transform(X.T).T

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
             embedding[0].max() + .10 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())

    plt.show()
Esempio n. 21
0
def plot_market_structure(names, labels, embedding, partial_correlations):
    import matplotlib.pyplot as plt
    from matplotlib.collections import LineCollection
    # Visualization
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Display a graph of the partial correlations
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    try:
        lc = LineCollection(segments,
                            zorder=0, cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(8 * values)
    except ValueError:
        print "Warning: skip line normalization"
        lc = LineCollection(segments,
                            zorder=0, cmap=plt.cm.hot_r)
        lc.set_linewidths(1)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 color='black',
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(labels.max())),
                           alpha=.6))

    plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
             embedding[0].max() + .10 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())

    plt.show()
    plt.close()
    del plt, LineCollection
Esempio n. 22
0
def graphicalAnalysis_plot(d, partial_correlations, my_colors, names, labels,
                           embedding, val_max, title):

    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
    n_labels = labels.max()

    #For correlation network graph
    fig = plt.figure(1, facecolor='w', figsize=(12, 5))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=500 * d**2, c=my_colors)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    # a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * val_max))
    lc.set_array(values)
    temp = (15 * values)
    temp2 = np.repeat(5, len(temp))
    w = np.minimum(temp, temp2)
    lc.set_linewidths(w)
    ax.add_collection(lc)
    axcb = fig.colorbar(lc)
    axcb.set_label('Strength')

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.nipy_spectral(label /
                                                          float(n_labels)),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
    plt.title(title)
    plt.show()
Esempio n. 23
0
def visual_stock_relationship(dataset, edge_model, labels, stock_names):
    """
    可视化结果
    :param dataset: 数据集
    :param edge_model: 模型
    :param labels: 标签
    :param stock_names:股票名称
    :return: none: 无
    """

    # LocallyLinearEmbedding LLE降维
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          n_neighbors=6,
                                                          eigen_solver='dense')
    """
        n_components 降维到的维数
        n_neighbors 搜索样本的近邻的个数,越大,降纬后的局部数据越好
        eigen_solver 特征分解的方法。有‘arpack’和‘dense’两者算法选择
    """
    # 处理数据
    embedding = node_position_model.fit_transform(dataset.T).T
    """
        fit_transform()是对数据进行某种统一处理
        比如标准化~N(0,1)
        将数据缩放(映射)到某个固定区间
        归一化
        正则化等
    """

    # 画出图表
    figure = pyplot.figure(1, facecolor='w', figsize=(10, 8))
    # 清除所有轴
    pyplot.clf()
    """
        清除所有轴但是窗口打开
        这样它可以被重复使用。
    """
    # 生成子图
    axe = pyplot.axes([0., 0., 1., 1.])
    # 设置轴属性
    pyplot.axis('off')

    # 显示偏相关图
    partial_correlations = edge_model.precision_.copy()
    d = 1 / numpy.sqrt(numpy.diag(partial_correlations))
    """
        np.sqrt() 开根号
        numpy.diag()返回一个矩阵的对角线元素
        或者创建一个对角阵( diagonal array)
    """
    partial_correlations *= d
    partial_correlations *= d[:, numpy.newaxis]
    """
        numpy.newaxis从字面上来理解就是用来创建新轴的
        或者说是用来对array进行维度扩展的。
    """
    non_zero = (numpy.abs(numpy.triu(partial_correlations, k=1)) > 0.02)
    """
        numpy.abs() 计算数组各元素的绝对值
        numpy.triu() 与tril类似,返回的是矩阵的上三角矩阵
    """

    # 使用嵌入的坐标绘制节点
    pyplot.scatter(embedding[0],
                   embedding[1],
                   s=100 * d**2,
                   c=labels,
                   cmap=pyplot.cm.nipy_spectral)
    """
        pyplot.scatter() 画散点图
    """

    # 绘制边缘
    start_idx, end_idx = numpy.where(non_zero)
    """
        numpy.where() 输出满足条件 (即非0) 元素的坐标 
        等价于numpy.nonzero
    """
    # a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = numpy.abs(partial_correlations[non_zero])
    # 绘制LineCollection曲线
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=pyplot.cm.hot_r,
                        norm=pyplot.Normalize(0, .7 * values.max()))
    """
        LineCollection实现在图形中绘制多条线
        作为面向对象绘图的一部分。
    """
    # 将LineCollection曲线添加到子图中
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    axe.add_collection(lc)

    # 向每个节点添加标签
    # 放置标签以避免与其他标签重叠
    n_labels = max(labels)
    for index, (name, label,
                (x, y)) in enumerate(zip(stock_names, labels, embedding.T)):

        # 计算坐标
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[numpy.argmin(numpy.abs(dy))]
        this_dy = dy[numpy.argmin(numpy.abs(dx))]

        # 根据其位置调整方向
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .001
        else:
            horizontalalignment = 'right'
            x = x - .001
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .001
        else:
            verticalalignment = 'top'
            y = y - .001

        pyplot.text(x,
                    y,
                    name,
                    size=10,
                    fontproperties='SimHei',
                    horizontalalignment=horizontalalignment,
                    verticalalignment=verticalalignment,
                    bbox=dict(facecolor='w',
                              edgecolor=pyplot.cm.nipy_spectral(
                                  label / float(n_labels)),
                              alpha=.6))
        """
            pyplot.text()添加文本信息
        """

    pyplot.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    pyplot.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                embedding[1].max() + .03 * embedding[1].ptp())
    """
        pyplot.xlim() 设定横坐标的上下限
        pyplot.ylim() 设定纵坐标的上下限
    """
    pyplot.show()
x = np.arange(n)
rs = check_random_state(0)
y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n))

###############################################################################
# Fit IsotonicRegression and LinearRegression models

ir = IsotonicRegression()

y_ = ir.fit_transform(x, y)

lr = LinearRegression()
lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

###############################################################################
# plot result

segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(y)))
lc.set_linewidths(0.5 * np.ones(n))

fig = plt.figure()
plt.plot(x, y, 'r.', markersize=12)
plt.plot(x, y_, 'g.-', markersize=12)
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
plt.gca().add_collection(lc)
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
plt.title('Isotonic regression')
plt.show()
def showCovariances(names,variation):

    
    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()
    
    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)
    
    ###############################################################################
    # Cluster using affinity propagation
    
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    
    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
    
    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane
    
    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)
    
    embedding = node_position_model.fit_transform(X.T).T
    
    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')
    
    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
    
    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                cmap=plt.cm.spectral)
    
    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)
    
    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):
    
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))
    
    plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
             embedding[0].max() + .10 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
    
    plt.show()
Esempio n. 26
0
    def visualize(self, cluster=False, savefile=None, doshow=True, seed=None, node_labels=None, label_idx=None,
                  mark_nodes=False):
        """
        Visualize the graph structure. The nodes positions are derived from the normalized PMI using the t-distributed
        stochastic neighbors embedding, while the graph edges are derived from the normalized PMI values. To reduce
        clutter, only those edges within top 5% of positive PMI values are drawn. The sizes of the nodes represent the
        marginal frequencies of the features represented by each node.

        :param cluster: If true, also cluster the nodes using affinity propagation and color them according to cluster
            label.
        :param savefile: The name of a file to save the figure to.
        :param doshow: If true, then display the figure.
        :param seed: The seed for the random number generator used for initialization of the t-distributed stochastic
            neighbors embedding.
        :param node_labels: A list of strings containing the labels for a set of nodes.
        :param label_idx: The indices of the nodes to be labeled.
        :param mark_nodes: If true, also mark the labeled nodes using a large green circle.
        :return:
        """
        if node_labels is None:
            node_labels = []
        if label_idx is None:
            label_idx = []
        if len(label_idx) != len(node_labels):
            raise ValueError("Length of node_labels must be the same as label_idx.")

        # use normalized PMI for similarity metric
        similarity = self.pmi / -np.log(self.joint_probs)
        similarity[np.diag_indices_from(similarity)] = 1.0

        # compute the 2-d manifold and the projection of the data onto it. this defines the node positions
        distance = -(similarity - 1.0)  # convert to [-2.0, 0.0] and then make positive
        node_position_model = manifold.TSNE(verbose=self.verbose, metric='precomputed', learning_rate=100,
                                            random_state=seed)
        node_positions = node_position_model.fit_transform(distance).T

        if cluster:
            # also include cluster information in the visualization
            clusters = self.cluster(normalize=True)

        plt.figure(1, facecolor='k', figsize=(10, 8))
        plt.clf()
        ax = plt.axes([0., 0., 1., 1.])
        plt.axis('off')

        # Plot the nodes using the coordinates of our embedding
        base_symbol_size = self.train_marginal / float(self.train_marginal.max()) + 0.05
        if cluster:
            # color ingredient nodes by cluster
            plt.scatter(node_positions[0], node_positions[1], s=300 * base_symbol_size, c=clusters,
                        cmap=plt.cm.spectral_r)
        else:
            plt.scatter(node_positions[0], node_positions[1], s=300 * base_symbol_size,
                        cmap=plt.cm.spectral_r, c='DodgerBlue')

        # Display a graph of ingredients commonly found together based on pointwise mutual information (PMI)
        non_zero = np.triu(similarity, k=1) > np.percentile(similarity[similarity > 0], 95.0)

        start_idx, end_idx = np.where(non_zero)
        #a sequence of (*line0*, *line1*, *line2*), where::
        #            linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[node_positions[:, start], node_positions[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = similarity[non_zero]
        lc = LineCollection(segments,
                            zorder=0, cmap=plt.cm.hot,
                            norm=plt.Normalize(values.min(), np.percentile(values, 95.0)))
        lc.set_array(values)
        lc.set_linewidths(2 * values)
        ax.add_collection(lc)
        # plt.colorbar(lc)

        for label, node_idx in zip(node_labels, label_idx):
            if mark_nodes:
                plt.scatter(node_positions[0, node_idx], node_positions[1, node_idx], s=500, c='Green')
            plt.text(node_positions[0, node_idx] + 0.02 * node_positions[0].ptp(),
                     node_positions[1, node_idx] + 0.02 * node_positions[1].ptp(),
                     label, size=20, color='White')

        plt.xlim(node_positions[0].min() - .15 * node_positions[0].ptp(),
                 node_positions[0].max() + .10 * node_positions[0].ptp(),)
        plt.ylim(node_positions[1].min() - .03 * node_positions[1].ptp(),
                 node_positions[1].max() + .03 * node_positions[1].ptp())

        if savefile is not None:
            plt.savefig(savefile, facecolor='k', edgecolor='Yellow')
        if doshow:
            plt.show()

        return ax, node_positions
Esempio n. 27
0
class HoughDemo(ImageProcessDemo):
    TITLE = u"Hough Demo"
    DEFAULT_IMAGE = "stuff.jpg"
    SETTINGS = ["th2", "show_canny", "rho", "theta", "hough_th",
                "minlen", "maxgap", "dp", "mindist", "param2",
                "min_radius", "max_radius", "blur_sigma",
                "linewidth", "alpha", "check_line", "check_circle"]

    check_line = Bool(True)
    check_circle = Bool(True)

    #Gaussian blur parameters
    blur_sigma = Range(0.1, 5.0, 2.0)
    show_blur = Bool(False)

    # Canny parameters
    th2 = Range(0.0, 255.0, 200.0)
    show_canny = Bool(False)

    # HoughLine parameters
    rho = Range(1.0, 10.0, 1.0)
    theta = Range(0.1, 5.0, 1.0)
    hough_th = Range(1, 100, 40)
    minlen = Range(0, 100, 10)
    maxgap = Range(0, 20, 10)

    # HoughtCircle parameters

    dp = Range(1.0, 5.0, 1.9)
    mindist = Range(1.0, 100.0, 50.0)
    param2 = Range(5, 100, 50)
    min_radius = Range(5, 100, 20)
    max_radius = Range(10, 100, 70)

    # draw parameters
    linewidth = Range(1.0, 3.0, 1.0)
    alpha = Range(0.0, 1.0, 0.6)

    def control_panel(self):
        return VGroup(
            Group(
                Item("blur_sigma", label=u"标准方差"),
                Item("show_blur", label=u"显示结果"),
                label=u"高斯模糊参数"
            ),
            Group(
                Item("th2", label=u"阈值2"),
                Item("show_canny", label=u"显示结果"),
                label=u"边缘检测参数"
            ),
            Group(
                Item("rho", label=u"偏移分辨率(像素)"),
                Item("theta", label=u"角度分辨率(角度)"),
                Item("hough_th", label=u"阈值"),
                Item("minlen", label=u"最小长度"),
                Item("maxgap", label=u"最大空隙"),
                label=u"直线检测"
            ),
            Group(
                Item("dp", label=u"分辨率(像素)"),
                Item("mindist", label=u"圆心最小距离(像素)"),
                Item("param2", label=u"圆心检查阈值"),
                Item("min_radius", label=u"最小半径"),
                Item("max_radius", label=u"最大半径"),
                label=u"圆检测"
            ),
            Group(
                Item("linewidth", label=u"线宽"),
                Item("alpha", label=u"alpha"),
                HGroup(
                    Item("check_line", label=u"直线"),
                    Item("check_circle", label=u"圆"),
                ),
                label=u"绘图参数"
            )
        )

    def __init__(self, **kwargs):
        super(HoughDemo, self).__init__(**kwargs)
        self.connect_dirty("th2, show_canny, show_blur, rho, theta, hough_th,"
                            "min_radius, max_radius, blur_sigma,"
                           "minlen, maxgap, dp, mindist, param2, "
                           "linewidth, alpha, check_line, check_circle")
        self.lines = LineCollection([], linewidths=2, alpha=0.6)
        self.axe.add_collection(self.lines)

        self.circles = EllipseCollection(
            [], [], [],
            units="xy",
            facecolors="none",
            edgecolors="red",
            linewidths=2,
            alpha=0.6,
            transOffset=self.axe.transData)

        self.axe.add_collection(self.circles)

    def _img_changed(self):
        self.img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)

    def draw(self):
        img_smooth = cv2.GaussianBlur(self.img_gray, (0, 0), self.blur_sigma, self.blur_sigma)
        img_edge = cv2.Canny(img_smooth, self.th2 * 0.5, self.th2)

        if self.show_blur and self.show_canny:
            show_img = cv2.cvtColor(np.maximum(img_smooth, img_edge), cv2.COLOR_BAYER_BG2BGR)
        elif self.show_blur:
            show_img = cv2.cvtColor(img_smooth, cv2.COLOR_BAYER_BG2BGR)
        elif self.show_canny:
            show_img = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2BGR)
        else:
            show_img = self.img

        if self.check_line:
            theta = self.theta / 180.0 * np.pi
            lines = cv2.HoughLinesP(img_edge,
                                    self.rho, theta, self.hough_th,
                                    minLineLength=self.minlen,
                                    maxLineGap=self.maxgap)

            if lines is not None:
                lines = lines[0]
                lines.shape = -1, 2, 2
                self.lines.set_segments(lines)
                self.lines.set_visible(True)
            else:
                self.lines.set_visible(False)
        else:
            self.lines.set_visible(False)

        if self.check_circle:
            circles = cv2.HoughCircles(img_smooth, 3,
                                       self.dp, self.mindist,
                                       param1=self.th2,
                                       param2=self.param2,
                                       minRadius=self.min_radius,
                                       maxRadius=self.max_radius)

            if circles is not None:
                circles = circles[0]
                self.circles._heights = self.circles._widths = circles[:, 2]
                self.circles.set_offsets(circles[:, :2])
                self.circles._angles = np.zeros(len(circles))
                self.circles._transOffset = self.axe.transData
                self.circles.set_visible(True)
            else:
                self.circles.set_visible(False)
        else:
            self.circles.set_visible(False)

        self.lines.set_linewidths(self.linewidth)
        self.circles.set_linewidths(self.linewidth)
        self.lines.set_alpha(self.alpha)
        self.circles.set_alpha(self.alpha)

        self.draw_image(show_img)
Esempio n. 28
0
f = open('Datatest.csv')
for row in csv.reader(f):
    diabetes_X_test.append(float(row[3]))
    diabetes_y_test.append(float(row[4]))
f.close()


ir = IsotonicRegression()
y_ = ir.fit_transform(diabetes_X_train, diabetes_y_train)
#lr = LinearRegression()
#lr.fit(diabetes_X_train, diabetes_y_train)  # x needs to be 2d for LinearRegression

segments = [[[i, diabetes_y_train[i]], [i, y_[i]]] for i in range(len(diabetes_X_train))]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(diabetes_y_train)))
lc.set_linewidths(0.5 * np.ones(len(diabetes_X_train)))

fig = plt.figure()
#plt.plot(diabetes_X_train, diabetes_y_train, 'r.', markersize=12,color='green')
plt.plot(diabetes_X_test, diabetes_y_test, 'r.', markersize=12,color='black')
#plt.plot(diabetes_X_train, y_, 'g.-', markersize=12,color='yellow')
plt.plot(diabetes_X_test, ir.predict(diabetes_X_test), 'b-',color='red')
#plt.gca().add_collection(lc)
print("a=",diabetes_X_test)
print("a=",ir.predict(diabetes_X_test))
r1=r2_score(diabetes_y_train , ir.predict(diabetes_X_train), multioutput='variance_weighted')
print("r1=",r1)
#r2=r2_score(diabetes_y_test , ir.predict(diabetes_X_test), multioutput='variance_weighted')
#print("r2=",r2)

#plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
Esempio n. 29
0
    def relation_plot(self, df, good_list):
        close_price_list = [
            df[df.code == code].close.tolist() for code in good_list
        ]
        close_prices = np.vstack(close_price_list)

        open_price_list = [
            df[df.code == code].open.tolist() for code in good_list
        ]
        open_prices = np.vstack(open_price_list)

        # the daily variations of the quotes are what carry most information
        variation = (close_prices - open_prices) * 100 / open_prices

        logger.info("get variation succeed")
        # #############################################################################
        # learn a graphical structure from the correlations
        edge_model = covariance.GraphLassoCV()
        # standardize the time series: using correlations rather than covariance is more efficient for structure recovery
        X = variation.copy().T
        X /= X.std(axis=0)
        edge_model.fit(X)

        logger.info("mode compute succeed")
        # #############################################################################
        # cluster using affinity propagation
        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()
        code_list = np.array(good_list)

        industry_dict = dict()
        industry_df_info = IndustryInfo.get()
        for index, name in industry_df_info.name.iteritems():
            content = industry_df_info.loc[index]['content']
            a_code_list = json.loads(content)
            for code in a_code_list:
                industry_dict[code] = name

        cluster_dict = dict()
        for i in range(n_labels + 1):
            cluster_dict[i] = code_list[labels == i]
            name_list = [
                CStockInfo.get(code, 'name') for code in code_list[labels == i]
            ]
            logger.info('cluster code %i: %s' %
                        ((i + 1), ', '.join(name_list)))

        cluster_info = dict()
        for group, _code_list in cluster_dict.items():
            for code in _code_list:
                iname = industry_dict[code]
                if group not in cluster_info: cluster_info[group] = set()
                cluster_info[group].add(iname)
            logger.info('cluster inustry %i: %s' %
                        ((i + 1), ', '.join(list(cluster_info[group]))))

        # #############################################################################
        # find a low-dimension embedding for visualization: find the best position of
        # the nodes (the stocks) on a 2D plane
        # we use a dense eigen_solver to achieve reproducibility (arpack is
        # initiated with random vectors that we don't control). In addition, we
        # use a large number of neighbors to capture the large-scale structure.
        node_position_model = manifold.LocallyLinearEmbedding(
            n_components=2, eigen_solver='dense', n_neighbors=6)
        embedding = node_position_model.fit_transform(X.T).T

        # #############################################################################
        # visualizatio
        plt.figure(1, facecolor='w', figsize=(10, 8))
        plt.clf()
        ax = plt.axes([0., 0., 1., 1.])
        plt.axis('off')

        # display a graph of the partial correlations
        partial_correlations = edge_model.precision_.copy()
        d = 1 / np.sqrt(np.diag(partial_correlations))
        partial_correlations *= d
        partial_correlations *= d[:, np.newaxis]
        non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

        # plot the nodes using the coordinates of our embedding
        plt.scatter(embedding[0],
                    embedding[1],
                    s=100 * d**2,
                    c=labels,
                    cmap=plt.cm.nipy_spectral)

        # plot the edges
        start_idx, end_idx = np.where(non_zero)
        # a sequence of (*line0*, *line1*, *line2*), where:: linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[embedding[:, start], embedding[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = np.abs(partial_correlations[non_zero])
        lc = LineCollection(segments,
                            zorder=0,
                            cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(15 * values)
        ax.add_collection(lc)

        # add a label to each node. The challenge here is that we want to position the labels to avoid overlap with other labels
        for index, (name, label,
                    (x, y)) in enumerate(zip(code_list, labels, embedding.T)):
            dx = x - embedding[0]
            dx[index] = 1
            dy = y - embedding[1]
            dy[index] = 1
            this_dx = dx[np.argmin(np.abs(dy))]
            this_dy = dy[np.argmin(np.abs(dx))]
            if this_dx > 0:
                horizontalalignment = 'left'
                x = x + .002
            else:
                horizontalalignment = 'right'
                x = x - .002
            if this_dy > 0:
                verticalalignment = 'bottom'
                y = y + .002
            else:
                verticalalignment = 'top'
                y = y - .002
            plt.text(x,
                     y,
                     name,
                     size=10,
                     horizontalalignment=horizontalalignment,
                     verticalalignment=verticalalignment,
                     bbox=dict(facecolor='w',
                               edgecolor=plt.cm.nipy_spectral(label /
                                                              float(n_labels)),
                               alpha=.6))
        plt.xlim(
            embedding[0].min() - .15 * embedding[0].ptp(),
            embedding[0].max() + .10 * embedding[0].ptp(),
        )
        plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                 embedding[1].max() + .03 * embedding[1].ptp())
        plt.savefig('/tmp/relation.png', dpi=1000)
Esempio n. 30
0
def visual_stock_relationship(dataset, edge_model, labels, stock_names):
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)
    embedding = node_position_model.fit_transform(dataset.T).T
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')
    # Display a graph of the partial correlations\n",
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
    # Plot the nodes using the coordinates of our embedding\n",
    plt.scatter(embedding[0],
                embedding[1],
                s=100 * d**2,
                c=labels,
                cmap=plt.cm.nipy_spectral)
    # Plot the edges\n",
    start_idx, end_idx = np.where(non_zero)
    # a sequence of (*line0*, *line1*, *line2*), where::\n",
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)\n",
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)
    # Add a label to each node. The challenge here is that we want to\n",
    # position the labels to avoid overlap with other labels\n",
    n_labels = max(labels)
    for index, (name, label,
                (x, y)) in enumerate(zip(stock_names, labels, embedding.T)):
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .001
        else:
            horizontalalignment = 'right'
            x = x - .001
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .001
        else:
            verticalalignment = 'top'
            y = y - .001
        plt.text(x,
                 y,
                 name,
                 size=10,
                 fontproperties='SimHei',
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.nipy_spectral(label /
                                                          float(n_labels)),
                           alpha=.6)),
    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
    plt.show()
Esempio n. 31
0
class SunPlotPy(wx.Frame, Spatial, Grid):
    """
    The main frame of the application
    """
    title = 'sunplot(py)'

    # Plotting options
    autoclim = True
    showedges = False
    bgcolor = 'k'
    textcolor = 'w'
    cmap = 'RdBu'
    particlesize = 1.8
    particlecolor = 'm'

    # other flags
    collectiontype = 'cells'
    oldcollectiontype = 'cells'

    #
    tindex = 0
    depthlevs = [0., 10., 100., 200., 300., 400., 500.,\
        1000.,2000.,3000.,4000.,5000]

    _FillValue = 999999

    def __init__(self):
        wx.Frame.__init__(self, None, -1, self.title)

        self.create_menu()
        self.create_status_bar()
        self.create_main_panel()

        #self.draw_figure()

    def create_menu(self):
        self.menubar = wx.MenuBar()

        ###
        # File Menu
        ###
        menu_file = wx.Menu()
        # Load a hydro output file
        m_expt = menu_file.Append(-1, "&Open file\tCtrl-O", "Open netcdf file")
        self.Bind(wx.EVT_MENU, self.on_open_file, m_expt)

        # Load a grid file
        m_grid = menu_file.Append(-1, "&Load grid\tCtrl-G",
                                  "Load SUNTANS grid from folder")
        self.Bind(wx.EVT_MENU, self.on_load_grid, m_grid)

        # Load a particle file
        m_part = menu_file.Append(-1, "&Load PTM file\tCtrl-Shift-P",
                                  "Load a PTM file")
        self.Bind(wx.EVT_MENU, self.on_load_ptm, m_part)

        # Save current scene as an animation
        m_anim = menu_file.Append(-1,
                                  "&Save animation of current scene\tCtrl-S",
                                  "Save animation")
        self.Bind(wx.EVT_MENU, self.on_save_anim, m_anim)

        # Save the current figure
        m_prin = menu_file.Append(-1, "&Print current scene\tCtrl-P",
                                  "Save figure")
        self.Bind(wx.EVT_MENU, self.on_save_fig, m_prin)

        menu_file.AppendSeparator()
        # Exit
        m_exit = menu_file.Append(-1, "E&xit\tCtrl-X", "Exit")
        self.Bind(wx.EVT_MENU, self.on_exit, m_exit)

        ###
        # Tools menu
        ###
        menu_tools = wx.Menu()
        m_gridstat = menu_tools.Append(-1, "&Plot grid size statistics",
                                       "SUNTANS grid size")
        self.Bind(wx.EVT_MENU, self.on_plot_gridstat, m_gridstat)

        m_countcells = menu_tools.Append(-1, "&Count # grid cells",
                                         "Grid cell count")
        self.Bind(wx.EVT_MENU, self.on_count_cells, m_countcells)

        m_overlaybathy = menu_tools.Append(-1, "&Overlay depth contours",
                                           "Depth overlay")
        self.Bind(wx.EVT_MENU, self.on_overlay_bathy, m_overlaybathy)

        ###
        # Help Menu
        ###
        menu_help = wx.Menu()
        m_about = menu_help.Append(-1, "&About\tF1", "About the demo")
        self.Bind(wx.EVT_MENU, self.on_about, m_about)

        # Add all of the menu bars
        self.menubar.Append(menu_file, "&File")
        self.menubar.Append(menu_tools, "&Tools")
        self.menubar.Append(menu_help, "&Help")
        self.SetMenuBar(self.menubar)

    def create_main_panel(self):
        """ Creates the main panel with all the controls on it:
             * mpl canvas
             * mpl navigation toolbar
             * Control panel for interaction
        """
        self.panel = wx.Panel(self)

        # Create the mpl Figure and FigCanvas objects.
        # 5x4 inches, 100 dots-per-inch
        #
        self.dpi = 100
        #self.fig = Figure((7.0, 6.0), dpi=self.dpi,facecolor=self.bgcolor)
        self.fig = Figure((7.0, 6.0), dpi=self.dpi)
        self.canvas = FigCanvas(self.panel, -1, self.fig)

        # Since we have only one plot, we can use add_axes
        # instead of add_subplot, but then the subplot
        # configuration tool in the navigation toolbar wouldn't
        # work.
        #
        self.axes = self.fig.add_subplot(111)
        #SetAxColor(self.axes,self.textcolor,self.bgcolor)

        # Bind the 'pick' event for clicking on one of the bars
        #
        #self.canvas.mpl_connect('pick_event', self.on_pick)

        ########
        # Create widgets
        ########
        self.variable_list = wx.ComboBox(self.panel,
                                         size=(200, -1),
                                         choices=['Select a variable...'],
                                         style=wx.CB_READONLY)
        self.variable_list.Bind(wx.EVT_COMBOBOX, self.on_select_variable)

        self.time_list = wx.ComboBox(self.panel,
                                     size=(200, -1),
                                     choices=['Select a time step...'],
                                     style=wx.CB_READONLY)
        self.time_list.Bind(wx.EVT_COMBOBOX, self.on_select_time)

        self.depthlayer_list = wx.ComboBox(
            self.panel,
            size=(200, -1),
            choices=['Select a vertical layer...'],
            style=wx.CB_READONLY)
        self.depthlayer_list.Bind(wx.EVT_COMBOBOX, self.on_select_depth)

        self.show_edge_check = wx.CheckBox(self.panel,
                                           -1,
                                           "Show Edges",
                                           style=wx.ALIGN_RIGHT)
        self.show_edge_check.Bind(wx.EVT_CHECKBOX, self.on_show_edges)

        if USECMOCEAN:
            cmaps = []
            for cmap in cm.cmapnames:
                cmaps.append(cmap)
                cmaps.append(cmap + '_r')  # Add all reverse map options
        else:
            # Use matplotlib standard
            cmaps = list(matplotlib.cm.datad.keys())

        cmaps.sort()
        self.colormap_list = wx.ComboBox(self.panel,
                                         size=(100, -1),
                                         choices=cmaps,
                                         style=wx.CB_READONLY)
        self.colormap_list.Bind(wx.EVT_COMBOBOX, self.on_select_cmap)
        self.colormap_label = wx.StaticText(self.panel, -1, "Colormap:")

        self.clim_check = wx.CheckBox(self.panel,
                                      -1,
                                      "Manual color limits ",
                                      style=wx.ALIGN_RIGHT)
        self.clim_check.Bind(wx.EVT_CHECKBOX, self.on_clim_check)

        self.climlow = wx.TextCtrl(self.panel,
                                   size=(100, -1),
                                   style=wx.TE_PROCESS_ENTER)
        self.climlow.Bind(wx.EVT_TEXT_ENTER, self.on_climlow)

        self.climhigh = wx.TextCtrl(self.panel,
                                    size=(100, -1),
                                    style=wx.TE_PROCESS_ENTER)
        self.climhigh.Bind(wx.EVT_TEXT_ENTER, self.on_climhigh)

        # Labels
        self.variable_label = wx.StaticText(self.panel,
                                            -1,
                                            "Variable:",
                                            size=(200, -1))
        self.time_label = wx.StaticText(self.panel,
                                        -1,
                                        "Time step:",
                                        size=(200, -1))
        self.depth_label = wx.StaticText(self.panel,
                                         -1,
                                         "Vertical level:",
                                         size=(200, -1))

        # Create the navigation toolbar, tied to the canvas
        #
        self.toolbar = NavigationToolbar(self.canvas)
        #self.toolbar.toolitems[8][3]='my_save_fig'

        #def my_save_fig(self,*args):
        #    print 'saving figure'
        #    return "break"

        #########
        # Layout with box sizers
        #########

        self.vbox = wx.BoxSizer(wx.VERTICAL)
        self.vbox.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW)
        self.vbox.Add(self.toolbar, 0, wx.EXPAND)

        self.vbox.AddSpacer(10)
        #self.vbox.Add((-1,25))

        flags = wx.ALIGN_LEFT | wx.ALL | wx.ALIGN_CENTER_VERTICAL

        self.hbox0 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox0.Add(self.show_edge_check, 0, border=10, flag=flags)
        self.hbox0.Add(self.colormap_label, 0, border=10, flag=flags)
        self.hbox0.Add(self.colormap_list, 0, border=10, flag=flags)
        self.hbox0.Add(self.clim_check, 0, border=10, flag=flags)
        self.hbox0.Add(self.climlow, 0, border=10, flag=flags)
        self.hbox0.Add(self.climhigh, 0, border=10, flag=flags)

        self.vbox.AddSpacer(5)
        self.hbox1 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox1.Add(self.variable_label, 0, border=10, flag=flags)
        self.hbox1.Add(self.time_label, 0, border=10, flag=flags)
        self.hbox1.Add(self.depth_label, 0, border=10, flag=flags)

        self.vbox.AddSpacer(5)
        self.hbox2 = wx.BoxSizer(wx.HORIZONTAL)
        self.hbox2.Add(self.variable_list, 0, border=10, flag=flags)
        self.hbox2.Add(self.time_list, 0, border=10, flag=flags)
        self.hbox2.Add(self.depthlayer_list, 0, border=10, flag=flags)

        self.vbox.Add(self.hbox1, 0, flag=wx.ALIGN_LEFT | wx.TOP)
        self.vbox.Add(self.hbox2, 0, flag=wx.ALIGN_LEFT | wx.TOP)
        self.vbox.Add(self.hbox0, 0, flag=wx.ALIGN_LEFT | wx.TOP)

        self.panel.SetSizer(self.vbox)
        self.vbox.Fit(self)

    ##########
    # Event functions
    ##########

    def create_figure(self):
        """
        Creates the figure
        """
        # Find the colorbar limits if unspecified
        if self.autoclim:
            self.clim = [self.data.min(), self.data.max()]
            self.climlow.SetValue('%3.1f' % self.clim[0])
            self.climhigh.SetValue('%3.1f' % self.clim[1])

        if 'collection' in self.__dict__:
            #self.collection.remove()
            self.axes.collections.remove(self.collection)
        else:
            # First call - set the axes limits
            self.axes.set_aspect('equal')
            self.axes.set_xlim(self.xlims)
            self.axes.set_ylim(self.ylims)

        if self.collectiontype == 'cells':
            self.collection = PolyCollection(self.xy, cmap=self.cmap)
            self.collection.set_array(np.array(self.data[:]))
            if not self.showedges:
                self.collection.set_edgecolors(
                    self.collection.to_rgba(np.array((self.data[:]))))
        elif self.collectiontype == 'edges':
            xylines = [self.xp[self.edges], self.yp[self.edges]]
            linesc = [
                list(zip(xylines[0][ii, :], xylines[1][ii, :]))
                for ii in range(self.Ne)
            ]
            self.collection = LineCollection(linesc,
                                             array=np.array(self.data[:]),
                                             cmap=self.cmap)

        self.collection.set_clim(vmin=self.clim[0], vmax=self.clim[1])

        self.axes.add_collection(self.collection)
        self.title = self.axes.set_title(self.genTitle(), color=self.textcolor)
        self.axes.set_xlabel('Easting [m]')
        self.axes.set_ylabel('Northing [m]')

        # create a colorbar

        if 'cbar' not in self.__dict__:
            self.cbar = self.fig.colorbar(self.collection)
            #SetAxColor(self.cbar.ax.axes,self.textcolor,self.bgcolor)
        else:
            #pass
            print('Updating colorbar...')
            #self.cbar.check_update(self.collection)
            self.cbar.on_mappable_changed(self.collection)

        self.canvas.draw()

    def update_figure(self):
        if self.autoclim:
            self.clim = [self.data.min(), self.data.max()]
            self.climlow.SetValue('%3.1f' % self.clim[0])
            self.climhigh.SetValue('%3.1f' % self.clim[1])
        else:
            self.clim = [float(self.climlow.GetValue()),\
                float(self.climhigh.GetValue())]

        # check whether it is cell or edge type
        if self.hasDim(self.variable, self.griddims['Ne']):
            self.collectiontype = 'edges'
        elif self.hasDim(self.variable, self.griddims['Nc']):
            self.collectiontype = 'cells'

        # Create a new figure if the variable has gone from cell to edge of vice
        # versa
        if not self.collectiontype == self.oldcollectiontype:
            self.create_figure()
            self.oldcollectiontype = self.collectiontype

        self.collection.set_array(np.array(self.data[:]))
        self.collection.set_clim(vmin=self.clim[0], vmax=self.clim[1])

        # Cells only
        if self.collectiontype == 'cells':
            if not self.showedges:
                self.collection.set_edgecolors(
                    self.collection.to_rgba(np.array((self.data[:]))))
            else:
                self.collection.set_edgecolors('k')
                self.collection.set_linewidths(0.2)

        # Update the title
        self.title = self.axes.set_title(self.genTitle(), color=self.textcolor)

        #Update the colorbar
        self.cbar.update_normal(self.collection)

        # redraw the figure
        self.canvas.draw()

    def on_pick(self, event):
        # The event received here is of the type
        # matplotlib.backend_bases.PickEvent
        #
        # It carries lots of information, of which we're using
        # only a small amount here.
        #
        box_points = event.artist.get_bbox().get_points()
        msg = "You've clicked on a bar with coords:\n %s" % box_points

        dlg = wx.MessageDialog(self, msg, "Click!",
                               wx.OK | wx.ICON_INFORMATION)

        dlg.ShowModal()
        dlg.Destroy()

    def on_select_variable(self, event):
        vname = event.GetString()
        self.flash_status_message("Selecting variable: %s" % vname)
        # update the spatial object and load the data
        self.variable = vname
        self.loadData(variable=self.variable)

        # Check if the variable has a depth coordinate
        depthstr = ['']
        # If so populate the vertical layer box
        if self.hasDim(self.variable, self.griddims['Nk']):
            depthstr = ['%3.1f' % self.z_r[k] for k in range(self.Nkmax)]
            depthstr += ['surface', 'seabed']

        elif self.hasDim(self.variable, 'Nkw'):
            depthstr = ['%3.1f' % self.z_w[k] for k in range(self.Nkmax + 1)]

        self.depthlayer_list.SetItems(depthstr)

        # Update the plot
        self.update_figure()

    def on_select_time(self, event):
        self.tindex = event.GetSelection()
        # Update the object time index and reload the data
        if self.plot_type == 'hydro':
            if not self.tstep == self.tindex:
                self.tstep = self.tindex
                self.loadData()
                self.flash_status_message("Selecting variable: %s..." %
                                          event.GetString())

                # Update the plot
                self.update_figure()
        elif self.plot_type == 'particles':
            self.PTM.plot(self.tindex,ax=self.axes,\
                xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())

            self.canvas.draw()

    def on_select_depth(self, event):
        kindex = event.GetSelection()
        if not self.klayer[0] == kindex:
            # Check if its the seabed or surface value
            if kindex >= self.Nkmax:
                kindex = event.GetString()
            self.klayer = [kindex]
            self.loadData()
            self.flash_status_message("Selecting depth: %s..." %
                                      event.GetString())

            # Update the plot
            self.update_figure()

    def on_open_file(self, event):
        file_choices = "SUNTANS NetCDF (*.nc)|*.nc*|UnTRIM NetCDF (*.nc)|*.nc*|All Files (*.*)|*.*"

        dlg = wx.FileDialog(self,
                            message="Open SUNTANS file...",
                            defaultDir=os.getcwd(),
                            defaultFile="",
                            wildcard=file_choices,
                            style=wx.FD_MULTIPLE)

        if dlg.ShowModal() == wx.ID_OK:
            self.plot_type = 'hydro'

            path = dlg.GetPaths()

            # Initialise the class
            if dlg.GetFilterIndex() == 0 or dlg.GetFilterIndex() > 1:  #SUNTANS
                self.flash_status_message("Opening SUNTANS file: %s" % path)
                try:
                    Spatial.__init__(self, path, _FillValue=self._FillValue)
                except:
                    Spatial.__init__(self, path, _FillValue=-999999)
                startvar = 'dv'
            if dlg.GetFilterIndex() == 1:  #UnTRIM
                self.flash_status_message("Opening UnTRIMS file: %s" % path)
                #Spatial.__init__(self,path,gridvars=untrim_gridvars,griddims=untrim_griddims)
                UNTRIMSpatial.__init__(self, path)
                startvar = 'Mesh2_face_depth'

            # Populate the drop down menus
            vnames = self.listCoordVars()
            self.variable_list.SetItems(vnames)

            # Update the time drop down list
            if 'time' in self.__dict__:
                self.timestr = [
                    datetime.strftime(tt, '%d-%b-%Y %H:%M:%S')
                    for tt in self.time
                ]
            else:
                # Assume that it is a harmonic-type file
                self.timestr = self.nc.Constituent_Names.split()

            self.time_list.SetItems(self.timestr)

            # Draw the depth
            if startvar in vnames:
                self.variable = startvar
                self.loadData()
                self.create_figure()

    def on_load_grid(self, event):

        dlg = wx.DirDialog(self,
                           message="Open SUNTANS grid from folder...",
                           defaultPath=os.getcwd(),
                           style=wx.DD_DEFAULT_STYLE)

        if dlg.ShowModal() == wx.ID_OK:
            path = dlg.GetPath()

            # Initialise the class
            self.flash_status_message("Opening SUNTANS grid from folder: %s" %
                                      path)
            Grid.__init__(self, path)

            # Plot the Grid
            if 'collection' in self.__dict__:
                self.axes.collections.remove(self.collection)

            self.axes, self.collection = self.plotmesh(ax=self.axes,
                                                       edgecolors='y')

            # redraw the figure
            self.canvas.draw()

    def on_load_ptm(self, event):
        file_choices = "PTM NetCDF (*.nc)|*.nc|PTM Binary (*_bin.out)|*_bin.out|All Files (*.*)|*.*"

        dlg = wx.FileDialog(self,
                            message="Open PTM file...",
                            defaultDir=os.getcwd(),
                            defaultFile="",
                            wildcard=file_choices,
                            style=wx.FD_MULTIPLE)

        if dlg.ShowModal() == wx.ID_OK:
            self.plot_type = 'particles'
            path = dlg.GetPath()

            # Initialise the class
            if dlg.GetFilterIndex() == 0:  #SUNTANS
                self.flash_status_message("Opening PTM netcdf file: %s" % path)
                self.PTM = PtmNC(path)
            elif dlg.GetFilterIndex() == 1:  #PTM
                self.flash_status_message("Opening PTM binary file: %s" % path)
                self.PTM = PtmBin(path)

            self.Nt = self.PTM.nt

            # Update the time drop down list
            self.timestr = [
                datetime.strftime(tt, '%d-%b-%Y %H:%M:%S')
                for tt in self.PTM.time
            ]
            self.time_list.SetItems(self.timestr)

            # Plot the first time step
            if 'xlims' in self.__dict__:
                self.PTM.plot(self.PTM.nt-1,ax=self.axes,xlims=self.xlims,\
                ylims=self.ylims,color=self.particlecolor,\
                fontcolor='w',markersize=self.particlesize)
            else:
                self.PTM.plot(self.PTM.nt-1,ax=self.axes,fontcolor='w',\
                    color=self.particlecolor,markersize=self.particlesize)
            # redraw the figure
            self.canvas.draw()

    def on_show_edges(self, event):
        sender = event.GetEventObject()
        self.showedges = sender.GetValue()

        # Update the figure
        self.update_figure()

    def on_clim_check(self, event):
        sender = event.GetEventObject()
        if sender.GetValue() == True:
            self.autoclim = False
            self.update_figure()
        else:
            self.autoclim = True

    def on_climlow(self, event):
        self.clim[0] = event.GetString()
        #self.update_figure()

    def on_climhigh(self, event):
        self.clim[1] = event.GetString()
        #self.update_figure()

    def on_select_cmap(self, event):
        self.cmap = event.GetString()
        if USECMOCEAN:
            self.collection.set_cmap(getattr(cm, self.cmap))
        else:
            self.collection.set_cmap(self.cmap)

        # Update the figure
        self.update_figure()

    def on_save_fig(self, event):
        """
        Save a figure of the current scene to a file
        """
        file_choices = " (*.png)|*.png| (*.pdf)|*.pdf |(*.jpg)|*.jpg |(*.eps)|*eps "
        filters = ['.png', '.pdf', '.png', '.png']

        dlg = wx.FileDialog(self,
                            message="Save figure to file...",
                            defaultDir=os.getcwd(),
                            defaultFile="",
                            wildcard=file_choices,
                            style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)

        if dlg.ShowModal() == wx.ID_OK:

            path = dlg.GetPath()
            ext = filters[dlg.GetFilterIndex()]
            if ext in path:
                outfile = path
            else:
                outfile = path + ext

            self.fig.savefig(outfile)

    def on_save_anim(self, event):
        """
        Save an animation of the current scene to a file
        """
        file_choices = "Quicktime (*.mov)|*.mov| (*.gif)|*.gif| (*.avi)|*.avi |(*.mp4)|*.mp4 "
        filters = ['.mov', '.gif', '.avi', '.mp4']

        dlg = wx.FileDialog(self,
                            message="Output animation file...",
                            defaultDir=os.getcwd(),
                            defaultFile="",
                            wildcard=file_choices,
                            style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)

        if dlg.ShowModal() == wx.ID_OK:

            path = dlg.GetPath()
            ext = filters[dlg.GetFilterIndex()]
            if ext in path:
                outfile = path
            else:
                outfile = path + ext
            self.flash_status_message("Saving figure to file: %s" % outfile)
            self.flash_status_message("Saving animation to file: %s" % outfile)

            # Create the animation
            #self.tstep = range(self.Nt) # Use all time steps for animation
            #self.animate(cbar=self.cbar,cmap=self.cmap,\
            #    xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())
            def initanim():
                if not self.plot_type == 'particles':
                    return (self.title, self.collection)
                else:
                    return (self.PTM.title, self.PTM.p_handle)

            def updateScalar(i):
                if not self.plot_type == 'particles':
                    self.tstep = [i]
                    self.loadData()
                    self.update_figure()
                    return (self.title, self.collection)
                elif self.plot_type == 'particles':
                    self.PTM.plot(i,ax=self.axes,\
                        xlims=self.axes.get_xlim(),ylims=self.axes.get_ylim())
                    return (self.PTM.title, self.PTM.p_handle)

            self.anim = animation.FuncAnimation(self.fig, \
                updateScalar, init_func = initanim, frames=self.Nt, interval=50, blit=True)

            if ext == '.gif':
                self.anim.save(outfile, writer='imagemagick', fps=6)
            elif ext == '.mp4':
                print('Saving html5 video...')
                # Ensures html5 compatibility
                self.anim.save(outfile,writer='mencoder',fps=6,\
                    bitrate=3600,extra_args=['-ovc','x264']) # mencoder options
                #bitrate=3600,extra_args=['-vcodec','libx264'])
            else:
                self.anim.save(outfile, writer='mencoder', fps=6, bitrate=3600)

            # Return the figure back to its status
            del self.anim
            self.tstep = self.tindex
            if not self.plot_type == 'particles':
                self.loadData()
                self.update_figure()

            # Bring up a dialog box
            dlg2 = wx.MessageDialog(self, 'Animation complete.', "Done", wx.OK)
            dlg2.ShowModal()
            dlg2.Destroy()

    def on_exit(self, event):
        self.Destroy()

    def on_about(self, event):
        msg = """ SUNTANS NetCDF visualization tool

            *Author: Matt Rayson
            *Institution: Stanford University
            *Created: October 2013
        """
        dlg = wx.MessageDialog(self, msg, "About", wx.OK)
        dlg.ShowModal()
        dlg.Destroy()

    def on_count_cells(self, eveny):
        msg = "Total 3-D grid cells = %d" % (self.count_cells())
        dlg = wx.MessageDialog(self, msg, "No. cells", wx.OK)
        dlg.ShowModal()
        dlg.Destroy()

    def on_overlay_bathy(self, event):
        # Plot depth contours
        print('Plotting contours...')
        self.contourf(z=self.dv, clevs=self.depthlevs,\
            ax=self.axes,\
            filled=False, colors='0.5', linewidths=0.5, zorder=1e6)
        print('Done')

    def on_plot_gridstat(self, event):
        """
        Plot the grid size histogram in a new figure
        """
        matplotlib.pyplot.figure()
        self.plothist()
        matplotlib.pyplot.show()

    def create_status_bar(self):
        self.statusbar = self.CreateStatusBar()

    def flash_status_message(self, msg, flash_len_ms=1500):
        self.statusbar.SetStatusText(msg)
        self.timeroff = wx.Timer(self)
        self.Bind(wx.EVT_TIMER, self.on_flash_status_off, self.timeroff)
        self.timeroff.Start(flash_len_ms, oneShot=True)

    def on_flash_status_off(self, event):
        self.statusbar.SetStatusText('')
Esempio n. 32
0
            s=s,
            lw=0,
            label='True Position')
plt.scatter(pos[:, 0], pos[:, 1], color='turquoise', s=s, lw=0, label='MDS')
plt.scatter(npos[:, 0],
            npos[:, 1],
            color='darkorange',
            s=s,
            lw=0,
            label='NMDS')
plt.legend(scatterpoints=1, loc='best', shadow=False)

similarities = similarities.max() / (similarities + EPSILON) * 100
np.fill_diagonal(similarities, 0)
# Plot the edges
start_idx, end_idx = np.where(pos)
# a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[X_true[i, :], X_true[j, :]] for i in range(len(pos))
            for j in range(len(pos))]
values = np.abs(similarities)
lc = LineCollection(segments,
                    zorder=0,
                    cmap=plt.cm.Blues,
                    norm=plt.Normalize(0, values.max()))
lc.set_array(similarities.flatten())
lc.set_linewidths(np.full(len(segments), 0.5))
ax.add_collection(lc)

plt.show()
Esempio n. 33
0
def plot_market_structure(names, labels, embedding, partial_correlations):
    import matplotlib.pyplot as plt
    from matplotlib.collections import LineCollection
    # Visualization
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    # Display a graph of the partial correlations
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]

    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=100 * d**2,
                c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    try:
        lc = LineCollection(segments,
                            zorder=0,
                            cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(8 * values)
    except ValueError:
        print "Warning: skip line normalization"
        lc = LineCollection(segments, zorder=0, cmap=plt.cm.hot_r)
        lc.set_linewidths(1)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 color='black',
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label /
                                                     float(labels.max())),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())

    plt.show()
    plt.close()
    del plt, LineCollection
Esempio n. 34
0
npos = clf.fit_transform(pos)

fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])

plt.scatter(X_true[:, 0], X_true[:, 1], c='r', s=20)
plt.scatter(pos[:, 0] + 0.2, pos[:, 1] + 0.2, s=20, c='g')
plt.scatter(npos[:, 0] - 0.2, npos[:, 1] - 0.2, s=20, c='b')
plt.legend(('True position', 'MDS', 'NMDS'), loc='best')

similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0

# Plot the edges
start_idx, end_idx = np.where(pos)
#a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[pos[i, :], pos[j, :]] for i in range(len(pos))
            for j in range(len(pos))]
values = np.abs(similarities)
lc = LineCollection(segments,
                    zorder=0,
                    cmap=plt.cm.hot_r,
                    norm=plt.Normalize(0, values.max()))
lc.set_array(similarities.flatten())
lc.set_linewidths(0.5 * np.ones(len(segments)))
ax.add_collection(lc)

plt.show()
Esempio n. 35
0
    def _visualize(self, names, close_prices, open_prices):
        # The daily variations of the quotes are what carry most information
        variation = close_prices - open_prices
        # NaN值赋值为0,下面在调用GraphLassoCV的时候会报一些除0的RuntimeWarning,但是可以通过
        variation[np.isnan(variation)] = 0
        # #############################################################################
        # Learn a graphical structure from the correlations
        edge_model = covariance.GraphicalLassoCV()
        # standardize the time series: using correlations rather than covariance
        # is more efficient for structure recovery
        X = variation.copy().T
        X /= X.std(axis=0)
        edge_model.fit(X)
        # #############################################################################
        # Cluster using affinity propagation
        _, labels = cluster.affinity_propagation(edge_model.covariance_,
                                                 random_state=0)
        n_labels = labels.max()
        for i in range(n_labels + 1):
            print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
        # #############################################################################
        # Find a low-dimension embedding for visualization: find the best position of
        # the nodes (the stocks) on a 2D plane
        # We use a dense eigen_solver to achieve reproducibility (arpack is
        # initiated with random vectors that we don't control). In addition, we
        # use a large number of neighbors to capture the large-scale structure.
        node_position_model = manifold.LocallyLinearEmbedding(
            n_components=2, eigen_solver='dense', n_neighbors=6)
        embedding = node_position_model.fit_transform(X.T).T
        # #############################################################################
        # Visualization
        # 支持中文
        plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
        plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
        plt.figure(1, facecolor='w', figsize=(15, 12))
        plt.clf()
        ax = plt.axes([0., 0., 1., 1.])
        plt.axis('off')
        # Display a graph of the partial correlations
        partial_correlations = edge_model.precision_.copy()
        d = 1 / np.sqrt(np.diag(partial_correlations))
        partial_correlations *= d
        partial_correlations *= d[:, np.newaxis]
        non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
        # Plot the nodes using the coordinates of our embedding
        plt.scatter(embedding[0],
                    embedding[1],
                    s=100 * d**2,
                    c=labels,
                    cmap=plt.cm.nipy_spectral)
        # Plot the edges
        start_idx, end_idx = np.where(non_zero)
        # a sequence of (*line0*, *line1*, *line2*), where::
        #            linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[embedding[:, start], embedding[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = np.abs(partial_correlations[non_zero])
        lc = LineCollection(segments,
                            zorder=0,
                            cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(15 * values)
        ax.add_collection(lc)
        # Add a label to each node. The challenge here is that we want to
        # position the labels to avoid overlap with other labels
        for index, (name, label,
                    (x, y)) in enumerate(zip(names, labels, embedding.T)):

            dx = x - embedding[0]
            dx[index] = 1
            dy = y - embedding[1]
            dy[index] = 1
            this_dx = dx[np.argmin(np.abs(dy))]
            this_dy = dy[np.argmin(np.abs(dx))]
            if this_dx > 0:
                horizontalalignment = 'left'
                x = x + .002
            else:
                horizontalalignment = 'right'
                x = x - .002
            if this_dy > 0:
                verticalalignment = 'bottom'
                y = y + .002
            else:
                verticalalignment = 'top'
                y = y - .002
            plt.text(x,
                     y,
                     name,
                     size=10,
                     horizontalalignment=horizontalalignment,
                     verticalalignment=verticalalignment,
                     bbox=dict(facecolor='w',
                               edgecolor=plt.cm.nipy_spectral(label /
                                                              float(n_labels)),
                               alpha=.6))
        plt.xlim(
            embedding[0].min() - .15 * embedding[0].ptp(),
            embedding[0].max() + .10 * embedding[0].ptp(),
        )
        plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                 embedding[1].max() + .03 * embedding[1].ptp())
        plt.show()
Esempio n. 36
0
def plot_mds(rdm, level=None):
    '''function to visualize RDM via multidimensional scaling'''

    # big kudos to Jona Sassenhagen for doing an amazing job
    # adding condition names and colors to the mds plot

    # import modules and functions
    import numpy as np
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    from sklearn import manifold
    from sklearn.decomposition import PCA
    from matplotlib.collections import LineCollection

    ## computation/transformation section

    # read in the rdm in .csv format, creating a data frame
    if isinstance(rdm, str) is True:
        df = pd.read_csv(rdm)
        if 'Unnamed: 0' in rdm:
            del rdm['Unnamed: 0']
    else:
        df=rdm

    df.index = df.columns  # set data frame index based on columns

    if level == '2nd':
        df= df.mask(df.values > -1.05, 1 - df.values)

    # set seed for mds
    seed = 0

    # create mds object
    mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                       dissimilarity="precomputed", n_jobs=1)
    # apply mds to data frame
    rdm_mds = mds.fit(df.values).embedding_

    # create new data frame from mds
    df_mds = pd.DataFrame(rdm_mds, index=df.index, columns=["dim1", "dim2"])
    df_mds["cond"] = df_mds.index # create condition column based on index

    # create pca object
    clf = PCA(n_components=2)

    # set rdm data frame based on data frame values
    rdm = pd.DataFrame(df.values)

    # scale data
    rdm = rdm.max() / rdm * 100
    rdm[np.isinf(rdm)] = 0

    # convert rdm data frame to array
    rdm = rdm.to_numpy()

    # apply pca to mds
    rdm_mds_pca = clf.fit_transform(rdm_mds)

    ## plotting section

    sns.set_style("white") # set seaborn style to white
    # create lmplot from the mds data frame
    g = sns.lmplot("dim1", "dim2", hue="cond", data=df_mds, fit_reg=False, legend=False)
    ax = g.ax # set axes
    sns.despine(ax=ax, trim=True, left=True, bottom=True) # despine graphic
    ax.axes.get_xaxis().set_visible(False) # remove x axis
    ax.axes.get_yaxis().set_visible(False) # remove y axis
    ax.grid(False) # remove gird

    # add condition names to plot
    for dim1, dim2, name in df_mds.values:
        ax.text(dim1 * 1.05, dim2 * 1.05, name)

    # create segments
    segments = [[rdm_mds[i, :], rdm_mds[j, :]]
                for i in range(len(rdm_mds_pca)) for j in range(len(rdm_mds_pca))]
    values = np.abs(rdm)

    # set line collection
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.cm.Greys,
                        norm=plt.Normalize(0, values.max()))
    lc.set_array(rdm.flatten())
    lc.set_linewidths(0.5 * np.ones(len(segments)))
    ax.add_collection(lc) # add line collection to plot

    plt.tight_layout()
    plt.show()
Esempio n. 37
0
    def plotManifoldDistances(self,
                              segments: List[Union[MessageSegment,
                                                   TypedSegment, TypedTemplate,
                                                   Template, RawMessage, Any]],
                              distances: numpy.ndarray,
                              labels: numpy.ndarray,
                              templates: List = None,
                              plotEdges=False,
                              countMarkers=False):
        # noinspection PyUnresolvedReferences
        """
        Plot distances of segments according to (presumably multidimensional) features.
        This function abstracts from the actual feature by directly taking a precomputed similarity matrix and
        arranging the segments relative to each other according to their distances using Multidimensional Scaling (MDS).
        See module `manifold` from package `sklearn`.

        If segments is a list of `TypedSegment` or `MessageSegment`, this function plots the feature values of each
        given segment overlaying each other besides the distances; they are colored according to the given labels.

        >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
        >>> from nemere.utils.loader import BaseLoader
        >>> from nemere.inference.analyzers import Value
        >>>
        >>> bytedata = [
        ...     bytes([1, 2, 3, 4]),
        ...     bytes([   2, 3, 4]),
        ...     bytes([   1, 3, 4]),
        ...     bytes([   2, 4   ]),
        ...     bytes([   2, 3   ]),
        ...     bytes([20, 30, 37, 50, 69, 2, 30]),
        ...     bytes([        37,  5, 69       ]),
        ...     bytes([70, 2, 3, 4]),
        ...     bytes([3, 2, 3, 4])
        ...     ]
        >>> messages  = [RawMessage(bd) for bd in bytedata]
        >>> specimens = BaseLoader(messages)
        >>> analyzers = [Value(message) for message in messages]
        >>> segments  = [TypedSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers]
        >>> for seg in segments[:4]:
        ...     seg.fieldtype = "ft1"
        >>> for seg in segments[4:6]:
        ...     seg.fieldtype = "ft2"
        >>> for seg in segments[6:]:
        ...     seg.fieldtype = "ft3"
        >>> DistanceCalculator.debug = False
        >>> dc = DistanceCalculator(segments, thresholdFunction=DistanceCalculator.neutralThreshold, thresholdArgs=None)
        Calculated distances for 37 segment pairs in ... seconds.
        >>> dp = DistancesPlotter(specimens, "test", False)
        >>> dp.plotManifoldDistances(segments, dc.distanceMatrix, numpy.array([1,2,3,1,1,0,1,0,2]))
        >>> dp.writeOrShowFigure()  # doctest: +SKIP

        :param segments: If `segments` is a list of `TypedSegment`s, field types are marked as small markers
            within the label marker. labels containing "Noise" then are not explicitly marked like the other labeled
            segments
        :param distances: The precomputed similarity matrix:
            symmetric matrix, rows/columns in the order of `segments`
        :param labels: Labels of strings (or ints or any other printable type) identifying the cluster for each segment
        :param templates: Templates of clusters to be printed alongside with the feature values.
            CURRENTLY UNTESTED
        :param plotEdges: Plot of edges between each pair of segment markers.
            Caution: Adds n^2 lines which takes very long compared to the scatterplot and
            quickly becomes a huge load especially when rendering the plot as PDF.
        :param countMarkers: add text labels with information at positions with multiple markers
        """
        assert isinstance(segments, Sequence)
        assert isinstance(distances, numpy.ndarray)
        assert isinstance(labels, numpy.ndarray)
        assert len(segments) == distances.shape[0] == distances.shape[1]

        axMDS, axSeg = self._axes  # type: plt.Axes, plt.Axes
        axMDS.set_aspect('equal', adjustable='datalim')

        # subsample if segment count is larger than maxSamples
        subret = self.subsample(segments, distances, labels)
        if subret:
            originalSegmentCount, segments, distances, labels = subret
            if self._plotSegmentValues:
                botlef = (0, -5)
            else:
                botlef = (0.1, 0.1)
            axSeg.text(
                *botlef,
                'Subsampled: {} of {} segments'.format(len(segments),
                                                       originalSegmentCount))
            # without subsampling, existing values need not to be overwritten

        pos = DistancesPlotter.manifoldPositions(distances)

        # identify unique labels
        ulab = DistancesPlotter.uniqueLabels(labels, segments)
        if templates is None:
            templates = ulab
        # prepare color space
        cIdx = [
            int(round(each))
            for each in numpy.linspace(2, self.cm.N - 2, len(ulab))
        ]

        # CLUSTERS (large bobbles): iterate unique labels and scatter plot each of these clusters
        for c, (l,
                t) in enumerate(zip(ulab,
                                    templates)):  # type: int, (Any, Template)
            lColor = self.cm(cIdx[c])
            class_member_mask = (labels == l)
            try:
                x = list(compress(pos[:, 0].tolist(), class_member_mask))
                y = list(compress(pos[:, 1].tolist(), class_member_mask))
                # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row."
                # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html:
                axMDS.scatter(
                    x,
                    y,
                    c=colors.to_rgba_array(lColor),
                    alpha=.6,
                    s=self.labsize,
                    # s=s-(c*s/len(ulab)),  #
                    lw=0,
                    label=str(l))
            except IndexError as e:
                print(pos)
                print(distances)
                print(segments)
                raise e

            if isinstance(t, Template) and self._plotSegmentValues:
                axSeg.plot(t.values, c=lColor, linewidth=4)

        # GROUND TRUTH (small bobbles): include field type labels for TypedSegments input
        if any(
                isinstance(seg, (TypedSegment, TypedTemplate, RawMessage))
                for seg in segments):
            if any(
                    isinstance(seg, (TypedSegment, TypedTemplate))
                    for seg in segments):
                ftypes = numpy.array([
                    seg.fieldtype if isinstance(
                        seg, (TypedSegment, TypedTemplate)) else "[unknown]"
                    for seg in segments
                ])  # PP
            elif any(
                    isinstance(seg, RawMessage) and seg.messageType != 'Raw'
                    for seg in segments):
                ftypes = numpy.array([
                    msg.messageType if isinstance(msg, RawMessage)
                    and msg.messageType != 'Raw' else "[unknown]"
                    for msg in segments
                ])  # PP
            else:
                ftypes = set()
            # identify unique types
            utyp = sorted(set(ftypes))
            # prepare color space
            cIdx = [
                int(round(each))
                for each in numpy.linspace(30, self.fcm.N - 30, len(utyp))
            ]
            # iterate unique types and scatter plot each of these groups
            for n, ft in enumerate(utyp):  # PP
                fColor = self.fcm(cIdx[n])
                type_member_mask = (ftypes == ft)
                x = list(compress(pos[:, 0].tolist(), type_member_mask))
                y = list(compress(pos[:, 1].tolist(), type_member_mask))
                # "If you want to specify the same RGB or RGBA value for all points, use a 2-D array with a single row."
                # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html:
                axMDS.scatter(x,
                              y,
                              c=colors.to_rgba_array(fColor),
                              alpha=1,
                              s=self.typsize,
                              lw=0,
                              label=str(ft))

                if isinstance(
                        segments[0],
                    (TypedSegment, TypedTemplate)) and self._plotSegmentValues:
                    for seg in compress(segments, type_member_mask):
                        axSeg.plot(seg.values, c=fColor, alpha=0.05)
        elif isinstance(segments[0],
                        MessageSegment) and self._plotSegmentValues:
            for c, l in enumerate(ulab):
                lColor = self.cm(cIdx[c])
                class_member_mask = (labels == l)
                for seg in compress(segments, class_member_mask):
                    axSeg.plot(seg.values, c=lColor, alpha=0.1)
        elif self._plotSegmentValues:
            axSeg.text(.5,
                       .5,
                       'nothing to plot\n(message alignment)',
                       horizontalalignment='center')

        # place the label/type legend in the (otherwise empty) axSeg subfigure
        if isinstance(segments[0], RawMessage) or not self._plotSegmentValues:
            legendHandles, legendLabels = axMDS.get_legend_handles_labels()
            # axMDS.legend(bbox_to_anchor=(1.04,1), scatterpoints=1, shadow=False)
            axSeg.legend(handles=legendHandles,
                         labels=legendLabels,
                         loc='best',
                         scatterpoints=1,
                         shadow=False)
            axSeg.patch.set_alpha(0.0)
            axSeg.axis('off')
        else:
            # place the label/type legend at the best position
            axMDS.legend(scatterpoints=1, loc='best', shadow=False)

        if plotEdges:
            # plotting of edges takes a long time compared to the scatterplot (and especially when rendering the PDF)
            from matplotlib.collections import LineCollection
            # Plot the edges
            lines = [[pos[i, :], pos[j, :]] for i in range(len(pos))
                     for j in range(len(pos))]
            values = numpy.abs(distances)
            # noinspection PyUnresolvedReferences
            lc = LineCollection(lines,
                                zorder=0,
                                cmap=plt.cm.Blues,
                                norm=plt.Normalize(0, values.max()))
            # lc.set_alpha(.1)
            lc.set_array(distances.flatten())
            lc.set_linewidths(0.5 * numpy.ones(len(segments)))
            axMDS.add_collection(lc)

        if countMarkers:
            # Count markers at identical positions and plot text with information about the markers at this position
            from collections import Counter
            import math
            if isinstance(segments[0], (TypedSegment, TypedTemplate)):
                # TODO for TypedTemplates we rather need to count the number of base segments, so for now this is not accurate
                coordCounter = Counter([
                    (posX, posY, seg.fieldtype)
                    for seg, lab, posX, posY in zip(
                        segments, labels, pos[:, 0].tolist(), pos[:,
                                                                  1].tolist())
                ])
            else:
                # TODO for Templates we rather need to count the number of base segments, so for now this is not accurate
                coordCounter = Counter([
                    (posX, posY, lab) for lab, posX, posY in zip(
                        labels, pos[:, 0].tolist(), pos[:, 1].tolist())
                ])
            for (posX, posY, lab), cnt in coordCounter.items():
                if cnt > 1:
                    theta = hash(str(lab)) % 360
                    r = 1
                    posXr = posX + r * math.cos(theta)
                    posYr = posY + r * math.sin(theta)
                    axMDS.text(posXr,
                               posYr,
                               "{}: {}".format(lab, cnt),
                               withdash=True)

        if self._fig.canvas.toolbar is not None:
            self._fig.canvas.toolbar.update()
Esempio n. 38
0
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')
    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())

    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(0.15 / values)
    ax.add_collection(lc)

    if animation:
        sca = ax.scatter([], [], cmap=plt.cm.spectral)
        texts = []

        def init():
            sca.set_offsets([])
            return sca, tuple(texts)

        def animate(i):
            x = embedding[0][labels <= i]
            y = embedding[1][labels <= i]
            s = 100 * d[labels <= i]**2
            c = labels[labels <= i]
Esempio n. 39
0
npos = clf.fit_transform(npos)

fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])

s = 100
plt.scatter(X_true[:, 0], X_true[:, 1], color='navy', s=s, lw=0,
            label='True Position')
plt.scatter(pos[:, 0], pos[:, 1], color='turquoise', s=s, lw=0, label='MDS')
plt.scatter(npos[:, 0], npos[:, 1], color='darkorange', s=s, lw=0, label='NMDS')
plt.legend(scatterpoints=1, loc='best', shadow=False)

similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0

# Plot the edges
start_idx, end_idx = np.where(pos)
# a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[X_true[i, :], X_true[j, :]]
            for i in range(len(pos)) for j in range(len(pos))]
values = np.abs(similarities)
lc = LineCollection(segments,
                    zorder=0, cmap=plt.cm.Blues,
                    norm=plt.Normalize(0, values.max()))
lc.set_array(similarities.flatten())
lc.set_linewidths(np.full(len(segments), 0.5))
ax.add_collection(lc)

plt.show()
x = np.arange(n)
rs = check_random_state(0)
y = rs.randint(-50, 50, size=(n, )) + 50. * np.log1p(np.arange(n))

# #############################################################################
# Fit IsotonicRegression and LinearRegression models

ir = IsotonicRegression()

y_ = ir.fit_transform(x, y)

lr = LinearRegression()
lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

# #############################################################################
# Plot result

segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(y)))
lc.set_linewidths(np.full(n, 0.5))

fig = plt.figure()
plt.plot(x, y, 'r.', markersize=12)
plt.plot(x, y_, 'b.-', markersize=12)
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
plt.gca().add_collection(lc)
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
plt.title('Isotonic regression')
plt.show()
Esempio n. 41
0
    def plot_environment(self):
        fig, ax = plt.subplots()

        nodes = {}
        for node_id in self.otm4rl.otmwrapper.otm.scenario().get_node_ids():
            node_info = self.otm4rl.otmwrapper.otm.scenario().get_node_with_id(
                node_id)
            nodes[node_id] = {'x': node_info.getX(), 'y': node_info.getY()}

        lines = []
        norms = []
        minX = float('Inf')
        maxX = -float('Inf')
        minY = float('Inf')
        maxY = -float('Inf')

        state = self.otm4rl.get_queues()

        for link_id in self.otm4rl.otmwrapper.otm.scenario().get_link_ids():
            link_info = self.otm4rl.otmwrapper.otm.scenario().get_link_with_id(
                link_id)

            start_point = nodes[link_info.getStart_node_id()]
            end_point = nodes[link_info.getEnd_node_id()]

            x0 = start_point['x']
            y0 = start_point['y']
            x1 = end_point['x']
            y1 = end_point['y']

            if x1 - x0 > 0:
                y0 -= 150
                y1 -= 150

            if x1 - x0 < 0:
                y0 += 150
                y1 += 150

            if y1 - y0 > 0:
                x0 += 100
                x1 += 100

            if y1 - y0 < 0:
                x0 -= 100
                x1 -= 100

            p0 = (x0, y0)
            p1 = (x1, y1)

            lines.append([p0, p1])
            norms.append(state[link_id]["waiting"] / self.max_queues[link_id])

            minX = min([minX, p0[0], p1[0]])
            maxX = max([maxX, p0[0], p1[0]])
            minY = min([minY, p0[1], p1[1]])
            maxY = max([maxY, p0[1], p1[1]])

        cmap = plt.get_cmap('hot')
        all_colors = [cmap(z) for z in norms]
        lc = LineCollection(lines, colors=all_colors)
        lc.set_linewidths(15)
        ax.add_collection(lc)

        dY = maxY - minY
        dX = maxX - minX

        if (dY > dX):
            ax.set_ylim((minY, maxY))
            c = (maxX + minX) / 2
            ax.set_xlim((c - dY / 2, c + dY / 2))
        else:
            ax.set_xlim((minX, maxX))
            c = (maxY + minY) / 2
            ax.set_ylim((c - dX / 2, c + dX / 2))

        return plt
x = np.arange(n)
rs = check_random_state(0)
y = rs.randint(-50, 50, size=(n,)) + 50. * np.log1p(np.arange(n))

# #############################################################################
# Fit IsotonicRegression and LinearRegression models

ir = IsotonicRegression()

y_ = ir.fit_transform(x, y)

lr = LinearRegression()
lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

# #############################################################################
# Plot result

segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(y)))
lc.set_linewidths(np.full(n, 0.5))

fig = plt.figure()
plt.plot(x, y, 'r.', markersize=12)
plt.plot(x, y_, 'g.-', markersize=12)
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
plt.gca().add_collection(lc)
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
plt.title('Isotonic regression')
plt.show()
Esempio n. 43
0
def plot():
	# Input path of the file
	while True:
		try:
			path = raw_input('Enter the name (xxx.csv) of your csv file (in the same folder): ')
			dataframe = pd.read_csv(path, delimiter=",", skipinitialspace=True)
			break
		except IOError:
			print 'Cannot find the file. Try again! (You must have this file in the same folder.'	


	# Preprocess the data
	dataframe = dataframe.convert_objects(convert_numeric=True)
	dataframe = dataframe.fillna(dataframe.mean())


	# Vectorize nominal features
	d = dataframe.T.to_dict().values()
	v = DictVectorizer(sparse=False)
	X = v.fit_transform(d)


	# Eliminate nan values
	X = np.nan_to_num(X)


	# The GraphLasso estimator uses an l1 penalty to enforce sparsity on the precision matrix: 
	# the higher its alpha parameter, the more sparse the precision matrix. 
	# The corresponding GraphLassoCV object uses cross-validation to automatically set the alpha parameter.
	print 'computing edge model... (large data may take a significant time)'
	edge_model = covariance.GraphLassoCV()
	edge_model.fit(X.T)


	# Affinity Propagation clustering
	print 'computing clustering...'
	clustering = cluster.AffinityPropagation()
	clustering.fit(edge_model.covariance_)
	labels = clustering.labels_


	# Print data based on cluster
	dataframe.insert(loc=0, column="clustering group", value=labels, allow_duplicates=False)
	dataframe.sort(columns="clustering group", inplace=True)
	print '***************Data Cluster**************'
	pd.set_option('display.max_rows', len(dataframe))
	print(dataframe)
	print '*******************End*******************'


	# Make sure the same scale is used over all features. 
	# Because manifold learning methods are based on a nearest-neighbor search, 
	# the algorithm may perform poorly otherwise. 
	scaler = StandardScaler()
	X = scaler.fit_transform(X)
	node_position_model = manifold.SpectralEmbedding(n_components=2)
	embedding = node_position_model.fit_transform(X).T


	# Visualization
	plt.figure(1, facecolor='w', figsize=(10, 8))
	plt.clf()
	ax = plt.axes([0., 0., 1., 1.])
	plt.axis('off')


	# Display a graph of the partial correlations
	# Compute partial correlation according to precision matrix
	partial_correlations = edge_model.precision_.copy()
	d = 1 / np.sqrt(np.diag(partial_correlations))
	partial_correlations *= d
	partial_correlations *= d[:, np.newaxis]
	non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)


	# Plot the nodes using the coordinates of our embedding
	d_new = Normalizer().fit_transform(d)
	plt.scatter(embedding[0], embedding[1], s= 100 * d_new ** 0.5, c=labels, cmap=plt.cm.spectral)


	# Plot the edges
	start_idx, end_idx = np.where(non_zero)
	#a sequence of (*line0*, *line1*, *line2*), where::
	#            linen = (x0, y0), (x1, y1), ... (xm, ym)
	segments = [[embedding[:, start], embedding[:, stop]]
	            for start, stop in zip(start_idx, end_idx)]
	values = np.abs(partial_correlations[non_zero])
	lc = LineCollection(segments,
	                    zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, .7 * values.max()))
	lc.set_array(values)
	lc.set_linewidths(5 * values)
	ax.add_collection(lc)


	# Show the result graph
	plt.show()
Esempio n. 44
0
from matplotlib.collections import LineCollection

from sklearn.linear_model import LinearRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.utils import check_random_state

n = 100
x = np.arange(n)
rs = check_random_state(0)
y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n))
# Fit IsotonicRegression and LinearRegression models
ir = IsotonicRegression()

y_ = ir.fit_transform(x, y)

lr = LinearRegression()
lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression
# plot result
segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(y)))
lc.set_linewidths(0.5 * np.ones(n))

fig = plt.figure()
plt.plot(x, y, 'r.', markersize=12)
plt.plot(x, y_, 'g.-', markersize=12)
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
plt.gca().add_collection(lc)
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
plt.title('Isotonic regression')
plt.show()
x = X_train
y = y_train

y_ = ir.fit_transform(x, y)
ir_model = ir.fit(x, y)
lr = LinearRegression()
lr_model = lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

###############################################################################
# plot result

segments = [[[i, y[i]], [i, y_[i]]] for i in range(len(x))]
lc = LineCollection(segments, zorder=0)
lc.set_array(np.ones(len(y)))
lc.set_linewidths(0.5 * np.ones(len(x)))

fig = plt.figure()
plt.plot(x, y, 'r.', markersize=6)
plt.plot(x, y_, 'g.-', markersize=6)
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
plt.gca().add_collection(lc)
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
plt.title('Isotonic regression')
#plt.show()

lr_predict = lr.predict(X_test[:, np.newaxis])
ir_predict = ir_model.predict(X_test)

#print np.isinf(ir_predict)
#print np.isnan(ir_predict)
Esempio n. 46
0
pos = clf.fit_transform(pos)

npos = clf.fit_transform(npos)

fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])

plt.scatter(data[:, 0], data[:, 1], c='r', s=52)
plt.scatter(pos[:, 0], pos[:, 1], s=52, c='g')
plt.scatter(npos[:, 0], npos[:, 1], s=52, c='b')
#plt.legend(('True position', 'MDS', 'NMDS'), loc='best')

similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0

# Plot the edges
start_idx, end_idx = np.where(pos)
#a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[data[i, :], data[j, :]]
            for i in range(len(pos)) for j in range(len(pos))]
values = np.abs(similarities)
lc = LineCollection(segments,
                    zorder=0, cmap=plt.cm.hot_r,
                    norm=plt.Normalize(0, values.max()))
lc.set_array(similarities.flatten())
lc.set_linewidths(0.5 * np.ones(len(segments)))
ax.add_collection(lc)

plt.show()
Esempio n. 47
0
def contingencyTableChi2andPOISpaceStructure(dataBunch, pred, class_mapping,
                                             dbLabel):
    '''独立性检验'''
    mergingData = np.hstack(
        (pred.reshape(-1, 1), dataBunch.target.reshape(-1,
                                                       1)))  #水平组合聚类预测值和行业分类类标
    targetStack = []
    for i in range(len(
            np.array(class_mapping)[..., 0])):  #按行业类标重新组织数据,每行对应行业类标所有的聚类预测值
        targetStack.append(mergingData[mergingData[..., -1] == int(
            np.array(class_mapping)[..., 0][i])])
    clusterFrequency = {}
    for p in targetStack:  #按行业类标计算每类所有点所属聚类簇的数量(频数)
        clusterFrequency[(p[...,
                            -1][0])] = [(j, np.sum(p[..., 0] == int(j)) + 1)
                                        for j in dbLabel
                                        if j != -1]  #独立性检验值不能为零,因此将所有值+1


#    print(clusterFrequency)
    CTableTarget = list(clusterFrequency.keys())
    CTableIdx = np.array(list(clusterFrequency.values()))
    CTable = CTableIdx[..., 1]  #建立用于独立性分析的列联表,横向为行业类所属聚类簇频数,纵向为行业类标
    totalIndependence = chi2_contingency(CTable)  #列联表的独立性检验
    g, p, dof, expctd = totalIndependence  #提取卡方值g,p值,自由度dof和与元数据数组同维度的对应理论值。此次实验计算p=0.00120633349692,小于0.05,因此行业分类与聚类簇相关。
    print(g, p, dof)
    '''poi的空间分布结构。参考官方案例Visualizing the stock market structure:http://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html#sphx-glr-auto-examples-applications-plot-stock-market-py'''
    #A-协方差逆矩阵(精度矩阵)。The matrix inverse of the covariance matrix, often called the precision matrix, is proportional to the partial correlation matrix. It gives the partial independence relationship. In other words, if two features are independent conditionally on the others, the corresponding coefficient in the precision matrix will be zero。来自官网说明摘录
    edge_model = covariance.GraphLassoCV(
    )  #稀疏逆协方差估计器GraphLassoCV(),翻译有待数学专业确认。官网解释:http://scikit-learn.org/stable/modules/covariance.html#sparse-inverse-covariance
    X = CTable.copy().T
    print(X, X.shape)
    X = X / X.std(axis=0)  #标准化。可以自行实验小规模数组,查看变化,分析结果,获取结论。
    print(X)
    edge_model.fit(X)
    print("******************************************************************")
    print(edge_model.covariance_.shape)

    #B-affinity_propagation(AP)聚类算法是基于数据点间"信息传递"的一种聚类算法,不用预先给出cluster簇数。聚类协方差矩阵
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    print(labels)

    #C-Manifold中的降维方法可以能够处理数据中的非线性结构信息。具体可以查看官网http://scikit-learn.org/stable/modules/manifold.html#locally-linear-embedding。降维的目的是降到2维,作为xy坐标值,在二维图表中绘制为点。
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)
    embedding = node_position_model.fit_transform(X.T).T
    print(embedding.shape)
    '''图表可视化poi空间分布结构'''
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes(
        [0., 0., 1., 1.]
    )  #可以参考官方示例程序 http://matplotlib.org/examples/pylab_examples/axis_equal_demo.html
    plt.axis('off')

    # Display a graph of the partial correlations/偏相关分析:在多要素所构成的系统中,当研究某一个要素对另一个要素的影响或相关程度时,把其他要素的影响视作常数(保持不变),即暂时不考虑其他要素影响,单独研究两个要素之间的相互关系的密切程度,所得数值结果为偏相关系数。在多元相关分析中,简单相关系数可能不能够真实的反映出变量X和Y之间的相关性,因为变量之间的关系很复杂,它们可能受到不止一个变量的影响。这个时候偏相关系数是一个更好的选择。
    partial_correlations = edge_model.precision_.copy()
    print(partial_correlations.shape)
    d = 1 / np.sqrt(
        np.diag(partial_correlations))  #umpy.diag()返回一个矩阵的对角线元素,计算该元素平方根的倒数。
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02
                )  #np.triu()返回矩阵的上三角矩阵。

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=300 * d**2,
                c=labels,
                cmap=plt.cm.spectral)  #簇类标用于定义节点的颜色,降维后数据作为点坐标

    # Plot the edges
    start_idx, end_idx = np.where(
        non_zero
    )  #numpy.where(condition[, x, y])这里x,y是可选参数,condition是条件,这三个输入参数都是array_like的形式;而且三者的维度相同。当conditon的某个位置的为true时,输出x的对应位置的元素,否则选择y对应位置的元素;如果只有参数condition,则函数返回为true的元素的坐标位置信息;
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    cm = plt.cm.get_cmap(
        'OrRd'
    )  #具体的`matplotlib.colors.Colormap'实例可以查看matplotlib官网 http://matplotlib.org/users/colormaps.html,替换不同色系
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=cm,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)  #定义边缘的强度。
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to position the labels to avoid overlap with other labels,添加行业分类标签,并避免标签重叠。
    names = [i[-1] for i in class_mapping]
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))
    plt.xlim(
        embedding[0].min() - .15 * embedding[0].ptp(),
        embedding[0].max() + .10 * embedding[0].ptp(),
    )  #numpy.ptp()极差函数返回沿轴的值的范围(最大值-最小值)。
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
    plt.show()
    return CTable
Esempio n. 48
0
def dailyStockClusters():
    import datetime
    import os
    import numpy as np
    import pandas.io.data as web
    from pandas import DataFrame
    from matplotlib import pylab as pl
    from matplotlib import finance
    from matplotlib.collections import LineCollection
    
    from sklearn import cluster, covariance, manifold
    ########################################################################
    ###
    ### This example employs several unsupervised learning techniques to 
    ### extract the stock market structure from variations in historical quotes.
    ### The quantity that we use is the daily variation in quote price: 
    ### quotes that are linked tend to co-fluctuate during a day.
    ###
    ### stocks used are all Nasdaq 100 stocks that have one year of history
    ### from the current date.
    ###
    ### adopted from example at:
    ### http://scikit-learn.org/0.14/auto_examples/applications/plot_stock_market.html
    ###
    ########################################################################
    # Retrieve the data from Internet
    
    # Choose a time period reasonnably calm (not too long ago so that we get
    # high-tech firms, and before the 2008 crash)
    today = datetime.datetime.now()
    d1 = datetime.datetime(today.year-1, today.month, today.day)
    d2 = datetime.datetime(today.year, today.month, today.day)
    
    # input symbols and company names from text file
    companyName_file = os.path.join( os.getcwd(), "symbols",  "companyNames.txt" )
    with open( companyName_file, "r" ) as f:
        companyNames = f.read()
    
    print "\n\n\n"
    companyNames = companyNames.split("\n")
    ii = companyNames.index("")
    del companyNames[ii]
    companySymbolList  = []
    companyNameList = []
    symbol_dict = {}
    for iname,name in enumerate(companyNames):
        name = name.replace("amp;", "")
        testsymbol, testcompanyName = name.split(";")
        companySymbolList.append(format(testsymbol,'s'))
        companyNameList.append(format(testcompanyName,'s'))
        if testsymbol != "CASH":
            symbol_dict[ testsymbol ] = format(testcompanyName,'s')
    print " ... symbol_dict = ", symbol_dict
    
    
    symbols = companySymbolList[:]
    names = companyNameList[:]
    
                       
    all_data = {}
    for ticker in symbols:
        try:
            all_data[ticker] = web.get_data_yahoo(ticker, d1, d2)
            qclose = DataFrame({tic: data['Close']
                        for tic, data in all_data.iteritems()})
            qopen = DataFrame({tic: data['Open']
                        for tic, data in all_data.iteritems()})
        except:
            print "Cant find ", ticker
    
    symbols_edit = []
    names_edit = []
    for i, ticker in enumerate( symbols ):
        if True in np.isnan(np.array(qclose[ticker])).tolist():
            print ticker, " nans found, ticker removed"
            del qclose[ticker]
            del qopen[ticker]
        else:
            symbols_edit.append(ticker)
            names_edit.append( names[i] )
    
    # The daily variations of the quotes are what carry most information
    variation = qclose - qopen
    variation[ np.isnan(variation) ] = 0.
    
    
    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()
    
    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    X = variation.copy()
    #X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)
    
    ###############################################################################
    # Cluster using affinity propagation
    
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    
    for i in range(n_labels + 1):
        print "Cluster "+str(i)+":"
        for j in range(len(labels)):
            if labels[j] == i:
                print " ... "+names_edit[j]
        #print('Cluster %i: %s' % ((i + 1), ', '.join(names_edit[labels == i])))

    for i in range(n_labels + 1):
        print "Cluster "+str(i)+":"
        for j in range(len(labels)):
            if labels[j] == i:
                print " ... "+names_edit[j]
                
    figure7path = 'Clustered_companyNames.png'  # re-set to name without full path
    figure7_htmlText = "\n<br><h3>Daily stock clustering analyis. Based on one year performance correlations.</h3>\n"
    figure7_htmlText = figure7_htmlText + "\nClustering based on daily variation in Nasdaq 100 quotes.\n"
    figure7_htmlText = figure7_htmlText + '''<br><img src="'''+figure7path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n'''

        
    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane
    
    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)
    
    embedding = node_position_model.fit_transform(X.T).T
    
    ###############################################################################
    # Visualization
    pl.figure(1, facecolor='w', figsize=(10, 8))
    pl.clf()
    ax = pl.axes([0., 0., 1., 1.])
    pl.axis('off')
    
    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
    
    # Plot the nodes using the coordinates of our embedding
    pl.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
               cmap=pl.cm.spectral)
    
    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=pl.cm.hot_r,
                        norm=pl.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)
    
    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):
    
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        pl.text(x, y, name, size=10,
                horizontalalignment=horizontalalignment,
                verticalalignment=verticalalignment,
                bbox=dict(facecolor='w',
                          edgecolor=pl.cm.spectral(label / float(n_labels)),
                          alpha=.6))
    
    pl.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
            embedding[0].max() + .10 * embedding[0].ptp(),)
    pl.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
            embedding[1].max() + .03 * embedding[1].ptp())
    
    pl.savefig( os.path.join( os.getcwd(), "pyTAAA_web",  "Clustered_companyNames.png" ), format='png' )
    
    return figure7_htmlText
def StockMarketOLD():
    ###############################################################################
    # Retrieve the data from Internet

    # Choose a time period reasonnably calm (not too long ago so that we get
    # high-tech firms, and before the 2008 crash)
    d1 = datetime.datetime(2005, 1, 1)
    d2 = datetime.datetime(2009, 12, 31)

    # kraft symbol has now changed from KFT to MDLZ in yahoo
    symbol_dict = {
        'TOT': 'Total',
        'XOM': 'Exxon',
        'CVX': 'Chevron',
        'COP': 'ConocoPhillips',
        'VLO': 'Valero Energy',
        'MSFT': 'Microsoft',
        'IBM': 'IBM',
        'TWX': 'Time Warner',
        'CMCSA': 'Comcast',
        #'CVC': 'Cablevision',
        #'YHOO': 'Yahoo',
        #'DELL': 'Dell',
        'HPQ': 'HP',
        'AMZN': 'Amazon',
        'TM': 'Toyota',
        'CAJ': 'Canon',
        'MTU': 'Mitsubishi',
        'SNE': 'Sony',
        #'F': 'Ford',
        'HMC': 'Honda',
        #'NAV': 'Navistar',
        'NOC': 'Northrop Grumman',
        'BA': 'Boeing',
        'KO': 'Coca Cola',
        'MMM': '3M',
        'MCD': 'Mc Donalds',
        #'PEP': 'Pepsi',
        'MDLZ': 'Kraft Foods',
        'K': 'Kellogg',
        'UN': 'Unilever',
        'MAR': 'Marriott',
        'PG': 'Procter Gamble',
        'CL': 'Colgate-Palmolive',
        'GE': 'General Electrics',
        'WFC': 'Wells Fargo',
        'JPM': 'JPMorgan Chase',
        #'AIG': 'AIG',
        'AXP': 'American Express',
        'BAC': 'Bank of America',
        'GS': 'Goldman Sachs',
        'AAPL': 'Apple',
        'SAP': 'SAP',
        'CSCO': 'Cisco',
        'TXN': 'Texas Instruments',
        'XRX': 'Xerox',
        #'LMT': 'Lookheed Martin',
        'WMT': 'Wal-Mart',
        'WBA': 'Walgreen',
        'HD': 'Home Depot',
        'GSK': 'GlaxoSmithKline',
        'PFE': 'Pfizer',
        'SNY': 'Sanofi-Aventis',
        'NVS': 'Novartis',
        'KMB': 'Kimberly-Clark',
        'R': 'Ryder',
        'GD': 'General Dynamics',
        'RTN': 'Raytheon',
        'CVS': 'CVS',
        'CAT': 'Caterpillar',
        'DD': 'DuPont de Nemours',

        #'GM': 'General Motors',
        #'GOOG' : 'Google',
        'ORCL': 'Oracle',
        'NVO': 'Novo Nordisk',
        'LLY': 'Eli Lilly and Company',
        #'FB':'Facebook',
        'MRK': 'Merck Co',
    }
    '''
    symbol_dict = {'Danske.CO':'Danske Bank',
                   'Maersk-B.CO':'Maersk',
                   'DSV.CO':'DSV',
                   'FLS.CO':'FLS',
                   'Gen.CO':'Genmab',
                   'TDC.CO':'TDC',
                   'CARL-B.CO':'Carlsberg',
                   'CHR.CO':'Chr Hansen',
                   'COLO-B.CO':'Coloplast',
                   'GN.CO':'GN Store Nord',
                   'NDA-DKK.co':'Nordea',
                   'Novo-B.co':'Novo Nordisk',
                   'NZYM-B.CO':'Novozymes',
                   'PNDORA.CO':'Pandora',
                   'Tryg.co':'Tryg',
                   'VWS.CO':'Vestas',
                   'WDH.CO':'William Demant',
                   'G4s.co':'G4S',
                   'JYSK.CO':'Jyske Bank',
                   'KBHL.CO':'Kobenhavns Lufthavne',
                   'RBREW.CO':'Royal Unibrew',
                   'ROCK-B.CO':'Rockwool',
                   'SYDB.CO':'Sydbank',
                   'TOP.CO':'Topdanmark',
                   #'ALMB.CO':'Alm Brand',
                   'AURI-B.CO':'Auriga',
                   'Bava.CO':'Bavarian Nordic',
                   'BO.CO':'Bang Olufsen',
                   'DFDS.CO':'DFDS',
                   'DNORD.CO':'DS Norden',
                   'GES.CO':'Greentech',
                   'IC.CO':'IC Group',
                   'JDAN.CO':'Jeudan',
                   #'JUTBK.CO':'Jutlander Bank',
                   #'MATAS.CO':'Matas',
                   'NKT.CO':'NKT',
                   #'NNIT.CO':'NNIT',
                   'NORDJB.CO':'Nordjyske Bank',
                   #'ONXEO.CO':'Onxeo',
                   #'OSSR.CO':'Ossur',
                   'PAAL-B.CO':'Per Aarslef',
                   'RILBA.CO':'Ringkobing Landbobank',
                   'SAS-DKK.CO':'SAS',
                   'SCHO.CO':'Schouw Co.',
                   'SIM.CO':'SimCorp',
                   'Solar-B.co':'Solar B',
                   'SPNO.CO':'Spar Nord',
                   'TIV.CO':'Tivoli',
                   'UIE.CO':'UIE',
                   'VELO.CO':'Veloxis',
                   'ZEAL.CO':'Zealand Pharma'
                   }
    '''
    symbols, names = np.array(list(symbol_dict.items())).T

    for symbol in symbols:
        print symbol
        if len(
                pd.DataFrame(
                    np.array([[
                        q[5] for q in quotes_historical_yahoo(
                            symbol, d1, d2, True, False)
                    ]]).T)) != 1259:
            print symbol, len(
                pd.DataFrame(
                    np.array([[
                        q[5] for q in quotes_historical_yahoo(
                            symbol, d1, d2, True, False)
                    ]]).T))

    open = pd.DataFrame(
        np.array([[
            q[5] for q in quotes_historical_yahoo(symbol, d1, d2, True, False)
        ] for symbol in symbols]).T)
    close = pd.DataFrame(
        np.array([[
            q[6] for q in quotes_historical_yahoo(symbol, d1, d2, True, False)
        ] for symbol in symbols]).T)

    # The daily variations of the quotes are what carry most information
    variation = np.array(close - open)

    ###############################################################################
    # Learn a graphical structure from the correlations
    #edge_model = covariance.GraphLassoCV()

    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery

    df = pd.read_csv('data/TData9313_final5.csv', index_col=0)
    X = variation.copy()

    pd.DataFrame(np.round(np.cov(X.T), 3), columns=symbols,
                 index=symbols).to_latex('covariancetable.tex')

    print np.max(np.round(np.cov(X.T), 3))

    X /= X.std(axis=0)

    covariance_, precision_ = graphical_lasso(X, 0.3)

    print pd.DataFrame(precision_)

    #edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(covariance_)

    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane

    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)

    embedding = node_position_model.fit_transform(X.T).T

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(20, 16))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    plt.annotate('From %s to %s' %
                 (d1.strftime('%Y-%m-%d'), d2.strftime('%Y-%m-%d')),
                 xy=(0.11, -0.37),
                 size=25)

    print X.shape

    for i in range(n_labels + 1):
        plt.annotate('Cluster %i: %s' %
                     ((i + 1), ', '.join(symbols[labels == i])),
                     xy=(-0.43, 0.02 - i * 0.02),
                     size=18)
        pass

    # Display a graph of the partial correlations
    #partial_correlations = edge_model.precision_.copy()
    partial_correlations = precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=200 * d**2,
                c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.get_cmap('Greys'),
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=22,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .25 * embedding[0].ptp(),
        embedding[0].max() + .20 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(),
             embedding[1].max() + .20 * embedding[1].ptp())

    plt.savefig('Graphs/StockCluster.pdf', bbox_inches='tight')
    plt.savefig('Graphs/StockCluster.svg', bbox_inches='tight')
    plt.show()
# Plot the nodes using the coordinates of our embedding
plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
            cmap=plt.cm.spectral)

# Plot the edges
start_idx, end_idx = np.where(non_zero)
# a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[embedding[:, start], embedding[:, stop]]
            for start, stop in zip(start_idx, end_idx)]
values = np.abs(partial_correlations[non_zero])
lc = LineCollection(segments,
                    zorder=0, cmap=plt.cm.hot_r,
                    norm=plt.Normalize(0, .7 * values.max()))
lc.set_array(values)
lc.set_linewidths(15 * values)
ax.add_collection(lc)

# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
for index, (name, label, (x, y)) in enumerate(
        zip(names, labels, embedding.T)):

    dx = x - embedding[0]
    dx[index] = 1
    dy = y - embedding[1]
    dy[index] = 1
    this_dx = dx[np.argmin(np.abs(dy))]
    this_dy = dy[np.argmin(np.abs(dx))]
    if this_dx > 0:
        horizontalalignment = 'left'
Esempio n. 51
0
class SpiroGraph(object):

    '''
    Spirograph drawer with matplotlib slider widgets to change parameters.
    Parameters of line are:

        R: The radius of the big circle
        r: The radius of the small circle which rolls along the inside of the
           bigger circle
        p: distance from centre of smaller circle to point in the circle where
           the pen hole is.
        tmax: the angle through which the smaller circle is rotated to draw the
              spirograph
        tstep: how often matplotlib plots a point
        a, b, c: parameters of the linewidth equation.
    '''

    # kwargs for each of the matplotlib sliders
    slider_kwargs = (
        {'label': 't_max', 'valmin': np.pi, 'valmax': 200 * np.pi,
         'valinit': tmax0, 'valfmt': PiString()},
        {'label': 't_step', 'valmin': 0.01,
         'valmax': 10, 'valinit': tstep0},
        {'label': 'R', 'valmin': 1, 'valmax': 200, 'valinit': R0},
        {'label': 'r', 'valmin': 1, 'valmax': 200, 'valinit': r0},
        {'label': 'p', 'valmin': 1, 'valmax': 200, 'valinit': p0},
        {'label': 'colour', 'valmin': 0, 'valmax': 1, 'valinit': 1},
        {'label': 'width_a', 'valmin': 0.5, 'valmax': 10, 'valinit': 1},
        {'label': 'width_b', 'valmin': 0, 'valmax': 10, 'valinit': 0},
        {'label': 'width_c', 'valmin': 0, 'valmax': 10, 'valinit': 0.5})

    rbutton_kwargs = (
        {'labels': ('black', 'white'), 'activecolor': 'white', 'active': 0},
        {'labels': ('solid', 'variable'), 'activecolor': 'white', 'active': 0})

    def __init__(self, colormap, figsize=(7, 10)):
        self.colormap_name = colormap
        self.variable_color = False
        # Use ScalarMappable to map full colormap to range 0 - 1
        self.colormap = ScalarMappable(cmap=colormap)
        self.colormap.set_clim(0, 1)

        # set up main axis onto which to draw spirograph
        self.figsize = figsize
        plt.rcParams['figure.figsize'] = figsize
        self.fig, self.mainax = plt.subplots()
        plt.subplots_adjust(bottom=0.3)
        title = self.mainax.set_title('Spirograph Drawer!',
                                      size=20,
                                      color='white')
        self.text = [title, ]
        # set up slider axes
        self.slider_axes = [plt.axes([0.25, x, 0.65, 0.015])
                            for x in np.arange(0.05, 0.275, 0.025)]
        # same again for radio buttons
        self.rbutton_axes = [plt.axes([0.025, x, 0.1, 0.15])
                             for x in np.arange(0.02, 0.302, 0.15)]
        # use log scale for tstep slider
        self.slider_axes[1].set_xscale('log')
        # turn off frame, ticks and tick labels for all axes
        for ax in chain(self.slider_axes, self.rbutton_axes, [self.mainax, ]):
            ax.axis('off')
        # use axes and kwargs to create list of sliders/rbuttons
        self.sliders = [Slider(ax, **kwargs)
                        for ax, kwargs in zip(self.slider_axes,
                                              self.slider_kwargs)]
        self.rbuttons = [RadioButtons(ax, **kwargs)
                         for ax, kwargs in zip(self.rbutton_axes,
                                               self.rbutton_kwargs)]
        self.update_figcolors()

        # set up initial line
        self.t = np.arange(0, tmax0, tstep0)
        x, y = spiro_linefunc(self.t, R0, r0, p0)
        self.linecollection = LineCollection(
            segments(x, y),
            linewidths=spiro_linewidths(self.t, a0, b0, c0),
            color=self.colormap.to_rgba(col0))
        self.mainax.add_collection(self.linecollection)

        # creates the plot and connects sliders to various update functions
        self.run()

    def update_figcolors(self, bgcolor='black'):
        '''
        function run by background color radiobutton. Sets all labels, text,
        and sliders to foreground color, all axes to background color
        '''
        fgcolor = 'white' if bgcolor == 'black' else 'black'
        self.fig.set_facecolor(bgcolor)
        self.mainax.set_axis_bgcolor(bgcolor)
        for ax in chain(self.slider_axes, self.rbutton_axes):
            ax.set_axis_bgcolor(bgcolor)

        # set fgcolor elements to black or white, mostly elements of sliders
        for item in chain(map(attrgetter('label'), self.sliders),
                          map(attrgetter('valtext'), self.sliders),
                          map(attrgetter('poly'), self.sliders),
                          self.text,
                          *map(attrgetter('labels'), self.rbuttons)):
            item.set_color(fgcolor)

        self.update_radiobutton_colors()
        plt.draw()

    def update_linewidths(self, *args):
        '''
        function run by a, b and c parameter sliders. Sets width of each line
        in linecollection according to sine function
        '''
        a, b, c = (s.val for s in self.sliders[6:])
        self.linecollection.set_linewidths(spiro_linewidths(self.t, a, b, c))
        plt.draw()

    def update_linecolors(self, *args):
        '''
        function run by color slider and indirectly by variable/solid color
        radiobutton. Updates colors of each line in linecollection using the
        set colormap.
        '''
        # get current color value (a value between 1 and 0)
        col_val = self.sliders[5].val
        if not self.variable_color:
            # if solid color, convert color value to rgb and set the color
            self.linecollection.set_color(self.colormap.to_rgba(col_val))
        else:
            # create values between 0 and 1 for each line segment
            colors = (self.t / max(self.t)) + col_val
            # use color value to roll colors
            colors[colors > 1] -= 1
            self.linecollection.set_color(
                [self.colormap.to_rgba(i) for i in colors])
        plt.draw()

    def update_lineverts(self, *args):
        '''
        function run by R, r, p, tmax and tstep sliders to update line vertices
        '''
        tmax, tstep, R, r, p = (s.val for s in self.sliders[:5])
        self.t = np.arange(0, tmax, tstep)
        x, y = spiro_linefunc(self.t, R, r, p)
        self.linecollection.set_verts(segments(x, y))
        # change axis limits to pad new line nicely
        self.mainax.set(xlim=(min(x) - 5, max(x) + 5),
                        ylim=(min(y) - 5, max(y) + 5))
        plt.draw()

    def update_linecolor_setting(self, val):
        '''
        function run by solid/variable colour slider, alters variable_color
        attribute then calls update_linecolors
        '''
        if val == 'variable':
            self.variable_color = True
        elif val == 'solid':
            self.variable_color = False
        # need to update radiobutton colors here.
        self.update_radiobutton_colors()
        self.update_linecolors()

    def update_radiobutton_colors(self):
        '''
        makes radiobutton colors correct even on a changing axis background
        '''
        bgcolor = self.rbuttons[0].value_selected
        fgcolor = 'white' if bgcolor == 'black' else 'black'
        for i, b in enumerate(self.rbuttons):
            # find out index of the active button
            active_idx = self.rbutton_kwargs[i]['labels'].index(
                b.value_selected)
            # set button colors accordingly
            b.circles[not active_idx].set_color(bgcolor)
            b.circles[active_idx].set_color(fgcolor)

    def run(self):
        '''
        set up slider functions
        '''
        verts_func = self.update_lineverts
        colors_func = self.update_linecolors
        widths_func = self.update_linewidths
        # create iterable of slider funcs to zip with sliders
        slider_update_funcs = chain(repeat(verts_func, 5),
                                    [colors_func, ],
                                    repeat(widths_func, 3))
        # set slider on_changed functions
        for s, f in zip(self.sliders, slider_update_funcs):
            s.on_changed(f)

        self.rbuttons[0].on_clicked(self.update_figcolors)
        self.rbuttons[1].on_clicked(self.update_linecolor_setting)
        plt.show()