Example #1
0
def distopcounts():
    fig, ax = plt.subplots(1, figsize=(8, 4))

    ax.set_yscale('log')

    data = read_csv(['distinct_ops', 'counts'], True)

    #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True)
    ppl.scatter(ax, data['distinct_ops'], data['counts'], color=pcs[0], marker="o", label="SDSS", s=100)

    data = read_csv(['distinct_physical_ops'], False)
    ppl.scatter(ax, data['distinct_physical_ops'], data['count'], color=pcs[1], marker="v", label="SQLShare", s=100)

    ax.set_xlabel('Distinct physical operators used')
    ax.set_ylabel('# of queries')

    ppl.legend(ax, loc='lower right')

    ax.set_xlim(0)
    ax.set_ylim(0)

    fig.tight_layout()
    plt.show()

    fig.savefig('plot_dist_physops_query.pdf', format='pdf', transparent=True)
    fig.savefig('plot_dist_physops_query.png', format='png', transparent=True)
Example #2
0
    def plot_profile(self,
                     fig,
                     ax,
                     x,
                     y,
                     xlabel='',
                     ylabel='',
                     axis_font={},
                     tick_font={},
                     scatter=False,
                     **kwargs):

        if not axis_font:
            axis_font = axis_font_default

        if scatter:
            ppl.scatter(ax, x, y, **kwargs)
        else:
            ppl.plot(ax, x, y, **kwargs)

        if xlabel:
            ax.set_xlabel(xlabel.replace("_", " "), labelpad=5, **axis_font)
        if ylabel:
            ax.set_ylabel(ylabel.replace("_", " "), labelpad=11, **axis_font)
        if tick_font:
            ax.tick_params(**tick_font)
        ax.xaxis.set_label_position('top')  # this moves the label to the top
        ax.xaxis.set_ticks_position('top')
        ax.xaxis.get_major_locator()._nbins = 5
        ax.grid(True)
        plt.tight_layout()
Example #3
0
File: fit.py Project: SRHerzog/ut
def xy_linear_regression(df, x_var, y_var, **kwargs):
    fit_intercept = kwargs.get('fit_intercept', True)
    d = df[[x_var, y_var]]
    d = d.dropna()

    x = np.matrix(d[x_var]).transpose()
    y = np.matrix(d[y_var]).transpose()

    clf = linear_model.LinearRegression(
        fit_intercept=kwargs.get('fit_intercept', True),
        normalize=kwargs.get('normalize', False))
    clf.fit(x, y)
    intercept, slope = clf.intercept_[0], clf.coef_[0][0]

    if kwargs.get('plot', False):
        ppl.scatter(x=d[x_var], y=d[y_var])
        x_line = np.array([0, d[x_var].max()])
        plt.plot(x_line, intercept + slope * x_line, 'k-')
        plt.annotate('slope={slope:.4f}\nintercept={intercept:.4f}'.format(
            slope=slope, intercept=intercept), (0.05, 0.9),
                     xycoords='axes fraction')
        plt.xlabel(x_var)
        plt.ylabel(y_var)

    return intercept, slope
Example #4
0
def new_tables_cdf():
    fig, ax = plt.subplots(1)

    data = read_csv(['query_number', 'num_new_tables'], True)
    c = data['num_new_tables'].astype(float)
    c /= sum(c)
    q = data['query_number'].astype(float)
    q /= q[-1]
    ppl.plot(ax, q, np.cumsum(c), label="SDSS", color=cs[0], linewidth=2, ls='-.', drawstyle='steps-post')
    ppl.scatter(ax, q, np.cumsum(c), color=cs[0], marker="o", s=100)

    data = read_csv(['table_coverage'], False)
    c = data['tables'].astype(float)
    c /= c[-1]
    q = data['query_id'].astype(float)
    q /= q[-1]
    ppl.plot(ax, q, c, label="SQLShare", color=cs[1], linewidth=2, ls='-.', drawstyle='steps-post')
    ppl.scatter(ax, q, c, color=cs[1], marker="o", s=100)

    ppl.legend(ax, loc='lower right')

    plt.gca().yaxis.set_major_formatter(formatter)

    ax.set_xlabel('Query number')
    ax.set_ylabel('% of newly used table')

    ax.set_ylim(0, 1.01)
    ax.set_xlim(0, 1)

    ax.yaxis.grid()

    plt.show()

    fig.savefig('num_new_tables.pdf', format='pdf', transparent=True)
    fig.savefig('num_new_tables.png', format='png', transparent=True)
Example #5
0
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9):
    """Plot all `clusters` among `candidates` with the `bounds` of the city
    (or at least `shrink` of them). Also plot convex `hulls` of gold areas if
    provided."""
    xbounds, ybounds = bounds
    unique_labels = len(clusters)
    clustered = set().union(*map(list, clusters))
    noise = list(candidates.difference(clustered))
    if unique_labels > 5:
        colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels+1))
    else:
        colors = [gray, red, green, blue, orange]
    plt.figure(figsize=(20, 15))
    for k, indices, col in zip(range(unique_labels+1), [noise]+clusters,
                               colors):
        k -= 1
        if k == -1:
            col = 'gray'
        ppl.scatter(vloc[indices, 0], vloc[indices, 1],
                    s=35 if k != -1 else 16, color=col,
                    alpha=0.8 if k != -1 else 0.6,
                    label='noise' if k == -1 else 'cluster {}'.format(k+1))
    hulls = hulls or []
    for idx, hull in enumerate(hulls):
        first_again = range(len(hull))+[0]
        ppl.plot(hull[first_again, 0], hull[first_again, 1], '--',
                 c=ppl.colors.almost_black, lw=1.0, alpha=0.9,
                 label='gold region' if idx == 0 else None)
    plt.xlim(shrink*xbounds)
    plt.ylim(shrink*ybounds)
    ppl.legend()
Example #6
0
def scatter_plot(x,y,title,save,l):
        ppl.scatter(ax,x,y)
        ax.set_title(title)
        ppl.show()
        if save:
            ax.set
            fig.savefig(l)
Example #7
0
    def plot_time_series(self, fig, is_timeseries, ax, x, y, fill=False, title='', xlabel='', ylabel='',
                         title_font={}, axis_font={}, tick_font={}, scatter=False, qaqc=[], events={}, **kwargs):

        if not title_font:
            title_font = title_font_default
        if not axis_font:
            axis_font = axis_font_default

        if scatter:
            ppl.scatter(ax, x, y, **kwargs)
        else:
            h = ppl.plot(ax, x, y, **kwargs)

        if is_timeseries:
            self.get_time_label(ax, x)
            fig.autofmt_xdate()
        else:
            ax.set_xlabel(xlabel.replace("_", " "), **axis_font)

        if ylabel:
            ax.set_ylabel(ylabel.replace("_", " "), **axis_font)
        if title:
            ax.set_title(title.replace("_", " "), **title_font)

        ax.grid(True)
        if fill:
            miny = min(ax.get_ylim())
            if not scatter:
                ax.fill_between(x, y, miny+1e-7, facecolor = h[0].get_color(), alpha=0.15)
            else:
                ax.fill_between(x, y, miny+1e-7, facecolor = axis_font_default['color'], alpha=0.15)

        if events:
            ylim = ax.get_ylim()
            for event in events['events']:
                time = datestr2num(event['start_date'])
                x = np.array([time, time])
                h = ax.plot(x, ylim, '--', label=event['class'])

            legend = ax.legend()
            if legend:
                for label in legend.get_texts():
                    label.set_fontsize(10)

        if len(qaqc) > 0:
            bad_data = np.where(qaqc > 0)
            h = ppl.plot(ax, x[bad_data], y[bad_data],
                         marker='o',
                         mfc='none',
                         linestyle='None',
                         markersize=6,
                         markeredgewidth=2,
                         mec='r')

        # plt.tick_params(axis='both', which='major', labelsize=10)
        if tick_font:
            ax.tick_params(**tick_font)
        plt.tight_layout()
Example #8
0
def test_scatter():
    # Set the random seed for consistency
    np.random.seed(12)

    # Show the whole color range
    for i in range(8):
        x = np.random.normal(loc=i, size=1000)
        y = np.random.normal(loc=i, size=1000)
        ppl.scatter(x, y, label=str(i))
Example #9
0
def scatterFreeByMissingEdges(type_header,
                              type_table,
                              config,
                              instanceType,
                              solsPath,
                              solsExt,
                              figName=None):
    if config not in type_table:
        raise Exception("Config \"" + config + "\" not found!")
    if instanceType not in type_table[config]:
        raise Exception("Instance type \"" + instanceType + "\" not found!")

    fig, ax = plt.subplots(1)

    for size in sorted(
            type_table[config][instanceType].iterkeys()):  # instanceType
        fixedEdges = np.array(
            map(int,
                type_table[config][instanceType][size]["preproc.fixedEdges"]))
        blockedEdges = np.array(
            map(int, type_table[config][instanceType][size]
                ["preproc.blockedEdges"]))
        instanceNumbers = np.array(
            type_table[config][instanceType][size]["instanceNumber"])

        if size == 80:
            solsPath += '_80-90'
        missingEdges = []
        freeEdges = []
        for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges,
                                                  instanceNumbers):
            instanceName = "%s_%03d_%02d%s" % (instanceType, size,
                                               instanceNumber, solsExt)
            solEdges = getNEdges(os.path.join(solsPath, instanceName))

            mEdges = solEdges - fEdges
            missingEdges.append(mEdges)

            frEdges = ((size - 1) * size) / 2 - (fEdges + bEdges)
            freeEdges.append(frEdges)

        ppl.scatter(ax, missingEdges, freeEdges, label=str(size))

    ppl.legend(ax, loc="lower right")
    ax.set_xlabel(u'Arestas faltantes')
    ax.set_ylabel(u'Arestas livres')
    # ax.set_aspect('equal')

    ax.set_xlim((0, ax.get_xlim()[1]))
    ax.set_ylim((0, ax.get_ylim()[1]))

    # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params')
    if figName != None:
        fig.savefig(figName, bbox_inches='tight')
Example #10
0
def plot_depth_ratios(depths, ratios, quals, in_file, title):
    out_file = "%s-depthratios.png" % os.path.splitext(in_file)[0]
    fig, ax = plt.subplots(1)
    for ds, rs, qualrange in _group_ratios_by_qual(depths, ratios, quals):
        print qualrange, len(ds)
        ppl.scatter(ax, x=depths, y=ratios, label=qualrange)
    ppl.legend(ax, title="Quality score range")
    ax.set_title(title)
    ax.set_xlabel("Depth")
    ax.set_ylabel("Variant/Total ratio")
    fig.savefig(out_file)
Example #11
0
def test_legend():
    # Set the random seed for consistency
    np.random.seed(12)

    fig, ax = plt.subplots(1)

    # Show the whole color range
    for i in range(8):
        x = np.random.normal(loc=i, size=1000)
        y = np.random.normal(loc=i, size=1000)
        ppl.scatter(ax, x, y, label=str(i))
    ppl.legend(ax)
Example #12
0
    def plot_ts_diagram(self, ax, sal, temp, xlabel='Salinity', ylabel='Temperature', title='',
                        axis_font={}, title_font={}, tick_font={}, **kwargs):

        if not axis_font:
            axis_font = axis_font_default
        if not title_font:
            title_font = title_font_default

        sal = np.ma.array(sal, mask=np.isnan(sal))
        temp = np.ma.array(temp, mask=np.isnan(temp))
        if len(sal) != len(temp):
            raise Exception('Sal and Temp arrays are not the same size!')

        # Figure out boudaries (mins and maxs)
        smin = sal.min() - (0.01 * sal.min())
        smax = sal.max() + (0.01 * sal.max())
        tmin = temp.min() - (0.1 * temp.max())
        tmax = temp.max() + (0.1 * temp.max())

        # Calculate how many gridcells we need in the x and y dimensions
        xdim = round((smax-smin)/0.1+1, 0)
        ydim = round((tmax-tmin)+1, 0)

        # Create empty grid of zeros
        dens = np.zeros((ydim, xdim))

        # Create temp and sal vectors of appropiate dimensions
        ti = np.linspace(1, ydim-1, ydim)+tmin
        si = np.linspace(1, xdim-1, xdim)*0.1+smin

        # Loop to fill in grid with densities
        for j in range(0, int(ydim)):
            for i in range(0, int(xdim)):
                dens[j, i] = sw.dens(si[i], ti[j], 0)

        # Substract 1000 to convert to sigma-t
        dens = dens - 1000

        # Plot data
        cs = plt.contour(si, ti, dens, linestyles='dashed', colors='k')

        plt.clabel(cs, fontsize=12, inline=1, fmt='%1.0f')  # Label every second level
        ppl.scatter(ax, sal, temp, **kwargs)

        ax.set_xlabel(xlabel.replace("_", " "), labelpad=10, **axis_font)
        ax.set_ylabel(ylabel.replace("_", " "), labelpad=10, **axis_font)
        ax.set_title(title.replace("_", " "), **title_font)
        ax.set_aspect(1./ax.get_data_ratio())  # make axes square
        if tick_font:
            ax.tick_params(**tick_font)
        plt.tight_layout()
Example #13
0
def plot_closeness(graph, closenessfile=None):
    fig, ax = plt.subplots(1)
    undirected = graph.to_undirected()
    for connected_component in nx.connected_components(undirected):
        created = [graph.node[cc].get('weights', 1) for cc in connected_component]
        closeness = [nx.closeness_centrality(undirected, u=cc) for cc in connected_component]
        prettyplotlib.scatter(ax, created, closeness)
        print(closeness)
    ax.set_xlabel('Changes Authored')
    ax.set_ylabel('Closeness Centrality')
    ax.set_ylim(0.0, 1.0)
    ax.set_xscale('log')
    if closenessfile:
        fig.savefig(closenessfile)
    else:
        plt.show()
Example #14
0
def plot_scatter_k_means_2d(n_clusters, clusters, is_plot=False):
    if is_plot:
        class_name = ["Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6", "Class_7", "Class_8", "Class_9", "Class_10", "Class_11", "Class_12", "Class_12", "Class_13", "Class_14", "Class_15"]
        colors = ["lime", "aqua","deeppink", "orangered","dodgerblue", "magenta","darkolivegreen","crimson","yellow","darkorchid","dodgerblue", "mediumpurple","hotpink","cyan","orangered" ]
        fig, ax = plt.subplots()
        ax.set_xlabel("x axis")
        ax.set_ylabel("y axis")
        title = "Plot for K-Means class"
        ax.set_title(title)
        for i in range(0, n_clusters):
            x_axis = tuple(x[0] for x in clusters[i])
            y_axis = tuple(x[1] for x in clusters[i])
            ppl.scatter(ax,x_axis, y_axis, color=colors[i], label=class_name[i])
        ppl.legend(ax)
        plt.show()
        fig.savefig('k_means_classification_2d_plot.jpg')
Example #15
0
File: plot.py Project: gauden/hfadb
    def _render_axes(self, ax_array, lang):
        comp_series = {}
        for comp in self.plot.specs['comparators']:
            y_comp = self.plot.data[self.plot.data.country == comp].value
            x_comp = self.plot.data[self.plot.data.country == comp].year
            comp_series[comp] = (x_comp, y_comp)

        for facet in range(self.plot.specs['facets']):
            cols = self.plot.specs['cols']
            r = facet // cols
            c = facet % cols
            ax = ax_array[r][c]

            country = self.plot.specs['countries'][facet]
            country = self.plot.index.get_countries(names=[country])
            y = self.plot.data[self.plot.data.country == country['en'][0]].value
            x = self.plot.data[self.plot.data.country == country['en'][0]].year

            ax_title = country[lang][0]
            ax.text(0.5, 0.95, ax_title,
                    verticalalignment='bottom', horizontalalignment='center',
                    transform=ax.transAxes, color=self.plot.specs.get('color', None),
                    fontsize=10, fontweight='bold')

            ax.set_ylim(self.plot.specs['ylim'])
            if self.plot.specs.get('ystep', None):
                ax.yaxis.set_ticks(np.arange(self.plot.specs['ylim'][0], 
                                             self.plot.specs['ylim'][1], 
                                             self.plot.specs['ystep']))

            start, end = self.plot.specs['xlim']
            if self.plot.specs.get('xstep', None):
                ax.xaxis.set_ticks(np.arange(self.plot.specs['xlim'][0], 
                                             self.plot.specs['xlim'][1], 
                                             self.plot.specs['xstep']))
            for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(10)
            for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(10)

            # Draw in-focus series: strong colour and opaque
            ppl.plot(ax, x, y, alpha=1.0, linewidth=1, color=self.plot.specs.get('color', None))
            ppl.scatter(ax, x, y, s=12.0, alpha=1.0, color=self.plot.specs.get('color', None))

            # Draw comparators: light, translucent, overlapping main series
            for x_comp, y_comp in comp_series.values():
                ppl.plot(ax, x_comp, y_comp, alpha=0.35, linewidth=5)
Example #16
0
def plot_meshless(args):
    npoints = args.npoints
    points = generate_2d_points(npoints, args.seed)

    x = np.arange(0., 1., .01)
    y = np.arange(0., 1., .01)
    xx, yy = np.meshgrid(x, y)

    import prettyplotlib as ppl
    fig, ax = ppl.subplots()

    for mypoint in points:
        dens = compute_meshless_density(xx, yy, points, mypoint, args.sigma)
        plt.contourf(xx, yy, dens, alpha=1. / npoints)

    ppl.scatter(ax, points[:, 0], points[:, 1], color='white')
    ax.autoscale(tight=True)
    plt.title(args.subtitle)
Example #17
0
def plot_embedding(figure, index, method, run_time, data, classes, dimension):
    """Scatter subplot `data` with colors corresponding to `classes` on
    `figure` at position `index` in `dimension`D. Title is made of `method`
    and `run_time`."""
    common = dict(c=classes, cmap=mpl.cm.Spectral, alpha=0.8, s=45)
    if dimension == 2:
        axe = figure.add_subplot(1, 1, 1 + index)
        ppl.scatter(data[:, 0], data[:, 1], **common)
    elif dimension == 3:
        axe = figure.add_subplot(5, 3, 1 + index, projection="3d")
        ppl.scatter(data[:, 0], data[:, 1], data[:, 2], **common)
    else:
        raise ValueError(dimension)
    plt.title("{} ({:.2g} sec)".format(method, run_time))
    axe.xaxis.set_major_formatter(mpl.ticker.NullFormatter())
    axe.yaxis.set_major_formatter(mpl.ticker.NullFormatter())
    if dimension == 3:
        axe.zaxis.set_major_formatter(mpl.ticker.NullFormatter())
    plt.axis('tight')
Example #18
0
def plot_embedding(figure, index, method, run_time, data, classes, dimension):
    """Scatter subplot `data` with colors corresponding to `classes` on
    `figure` at position `index` in `dimension`D. Title is made of `method`
    and `run_time`."""
    common = dict(c=classes, cmap=mpl.cm.Spectral, alpha=0.8, s=45)
    if dimension == 2:
        axe = figure.add_subplot(1, 1, 1 + index)
        ppl.scatter(data[:, 0], data[:, 1], **common)
    elif dimension == 3:
        axe = figure.add_subplot(5, 3, 1 + index, projection="3d")
        ppl.scatter(data[:, 0], data[:, 1], data[:, 2], **common)
    else:
        raise ValueError(dimension)
    plt.title("{} ({:.2g} sec)".format(method, run_time))
    axe.xaxis.set_major_formatter(mpl.ticker.NullFormatter())
    axe.yaxis.set_major_formatter(mpl.ticker.NullFormatter())
    if dimension == 3:
        axe.zaxis.set_major_formatter(mpl.ticker.NullFormatter())
    plt.axis('tight')
Example #19
0
def scatterFreeByMissingEdges(type_header, type_table, config, instanceType,
                              solsPath, solsExt, figName = None):
  if config not in type_table:
    raise Exception("Config \""+config+"\" not found!")
  if instanceType not in type_table[config]:
    raise Exception("Instance type \""+instanceType+"\" not found!")
  
  fig, ax = plt.subplots(1)

  for size in sorted(type_table[config][instanceType].iterkeys()): # instanceType
    fixedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.fixedEdges"]))
    blockedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.blockedEdges"]))
    instanceNumbers = np.array(type_table[config][instanceType][size]["instanceNumber"])

    if size == 80:
      solsPath += '_80-90'
    missingEdges = []
    freeEdges = []
    for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges, instanceNumbers):
      instanceName = "%s_%03d_%02d%s" % (instanceType, size, instanceNumber, solsExt)
      solEdges = getNEdges(os.path.join(solsPath, instanceName))

      mEdges = solEdges - fEdges
      missingEdges.append(mEdges)

      frEdges = ((size-1)*size)/2 - (fEdges + bEdges)
      freeEdges.append(frEdges)

    ppl.scatter(ax, missingEdges, freeEdges, label=str(size))
  
  ppl.legend(ax, loc="lower right")
  ax.set_xlabel(u'Arestas faltantes')
  ax.set_ylabel(u'Arestas livres')
  # ax.set_aspect('equal')

  ax.set_xlim((0, ax.get_xlim()[1]))
  ax.set_ylim((0, ax.get_ylim()[1]))

  # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params')
  if figName != None:
    fig.savefig(figName, bbox_inches='tight')
def ppl_scatter ():
    '''
    This function draws a simple prettyplotlib scatter graph
    that reproduces the graph created in part 2.

    Note:
        This function requires "prettyplotlib" library.
    '''
    np.random.seed(12)
    ax = fig.add_subplot(1,3,3)

    # Show the whole color range
    for i in range(8):
        x = np.random.normal(loc=i, size=1000)
        y = np.random.normal(loc=i, size=1000)
        ppl.scatter(ax, x, y, label=str(i))

    ppl.legend(ax,loc=4,fontsize=11)

    ax.set_title('A prettyplotlib `scatter` example\n'
                'showing default color cycle and scatter params',fontsize=12)
Example #21
0
def table_touch(dataset = True):
    fig, ax = plt.subplots(1)

    ax.set_yscale('log')

    data = read_csv(['touch', 'counts'], True)
    #ppl.bar(ax, range(len(data['touch'])), data['counts'], xticklabels=data['touch'], grid='y', log=True)
    ppl.scatter(ax, data['touch'], data['counts'], label="SDSS", marker="o", s=100)

    if dataset:
        data = read_csv(['dataset_touch'], False)
        ppl.scatter(ax, data['dataset_touch'], data['count'], label="SQLShare (Dataset)", marker="v", s=100, color=pcs[0])
    else:
        data = read_csv(['touch'], False)
        ppl.scatter(ax, data['touch'], data['count'], label="SQLShare", marker="v", s=100, color=pcs[1])

    ax.set_xlabel('Table touch')
    ax.set_ylabel('# of queries')

    ppl.legend(ax)

    ax.set_ylim(0)

    plt.show()

    if dataset:
        fig.savefig('plot_touch_dataset.pdf', format='pdf', transparent=True)
        fig.savefig('plot_touch_dataset.png', format='png', transparent=True)
    else:
        fig.savefig('plot_touch.pdf', format='pdf', transparent=True)
        fig.savefig('plot_touch.png', format='png', transparent=True)
Example #22
0
    def plot_profile(self, fig, ax, x, y, xlabel='', ylabel='',
                     axis_font={}, tick_font={}, scatter=False, **kwargs):

        if not axis_font:
            axis_font = axis_font_default

        if scatter:
            ppl.scatter(ax, x, y, **kwargs)
        else:
            ppl.plot(ax, x, y, **kwargs)

        if xlabel:
            ax.set_xlabel(xlabel.replace("_", " "), labelpad=5, **axis_font)
        if ylabel:
            ax.set_ylabel(ylabel.replace("_", " "), labelpad=11, **axis_font)
        if tick_font:
            ax.tick_params(**tick_font)
        ax.xaxis.set_label_position('top')  # this moves the label to the top
        ax.xaxis.set_ticks_position('top')
        ax.xaxis.get_major_locator()._nbins = 5
        ax.grid(True)
        plt.tight_layout()
Example #23
0
File: scatter.py Project: yz-/ut
def df_scatter_plot(df, x=None, y=None, label=None, **kwargs):

    if label is None:
        if len(df.columns) != 3:
            raise ValueError("I can't (or rather won't) guess the label if there's not exactly 3 columns. "
                             "You need to specify it")
        else:
            label = [t for t in df.columns if t not in [x, y]][0]
    colors = kwargs.pop('colors', None)
    label_list = kwargs.pop('label_list', np.array(df[label].unique()))
    fig, ax = mpl_plt.subplots(1)

    for i, this_label in enumerate(label_list):
        d = df[df[label] == this_label]
        xvals = np.array(d[x])
        yvals = np.array(d[y])
        if colors:
            ppl.scatter(ax, xvals, yvals, label=str(i), facecolor=colors[i], **kwargs)
        else:
            ppl.scatter(ax, xvals, yvals, label=str(i), **kwargs)

    ppl.legend(ax)
def ppl_scatter():
    '''
    This function draws a simple prettyplotlib scatter graph
    that reproduces the graph created in part 2.

    Note:
        This function requires "prettyplotlib" library.
    '''
    np.random.seed(12)
    ax = fig.add_subplot(1, 3, 3)

    # Show the whole color range
    for i in range(8):
        x = np.random.normal(loc=i, size=1000)
        y = np.random.normal(loc=i, size=1000)
        ppl.scatter(ax, x, y, label=str(i))

    ppl.legend(ax, loc=4, fontsize=11)

    ax.set_title(
        'A prettyplotlib `scatter` example\n'
        'showing default color cycle and scatter params',
        fontsize=12)
Example #25
0
def plot_time_series(fig,
                     ax,
                     x,
                     y,
                     fill=False,
                     title='',
                     ylabel='',
                     title_font={},
                     axis_font={},
                     **kwargs):

    if not title_font:
        title_font = title_font_default
    if not axis_font:
        axis_font = axis_font_default

    h = ppl.plot(ax, x, y, **kwargs)
    ppl.scatter(ax, x, y, **kwargs)
    get_time_label(ax, x)
    fig.autofmt_xdate()

    if ylabel:
        ax.set_ylabel(ylabel, **axis_font)
    if title:
        ax.set_title(title, **title_font)
    if 'degree' in ylabel:
        ax.set_ylim([0, 360])
    ax.grid(True)
    if fill:
        miny = min(ax.get_ylim())
        ax.fill_between(x,
                        y,
                        miny + 1e-7,
                        facecolor=h[0].get_color(),
                        alpha=0.15)
    # plt.subplots_adjust(top=0.85)
    plt.tight_layout()
Example #26
0
def opcounts():
    fig, ax = plt.subplots(1, figsize=(8, 4))

    ax.set_yscale('log')

    data = read_csv(['physops', 'counts'], True)
    #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True)
    y, x = np.array(np.histogram(data['physops'], 10, weights=data['counts']))
    w = x[1] - x[0]
    x += w/2
    data = [a for a in zip(list(x), list(y)) if a[1]]
    x = [i[0] for i in data]
    y = [i[1] for i in data]
    ppl.scatter(ax, x=x, y=y,  marker="o", color=pcs[0], s=100, label="SDSS")

    data = read_csv(['ops'], False)
    d = data['ops']
    y, x = np.histogram(d, bins=np.linspace(min(d), max(d), (max(d) - min(d)) / w), weights=data['count'])
    x += w/2
    data = [a for a in zip(list(x), list(y)) if a[1]]
    x = [i[0] for i in data]
    y = [i[1] for i in data]
    ppl.scatter(ax, x=x, y=y, marker="v", color=pcs[1], s=100, label="SQLShare")

    ax.set_xlabel('Physical operators used')
    ax.set_ylabel('# of queries')

    ppl.legend(ax, loc='lower right')

    ax.set_xlim(0)
    ax.set_ylim(0)

    fig.tight_layout()
    plt.show()

    fig.savefig('plot_logops_query.pdf', format='pdf', transparent=True)
    fig.savefig('plot_logops_query.png', format='png', transparent=True)
Example #27
0
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9):
    """Plot all `clusters` among `candidates` with the `bounds` of the city
    (or at least `shrink` of them). Also plot convex `hulls` of gold areas if
    provided."""
    xbounds, ybounds = bounds
    unique_labels = len(clusters)
    clustered = set().union(*map(list, clusters))
    noise = list(candidates.difference(clustered))
    if unique_labels > 5:
        colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels + 1))
    else:
        colors = [gray, red, green, blue, orange]
    plt.figure(figsize=(20, 15))
    for k, indices, col in zip(range(unique_labels + 1), [noise] + clusters,
                               colors):
        k -= 1
        if k == -1:
            col = 'gray'
        ppl.scatter(vloc[indices, 0],
                    vloc[indices, 1],
                    s=35 if k != -1 else 16,
                    color=col,
                    alpha=0.8 if k != -1 else 0.6,
                    label='noise' if k == -1 else 'cluster {}'.format(k + 1))
    hulls = hulls or []
    for idx, hull in enumerate(hulls):
        first_again = range(len(hull)) + [0]
        ppl.plot(hull[first_again, 0],
                 hull[first_again, 1],
                 '--',
                 c=ppl.colors.almost_black,
                 lw=1.0,
                 alpha=0.9,
                 label='gold region' if idx == 0 else None)
    plt.xlim(shrink * xbounds)
    plt.ylim(shrink * ybounds)
    ppl.legend()
Example #28
0
def plot_scatter(fig,
                 ax,
                 x,
                 y,
                 title='',
                 xlabel='',
                 ylabel='',
                 title_font={},
                 axis_font={},
                 **kwargs):

    if not title_font:
        title_font = title_font_default
    if not axis_font:
        axis_font = axis_font_default

    ppl.scatter(ax, x, y, **kwargs)
    if xlabel:
        ax.set_xlabel(xlabel, labelpad=10, **axis_font)
    if ylabel:
        ax.set_ylabel(ylabel, labelpad=10, **axis_font)
    ax.set_title(title, **title_font)
    ax.grid(True)
    ax.set_aspect(1. / ax.get_data_ratio())  # make axes square
Example #29
0
File: fit.py Project: yz-/ut
def xy_linear_regression(df, x_var, y_var, **kwargs):
    fit_intercept = kwargs.get('fit_intercept', True)
    d = df[[x_var, y_var]]
    d = d.dropna()

    x = np.matrix(d[x_var]).transpose()
    y = np.matrix(d[y_var]).transpose()

    clf = linear_model.LinearRegression(
        fit_intercept=kwargs.get('fit_intercept', True),
        normalize=kwargs.get('normalize', False))
    clf.fit(x, y)
    intercept, slope = clf.intercept_[0], clf.coef_[0][0]

    if kwargs.get('plot', False):
        ppl.scatter(x=d[x_var], y=d[y_var]);
        x_line = np.array([0, d[x_var].max()])
        plt.plot(x_line, intercept + slope * x_line, 'k-')
        plt.annotate('slope={slope:.4f}\nintercept={intercept:.4f}'.format(slope=slope, intercept=intercept),
                     (0.05, 0.9), xycoords='axes fraction')
        plt.xlabel(x_var)
        plt.ylabel(y_var)

    return intercept, slope
Example #30
0
def make_plots(d,correct):
    men = d[0]
    women = d[1]
    if correct:
        
        ay = convert_times(men)
        Ym = correct_times('m',ay)
        Xm = convert_dates(men)
        print "hola"
        print len(Xm)
        print len(Ym)
        ppl.scatter(ax,Xm,Ym,label="Men's speeds")

        aw = convert_times(women)
        Yw = correct_times('w',aw)
        Xw = convert_dates(women)

        ppl.scatter(ax,Xw,Yw,label="Women's speeds")

        ppl.legend(ax)
        ax.set_title("Bay To Breakers Speeds (Seconds per Mile)")
        fig.savefig("b2bwinningtimescorrected.png")

        

    else:
        Ym = convert_times(men)
        Xm = convert_dates(men)
        ppl.scatter(ax,Xm,Ym,label="Men's Times")

        Yw = convert_times(women)
        Xw = convert_dates(women)
        ppl.scatter(ax,Xw,Yw,label="Women's Times")

        ppl.legend(ax)
        ax.set_title("Bay To Breakers times (seconds)")
        fig.savefig("b2bwinningtimes.png")
Example #31
0
def plot_pca(df, c_scale=None, x_pc=1, y_pc=2, distance='L1', \
             save_as=None, save_format='png', whiten=True, num_vectors=30, \
             figsize=(10, 10), colors_dict=None, markers_dict=None, \
             title='PCA', show_vectors=True, show_point_labels=True, \
             show_vector_labels=True, column_ids_dict=None):
    # gather ids and values
    row_ids = df.index
    if column_ids_dict is not None:
        column_ids = [column_ids_dict[col] for col in df.columns]
    else:
        column_ids = df.columns

    df_array = df.as_matrix()

    # perform pca
    n_components = max(x_pc, y_pc, 2)
    pca = dc.PCA(whiten=whiten, n_components=n_components)
    pca.fit(df_array)
    X = pca.transform(df_array)
    (comp_x, comp_y) = (pca.components_[x_pc - 1, :],
                        pca.components_[y_pc - 1, :])

    x_list = X[:, x_pc - 1]
    y_list = X[:, y_pc - 1]

    if not c_scale:
        c_scale = .75 * max([norm(point) for point in zip(x_list, y_list)]) / \
                  max([norm(vector) for vector in zip(comp_x, comp_y)])

    size_scale = sqrt(figsize[0] * figsize[1]) / 1.5

    # sort features by magnitude/contribution to transformation
    comp_magn = []
    for (x, y, an_id) in zip(comp_x, comp_y, column_ids):

        x = x * c_scale
        y = y * c_scale

        if distance == 'L1':
            comp_magn.append((x, y, an_id, abs(y) + abs(x)))

        elif distance == 'L2':
            comp_magn.append((x, y, an_id, math.sqrt((y**2) + (x**2))))

    # create figure and plot
    pca_fig, ax = plt.subplots(figsize=figsize)

    for (x, y, an_id) in zip(x_list, y_list, row_ids):

        if colors_dict:
            try:
                color = colors_dict[an_id]
            except:
                color = 'black'
        else:
            color = 'black'

        if markers_dict:
            try:
                marker = markers_dict[an_id]
            except:
                marker = 'x'

        else:
            marker = 'x'

        if show_point_labels:
            ax.text(x, y, an_id, color=color, size=size_scale)

        ppl.scatter(ax, x, y, marker=marker, color=color, s=size_scale * 5)

    vectors = sorted(comp_magn, key=lambda item: item[3],
                     reverse=True)[:num_vectors]
    for x, y, marker, distance in vectors:

        if show_vectors:
            ppl.plot(ax, [0, x], [0, y], color=ppl.almost_black, linewidth=.5)
            if show_vector_labels:
                ax.text(x, y, marker, color='black', size=size_scale)

    var_1 = int(pca.explained_variance_ratio_[x_pc - 1] * 100)
    var_2 = int(pca.explained_variance_ratio_[y_pc - 1] * 100)

    ax.set_xlabel('Principal Component {} (Explains {}% Of Variance)'.format(
        str(x_pc), str(var_1)),
                  size=size_scale * 2)
    ax.set_ylabel('Principal Component {} (Explains {}% Of Variance)'.format(
        str(y_pc), str(var_2)),
                  size=size_scale * 2)
    ax.set_title(title, size=size_scale * 3)

    if save_as:
        kwargs = {}
        if save_format == 'png':
            kwargs['dpi'] = 300
        pca_fig.savefig(save_as, format=save_format, **kwargs)

    return vectors
# Show the whole color range
for s in station_list:
    if "data" in s:
        years = s["data"].keys()
        # Only show the stations with enough data.
        if len(s["data"].keys()) >= num_years_required:
            xx = []
            yx = []
            for y in years:
                xx.append(int(y))
                val = s["data"][y]["max"]
                yx.append(val)

            ax.scatter(xx, yx, marker='o')
            ppl.scatter(ax, xx, yx, alpha=0.8, edgecolor='black',
                        linewidth=0.15, label=str(s["station_num"]))
            ppl.legend(ax, loc='right', ncol=1)
            ax.set_xlabel('Year')
            ax.set_ylabel('water level (m)')

ax.set_title("Stations exceeding " +
             str(num_years_required) +
             " years worth of water level data (MHHW)")
fig.set_size_inches(14, 8)

# <markdowncell>

# ### Number of stations available by number of years

# <codecell>
from numpy.random import multivariate_normal
from numpy import vstack
import numpy as np
from matplotlib import pyplot as plt
import prettyplotlib as ppl

center1 = np.array([1,1])
center2 = np.array([-1,-1])
center3 = np.array([-1,1])

var_scale = 0.01

sample_size = 1000

x1 = multivariate_normal(center1,var_scale*np.eye(2),sample_size)
x2 = multivariate_normal(center2,var_scale*np.eye(2),sample_size)
x3 = multivariate_normal(center3,var_scale*np.eye(2),sample_size)

X = vstack((x1,x2,x3))

#np.savetxt("./test_clusters/guassian3.csv",X,delimiter=",")
ppl.scatter(X[:,0],X[:,1])
plt.show()
Example #34
0
import prettyplotlib as ppl
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots(1)
ax.set(aspect = 1)
ax.set_autoscaley_on(False)
ax.set_autoscalex_on(False)

ax.set_xlim(0, 0.35)
ax.set_ylim(0, 0.35)

data1 = np.genfromtxt('error_inv.csv', dtype=float, delimiter=',')
data2 = np.genfromtxt('error_var.csv', dtype=float, delimiter=',')

data1_mod = np.sqrt(np.mean(data1, 1)) - 0.0165
data2_mod = np.sqrt(np.mean(data2, 1)) - 0.01

count = 0
for i in range(0, data1_mod.shape[0]):
	if data1_mod[i] <= data2_mod[i]:
		count += 1

print count

ax.set_xlabel('RMSE regular')
ax.set_ylabel('RMSE rotation invariant')

ppl.scatter(ax, data2_mod, data1_mod, facecolor='#66c2a5', s=1)
ppl.plot([0, 0.35], [0, 0.35], '#fc8d62', linewidth=1)
fig.savefig('scatter.pdf')
Example #35
0
# Project the data to a 2D space for visualization
from sklearn.decomposition import RandomizedPCA
Xp = RandomizedPCA(n_components=2, random_state=1).fit_transform(X)

# Setup matplotlib to work interactively
from matplotlib import pyplot as plt
import prettyplotlib as ppl

# Plot individuals
populations = np.unique(y)
# colors = plt.get_cmap("hsv")
f, ax = plt.subplots(figsize=(10, 4))

for i, p in enumerate(populations):
    mask = (y == p)
    ppl.scatter(ax, Xp[mask, 0], Xp[mask, 1], label=p)

plt.xlim([-50, 100])
ppl.legend(ax, loc=1)

plt.savefig("randomized_pca.png")

# Learn with scikit-learn
# -----------------------

from matplotlib import pyplot as plt

from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

from sklearn.metrics import confusion_matrix
Example #36
0
    if "data" in s:
        years = s["data"].keys()
        # Only show the stations with enough data.
        if len(s["data"].keys()) >= num_years_required:
            xx = []
            yx = []
            for y in years:
                xx.append(int(y))
                val = s["data"][y]["max"]
                yx.append(val)

            ax.scatter(xx, yx, marker='o')
            ppl.scatter(ax,
                        xx,
                        yx,
                        alpha=0.8,
                        edgecolor='black',
                        linewidth=0.15,
                        label=str(s["station_num"]))
            ppl.legend(ax, loc='right', ncol=1)
            ax.set_xlabel('Year')
            ax.set_ylabel('water level (m)')

ax.set_title("Stations exceeding " + str(num_years_required) +
             " years worth of water level data (MHHW)")
fig.set_size_inches(14, 8)

# <markdowncell>

# ### Number of stations available by number of years
Example #37
0
def ntt_proposal_figure2():

    """
    Plot from NTT telsecope proposal
    """

    import yaml 
    from load import load
    import cPickle as pickle
    import numpy as np
    from astropy.table import Table 
    from model import model 
    from pl import pl
    from scipy.interpolate import interp1d 
    import matplotlib.pyplot as plt 

    with open('input.yml', 'r') as f:
        parfile = yaml.load(f)

    fittingobj = load(parfile)

    wavlen = fittingobj.get_wavlen()
    with open('/data/lc585/QSOSED/Results/140811/allsample_2/fluxcorr.array','rb') as f:
        flxcorr = pickle.load(f)

    flxcorr = np.ones(len(wavlen))
    zromag = fittingobj.get_zromag()
    bp = fittingobj.get_bp()
    dlam = fittingobj.get_dlam()

    tab1 = Table.read('/data/lc585/QSOSED/Results/140912/lowbetatab_3.fits')
    tab2 = Table.read('/data/lc585/QSOSED/Results/140912/highbetatab_3.fits')

    magcolumns = ['SDSS_UMAG',
                  'SDSS_GMAG',
                  'SDSS_RMAG',
                  'SDSS_IMAG',
                  'SDSS_ZMAG',
                  'UKIDSS_YMAG',
                  'UKIDSS_JMAG',
                  'UKIDSS_HMAG',
                  'UKIDSS_KMAG',
                  'ALLWISE_W1MAG',
                  'ALLWISE_W2MAG',
                  'ALLWISE_W3MAG',
                  'ALLWISE_W4MAG']

    datmag1 = np.array([np.array(tab1[i]) for i in magcolumns])
    datmag1 = datmag1 - datmag1[3, :] + 18.0

    datmag2 = np.array([np.array(tab2[i]) for i in magcolumns])
    datmag2 = datmag2 - datmag2[3, :] + 18.0

    plslp1 = 0.46
    plslp2 = 0.03
    plbrk = 2822.50
    bbt = 1216.32
    bbflxnrm = 0.24
    galfra = 0.31
    elscal = 0.71
    scahal = 0.86
    ebv = 0.0

    magtmp, wavlentmp, fluxtmp = model(plslp1,
                                       plslp2,
                                       plbrk,
                                       bbt,
                                       bbflxnrm,
                                       elscal,
                                       scahal,
                                       galfra,
                                       ebv,
                                       18.0,
                                       2.,
                                       fittingobj,
                                       flxcorr,
                                       parfile)

    wavnumjoin =  (np.abs(wavlen - parfile['runhotdustplfit']['wavmin_bbpl'] )).argmin()
    wavnummax = (np.abs(wavlen - parfile['runhotdustplfit']['wavmax_bbpl'] )).argmin()


    slope = 1.852337650258
    nrm = fluxtmp[wavnumjoin] / (wavlen[wavnumjoin]**(slope - 2.0))

    plmodel = pl(wavlen,slope,nrm)

    newflux = np.zeros(len(wavlen))
    newflux[:wavnumjoin] = fluxtmp[:wavnumjoin]
    newflux[wavnumjoin:] = plmodel[wavnumjoin:]

    # Calculate normalised model flux
    spc = interp1d(wavlentmp,newflux,bounds_error=False,fill_value=0.0)
    sum1 = np.sum( bp[3][1] * spc(bp[3][0]) * bp[3][0] * dlam[3])
    sum2 = np.sum( bp[3][1] * bp[3][0] * dlam[3])
    flxlam = sum1 / sum2
    flxlam = flxlam + 1e-200
    imag = (-2.5 * np.log10(flxlam)) - zromag[3]
    delta_m = 18.0 - imag # what i must add to model magnitude to match data
    fnew = newflux * 10**(-0.4 * delta_m) # this is normalised flux in erg/cm^2/s/A

    ### Calculate model with beta = 0 for comparison.
    slope = 0.9119355988796
    nrm = fluxtmp[wavnumjoin] / (wavlen[wavnumjoin]**(slope - 2.0))
    plmodel2 = pl(wavlen, slope, nrm)
    newflux2 = np.zeros(len(wavlen))
    newflux2[:wavnumjoin] = fluxtmp[:wavnumjoin]
    newflux2[wavnumjoin:] = plmodel2[wavnumjoin:]

    # Calculate normalised model flux
    spc = interp1d(wavlentmp,newflux2,bounds_error=False,fill_value=0.0)
    sum1 = np.sum( bp[3][1] * spc(bp[3][0]) * bp[3][0] * dlam[3])
    sum2 = np.sum( bp[3][1] * bp[3][0] * dlam[3])
    flxlam = sum1 / sum2
    flxlam = flxlam + 1e-200
    imag = (-2.5 * np.log10(flxlam)) - zromag[3]
    delta_m = 18.0 - imag # what i must add to model magnitude to match data
    fnew2 = newflux2 * 10**(-0.4 * delta_m) # this is normalised flux in erg/cm^2/s/A

    flam, lameff = np.zeros((len(tab1),13)), np.zeros((len(tab1),13))

    for obj in range(len(tab1)):

        lameff[obj,:] = fittingobj.get_lameff() / (1.0 + tab1[obj]['Z_1'])

        datmagtmp = datmag1[:,obj]

        # Calculate data fluxes from magnitudes
        f_0 = np.zeros(len(bp)) # flux zero points
        for ftr in range(len(bp)):
            sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr])
            sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr])
            f_0[ftr] = sum1 / sum2
        flam[obj,:] = f_0 * 10.0**( -0.4 * datmagtmp ) # data fluxes in erg/cm^2/s/A

    flam_2, lameff_2 = np.zeros((len(tab1),13)), np.zeros((len(tab1),13))

    for obj in range(len(tab2)):

        lameff_2[obj,:] = fittingobj.get_lameff() / (1.0 + tab2[obj]['Z_1'])

        datmagtmp = datmag2[:,obj]

        # Calculate data fluxes from magnitudes
        f_0 = np.zeros(len(bp)) # flux zero points
        for ftr in range(len(bp)):
            sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr])
            sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr])
            f_0[ftr] = sum1 / sum2
        flam_2[obj,:] = f_0 * 10.0**( -0.4 * datmagtmp ) # data fluxes in erg/cm^2/s/A


    # Manda's Very Red Quasars
    redcat = np.genfromtxt('/data/lc585/QSOSED/Results/140920/Red_Quasar_photom.cat')

    flam_3, lameff_3 = np.zeros((len(redcat),3)), np.zeros((len(redcat),3))

    for obj in range(len(redcat)):

        lameff_3[obj,0] =  33680.0 / (1.0 + redcat[obj,12])
        lameff_3[obj,1] =  46180.0 / (1.0 + redcat[obj,12])
        lameff_3[obj,2] =  120000.0 / (1.0 + redcat[obj,12])


        # Calculate data fluxes from magnitudes
        f_0 = np.zeros(len(bp)) # flux zero points
        for ftr in range(len(bp)):
            sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr])
            sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr])
            f_0[ftr] = sum1 / sum2

        flam_3[obj,0] = f_0[9] * 10.0**( -0.4 * redcat[obj,2] )
        flam_3[obj,1] = f_0[10] * 10.0**( -0.4 * redcat[obj,3] )
        flam_3[obj,2] = f_0[11] * 10.0**( -0.4 * redcat[obj,4] )


    nrm = fnew[520] * wavlen[520]

    flam = flam / nrm
    flam_2 = flam_2 / nrm
    flam_3 = flam_3 / nrm

    w1med = np.median(lameff[:,9]*flam[:,9])
    w2med = np.median(lameff[:,10]*flam[:,10])
    w3med = np.median(lameff[:,11]*flam[:,11])
    w1med_2 = np.median(lameff_2[:,9]*flam_2[:,9])
    w2med_2 = np.median(lameff_2[:,10]*flam_2[:,10])
    w3med_2 = np.median(lameff_2[:,11]*flam_2[:,11])

    w1err = np.std(lameff[:,9]*flam[:,9])
    w2err = np.std(lameff[:,10]*flam[:,10])
    w3err = np.std(lameff[:,11]*flam[:,11])
    w1err_2 = np.std(lameff_2[:,9]*flam_2[:,9])
    w2err_2 = np.std(lameff_2[:,10]*flam_2[:,10])
    w3err_2 = np.std(lameff_2[:,11]*flam_2[:,11])

    w1lam = np.median(lameff[:,9])
    w2lam = np.median(lameff[:,10])
    w3lam = np.median(lameff[:,11])
    w1lam_2 = np.median(lameff_2[:,9])
    w2lam_2 = np.median(lameff_2[:,10])
    w3lam_2 = np.median(lameff_2[:,11])

#    print w1med, w2med, w3med, w1med_2, w2med_2, w3med_2, w1lam, w2lam, w3lam, w1lam_2, w2lam_2, w3lam_2

    import matplotlib
    import prettyplotlib as ppl

    fig = plt.figure(figsize=(5,3))
    ax = fig.add_subplot(1,1,1)

    fnew = fnew / nrm
    fnew2 = fnew2 / nrm
    ax.plot(wavlen,wavlen*fnew,color='black')
    ax.plot(wavlen[1900:],wavlen[1900:]*fnew2[1900:],color='black')
    ax.errorbar(w1lam,w1med,yerr=w1err,color='blue')
    ax.errorbar(w2lam,w2med,yerr=w2err,color='blue')
    ax.errorbar(w3lam,w3med,yerr=w3err,color='blue')
    ax.errorbar(w1lam_2,w1med_2,yerr=w1err_2,color='red')
    ax.errorbar(w2lam_2,w2med_2,yerr=w2err_2,color='red')
    ax.errorbar(w3lam_2,w3med_2,yerr=w3err_2,color='red')


#    ppl.scatter(lameff_3[:,0] , lameff_3[:,0] * flam_3[:,0], color='grey', alpha=0.5)
#    ppl.scatter(lameff_3[:,1] , lameff_3[:,1] * flam_3[:,1], color='grey', alpha=0.5)
#    ppl.scatter(lameff_3[:,2] , lameff_3[:,2] * flam_3[:,2], color='grey', alpha=0.5)

    # Or plot lines
    for i in range(len(lameff_3)):
        xdat = np.array([lameff_3[i,0],lameff_3[i,1],lameff_3[i,2]])
        ydat = np.array([lameff_3[i,0] * flam_3[i,0],
                lameff_3[i,1] * flam_3[i,1],
                lameff_3[i,2] * flam_3[i,2]])
        nrmind = np.argmin( np.abs(wavlen - xdat[0]) )
        ydat = ydat * (wavlen[nrmind] * fnew[nrmind]) / ydat[0]
        ppl.scatter(xdat,ydat, color='grey', alpha=0.5)
        ppl.plot(xdat,ydat,color='grey',alpha=0.2)


    ax.set_xlim(1200,50000 )
    ax.set_ylim(0,3.5)

    ax.set_ylabel(r'Relative Flux $\lambda F_{\lambda}(\lambda)$',fontsize=10)
    ax.set_xlabel(r'Rest-frame Wavelength ($\AA$)',fontsize=10)
    plt.text(17699,0.21607,r'$\beta_{\rm NIR}=-0.09$',fontsize=8,horizontalalignment='left',verticalalignment='top')
    plt.text(17000,0.6,r'$\beta_{\rm NIR}=0.85$',fontsize=8,rotation=20.0,horizontalalignment='left',verticalalignment='bottom')
    plt.text(6604,2.23966,r'H$\alpha$',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom')
    plt.text(4902,1.21462,r'H$\beta$ & OIII',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom')
    plt.text(1558,2.3328,r'CIV',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom')

    plt.tick_params(axis='both',which='major',labelsize=8)
    ax.set_xscale('log')
    ax.set_xticks([2000,5000,10000,20000,40000])
    ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
    plt.tight_layout()
    # plt.savefig('/home/lc585/Dropbox/IoA/NTT_Proposal_95A/esoform-95A/figure2_v2.pdf')
    plt.show()

    return None
Example #38
0
    def plot_ts_diagram(self,
                        ax,
                        sal,
                        temp,
                        xlabel='Salinity',
                        ylabel='Temperature',
                        title='',
                        axis_font={},
                        title_font={},
                        tick_font={},
                        **kwargs):

        if not axis_font:
            axis_font = axis_font_default
        if not title_font:
            title_font = title_font_default

        sal = np.ma.array(sal, mask=np.isnan(sal))
        temp = np.ma.array(temp, mask=np.isnan(temp))
        if len(sal) != len(temp):
            raise Exception('Sal and Temp arrays are not the same size!')

        # Figure out boudaries (mins and maxs)
        smin = sal.min() - (0.01 * sal.min())
        smax = sal.max() + (0.01 * sal.max())
        tmin = temp.min() - (0.1 * temp.max())
        tmax = temp.max() + (0.1 * temp.max())

        # Calculate how many gridcells we need in the x and y dimensions
        xdim = round((smax - smin) / 0.1 + 1, 0)
        ydim = round((tmax - tmin) + 1, 0)

        # Create empty grid of zeros
        dens = np.zeros((ydim, xdim))

        # Create temp and sal vectors of appropiate dimensions
        ti = np.linspace(1, ydim - 1, ydim) + tmin
        si = np.linspace(1, xdim - 1, xdim) * 0.1 + smin

        # Loop to fill in grid with densities
        for j in range(0, int(ydim)):
            for i in range(0, int(xdim)):
                dens[j, i] = sw.dens(si[i], ti[j], 0)

        # Substract 1000 to convert to sigma-t
        dens = dens - 1000

        # Plot data
        cs = plt.contour(si, ti, dens, linestyles='dashed', colors='k')

        plt.clabel(cs, fontsize=12, inline=1,
                   fmt='%1.0f')  # Label every second level
        ppl.scatter(ax, sal, temp, **kwargs)

        ax.set_xlabel(xlabel.replace("_", " "), labelpad=10, **axis_font)
        ax.set_ylabel(ylabel.replace("_", " "), labelpad=10, **axis_font)
        ax.set_title(title.replace("_", " "), **title_font)
        ax.set_aspect(1. / ax.get_data_ratio())  # make axes square
        if tick_font:
            ax.tick_params(**tick_font)
        plt.tight_layout()
Example #39
0
    def plot_time_series(self,
                         fig,
                         is_timeseries,
                         ax,
                         x,
                         y,
                         fill=False,
                         title='',
                         xlabel='',
                         ylabel='',
                         title_font={},
                         axis_font={},
                         tick_font={},
                         scatter=False,
                         qaqc=[],
                         events={},
                         **kwargs):

        if not title_font:
            title_font = title_font_default
        if not axis_font:
            axis_font = axis_font_default

        if scatter:
            ppl.scatter(ax, x, y, **kwargs)
        else:
            h = ppl.plot(ax, x, y, **kwargs)

        if is_timeseries:
            self.get_time_label(ax, x)
            fig.autofmt_xdate()
        else:
            ax.set_xlabel(xlabel.replace("_", " "), **axis_font)

        if ylabel:
            ax.set_ylabel(ylabel.replace("_", " "), **axis_font)
        if title:
            ax.set_title(title.replace("_", " "), **title_font)

        ax.grid(True)
        if fill:
            miny = min(ax.get_ylim())
            if not scatter:
                ax.fill_between(x,
                                y,
                                miny + 1e-7,
                                facecolor=h[0].get_color(),
                                alpha=0.15)
            else:
                ax.fill_between(x,
                                y,
                                miny + 1e-7,
                                facecolor=axis_font_default['color'],
                                alpha=0.15)

        if events:
            ylim = ax.get_ylim()
            for event in events['events']:
                time = datestr2num(event['start_date'])
                x = np.array([time, time])
                h = ax.plot(x, ylim, '--', label=event['class'])

            legend = ax.legend()
            if legend:
                for label in legend.get_texts():
                    label.set_fontsize(10)

        if len(qaqc) > 0:
            bad_data = np.where(qaqc > 0)
            h = ppl.plot(ax,
                         x[bad_data],
                         y[bad_data],
                         marker='o',
                         mfc='none',
                         linestyle='None',
                         markersize=6,
                         markeredgewidth=2,
                         mec='r')

        # plt.tick_params(axis='both', which='major', labelsize=10)
        if tick_font:
            ax.tick_params(**tick_font)
        plt.tight_layout()
plt.savefig('test/u_SDSS_vs_GC')
'''



limits = [[12, 18.5], [11.4, 16.5], [10, 16], [9.5, 16], [9.5, 16]]

for i, band in enumerate(bands):
  lim_lo = limits[i][0]
  lim_hi = limits[i][1]
  x, m, b = makeDiagonalLine([lim_lo, lim_hi])

  fig = plt.figure(figsize=(8, 8))  
  ax = plt.subplot(111)

  c = ppl.scatter(ax, petroMags[:, i], mags[:, i], s=8, c='k', edgecolor='k') 
  ax.axis([lim_lo, lim_hi, lim_lo, lim_hi])
  ax.errorbar(petroMags[:, i], mags[:, i], yerr=mag_err[:, i], mew=0, linestyle = "none", color="black")
  plt.plot(x,m*x + b, c='k')
  ax.xaxis.set_major_locator(majorLocator)
  ax.xaxis.set_minor_locator(minorLocator)
  ax.yaxis.set_major_locator(majorLocator)
  ax.yaxis.set_minor_locator(minorLocator)
  ax.minorticks_on()
  # Change the labels back to black
  ax.xaxis.label.set_color('black')
  ax.yaxis.label.set_color('black')
  # Change the axis title also back to black
  ax.title.set_color('black')
  # Get back the top and right axes lines ("spines")
  spines_to_remove = ['top', 'right']
Example #41
0
def plot_pca(df, c_scale=None, x_pc=1, y_pc=2, distance='L1', \
             save_as=None, save_format='png', whiten=True, num_vectors=30, \
             figsize=(10, 10), colors_dict=None, markers_dict=None, \
             title='PCA', show_vectors=True, show_point_labels=True, \
             show_vector_labels=True, column_ids_dict=None):
    # gather ids and values
    row_ids = df.index
    if column_ids_dict is not None:
        column_ids = [column_ids_dict[col] for col in df.columns]
    else:
        column_ids = df.columns

    df_array = df.as_matrix()

    # perform pca
    n_components = max(x_pc, y_pc, 2)
    pca = dc.PCA(whiten=whiten, n_components=n_components)
    pca.fit(df_array)
    X = pca.transform(df_array)
    (comp_x, comp_y) = (
    pca.components_[x_pc - 1, :], pca.components_[y_pc - 1, :])

    x_list = X[:, x_pc - 1]
    y_list = X[:, y_pc - 1]

    if not c_scale:
        c_scale = .75 * max([norm(point) for point in zip(x_list, y_list)]) / \
                  max([norm(vector) for vector in zip(comp_x, comp_y)])

    size_scale = sqrt(figsize[0] * figsize[1]) / 1.5

    # sort features by magnitude/contribution to transformation
    comp_magn = []
    for (x, y, an_id) in zip(comp_x, comp_y, column_ids):

        x = x * c_scale
        y = y * c_scale

        if distance == 'L1':
            comp_magn.append((x, y, an_id, abs(y) + abs(x)))

        elif distance == 'L2':
            comp_magn.append((x, y, an_id, math.sqrt((y ** 2) + (x ** 2))))

    # create figure and plot 
    pca_fig, ax = plt.subplots(figsize=figsize)

    for (x, y, an_id) in zip(x_list, y_list, row_ids):

        if colors_dict:
            try:
                color = colors_dict[an_id]
            except:
                color = 'black'
        else:
            color = 'black'

        if markers_dict:
            try:
                marker = markers_dict[an_id]
            except:
                marker = 'x'

        else:
            marker = 'x'

        if show_point_labels:
            ax.text(x, y, an_id, color=color, size=size_scale)

        ppl.scatter(ax, x, y, marker=marker, color=color, s=size_scale * 5)

    vectors = sorted(comp_magn, key=lambda item: item[3], reverse=True)[
              :num_vectors]
    for x, y, marker, distance in vectors:

        if show_vectors:
            ppl.plot(ax, [0, x], [0, y], color=ppl.almost_black, linewidth=.5)
            if show_vector_labels:
                ax.text(x, y, marker, color='black', size=size_scale)

    var_1 = int(pca.explained_variance_ratio_[x_pc - 1] * 100)
    var_2 = int(pca.explained_variance_ratio_[y_pc - 1] * 100)

    ax.set_xlabel(
        'Principal Component {} (Explains {}% Of Variance)'.format(str(x_pc),
                                                                   str(var_1)),
        size=size_scale * 2)
    ax.set_ylabel(
        'Principal Component {} (Explains {}% Of Variance)'.format(str(y_pc),
                                                                   str(var_2)),
        size=size_scale * 2)
    ax.set_title(title, size=size_scale * 3)

    if save_as:
        kwargs = {}
        if save_format == 'png':
            kwargs['dpi'] = 300
        pca_fig.savefig(save_as, format=save_format, **kwargs)

    return vectors
Example #42
0
        for y in years:
            val = s["data"][y]["max"]
            if val is not None:
                try:
                    #round to 2dp
                    val = "%.2f" % val
                    yx.append(val)
                    xx.append(int(y))
                except:
                    pass

        #ppl.scatter(ax, xx, yx,alpha=0.8,edgecolor='black',linewidth=0.15 ,label=str(s["station_num"])+":"+str(s["long_name"][0]))
        ppl.scatter(ax,
                    xx,
                    yx,
                    alpha=0.8,
                    edgecolor='black',
                    linewidth=0.15,
                    label=str(s["long_name"]))

ax.legend(loc=1)
ax.set_title(
    'Annual Max sea surface wave significant height (m) (Observed & Model)')
ax.set_xlabel('Year')
ax.set_ylabel('sea surface wave significant height (m)')

ax.set_xticks(numpy.arange(st_yr, ed_yr, 2))
fig.set_size_inches(14, 8)

# Shink current axis by 20%
box = ax.get_position()
Example #43
0
print "Pi_hat = ", np.mean(pi_hat) # mostramos el valor medio de pi para las nn estimaciones 
print "Pi real = 3.14159265359" # el valor real de pi
print "ECM =", np.mean(np.subtract(3.14159265359,pi_hat) ** 2)

# Funciones auxiliares para el grafico
# necesitamos solo los valores aleatorios que cumplen la condicion de circunferencia
u_f = [] 
v_f = []

for i,e in enumerate(u):
    if np.sqrt((u[i]-0.5 )** 2 + (v[i]-0.5) ** 2) < 0.5:
        u_f += [u[i]]
        v_f += [v[i]]

plt.figure(figsize=(6,12))

ax1 = subplot(211) # gráfico de la simulación de montercarlo
ax1.axhline(0.5, color="grey", alpha=0.5)
ax1.axvline(0.5, color="grey", alpha=0.5)
ax1.set_title(u"Simulación Montecarlo", fontsize=14)
ppl.scatter(u,v)
ppl.scatter(u_f,v_f, linewidth=0.08)

ax2 = subplot(212) # gráfico de las nn estimaciones de pi
ppl.scatter(range(len(pi_hat)),pi_hat)
ax2.axhline(3.14159265359, color="grey", alpha=0.5, linewidth=2)
text(len(pi_hat)*1.01, 3.14159265359, "Pi = 3.1415...")
ax2.set_title(u"Estimación de Pi", fontsize=14)

# plt.show() # quitar el \"#" si usas el shell
#plt.savefig("simulacion_pi_montercalo.png", dpi=200) # quitar el \# para guardar la imagen.
Example #44
0
def plot(x, y, **kwargs):
	ppl.scatter(x, y, True, **kwargs)
	sumpeak = np.array([])
	suminterval = np.array([])
	sumfilterpeak = np.array([])
	sumfilterinterval = np.array([])

	for time in processlist:
		time = time.astype(int)
		loaddata = pathfilename+str(time[0])+'_'+str(time[-1])+'.npz'
		sumpeak = np.append(sumpeak, np.load(loaddata)['totalpeak'])
		suminterval = np.append(suminterval, np.load(loaddata)['totalinterval'])
		sumfilterpeak = np.append(sumfilterpeak, np.load(loaddata)['totalfilterpeak'])
		sumfilterinterval = np.append(sumfilterinterval, np.load(loaddata)['totalfilterinterval'])
		os.remove(loaddata)

	fig, ax = plt.subplots(1)
	ppl.scatter(ax, sumpeak, suminterval)
	ppl.scatter(ax, sumfilterpeak, sumfilterinterval)
	ax.set_title(savefilename + ' Heterogeneity distrubition')
	fig.savefig(pathfilename + '_Heterogeneity_distrubition' + '.png',dpi=300)
	plt.close()
	print 'Saved scatter plot:', savefilename

	sortsumpeak = np.sort(sumfilterpeak)
	sortsumpeakdiff = np.diff(sortsumpeak)
	diffdistrubition = np.sort(sortsumpeakdiff.copy())

	limit = np.mean(diffdistrubition)

	fig, ax = plt.subplots(1)
	ppl.plot(np.arange(len(sortsumpeakdiff)), sortsumpeakdiff)
	ppl.plot(np.arange(len(diffdistrubition)), diffdistrubition)
loadings = pandas.DataFrame({
    "loadings": reduced_data.components_[1, :],
    "kmer": kmer_colums
})
loadings.sort('loadings')

# Scatter plot of GC rich vs AT rich

gc_rich_kmers = [x for x in kmer_colums if set(x) == set(['G', 'C'])]
at_rich_kmers = [x for x in kmer_colums if set(x) == set(['A', 'T'])]

gc_rich = input_kmers_counts[gc_rich_kmers].apply(lambda x: sum(x), axis=1)
at_rich = input_kmers_counts[at_rich_kmers].apply(lambda x: sum(x), axis=1)
kmer_gc_only = pandas.concat((gc_rich, at_rich), axis=1)

ppl.scatter(kmer_gc_only[0], kmer_gc_only[1], c=colors)
pl.show()

# We make a data frame with the PCA coordinates and all annotations

input_kmers_counts_output = pandas.pivot_table(
    input_kmers,
    values="count",
    index=['sequence_description', 'sequence_length', 'GC'],
    columns=["kmer"],
    fill_value=0)

reduced_data_coord = reduced_data.fit_transform(
    input_kmers_counts[kmer_colums])

output_table = pandas.DataFrame({
						totalpeak = np.append(totalpeak, oripeak)
						totalinterval = np.append(totalinterval, [interval] * len(oripeak))


						for iprint in range(len(oripeak)):
								print 'peak', oripeak[iprint], 'Popen(avg)', oripopendiffpeak[iprint], 'Popen(std)', oripopendiffstdamp[iprint]

						if count == 10:
								#plotorifit(savefilename, std, oripeak, oripopendiffstdamp, interval, precentage)
								plotpopendiff(savefilename, result, oripeak, oripopendiffstdamp)
								count = 0
						count += 1

		fig, ax = plt.subplots(1)
		ppl.scatter(ax, totalpeak, totalinterval)
		ax.set_title(savefilename + ' Heterogeneity distrubition Start:')
		fig.savefig(pathfilename + '_Heterogeneity_distrubition' + '.png',dpi=500)
		plt.close()
		print 'Saved scatter plot:' + savefilename





'''
		if os.path.exists(savefilename + '.npy'):
				print savefilename + '.npy', 'Found. Loading the data.'
				plotcolour = np.load(savefilename + '.npy')
				print savefilename + '.npy', 'Loaded.'
		else:
Example #48
0
    def make_scatter(self, use_prettyplotlib=True, hists=True, num_bins=None):
        '''
        Plot two columns against each other. If self.subplot is enabled,
        all comparisons returned in a triangle collection. Inspiration for
        this form came from the package ![triangle.py](https://github.com/dfm/triangle.py).
        Small snippets to set the labels and figure size came from triangle.py.
        '''

        if use_prettyplotlib:
            try:
                import prettyplotlib as plt
            except ImportError:
                import matplotlib.pyplot as plt
                use_prettyplotlib = False
                print "prettyplotlib not installed. Using matplotlib..."
        else:
            import matplotlib.pyplot as plt

        # Setup subplots if plotting together
        if self.subplot:
          # Make the objects
          num = len(self.columns)
          factor = 2.0 # size of one side of one panel
          lbdim = 0.5 * factor # size of left/bottom margin
          trdim = 0.3 * factor # size of top/right margin
          whspace = 0.05 # w/hspace size
          plotdim = factor * num + factor * (num - 1.) * whspace
          dim = lbdim + plotdim + trdim
          fig, axes = plt.subplots(nrows=num, ncols=num, figsize=(dim, dim))

          lb = lbdim / dim
          tr = (lbdim + plotdim) / dim
          fig.subplots_adjust(left=lb, bottom=lb, right=tr, top=tr,
                              wspace=whspace, hspace=whspace)

        for i, column1 in enumerate(self.columns):
          for j, column2 in enumerate(self.columns):

            data1 = self.dataframe[column1]
            data2 = self.dataframe[column2]

            # Get rid of nans
            nans = np.isnan(data1) + np.isnan(data2)
            data1 = data1[~nans]
            data2 = data2[~nans]

            if self.subplot:
              ax = axes[i, j]
              if j > i: # Don't bother plotting duplicates
                ax.set_visible(False)
                ax.set_frame_on(False)
              else:

                if j == i: # Plot histograms
                  # Set number of bins
                  if num_bins is None:
                      num_bins = np.sqrt(len(data1))
                  if hists == True:
                    if use_prettyplotlib:
                      plt.hist(ax, data1, num_bins, grid="y")
                    else:
                      ax.hist(data1, num_bins)
                      ax.grid(True)
                  else:
                    ax.set_visible(False)
                    ax.set_frame_on(False)

                  ax.set_xticklabels([])
                  ax.set_yticklabels([])

                if j != i:
                  if use_prettyplotlib:
                    plt.scatter(ax, data2, data1)
                  else:
                    ax.scatter(data2, data1)
                  ax.grid(True)
                  ax.xaxis.set_major_locator(MaxNLocator(5))
                  ax.yaxis.set_major_locator(MaxNLocator(5))

                if i < num - 1:
                    ax.set_xticklabels([])
                else:
                    [l.set_rotation(45) for l in ax.get_xticklabels()]
                    ax.set_xlabel(column2)
                    ax.xaxis.set_label_coords(0.5, -0.3)

                if j > 0:
                    ax.set_yticklabels([])
                else:
                    [l.set_rotation(45) for l in ax.get_yticklabels()]
                    ax.set_ylabel(column1)
                    ax.yaxis.set_label_coords(-0.3, 0.5)

            else:
              if j < i:
                fig, axes = plt.subplots(1)
                if use_prettyplotlib:
                  plt.scatter(axes, data2, data1, grid="y")
                else:
                  axes.scatter(data2, data1)
                  axes.grid(True)
                axes.set_xlabel(column2)  # ADD UNITS!
                axes.set_ylabel(column1)  # ADD UNITS!

                if self.verbose:
                  p.show()
                else:
                  fig.savefig(self.save_name+"_"+column1+"_"+column2+"."+self.save_type)
                  p.close()

        if self.subplot:
          # p.tight_layout()
          if self.verbose:
            p.show()
          else:
            fig.savefig(self.save_name+"_"+"scatter"+"."+self.save_type)
Example #49
0
    def make_scatter(self, use_prettyplotlib=True, hists=True, num_bins=None):
        '''
        Plot two columns against each other. If self.subplot is enabled,
        all comparisons returned in a triangle collection. Inspiration for
        this form came from the package ![triangle.py](https://github.com/dfm/triangle.py).
        Small snippets to set the labels and figure size came from triangle.py.
        '''

        if use_prettyplotlib:
            try:
                import prettyplotlib as plt
            except ImportError:
                import matplotlib.pyplot as plt
                use_prettyplotlib = False
                print "prettyplotlib not installed. Using matplotlib..."
        else:
            import matplotlib.pyplot as plt

        # Setup subplots if plotting together
        if self.subplot:
            # Make the objects
            num = len(self.columns)
            factor = 2.0  # size of one side of one panel
            lbdim = 0.5 * factor  # size of left/bottom margin
            trdim = 0.3 * factor  # size of top/right margin
            whspace = 0.05  # w/hspace size
            plotdim = factor * num + factor * (num - 1.) * whspace
            dim = lbdim + plotdim + trdim
            fig, axes = plt.subplots(nrows=num, ncols=num, figsize=(dim, dim))

            lb = lbdim / dim
            tr = (lbdim + plotdim) / dim
            fig.subplots_adjust(left=lb,
                                bottom=lb,
                                right=tr,
                                top=tr,
                                wspace=whspace,
                                hspace=whspace)

        for i, column1 in enumerate(self.columns):
            for j, column2 in enumerate(self.columns):

                data1 = self.dataframe[column1]
                data2 = self.dataframe[column2]

                # Get rid of nans
                nans = np.isnan(data1) + np.isnan(data2)
                data1 = data1[~nans]
                data2 = data2[~nans]

                if self.subplot:
                    ax = axes[i, j]
                    if j > i:  # Don't bother plotting duplicates
                        ax.set_visible(False)
                        ax.set_frame_on(False)
                    else:

                        if j == i:  # Plot histograms
                            # Set number of bins
                            if num_bins is None:
                                num_bins = np.sqrt(len(data1))
                            if hists == True:
                                if use_prettyplotlib:
                                    plt.hist(ax, data1, num_bins, grid="y")
                                else:
                                    ax.hist(data1, num_bins)
                                    ax.grid(True)
                            else:
                                ax.set_visible(False)
                                ax.set_frame_on(False)

                            ax.set_xticklabels([])
                            ax.set_yticklabels([])

                        if j != i:
                            if use_prettyplotlib:
                                plt.scatter(ax, data2, data1)
                            else:
                                ax.scatter(data2, data1)
                            ax.grid(True)
                            ax.xaxis.set_major_locator(MaxNLocator(5))
                            ax.yaxis.set_major_locator(MaxNLocator(5))

                        if i < num - 1:
                            ax.set_xticklabels([])
                        else:
                            [l.set_rotation(45) for l in ax.get_xticklabels()]
                            ax.set_xlabel(column2)
                            ax.xaxis.set_label_coords(0.5, -0.3)

                        if j > 0:
                            ax.set_yticklabels([])
                        else:
                            [l.set_rotation(45) for l in ax.get_yticklabels()]
                            ax.set_ylabel(column1)
                            ax.yaxis.set_label_coords(-0.3, 0.5)

                else:
                    if j < i:
                        fig, axes = plt.subplots(1)
                        if use_prettyplotlib:
                            plt.scatter(axes, data2, data1, grid="y")
                        else:
                            axes.scatter(data2, data1)
                            axes.grid(True)
                        axes.set_xlabel(column2)  # ADD UNITS!
                        axes.set_ylabel(column1)  # ADD UNITS!

                        if self.verbose:
                            p.show()
                        else:
                            fig.savefig(self.save_name + "_" + column1 + "_" +
                                        column2 + "." + self.save_type)
                            p.close()

        if self.subplot:
            # p.tight_layout()
            if self.verbose:
                p.show()
            else:
                fig.savefig(self.save_name + "_" + "scatter" + "." +
                            self.save_type)