Esempio n. 1
0
def trellis_plot_scatter_and_polyfit():
    """ Trellis Plot arranges data in a rectangular grid by values of certain attributes using two plots: a scatter plot and a polyfit"""

    plot =rplot.RPlot(TIPS_DATA, x='total_bill', y='tip')
    plot.add(rplot.TrellisGrid(['sex', 'smoker']))
    plot.add(rplot.GeomScatter())
    plot.add(rplot.GeomPolyFit(degree=2))
    plot.render(plt.gcf())
Esempio n. 2
0
 def setUp(self):
     path = os.path.join(curpath(), 'data/tips.csv')
     self.data = read_csv(path, sep=',')
     layer1 = rplot.Layer(self.data)
     layer2 = rplot.GeomPoint(x='total_bill', y='tip')
     layer3 = rplot.GeomPolyFit(2)
     self.layers = rplot.sequence_layers([layer1, layer2, layer3])
     self.trellis1 = rplot.TrellisGrid(['sex', 'smoker'])
     self.trellis2 = rplot.TrellisGrid(['sex', '.'])
     self.trellis3 = rplot.TrellisGrid(['.', 'smoker'])
     self.trellised1 = self.trellis1.trellis(self.layers)
     self.trellised2 = self.trellis2.trellis(self.layers)
     self.trellised3 = self.trellis3.trellis(self.layers)
Esempio n. 3
0
 def test_sequence_layers(self):
     layer1 = rplot.Layer(self.data)
     layer2 = rplot.GeomPoint(x='SepalLength', y='SepalWidth',
                              size=rplot.ScaleSize('PetalLength'))
     layer3 = rplot.GeomPolyFit(2)
     result = rplot.sequence_layers([layer1, layer2, layer3])
     self.assertEqual(len(result), 3)
     last = result[-1]
     self.assertEqual(last.aes['x'], 'SepalLength')
     self.assertEqual(last.aes['y'], 'SepalWidth')
     self.assertTrue(isinstance(last.aes['size'], rplot.ScaleSize))
     self.assertTrue(self.data is last.data)
     self.assertTrue(rplot.sequence_layers([layer1])[0] is layer1)
Esempio n. 4
0
def azureml_main(frame1):

    ## import libraries
    import matplotlib
    matplotlib.use('agg')  # Set backend

    from pandas.tools.plotting import scatter_matrix
    import pandas.tools.rplot as rplot
    import matplotlib.pyplot as plt
    import numpy as np

    ## Create a pair-wise scatter plot
    ## ref: http://matplotlib.org/users/pyplot_tutorial.html
    Azure = True

    ## If in Azure, frame1 is passed to function
    if (Azure == False):
        frame1 = eeframe
    # first figure 1,
    fig1 = plt.figure(1, figsize=(10, 10))
    # returns the current axes
    ax = fig1.gca()
    scatter_matrix(frame1, alpha=0.3, diagonal='kde', ax=ax)
    plt.show()
    if (Azure == True): fig1.savefig('scatter1.png')

    ## Create conditioned scatter plots.
    col_list = [
        "Relative Compactness", "Surface Area", "Wall Area", "Roof Area",
        'Glazing Area', "Glazing Area Distribution"
    ]

    indx = 0
    for col in col_list:
        if (frame1[col].dtype in [np.int64, np.int32, np.float64]):
            indx += 1

            fig = plt.figure(figsize=(12, 6))
            # clear the current figure with clf() and the current axes with cla()
            fig.clf()
            ax = fig.gca()
            # http://pandas.pydata.org/pandas-docs/version/0.14.1/rplot.html
            # RPlot is a flexible API for producing Trellis plots. These plots
            # allow you to arrange data in a rectangular grid by values of
            # certain attributes.
            plot = rplot.RPlot(frame1, x=col, y='Heating Load')
            plot.add(rplot.TrellisGrid(['Overall Height', 'Orientation']))
            plot.add(rplot.GeomScatter())
            plot.add(rplot.GeomPolyFit(degree=2))
            ax.set_xlabel(col)
            ax.set_ylabel('Heating Load')
            plot.render(plt.gcf())

            if (Azure == True): fig.savefig('scatter' + col + '.png')

## Histograms of features by Overall Height
    col_list = [
        "Relative Compactness", "Surface Area", "Wall Area", "Roof Area",
        'Glazing Area', "Glazing Area Distribution", "Heating Load"
    ]
    for col in col_list:
        # http://pandas.pydata.org/pandas-docs/stable/indexing.html
        # .ix supports mixed integer and label based access.
        temp7 = frame1.ix[frame1['Overall Height'] == 7, col].as_matrix()
        temp35 = frame1.ix[frame1['Overall Height'] == 3.5, col].as_matrix()
        fig = plt.figure(figsize=(12, 6))
        fig.clf()
        # http://python4mpia.github.io/plotting/advanced.html
        ax7 = fig.add_subplot(1, 2, 1)
        ax35 = fig.add_subplot(1, 2, 2)
        # http://matplotlib.org/api/pyplot_api.html#module-matplotlib.pyplot
        ax7.hist(temp7, bins=20)
        ax7.set_title('Histogram of ' + col + '\n for for Overall Height of 7')
        ax35.hist(temp35, bins=20)
        ax35.set_title('Histogram of ' + col +
                       '\n for for Overall Height of 3.5')
        if (Azure == True): fig.savefig('hists_' + col + '.png')

## Create boxplots.
    for col in col_list:
        if (frame1[col].dtype in [np.int64, np.int32, np.float64]):
            fig = plt.figure(figsize=(6, 6))
            fig.clf()
            ax = fig.gca()
            frame1[[col, 'Overall Height']].boxplot(column=[col],
                                                    ax=ax,
                                                    by=['Overall Height'])
            ax.set_xlabel('')
            if (Azure == True): fig.savefig('box_' + col + '.png')

## In Azure, the function returns the data frame
    return frame1
Esempio n. 5
0
def azureml_main(frame1):
    ## import libraries
    import matplotlib
    matplotlib.use('agg')  # Set backend

    from pandas.tools.plotting import scatter_matrix
    import pandas.tools.rplot as rplot
    import matplotlib.pyplot as plt
    import numpy as np

    ## Create a pair-wise scatter plot
    fig1 = plt.figure(1, figsize=(10, 10))
    ax = fig1.gca()
    scatter_matrix(frame1, alpha=0.3, diagonal='kde', ax=ax)
    plt.show()
    fig1.savefig('scatter1.png')

    ## Create conditioned scatter plots.
    col_list = [
        "Relative Compactness", "Surface Area", "Wall Area",
        "Relative Compactness Sqred", "Surface Area Sqred", "Wall Area Sqred",
        "Relative Compactness 3", "Surface Area 3", "Wall Area 3", "Roof Area",
        'Glazing Area', "Glazing Area Distribution"
    ]

    indx = 0
    for col in col_list:
        if (frame1[col].dtype in [np.int64, np.int32, np.float64]):
            indx += 1

            fig = plt.figure(figsize=(12, 6))
            fig.clf()
            ax = fig.gca()
            plot = rplot.RPlot(frame1, x=col, y='Heating Load')
            plot.add(rplot.TrellisGrid(['Overall Height', 'Orientation']))
            plot.add(rplot.GeomScatter())
            plot.add(rplot.GeomPolyFit(degree=2))
            ax.set_xlabel(col)
            ax.set_ylabel('Heating Load')
            plot.render(plt.gcf())

            fig.savefig('scatter' + col + '.png')

## Histograms of Heating Load by Overall Height
    col_list = [
        "Relative Compactness", "Surface Area", "Wall Area",
        "Relative Compactness Sqred", "Surface Area Sqred", "Wall Area Sqred",
        "Relative Compactness 3", "Surface Area 3", "Wall Area 3", "Roof Area",
        'Glazing Area', "Glazing Area Distribution", "Heating Load"
    ]
    for col in col_list:
        temp7 = frame1.ix[frame1['Overall Height'] == 7, col].as_matrix()
        temp35 = frame1.ix[frame1['Overall Height'] == 3.5, col].as_matrix()
        fig = plt.figure(figsize=(12, 6))
        fig.clf()
        ax7 = fig.add_subplot(1, 2, 1)
        ax35 = fig.add_subplot(1, 2, 2)
        ax7.hist(temp7, bins=20)
        ax7.set_title('Histogram of ' + col + '\n for for Overall Height of 7')
        ax35.hist(temp35, bins=20)
        ax35.set_title('Histogram of ' + col +
                       '\n for for Overall Height of 3.5')
        fig.savefig('hists_' + col + '.png')

## Creat boxplots.
    for col in col_list:
        if (frame1[col].dtype in [np.int64, np.int32, np.float64]):
            fig = plt.figure(figsize=(6, 6))
            fig.clf()
            ax = fig.gca()
            frame1[[col, 'Overall Height']].boxplot(column=[col],
                                                    ax=ax,
                                                    by=['Overall Height'])
            ax.set_xlabel('')
            fig.savefig('box_' + col + '.png')

## Return the data frame
    return frame1
Esempio n. 6
0
plt.figure()
plot = rplot.RPlot(tips_data, x='total_bill', y='tip')
plot.add(rplot.TrellisGrid(['sex', 'smoker']))
plot.add(rplot.GeomDensity())
plot.render(plt.gcf())


# Trellis plot with scatter and poly fit line

# In[8]:

plt.figure()
plot = rplot.RPlot(tips_data, x='total_bill', y='tip')
plot.add(rplot.TrellisGrid(['sex', 'smoker']))
plot.add(rplot.GeomScatter())
plot.add(rplot.GeomPolyFit(degree=2))
plot.render(plt.gcf())


# Trellis Chart with 2D kernal density plot 

# In[9]:

plt.figure()
plot = rplot.RPlot(tips_data, x='total_bill', y='tip')
plot.add(rplot.TrellisGrid(['sex', 'smoker']))
plot.add(rplot.GeomScatter())
plot.add(rplot.GeomDensity2D())
plot.render(plt.gcf())