예제 #1
0
def present_compared_data(match_full_data_df, name, results_dir):
    match_full_data_df = match_full_data_df.drop_duplicates('ref_e')
    presented_data = match_full_data_df[[Header.ref_id + '_e', 'count', 'actual_prior', 'prior_e', 'actual_prior_diff']].copy()
    presented_data.rename(columns={'count': '# of refs',
                                   'actual_prior': 'actual frequency',
                                   'prior_e': 'expected frequency',
                                   'actual_prior_diff':'frequency difference'}, inplace=True)
    presented_data.index = range(1, len(presented_data) + 1)
    presented_data.loc[len(presented_data)+1] = [None, sum(presented_data['# of refs']), sum(presented_data['actual frequency']), sum(presented_data['expected frequency']), None]
    presented_data['expected frequency'] = presented_data['expected frequency'].apply(lambda val: "{0:.2f}%".format(val * 100))
    presented_data['actual frequency'] = presented_data['actual frequency'].apply(lambda val: "{0:.2f}%".format(val * 100))
    presented_data['frequency difference'] = presented_data['frequency difference'].apply(lambda val: "{0:.2f}%".format(val * 100))
    presented_data.index = range(1, len(presented_data)) + ['sum']

    ax = plt.subplot(111, frame_on=False )  # no visible frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis

    table(ax, presented_data, loc='center')  # where df is your data frame
    # table(ax, presented2, loc='center')

    res_name = name + '_emirge_smurf_compare.png'
    plt.tight_layout()

    im_path = os.path.join(results_dir, res_name)

    plt.savefig(im_path, bbox_inches='tight')
    plt.clf()
    logging.info("saved results to {}".format(im_path))
예제 #2
0
def draw_tabular():
    try:
        df = pd.read_sql_query(
            "SELECT DISTINCT currencySymbol ,  currencyName FROM currencies  WHERE currencies.currencyId Not IN (SELECT DISTINCT countries.currencyId1 FROM countries) ",
            db)
        #print(df)
        ax = plt.subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        table(ax, df, loc='center')  # where df is your data frame
        plt.show()
        plt.savefig('C:/Users/MoniSingh/Desktop/cervello/tabular.png')
    except ValueError:
        df = pd.read_sql_query(
            "SELECT DISTINCT currencySymbol ,  currencyName FROM currencies  WHERE currencies.currencyId Not IN (SELECT DISTINCT countries.currencyId1 FROM countries) ",
            db)
        #print(df)          # DISPLAY ON CONSOLE
        ax = plt.subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        table(ax, df, loc='center', index=False)  # where df is our data frame
        plt.show()
        plt.savefig('C:/Users/MoniSingh/Desktop/cervello/tabular.png')
예제 #3
0
def data_acq(key,acq_freq,upload_freq, cnt):
    cnt = 0
    while cnt != acq_freq:
        cel,fah = tempRead()
        _date = datetime.now()
        _date =str( _date.strftime('%d-%m-%Y %H:%M:%S'))
        c.append(cel)
        date.append(_date)
        f.append(fah)

        df_cel = pd.DataFrame({'celsius':c})
        df_fah = pd.DataFrame({'fahrenheit':f})
        df_date = pd.DataFrame({'Date_Time':date})
        df = combineFrame(combineFrame(df_date,df_cel),df_fah) #dataframe with datetime and temperature 
        print df

        #df.to_html('df.html') # convert the dataframe to html file
        #df.plot(x='Date_Time',y='celsius')
        #filename = 'tempPlot.png'
        #plt.savefig(filename,dpi=150)
        ax = plt.subplot(111, frame_on=False)
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        table(ax, df, loc='upper right')
        plt.savefig('datatemp.png', transparent=True) # save the dataframe as a png file.
        time.sleep(upload_freq)
        cnt = cnt+1
    data_upload(key, acq_freq, upload_freq) #upload the data recorded after the defined upload frequency
예제 #4
0
def sk_kurt(data):
    header = None
    sep = ','
    empdf = pd.DataFrame()
    df = pd.read_csv(data,sep,header)   # C:\Users\yobin\Desktop\c.csv     Ecommerce Purchases
    c=0
    t=0
    df1 = df.skew()
    li = list(df1)
    ke = df1.keys()
    while(t<len(li)):
        if(li[t]>0):
            li[t]='positively skewed'
        elif(li[t]<0):
            li[t]='negatively skewed'
        else:
            li[t]='symmetric'
        t=t+1
        
    while(c<len(li)):
        empdf = empdf.append({'column_name':ke[c],'skewness':li[c]},ignore_index=True)
        c=c+1
    empdf
    ax = plt.subplot(111)         
    table(ax, empdf, loc='center')
    ax.set_axis_off()
    plt.savefig('skewness.png')
예제 #5
0
파일: graph.py 프로젝트: D-Cube/SATYARTHI
    def top_sources(self, human, bot, path):
        nb_sources = 3
        sources = pd.concat([human, bot], axis=1)
        sources.columns = ['humans', 'bots']

        # fetch top 3 for each type
        top_humans = sources.sort_values(by='humans', ascending=False).head(nb_sources).fillna(0)
        top_bots = sources.sort_values(by='bots', ascending=False).head(nb_sources).fillna(0)

        mixed_sources = pd.concat([top_humans, top_bots])

        def add_percentage(df):
            bots_perc = df['bots'] / (df['bots'] + df['humans']) * 100
            df['% bots'] = bots_perc
            df['% humans'] = 100 - bots_perc

            return df

        sources = add_percentage(mixed_sources).applymap(lambda x: '%.2f' % x)
        sources = sources.drop(["humans", "bots"], axis=1)

        pl.figure(figsize=(15,5))

        ax1 = plt.subplot(111, frame_on=False)
        ax1.set_title("Top {} sources per type.".format(nb_sources))
        ax1.xaxis.set_visible(False)
        ax1.yaxis.set_visible(False)

        table(ax1, sources, loc="center")

        pl.savefig(path)
예제 #6
0
def graphical_analysis(data,dv,path='',regression=True):
    numerical,categorical = numerical_categorical_division(data)
    if regression:
        for col in numerical:
            if col != dv:
                ax = data.plot(col,dv)
                fig = ax.get_figure()
                fig.savefig(path+col+'.png',dpi=1000)
        for col in categorical:
            if col != dv:
                ax = data.boxplot(dv,by=col)
                fig = ax.get_figure()
                fig.savefig(path+col+'.png',dpi=1000)
    else:
        for col in numerical:
            if col != dv:
                ax = data.boxplot(col,by=dv)
                fig = ax.get_figure()
                fig.savefig(path+col+'.png',dpi=1000)
        for col in categorical:
            if col != dv:
                f =pd.crosstab(data[dv],data[col],dropna=False)
                for cat in f.columns:
                    f[cat] = f[cat].apply(lambda x: x/f[cat].sum()*100)
                ax = plt.subplot(111, frame_on=False) # no visible frame
                ax.xaxis.set_visible(False)  # hide the x axis
                ax.yaxis.set_visible(False)  # hide the y axis

                table(ax, f)  

                plt.savefig(path+col+'.png',dpi=1000)
예제 #7
0
    def make_table(self, value, ax, metric_suppression=''):
        '''
        Creation of a statistics table printed with the graph
        :param value: information measured
        :param ax: axes used
        :param metric_suppression: suppression of a metric when we use the describe pandas function
        '''

        if metric_suppression:
            the_table = table(
                ax,
                np.round(value.describe().drop(metric_suppression), 2),
                loc='center',
            )
        else:
            the_table = table(
                ax,
                np.round(value.describe(), 2),
                loc='center',
            )

        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.axis('off')

        the_table.set_fontsize(12)
        the_table.scale(1, 1.2)
def tournament(mygame, nplayers, rounds, S_self, m0, p_one):

    scores_pair = np.zeros((nplayers + 1, nplayers + 1))
    for i in range(nplayers):
        for j in range(i + 1, nplayers + 1):
            scoresA = []
            scoresB = []
            movesA = []
            movesB = []
            for k in range(rounds):
                moveA = mygame.player(i, movesB, movesA, m0, p_one)

                if j < nplayers:
                    moveB = mygame.player(j, movesA, movesB, m0, p_one)
                else:
                    moveB = mygame.player(S_self, movesA, movesB, m0, p_one)
                scoreA, scoreB = mygame.onegame(moveA, moveB)
                movesA.append(moveA)
                movesB.append(moveB)
                scoresA.append(scoreA)
                scoresB.append(scoreB)

            scoresA_cum = np.cumsum(scoresA)
            scoresB_cum = np.cumsum(scoresB)
            scores_pair[i][j] = scoresA_cum[-1]
            scores_pair[j][i] = scoresB_cum[-1]

    #plt.figure(figsize=(4,4))
    #plt.imshow(scores_pair)
    #plt.colorbar()

    plt.figure(figsize=(6, 2))
    df = pd.DataFrame(scores_pair)
    df['average'] = df.sum(axis=1)
    df.loc[:, 'average'] *= 1.0 / nplayers

    #df.round(1)
    ax = plt.subplot(111, frame_on=False)  # no visible frame
    #ax = plt.plot(frame_on=False) # no visible frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    #table(ax, df, loc='center')  #
    table(ax, np.round(df, 1), loc='center')  #

    if not os.path.isdir('static'):
        os.mkdir('static')
    else:
        for filename in glob.glob(os.path.join('static', '*.png')):
            os.remove(filename)
    plotfile = os.path.join('static', str(time.time()) + '.png')
    plt.savefig(plotfile)

    final_score = [scores_pair[nplayers][i] for i in range(nplayers)]
    #final_score = [scores_pair[2][i] for i in range(nplayers+1) ]
    #return plotfile+"  "+ str(final_score)
    #return plotfile+"  "+ ' '.join(str(x) for x in final_score)
    return plotfile + "," + ' '.join(map(str, final_score))
예제 #9
0
    def __df_to_png(self, df, file_path):
        # Clear prev sub plot
        subplots(clear=True)
        matplotlib.rc('figure', dpi=160)

        ax = plt.subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        table(ax, df, loc='center')  # where df is your data frame
        savefig(file_path)
        return self.response
예제 #10
0
def save_as_table(df):
    import matplotlib.pyplot as plt
    import pandas as pd
    from pandas.tools.plotting import table

    ax = plt.subplot(111, frame_on=False)  # no visible frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis

    table(ax, df)  # where df is your data frame

    plt.savefig('mytable.png')
예제 #11
0
파일: plottings.py 프로젝트: lebbai/ROL
def plottable(df, path='/Users/razzak_lebbai/junk/test.png'):
    ax = plt.subplot(111, frame_on=False) # no visible frame
    ##ax.patch.set_visible(False)
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    
    table(ax,df, loc='top')  # where df is your data frame
    plt.tight_layout()
    ##plt.show()
    
    plt.savefig(path)  
    plt.show()
예제 #12
0
def plot_table(df, width=0.3, height=0.15, size=12):

    table(plt.gca(), df, loc='center')

    # Grab the most recent table from the current axis
    the_table = plt.gca().tables[-1]

    for cell in the_table.get_celld().values():
        cell.set_height(height)
        cell.set_width(width)
        cell.set_fontsize(size)

    return the_table
예제 #13
0
파일: plotting.py 프로젝트: ghl3/bamboo
def plot_table(df, width=0.3, height=0.15, size=12):

    table(plt.gca(), df, loc='center')

    # Grab the most recent table from the current axis
    the_table = plt.gca().tables[-1]

    for cell in the_table.get_celld().values():
        cell.set_height(height)
        cell.set_width(width)
        cell.set_fontsize(size)

    return the_table
def calculate_illnesses(age, gender, product_category, symptoms_list):

    figure_filepath = get_graphs_filepath('prob_table_df.png')

    prediction_array = to_array(age, gender, product_category, symptoms_list)

    # use the 4 args to calculate results into dictionary as below
    # initialize illnesses
    possible_illnesses = {
        'death': 0.,
        'life_threatening': 0.,
        'serious_injuries_illness': 0.,
        'disability': 0.,
        'other_serious__important_medical_events_': 0.,
        'congenital_anomaly': 0.,
        'req_intervention_to_prvnt_perm_imprmnt': 0.,
        'hospitalization': 0.,
        'visited_an_er': 0.,
        'visited_a_health_care_provider': 0.
    }

    with open(get_pickle_filepath('forest_tuned_lowbias_fitted.pkl'),
              'rb') as picklefile:
        forest_tuned_lowbias_fitted = pickle.load(picklefile)

        # calculate illnesses
        illness_probabilities = forest_tuned_lowbias_fitted.predict_proba(
            prediction_array)
        illnesses_list = list(possible_illnesses.keys())
        illness_probabilities_list = list(illness_probabilities[0])
        illness_probabilities_series = pd.Series(illness_probabilities_list)
        illness_probabilities_series = illness_probabilities_series * 100
        possible_illnesses_table = pd.DataFrame({
            'Possible Outcome':
            illnesses_list,
            'Probability':
            illness_probabilities_series
        })

        ax = plt.subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        table(ax, possible_illnesses_table, loc='center')
        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
        plt.savefig(figure_filepath)

    return figure_filepath
예제 #15
0
    def _make_table(self, ax, df, title, height=None):
        if df is None:
            ax.set_visible(False)
            return

        import pandas.tools.plotting as plotting

        idx_nlevels = df.index.nlevels
        col_nlevels = df.columns.nlevels
        # must be convert here to get index levels for colorization
        df = self._insert_index(df)
        tb = plotting.table(ax, df, loc=9)
        tb.set_fontsize(self.font_size)

        if height is None:
            height = 1.0 / (len(df) + 1)

        props = tb.properties()
        for (r, c), cell in compat.iteritems(props['celld']):
            if c == -1:
                cell.set_visible(False)
            elif r < col_nlevels and c < idx_nlevels:
                cell.set_visible(False)
            elif r < col_nlevels or c < idx_nlevels:
                cell.set_facecolor('#AAAAAA')
            cell.set_height(height)

        ax.set_title(title, size=self.font_size)
        ax.axis('off')
예제 #16
0
    def _make_table(self, ax, df, title, height=None):
        if df is None:
            ax.set_visible(False)
            return

        import pandas.tools.plotting as plotting

        idx_nlevels = df.index.nlevels
        col_nlevels = df.columns.nlevels
        # must be convert here to get index levels for colorization
        df = self._insert_index(df)
        tb = plotting.table(ax, df, loc=9)
        tb.set_fontsize(self.font_size)

        if height is None:
            height = 1.0 / (len(df) + 1)

        props = tb.properties()
        for (r, c), cell in compat.iteritems(props['celld']):
            if c == -1:
                cell.set_visible(False)
            elif r < col_nlevels and c < idx_nlevels:
                cell.set_visible(False)
            elif r < col_nlevels or c < idx_nlevels:
                cell.set_facecolor('#AAAAAA')
            cell.set_height(height)

        ax.set_title(title, size=self.font_size)
        ax.axis('off')
def plot_table(title, cells, column_names, row_names, save=None):
    f, ax = matplotlib.pyplot.subplots(1)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_frame_on(False)
    f.suptitle(title, fontsize=14, fontweight='bold')

    df = pandas.DataFrame(cells)
    df.columns = column_names
    df.index = row_names
    tab = table(
        ax=ax,
        data=df,
        colLabels=column_names,
        rowLabels=row_names,
        loc='upper right',
    )

    tab.auto_set_font_size(False)
    tab.set_fontsize(12)

    print(df)

    if save is not None:
        print('Saving table into: {}'.format(save))
        try:
            os.makedirs(os.path.dirname(save))
        except:
            pass
        matplotlib.pyplot.savefig(save)
    else:
        matplotlib.pyplot.show()
    matplotlib.pyplot.close(f)
예제 #18
0
파일: flow.py 프로젝트: jgs1202/eval-GIB
def makefile():
    totalData = [[[] for j in range(120)] for i in range(4)]
    global origin, outpath, srcpath
    origin = '../src/Analyze/'
    outpath = '../src/trajectory/'
    srcpath = '../src/data/'

    data = json.load(open('../src/trajectory/fixations.json'))
    abst_data = json.load(open('../src/trajectory/abst_info.json'))
    matrixes = calcFlow(data, abst_data)
    print(len(matrixes[0]))

    for task in range(len(matrixes)):
        for matrix in range(len(matrixes[task])):
            row = ['AOI ' + str(i) for i in range(len(matrixes[task][matrix]))]
            column = ['AOI ' + str(i) for i in range(len(matrixes[task][matrix]))]
            fig, ax = plt.subplots(1, 1)
            table = plotting.table(ax, pd.DataFrame(matrixes[task][matrix]), rowLabels=row, colLabels=column, loc='center')
            table.scale(1, 1)
            # plt.title(out["name"])
            plt.close()
            ax.axis('off')
            f = open('../src/flows/task' + str(task + 1) + '/' + str(matrix) + '.json', 'w')
            json.dump(matrixes[task][matrix].tolist(), f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': '))
            f.close()
예제 #19
0
def plot_good_ratio(target, feature, bins=10, accumulative=False, ascending=True, show_table=True, **kwargs):
    """good_ratio로 구한 P(target = 1) 확률을 그래프로 표현한다.
   
    :param target: good / bad 여부를 나타내는 Series.
    :param feature: 분포를 구할 feature Series.
    :param bins: feature의 구간 개수. default: 10
    :param accumulative: 확률을 구할 때 누적 확률을 구할 것인지 여부. False이면 해당 구간에서의 확률만 구하고, True이면 누적 확률을 구한다. default: False
    :param ascending: accumulative == True일 때, 누적 확률을 큰 값에서부터 누적할지, 작은 값부터 누적할지 결정한다. True이면 작은 값부터, False이면 큰 값부터 누적한다.
    :param show_table: feature 구간별 data table을 출력할 지 여부.
    """

    if accumulative:
        base, _, ratio_df = good_ratio(target, feature, bins, accumulative, ascending)
        title_str = "good ratio of %s / accumulative / ascending = %s" % (feature.name, ascending)
    else:
        base, ratio_df = good_ratio(target, feature, bins, accumulative, ascending)
        title_str = "good ratio of %s" % (feature.name)

    ax = ratio_df["ratio"].plot(**kwargs)
    ax.axhline(base)
    ax.set_title(title_str)
    ax.set_xlabel("%s_cut" % feature.name)
    ax.set_ylabel("good ratio")

    if show_table:
        ax.set_xlabel("")
        ax.get_xaxis().set_ticklabels([])

        ratio_df["ratio"] = ratio_df["ratio"].round(3)
        t = table(ax, ratio_df[["ratio", "good", "count"]].T)
        t.scale(1.0, 2.0)
예제 #20
0
def summarize_week(week):

    matplotlib.style.use('ggplot')
    dfPos = pd.DataFrame()
    dfNeg = pd.DataFrame()
    descf = pd.DataFrame()
    for comma in week['collected']:
        pos, neg, dates, describe = summarizeCOMMA(comma)
        dfPos = dfPos.join(pd.DataFrame(data=pos,
                                        index=dates,
                                        columns=[comma['category']]),
                           how='outer')
        dfNeg = dfNeg.join(pd.DataFrame(data=neg,
                                        index=dates,
                                        columns=[comma['category']]),
                           how='outer')
        descf = descf.join(pd.DataFrame(data=np.round(describe.values(), 2),
                                        index=describe.keys(),
                                        columns=[comma['category']]),
                           how='outer')

    fig, axes = plt.subplots(3, 1)

    titles_dict = {
        'fontsize': 14,
        'fontweight': 8,
        'verticalalignment': 'baseline',
        'horizontalalignment': 'center'
    }
    dfPos.plot.area(stacked=False, ax=axes[0])
    axes[0].set_title("Tweets week " + str(week['_id']), fontdict=titles_dict)
    axes[0].set_ylabel('Positive')
    axes[0].legend(fontsize='xx-small')
    axes[0].get_xaxis().set_visible(False)

    dfNeg.plot.area(stacked=False, ax=axes[1], figsize=(6, 9))
    axes[1].set_ylabel('Negative')
    axes[1].legend(fontsize='xx-small')
    axes[1].set_xlabel('Weekly Summary')
    axes[1].set_xticklabels(dfNeg.index.values, rotation='horizontal')
    import matplotlib.dates as dates
    axes[1].get_xaxis().set_major_locator(dates.DayLocator(interval=1))
    axes[1].get_xaxis().set_minor_formatter(dates.DateFormatter(''))
    axes[1].get_xaxis().set_major_formatter(dates.DateFormatter('%b %d\n'))

    axes[2].axis('tight')
    axes[2].axis('off')
    t = table(axes[2],
              descf,
              gid=str(' Weekly Summary\n'),
              cellLoc='center',
              loc='center')
    t.auto_set_font_size(False)
    t.set_fontsize(7)

    # figManager = plt.get_current_fig_manager()
    # figManager.window.showMaximized()
    fig.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0)
    # plt.savefig("week_"+str(week['_id'])+".png",bbox_inches='tight')
    return fig
예제 #21
0
    def pie_chart(self, data, title, column=""):

        #print(data)
        data.index = list(data[column])
        #print(data)
        plt.figure(figsize=(16, 8))
        ax1 = plt.subplot(121, aspect='equal')
        data['Assets'].plot(kind='pie',
                            ax=ax1,
                            autopct='%1.1f%%',
                            startangle=90,
                            shadow=False,
                            legend=False,
                            fontsize=16,
                            title=title,
                            subplots=True)
        ax2 = plt.subplot(122)
        plt.axis('off')

        tbl = table(ax2, data.T, loc='center right')
        tbl.auto_set_font_size(False)
        tbl.set_fontsize(8)
        #plt.legend(loc='right')

        plt.savefig(self.p(title))
예제 #22
0
    def QualityofFittables(DataElement, pdf, outpath, truncate, nameStr=''):
        groupCats = ['class', 'targetstrat']
        tradedata = DataElement.data.tradedata.copy()
        mod = DataElement

        for groupCat in groupCats:

            r = pd.DataFrame()
            grouped = tradedata.groupby(groupCat)
            for name, data in grouped:
                mod.setTradedata(data)
                nameStrLoc = name
                if (len(nameStr) > 0):
                    nameStrLoc = name + "_" + nameStr
                r = pd.concat([
                    r,
                    reporter_impl.QualityofFitgrouptables(mod,
                                                          '/home/charles/',
                                                          truncate=truncate,
                                                          nameStr=nameStrLoc)
                ])
            try:
                r = np.round(
                    r[[
                        'Group', 'const (est)', 'beta (est)', 'gamma (est)',
                        'lin r^2', 'pow_law r^2'
                    ]], 4)
            except KeyError:
                r = np.round(
                    r[[
                        'Group', 'beta (est)', 'gamma (est)', 'lin r^2',
                        'pow_law r^2'
                    ]], 4)

            from pandas.tools.plotting import table
            fig, ax = plt.subplots(1, 1)
            table(ax,
                  r,
                  loc='upper right',
                  colWidths=[0.1, 0.15, 0.15, 0.15, 0.15, 0.15])
            df = pd.DataFrame(r['gamma (est)'])
            df.index = r['Group']
            df.plot(kind='bar', color='y', ax=ax, ylim=(0, 2), legend=False)
            plt.ylabel('Gamma (est)')
            plt.title('Coefficient by ' + groupCat)
            pdf.savefig()
            plt.close()
예제 #23
0
def cross_tab(data):
    header = None
    sep = ','
    df = pd.read_csv(
        data, sep,
        header)  # C:\Users\Usha\Desktop\c.csv     Ecommerce Purchases
    # to get table of a series use reset_index
    arr = []
    ap = df.select_dtypes(exclude=['number'])
    first_col = list(ap)
    n = 0
    str_arr = []

    for cln in first_col:
        arr.append(np.array(ap.iloc[:, n]))
        n = n + 1
    print(type(arr))
    indx_colno = 0
    last_no = 0
    ar_len = len(first_col) - 1
    #print(ar_len)
    i = 0
    # with each column with other column with bigger index than it
    while i <= ar_len - 1:
        f_a = []
        j = 0
        f_a = arr[i]
        j = i + 1
        while j <= ar_len:
            ddf = pd.crosstab(f_a,
                              arr[j],
                              rownames=[first_col[i]],
                              colnames=[first_col[j]])
            # print('\n\n')
            # print(ddf)
            ax = plt.subplot(111)
            fig = plt.figure(figsize=(9, 11))
            table(ax, ddf, loc='center')
            ax.set_axis_off()
            plt.savefig('img' + str(i) + str(j) + '.png',
                        bbox_inches='tight',
                        figsize=(9, 11))
            j = j + 1
        i = i + 1


#cross_tab(data)
예제 #24
0
def stats(data):
    df = pd.read_csv(data)
    #get only the numeric values of dataframe
    pp=df._get_numeric_data()


    pp2=pp.describe()
    #pp2.to_csv("new.csv")
    ax = plt.subplot(111)

    table(ax, pp2, loc='center')

    ax.set_axis_off()

    f=0
    plt.savefig(fpath + '\\' + 'stat' + str(f) + '.png')

#stats(data)
예제 #25
0
def RaRi(RecVlist, fdname):
    R = np.zeros((len(RecVlist), 3))
    for n, i in enumerate(RecVlist):
        t, v, pho = readtraces(fdname, i)
        dt = t[1] - t[0]
        baseline = np.mean(v[:int(0.005 / dt)])  #np.mean(v[:int(0.003/dt)])
        Ra = 10 / abs(min(v) - baseline) * 1000
        Rinpt = 10 / abs(
            np.mean(v[int(0.019 / dt):int(0.020 / dt)]) - baseline
        ) * 1000  #10/abs(np.mean(v[int(0.007/dt):int(0.008/dt)])-baseline)*1000
        R[n][0] = i
        R[n][1] = Ra
        R[n][2] = Rinpt
    #     print('Rec%d: Ra is %f Mohm' %(i,Ra))
    #     print('      Rinput is %f Mohm' %Rinpt)
    Rdf = pd.DataFrame(R)
    Rdf.columns = ['RecN', 'Ra', 'Rinput']
    #save
    dir_aligned = 'Analysis/%s' % fdname + '/aligned/Isteps'
    try:
        os.makedirs(dir_aligned)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    Rdf.to_pickle(dir_aligned + '/RaRinput.ASCII')

    from pandas.tools.plotting import table
    plt.figure(figsize=[10, 1 + 0.1 * len(RecVlist)])
    ax = plt.subplot(111, frame_on=False)  # no visible frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis

    table(ax, Rdf, loc='center')  # where df is your data frame

    dir_pic = 'Pics/%s' % fdname + '/Isteps'
    try:
        os.makedirs(dir_pic)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    plt.savefig(dir_pic + '/RaRinput.jpeg')
    plt.close()
    return (Rdf)
예제 #26
0
    def plot_statistic(self, error, statistic='mean'):
        statistics = pd.read_csv(self.save_dir_statistics+'statistics.csv', header=[0, 1, 2], index_col=[0])
        statistics.sortlevel(axis=0, inplace=True, sort_remaining=True)
        statistics.sortlevel(axis=1, inplace=True, sort_remaining=True)
        statistics[statistics == np.inf] = np.nan

        # index for sorting dataframe methods
        idx = np.argsort(np.argsort(self.methods))  # second argsort do get idx for undoing sorting

        # change color cycle
        cmap = pl.get_cmap('jet')
        colors = cmap(np.linspace(0.1, 0.9, len(self.methods)))

        fig = pl.figure(figsize=(9, 8))
        ax = fig.add_subplot(111)
        ax.set_prop_cycle(cycler('color', colors))
        for method in self.methods:
            if statistic == 'min':
                statistic_min = statistics.loc[(slice(None)), (slice(None), error, statistic)]
                ax.plot(statistics.index, statistic_min[method], label=method)
                table(ax, statistic_min[idx].transpose().apply(lambda x: x.map(lambda y: "%.6f" % y)),
                  rowLabels=self.methods, loc='bottom', bbox=[0, -0.7, 1, 0.55])
            elif statistic == 'mean':
                statistic_mean = statistics.loc[(slice(None)), (slice(None), error, statistic)]
                statistic_std = np.sqrt(statistics.loc[(slice(None)), (slice(None), error, 'var')])
                base_line, = ax.plot(statistics.index, statistic_mean[method], label=method)

                ax.fill_between(statistics.index.values,
                                (statistic_mean[method].values - statistic_std[method].values).flatten(),
                                (statistic_mean[method].values + statistic_std[method].values).flatten(),
                                facecolor=base_line.get_color(), alpha=0.1)
                table(ax, statistic_mean[idx].transpose().apply(lambda x: x.map(lambda y: "%.6f" % y)),
                  rowLabels=self.methods, loc='bottom', bbox=[0, -0.7, 1, 0.55])
        pl.tight_layout(rect=[0.15, 0.36, 1.0, 1.0])
        ax.set_xticks(statistics.index)
        ax.set_xticklabels(statistics.index)
        pl.legend(fontsize=12)
        pl.ylim([0, None])
        pl.xlabel(statistics.index.name)
        pl.ylabel(statistic + ' ' + error)
        pl.savefig(self.save_dir_statistics+'/plot_'+error+'_'+statistic+'.png')
        pl.show()
def create_key(alt_csv, file, dir):
    fig, ax = plt.subplots(figsize=(66, len(alt_csv)/5.2))
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_frame_on(False)
    tabla = table(ax, alt_csv, loc='upper right', colWidths=[0.15] * len(alt_csv.columns))
    tabla.auto_set_font_size(False)
    tabla.set_fontsize(5)
    tabla.scale(.8, .8)
    plt.savefig(os.path.join(ana_dir, os.path.join(tree_dir, os.path.join(key_dir, os.path.join(dir, file[:len(file)-4] + "_key.png")))),
                   transparent=True)
예제 #28
0
def tablegen(dict):
    df = pd.DataFrame(dict,index=['MSE','QL'])

    fig, ax = plt.subplots(figsize=(14, 2)) # set size frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
    tabla = table(ax, df.round(3), loc='center', colWidths=[0.17]*len(df.columns))  # where df is your data frame
    tabla.auto_set_font_size(False) # Activate set fontsize manually
    tabla.set_fontsize(12) # if ++fontsize is necessary ++colWidths
    tabla.scale(1, 1)
예제 #29
0
def compare_specific_reference(actual_res_path, expected_res_path, result_dir, test_name,  max_allowed_mismatch, ref_expected_id=None):
    test_df = get_test_df(actual_res_path)
    expected_df = get_expected_df(expected_res_path)

    match_df = get_expected_test_map_df(test_df, expected_df, max_allowed_mismatch)
    match_df = match_df[[Header.ref_id + "_e", Header.ref_id + "_t"]].drop_duplicates()
    test_df = test_df.rename(columns={Header.ref_id: Header.ref_id + '_t', Header.sequence: Header.sequence + '_t', Header.prior: Header.prior + '_t'})
    expected_df = expected_df.rename(
        columns={Header.ref_id: Header.ref_id + '_e', Header.sequence: Header.sequence + '_e', Header.prior: Header.prior + '_e'})
    scored_merge_df = pd.merge(test_df, match_df, on=[Header.ref_id + '_t'])
    scored_merge_df = pd.merge(expected_df, scored_merge_df, on=[Header.region, Header.ref_id + '_e'])
    scored_merge_df['mismatch_score'] = scored_merge_df.apply(
        lambda r: calc_mismatch_score(r[Header.sequence + '_t'], r[Header.sequence + '_e']), axis=1)

    if not ref_expected_id:
        ref_expected_ids = scored_merge_df[Header.ref_id + '_e'].drop_duplicates().tolist()
    else:
        ref_expected_ids = list(ref_expected_id)

    for ref_expected_id in ref_expected_ids:
        mapped_data = scored_merge_df[(scored_merge_df[Header.ref_id + '_e'] == ref_expected_id)]
        presented_data = mapped_data[[Header.ref_id + '_e', Header.ref_id + '_t', Header.region, 'prior_e', 'prior_t', 'mismatch_score' ]].copy()
        presented_data = presented_data.sort([Header.ref_id + '_e', Header.ref_id + '_t', 'prior_t', Header.region],ascending=False )
        presented_data.index = range(1, len(presented_data) + 1)
        presented_data['prior_e'] = presented_data['prior_e'].apply(lambda val: "{0:.2f}%".format(val * 100))
        presented_data['prior_t'] = presented_data['prior_t'].apply(lambda val: "{0:.2f}%".format(val * 100))

        ax = plt.subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        table(ax, presented_data, loc='center')  # where df is your data frame
        # table(ax, presented2, loc='center')

        res_name = 'emirge_smurf_'+ test_name + '_reference_id_' + str(ref_expected_id) + '.png'
        im_path = os.path.join(result_dir, res_name)

        plt.tight_layout()
        plt.savefig(im_path, bbox_inches='tight')
        plt.clf()
        logging.info("saving results to: {}".format(im_path))
예제 #30
0
    def metrics(self, return_type=None, ax=None):
        '''Returns Corrleation of feature and target, r2,mse,&rmse of fit regressions line
        between the two, and confidence intervals of boot strap samples regression lines

        Can be returned as data or as '''

        if self.fit_conf_int == False:
            self.confidence_intervals()

        data = [
            self.tgt_mean(),
            self.corr(),
            self.r2(),
            self.mse(),
            self.rmse(),
            self.bs_slopes.min(),
            np.median(self.bs_slopes),
            self.bs_slopes.max(),
            self.conf_intervals_values[0],
            self.conf_intervals_values[1],
            self.feature_units,
        ]

        df = pd.DataFrame(data,
                          columns=['Values'],
                          index=[
                              'Target Mean', 'Correlation', 'R2', 'MSE',
                              'RMSE', 'Increase Min', 'Increase Median',
                              'Increase Max', 'CI_Low', 'CI_High',
                              'Feature Units'
                          ])

        if return_type == 'df':
            return df

        elif return_type == 'img':
            assert not ax == None, "Please pass an axes object to plot img data on"

            from pandas.tools.plotting import table

            ax.xaxis.set_visible(False)  # hide the x axis
            ax.yaxis.set_visible(False)  # hide the y axis
            ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
            tabla = table(ax,
                          df,
                          loc='upper right',
                          colWidths=[0.17] *
                          len(df.columns))  # where df is your data frame

            return tabla  # where df is your data frame

        else:
            return data
예제 #31
0
def plot_agg_table(agg_tbl,oName,meter):
    fig = plt.figure(figsize=(6,1.5))
    ax2 = fig.add_subplot(111)
    ax2.xaxis.set_visible(False)
    ax2.yaxis.set_visible(False)
    for sp in ax2.spines.itervalues():
        sp.set_color('w')
        sp.set_zorder(0)
    the_table = table(ax2, agg_tbl ,loc='upper center',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1])
    the_table.set_fontsize(10)
    plt.suptitle(meter +' meter grid')
    plt.tight_layout()
    plt.savefig(oName, dpi = 600)
예제 #32
0
def main_compare_length():
    res_dir = "/home/vered/EMIRGE/EMIRGE-data/mock_for_noam_test/results"
    expected_dir = "/home/vered/EMIRGE/EMIRGE-data/mock_for_noam_test/"
    STATIC = "_static.csv"
    STATIC_WEIGHT = "_static_weight.csv"
    WEIGHT = "_weight.csv"
    BASIC = ".csv"
    FINAL_RES = "final_results_"
    indexes = ['5', '10', '15']
    indexes = ['15']

    for i in indexes:
        static = get_test_df(os.path.join(res_dir, FINAL_RES + i + STATIC))
        static_weight = get_test_df(os.path.join(res_dir, FINAL_RES + i + STATIC_WEIGHT))
        weight = get_test_df(os.path.join(res_dir, FINAL_RES + i + WEIGHT))
        basic = get_test_df(os.path.join(res_dir, FINAL_RES + i + BASIC))
        expected = get_expected_df(os.path.join(expected_dir, "mock_" + i + "seq/reads/expected_res.csv"))

        static_compare = get_presented_data(compare(static, expected, i + "_static"))
        static_weight_compare = get_presented_data(compare(static_weight, expected, i + "_static_weight"))
        weight_compare = get_presented_data(compare(weight, expected, i + "_weight"))
        basic_compare = get_presented_data(compare(basic, expected, i + "_basic"))

        full_comparison = pd.merge(pd.merge(static_compare, static_weight_compare, on=['ref_e', 'expected frequency'],
                                            suffixes=(" s", " s+w")),
                                   pd.merge(weight_compare, basic_compare, on=['ref_e', 'expected frequency'],
                                            suffixes=(" w", "-")))
        suffixes = [" s+w", " w", " s", "-"]
        full_comparison = full_comparison[
            ["# of refs" + s for s in suffixes] + ['expected frequency'] + ['freq diff' + s for s in
                                                                            suffixes]]  # of match references

        fig = plt.figure(figsize=(10, 4), dpi=300)
        ax = fig.add_subplot(111, frame_on=False)  # no visible frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis

        the_table = table(ax, full_comparison, loc='center')  # where df is your data frame
        the_table.set_fontsize(18)
        # the_table.scale(3, 3)

        res_name = i + '_emirge_smurf_full_compare.png'
        # plt.show()
        plt.tight_layout()
        plt.savefig(os.path.join('/home/vered/EMIRGE/EMIRGE-data/', res_name), bbox_inches='tight')

        plt.clf()
        fig.clear()
예제 #33
0
def plot(df, name):
    plt.tight_layout()
    # set fig size
    fig, ax = plt.subplots()
    # no axes
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    # no frame
    ax.set_frame_on(False)
    # plot table
    tab = table(ax, df, loc='upper right')
    # set font manually
    tab.auto_set_font_size(False)
    tab.set_fontsize(8)
    # save the result
    plt.savefig(name + '.png', dpi=199, bbox_inches="tight")
예제 #34
0
def KNN(vol_data, k=1, warmup=100, filename=None, Timedt=None, method=[3]):
    vol_data_input = vol_data.iloc[:, 1]
    dates = pd.Series(vol_data.Date)

    # This can be done more efficiently by moving k list directly into k
    #
    knns = [[
        ks, m,
        KNNcalc(vol_data=vol_data_input,
                dates=dates,
                k=ks,
                warmup=warmup,
                filename=filename,
                Timedt=Timedt,
                method=m)
    ] for count, m in enumerate(method) for ks in np.linspace(1, 20, 20)]
    # ks=20
    # knns = [[ks, m, KNNcalc(vol_data=vol_data_input, dates =dates, k=ks, warmup=warmup,filename=filename, Timedt=Timedt, method=m)]
    #         for count, m in enumerate(method)]
    mse = [knns[i][2][0] for i in range(len(knns))]
    ql = [knns[i][2][1] for i in range(len(knns))]
    kval = [int(knns[i][0]) for i in range(len(knns))]
    one_method_result = pd.DataFrame(np.transpose([kval, mse, ql]),
                                     columns=['k', 'MSE', 'QL'])
    # one_method_result = one_method_result.set_index('k')
    one_method_result.plot('k', 'MSE', figsize=[12, 7]).set_title(filename)
    one_method_result.plot('k', 'QL', figsize=[12, 7]).set_title(filename)

    # making a table
    fig, ax = plt.subplots()  # set size frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
    tabla = table(
        ax,
        one_method_result.round(7),
        loc='center',
        colWidths=[0.2] *
        len(one_method_result.columns))  # where df is your data frame
    tabla.auto_set_font_size(False)  # Activate set fontsize manually
    tabla.set_fontsize(10)  # if ++fontsize is necessary ++colWidths
    tabla.scale(1, 1)
    # feel free to comment out the line below for additional speed. But may cause overflow errors
    # with too many figs produced
    # plt.show()

    return one_method_result  #knns[-1][2]
예제 #35
0
def fig_creator(data_frame):  #bring in sorthead as the data_frame
    fig, ax = plt.subplots(figsize=(12, 4))  # set size frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
    tabla = table(ax,
                  data_frame,
                  loc='upper left',
                  colWidths=[0.12] *
                  len(data_frame.columns))  # where df is your data frame
    tabla.auto_set_font_size(False)  # Activate set fontsize manually
    tabla.set_fontsize(15)  # if ++fontsize is necessary ++colWidths
    tabla.scale(1.5, 1.5)  # change size table
    plt.savefig(f'images/{today.date()}_losers.png',
                transparent=True,
                bbox_inches='tight',
                dpi=300,
                pad_inches=0)
예제 #36
0
def tabulate_data(fig, table_info):
    """Prepare a matplotlib table using provided table info and adding result to figure."""
    fig.suptitle(table_info.title, fontsize=20, fontweight='bold')
    ax = fig.add_subplot(111)
   
	#configure table colors
    tableau20 = get_tableau_colors()
    color_1 = tableau20[0]
    color_2 = tableau20[1]

	#setup table at the middle of the figure
    df = table_info.df
    df.index = ' ' + df.index + '    ' #adding spaces to index(label) column since label column is fixed width 
    nrows, ncols = df.shape
    colwidth = 0.16
    rowheight = 0.1
    tab = table(ax, np.round(df, 2), loc='upper center', bbox=[.5-ncols*colwidth/2,.5-nrows*rowheight/2,ncols*colwidth,nrows*rowheight])

    for key, cell in tab.get_celld().items():
	    #set cell properties
        cell._text.set_size(14)
        cell.set_edgecolor('w')
        cell.set_linestyle('-')
        cell.set_facecolor('w')
        cell.set_linewidth(1)
        #change color of even rows vs. odd rows 
        row, col = key
        if row%2 == 0:
            cell.set_facecolor(color_1)
            cell._text.set_color('w')
        else:
            cell.set_facecolor(color_2)
            cell._text.set_color([i*0.65 for i in color_1])
	    #set color for header and index column
        if row == 0 or col == -1:
            cell._text.set_color('w')
            cell._text.set_weight('bold')
            cell.set_facecolor([i*0.65 for i in color_1])
        if row == 0:
            cell.set_height(cell.get_height()*1.4) #makes first row a bit taller

    ax.axis('off')
예제 #37
0

may_df = pd.DataFrame(may_sed_class.flatten())
may_df.rename(columns={0:'sed5class'}, inplace=True)
may_df = may_df.dropna()
may_df['sed5name'] = may_df.apply(lambda row: assign_class(row), axis=1)

print 'Now plotting distributions...'
ax1 = plt.subplot2grid((5,2),(4, 0))
aug_df.groupby('sed5name').size().plot(kind='bar', ax=ax1,rot=45)
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Substrate Type')
table_aug = pd.pivot_table(aug_df,index=['sed5name'], values = ['sed5class'],aggfunc='count')
table_aug['Percent_Area'] = table_aug['sed5class']/aug_df.sed5name.count()
table_aug = table_aug[['Percent_Area']]
table1 = table(ax1, np.round(table_aug,3), loc='upper right',colWidths=[0.2])


ax = plt.subplot2grid((5,2),(4, 1),sharey=ax1)
may_df.groupby('sed5name').size().plot(kind='bar', ax=ax,rot=45)
table_may = pd.pivot_table(may_df,index=['sed5name'], values = ['sed5class'],aggfunc='count')
table_may['Percent_Area'] = table_may['sed5class']/may_df.sed5name.count()
table_may = table_may[['Percent_Area']]
table2 = table(ax, np.round(table_may,3), loc='upper right',colWidths=[0.2])
ax.set_ylabel('Frequency')
ax.set_xlabel('Substrate Type')

plt.tight_layout()
print 'Now Saving figure...'
plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_aug_may_comparison_diverging_cmap.png",dpi=1000)
#plt.show()
tbl['substrate']=['sand','gravel','boulders']
tbl = tbl.set_index('substrate')
tbl.loc['sand'] = pd.Series({'mean':np.mean(s_df['dBW']),'std':np.std(s_df['dBW']) ,'CV':np.mean(s_df['dBW'])/np.std(s_df['dBW']),'25%':float(s_df.describe().iloc[4].values), '50%':float(s_df.describe().iloc[5].values),'75%':float(s_df.describe().iloc[6].values),'kurt':float(s_df.kurtosis().values),'skew':float(s_df.skew().values)})
tbl.loc['gravel'] = pd.Series({'mean':np.mean(g_df['dBW']),'std':np.std(g_df['dBW']) ,'CV':np.mean(g_df['dBW'])/np.std(g_df['dBW']),'25%':float(g_df.describe().iloc[4].values), '50%':float(g_df.describe().iloc[5].values),'75%':float(g_df.describe().iloc[6].values),'kurt':float(g_df.kurtosis().values),'skew':float(g_df.skew().values)})
tbl.loc['boulders'] = pd.Series({'mean':np.mean(b_df['dBW']),'std':np.std(b_df['dBW']) ,'CV':np.mean(b_df['dBW'])/np.std(b_df['dBW']),'25%':float(b_df.describe().iloc[4].values), '50%':float(b_df.describe().iloc[5].values),'75%':float(b_df.describe().iloc[6].values),'kurt':float(b_df.kurtosis().values),'skew':float(b_df.skew().values)})
tbl = tbl.applymap(lambda x: round(x,3))
del s_df, g_df, b_df

fig = plt.figure()
ax = fig.add_subplot(111)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for sp in ax.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)
the_table = table(ax, tbl.round(3),loc='best',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])
the_table.set_fontsize(12)
plt.tight_layout()
plt.savefig(r"c:\workspace\Texture_Classification\output\substrate_stat_plots\visual_agg_distribution_table_spet_14.png")
del tbl


in_shp = r"C:\workspace\Merged_SS\window_analysis\shapefiles\tex_seg_800_3class.shp"
ss_raster = r"C:\workspace\Merged_SS\window_analysis\raster\ss_10_rasterclipped.tif"
z_stats_46 = zonal_stats(in_shp ,ss_raster,stats=['count'],raster_out=True)


#Lets get get the substrate codes
ds = ogr.Open(in_shp)
lyr = ds.GetLayer(0)
a=[]
circ2 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[1],alpha=a_val)
circ3 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[2],alpha=a_val)
circ4 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[3],alpha=a_val)
circ5 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[4],alpha=a_val)
circ6 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[5],alpha=a_val)

#ax1 = fig.add_subplot(2,2,2)
ax1 = plt.subplot2grid((3,2),(0,1))
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
#hide the spines
for sp in ax1.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)

  
ax1.legend((circ1, circ2, circ3,circ4, circ5,circ6), ("sand", "sand/gravel", "gravel/sand","gravel","gravel/boulders","boulders"), 
           numpoints=1, loc='center left', borderaxespad=0.)   #bbox_to_anchor=(0.3, 0.9),

#ax2 = fig.add_subplot(2,2,3)
ax2 = plt.subplot2grid((3,2),(1,1),colspan=2)
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
for sp in ax2.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)
the_table = table(ax2, pivot_table.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])
the_table.set_fontsize(10)
plt.tight_layout(w_pad=10)#w_pad = 
plt.savefig(r"C:\workspace\Merged_SS\window_analysis\10_percent_shift\output\ss_visual_seg_2014_09_R01767.png", dpi=1000)
#plt.show()
예제 #40
0
    def plot2D(self):
        """Draw method for current data. Relies on pandas plot functionality
           if possible. There is some temporary code here to make sure only the valid
           plot options are passed for each plot kind."""

        if not hasattr(self, 'data'):
            return
        #needs cleaning up
        valid = {'line': ['alpha', 'colormap', 'grid', 'legend', 'linestyle',
                          'linewidth', 'marker', 'subplots', 'rot', 'logx', 'logy',
                          'sharey', 'kind'],
                    'scatter': ['alpha', 'grid', 'linewidth', 'marker', 'subplots', 's',
                            'legend', 'colormap','sharey', 'logx', 'logy', 'use_index','c',
                            'cscale','colorbar'],
                    'pie': ['colormap','legend'],
                    'hexbin': ['alpha', 'colormap', 'grid', 'linewidth'],
                    'bootstrap': ['grid'],
                    'bar': ['alpha', 'colormap', 'grid', 'legend', 'linewidth', 'subplots',
                            'sharey',  'logy', 'stacked', 'rot', 'kind'],
                    'barh': ['alpha', 'colormap', 'grid', 'legend', 'linewidth', 'subplots',
                            'stacked', 'rot', 'kind', 'logx'],
                    'histogram': ['alpha', 'linewidth','grid','stacked','subplots','colormap',
                             'sharey','rot','bins', 'logx', 'logy'],
                    'heatmap': ['colormap','rot'],
                    'area': ['alpha','colormap','grid','linewidth','legend','stacked',
                             'kind','rot','logx'],
                    'density': ['alpha', 'colormap', 'grid', 'legend', 'linestyle',
                                 'linewidth', 'marker', 'subplots', 'rot', 'kind'],
                    'boxplot': ['rot', 'grid', 'logy','colormap','alpha','linewidth'],
                    'scatter_matrix':['alpha', 'linewidth', 'marker', 'grid', 's'],
                    'contour': ['linewidth','colormap','alpha'],
                    'imshow': ['colormap','alpha']
                    }

        data = self.data
        if self._checkNumeric(data) == False:
            self.showWarning('no numeric data to plot')
            return
        #get all options from the mpl options object
        kwds = self.mplopts.kwds
        kind = kwds['kind']
        table = kwds['table']
        by = kwds['by']
        by2 = kwds['by2']
        errorbars = kwds['errorbars']
        useindex = kwds['use_index']

        #valid kwd args for this plot type
        kwargs = dict((k, kwds[k]) for k in valid[kind] if k in kwds)
        #initialise the figure
        self._initFigure()
        ax = self.ax
        #plt.style.use('dark_background')

        if by != '':
            #groupby needs to be handled per group so we can add all the axes to
            #our figure correctly
            if by not in data.columns:
                self.showWarning('the grouping column must be in selected data')
                return
            if by2 != '' and by2 in data.columns:
                by = [by,by2]
            g = data.groupby(by)
            if len(g) >25:
                self.showWarning('too many groups to plot')
                return
            self.ax.set_visible(False)
            kwargs['subplots'] = False
            size = len(g)
            nrows = round(np.sqrt(size),0)
            ncols = np.ceil(size/nrows)
            i=1
            for n,df in g:
                ax = self.fig.add_subplot(nrows,ncols,i)
                kwargs['legend'] = False #remove axis legends
                d=df.drop(by,1) #remove grouping columns
                self._doplot(d, ax, kind, False,  errorbars, useindex, kwargs)
                ax.set_title(n)
                handles, labels = ax.get_legend_handles_labels()
                i+=1
            self.fig.legend(handles, labels, loc='center right')#, bbox_to_anchor=(1, 0.5))
            self.fig.subplots_adjust(left=0.1, right=0.9, top=0.9,
                                     bottom=0.1, hspace=.25)
            axs = self.fig.get_axes()
            #self.canvas.draw()
        else:
            axs = self._doplot(data, ax, kind, kwds['subplots'], errorbars,
                               useindex, kwargs)
        if table == True:
            from pandas.tools.plotting import table
            if self.table.child != None:
                tabledata = self.table.child.model.df
                table(axs, np.round(tabledata, 2),
                      loc='upper right', colWidths=[0.1 for i in tabledata.columns])

        #set options general for all plot types
        #annotation optons are separate
        lkwds = self.labelopts.kwds.copy()
        lkwds.update(kwds)
        self.setFigureOptions(axs, lkwds)
        scf = 12/kwds['fontsize']
        try:
            self.fig.tight_layout()
            self.fig.subplots_adjust(top=0.9)
        except:
            self.fig.subplots_adjust(left=0.1, right=0.9, top=0.89,
                                     bottom=0.1, hspace=.4/scf, wspace=.2/scf)
            print ('tight_layout failed')
        self.canvas.draw()
        return
예제 #41
0
circ4 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[3],alpha=a_val)
circ5 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[4],alpha=a_val)
circ6 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[5],alpha=a_val)

 
ax.legend((circ1, circ2, circ3,circ4, circ5,circ6), ("sand", "sand/gravel", "gravel/sand","gravel","gravel/boulders","boulders"), 
           numpoints=1, loc='best', borderaxespad=0.)   #bbox_to_anchor=(0.3, 0.9),

#ax2 = fig.add_subplot(2,2,3)
ax2 = plt.subplot2grid((5,2),(4, 0))
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
for sp in ax2.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)
the_table = table(ax2, pivot_table.round(3),loc='upper right',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])
the_table.set_fontsize(10)

#plt.savefig(r"C:\workspace\Merged_SS\window_analysis\10_percent_shift\output\ss_visual_seg_2014_09_R01767.png", dpi=1000)

ss_raster = r"C:\workspace\Merged_SS\window_analysis\raster\ss_10_rasterclipped.tif"
ds = gdal.Open(ss_raster)
data = ds.GetRasterBand(1).ReadAsArray()
data[data<=0] = np.nan
gt = ds.GetGeoTransform()
proj = ds.GetProjection()
 
xres = gt[1]
yres = gt[5]

# get the edge coordinates and add half the resolution 
ax.add_collection(PatchCollection(sg_patch, facecolor=colors[3], alpha=a_val, edgecolor="none", zorder=10))
ax.add_collection(PatchCollection(g_patch, facecolor=colors[2], alpha=a_val, edgecolor="none", zorder=10))
ax.add_collection(PatchCollection(sr_patch, facecolor=colors[1], alpha=a_val, edgecolor="none", zorder=10))
ax.add_collection(PatchCollection(r_patch, facecolor=colors[0], alpha=a_val, edgecolor="none", zorder=10))

ax.legend(
    (circ1, circ2, circ3, circ4, circ5), ("rock", "sand/rock", "gravel", "sand/gravel", "sand"), numpoints=1, loc="best"
)

print "Now plotting focal statistics..."
ax = plt.subplot2grid((5, 2), (4, 0))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for sp in ax.spines.itervalues():
    sp.set_color("w")
    sp.set_zorder(0)
the_table = table(ax, tbl_28.round(3), loc="best", colWidths=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
the_table.set_fontsize(12)
ax = plt.subplot2grid((5, 2), (4, 1))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for sp in ax.spines.itervalues():
    sp.set_color("w")
    sp.set_zorder(0)
the_table = table(ax, tbl_31.round(3), loc="best", colWidths=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
the_table.set_fontsize(12)
plt.tight_layout()
# plt.show()
print "Now Saving figure..."
plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_sed_class_ground_truth_3m_agg_dist.png", dpi=600)
예제 #43
0
merchant_spend_legend = merchant_spend.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)


mdb_spend = merchant_spend.twinx()
mdb_spend.set(ylabel=("MDB Spend(£000s)"))
mdb_spend.get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ',')))
mdb_spend_figures = test_file["MDB Spend Figures"]
mdb_spend_series = mdb_spend.plot(reporting_period, mdb_spend_figures, "r")
mdb_spend_legend = mdb_spend.legend(frameon=False, bbox_to_anchor=(0., 1.006, 1., .1), loc='center right', borderaxespad=0.)

#Second plot - data table showing the data with currency mark

data_table = fig.add_subplot(2,2,2)
data_table.axis('off')
the_table = table(data_table, test_file[["Reported Figures (£)", "MDB Spend Figures (£)"]], rowLabels=xlabels, loc='center')
the_table.auto_set_font_size(False) 
the_table.set_fontsize(10)

#Third plot - correlation scatter plot of reported spend vs mdb spend with line of best fit 

correlation_graph = fig.add_subplot(2,2,3)
correlation_graph.set(title=("Correlation"), xlabel=("MDB Spend(£000s)"), ylabel=("Reported Spend(£000s)"))
correlation_graph.get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ',')))
correlation_graph.get_xaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ',')))
correlation_graph.scatter(mdb_spend_figures,reported_figures)
# calc the trendline (linear)
z = np.polyfit(mdb_spend_figures, reported_figures, 1)
p = np.poly1d(z)
correlation_graph.plot(mdb_spend_figures,p(mdb_spend_figures),"r--")
r, p_value = pearsonr(mdb_spend_figures, reported_figures)
예제 #44
0
print "Total Datasets", len(datalist)
print "Filenames", len(namelist)
print ""

if len(datalist) != len(namelist):
    print "Inconsitent length of data and corresponing names"
    sys.exit()

for item in datalist:
    title = str(namelist[i]) + " " + str(len(item)) + " runs"
    print title, np.mean(item)
    fig, axs = pl.subplots(2,2)
    fig.suptitle(title, fontsize=14, fontweight='bold')
    item[['Decomp-SOMs','ticks']].plot(kind='hist', legend=True, bins=20, alpha=0.5,ax=axs[0][0])
    item[['Decomp-SOMs','Necromass',  'ticks']].plot(kind='box',ax=axs[1][0])
    table(axs[1][1], np.round(item[['Necromass', 'SOMs', 'ticks', 'Hotspots']].describe(), 0), loc='upper right', colWidths=[0.1, 0.1, 0.1, 0.1])
    item.plot(kind='hexbin', x='SOMs', y='Necromass', C='ticks', reduce_C_function=np.max,gridsize=15, ax=axs[1][1])
    item.plot(kind='hexbin', x='ticks', y='Necromass', C='SOMs', reduce_C_function=np.max,gridsize=20, ax=axs[0][1])
    #bootstrap_plot(item['Necromass'], size=50, samples=500, color='grey')
    i += 1    
    #pl.show()
    print ""



pl.show()

'''setting for run which made the histogram for the PhD Seminar (pore like distribution)
x = 0
y = 0
z = 4.5
예제 #45
0
tex_data_160[tex_data_160<=0] = np.nan
del ds

df_10 = convert_to_dataframe(tex_data_10)
df_20 = convert_to_dataframe(tex_data_20)
df_40 = convert_to_dataframe(tex_data_40)
df_80 = convert_to_dataframe(tex_data_80)
df_160 = convert_to_dataframe(tex_data_160)


bin_s=[0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.0]

fig = plt.figure(figsize=(12,3))
ax1 = fig.add_subplot(1,5,1)
df_10.plot(ax = ax1, kind='hist',bins=bin_s, legend=False)
table(ax1, np.round(df_10.describe(),3), loc='upper right', colWidths=[0.2])
ax1.set_ylabel('frequency')
ax1.set_xlabel('Texture Lengthscale (m)')
ax1.set_title('10 Pixel Window')

ax2 = fig.add_subplot(1,5,2)
df_20.plot(ax=ax2,kind='hist',bins=bin_s, legend=False)
table(ax2, np.round(df_20.describe(),3), loc='upper right', colWidths=[0.2])
ax2.set_ylabel('frequency')
ax2.set_xlabel('Texture Lengthscale (m)')
ax2.set_title('20 Pixel Window')

ax3 = fig.add_subplot(1,5,3)
df_40.plot(ax=ax3,kind='hist',bins=bin_s, legend=False)
table(ax3, np.round(df_40.describe(),3), loc='upper right', colWidths=[0.2])
ax3.set_ylabel('frequency')
df_50 = convert_to_dataframe(tex_data_50)
df_55 = convert_to_dataframe(tex_data_55)
df_60 = convert_to_dataframe(tex_data_60)
df_65 = convert_to_dataframe(tex_data_65)
df_70 = convert_to_dataframe(tex_data_70)
df_80 = convert_to_dataframe(tex_data_80)
df_120 = convert_to_dataframe(tex_data_120)
df_160 = convert_to_dataframe(tex_data_160)

bin_s = list(np.arange(0,3.25,0.05))

fig = plt.figure(figsize=(22,3))
ax1 = fig.add_subplot(1,8,1)
df_50.plot(ax = ax1, kind='hist',bins=bin_s, legend=False)
table2 = table(ax1, np.round(df_50.describe(),3), loc='upper right', colWidths=[0.2])
table2.auto_set_font_size(False)
table2.set_fontsize(4)
ax1.set_ylabel('frequency')
ax1.set_xlabel('Texture Lengthscale (m)')
ax1.set_title('50 Pixel Window')

ax1 = fig.add_subplot(1,8,2)
df_55.plot(ax = ax1, kind='hist',bins=bin_s, legend=False)
table2 = table(ax1, np.round(df_55.describe(),3), loc='upper right', colWidths=[0.2])
table2.auto_set_font_size(False)
table2.set_fontsize(4)
ax1.set_ylabel('frequency')
ax1.set_xlabel('Texture Lengthscale (m)')
ax1.set_title('55 Pixel Window')
예제 #47
0
ax.add_collection(PatchCollection(s_patch, facecolor = colors[0],alpha=a_val, edgecolor='none',zorder=10))
ax.add_collection(PatchCollection(sg_patch, facecolor = colors[1],alpha=a_val, edgecolor='none',zorder=10))
ax.add_collection(PatchCollection(g_patch, facecolor = colors[2],alpha=a_val, edgecolor='none',zorder=10))   
ax.add_collection(PatchCollection(sr_patch, facecolor = colors[3],alpha=a_val, edgecolor='none',zorder=10)) 
ax.add_collection(PatchCollection(r_patch, facecolor = colors[4],alpha=a_val, edgecolor='none',zorder=10)) 

ax.legend((circ1, circ2, circ3,circ4,circ5),('rock','sand/rock','Gravel','Sand/Gravel','sand'),numpoints=1, loc='best')

print 'Now plotting focal statistics...'
ax = plt.subplot2grid((5,2),(4, 0))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for sp in ax.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)
the_table = table(ax, pivot_table_28.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1])


ax = plt.subplot2grid((5,2),(4, 1))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for sp in ax.spines.itervalues():
    sp.set_color('w')
    sp.set_zorder(0)
the_table = table(ax, pivot_table_31.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1])


plt.tight_layout()
plt.show()
print 'Now Saving figure...'
plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_aug_may_comparison_diverging_cmap.png",dpi=1000)
예제 #48
0
    def plot2D(self):
        """Plot method for current data. Relies on pandas plot functionality
           if possible. There is some temporary code here to make sure only the valid
           plot options are passed for each plot kind."""

        if not hasattr(self, "data"):
            return
        # needs cleaning up
        valid = {
            "line": [
                "alpha",
                "colormap",
                "grid",
                "legend",
                "linestyle",
                "linewidth",
                "marker",
                "subplots",
                "rot",
                "logx",
                "logy",
                "sharey",
                "kind",
            ],
            "scatter": [
                "alpha",
                "grid",
                "linewidth",
                "marker",
                "subplots",
                "s",
                "legend",
                "colormap",
                "sharey",
                "logx",
                "logy",
                "use_index",
                "c",
                "cscale",
                "colorbar",
                "bw",
            ],
            "pie": ["colormap", "legend"],
            "hexbin": ["alpha", "colormap", "grid", "linewidth"],
            "bootstrap": ["grid"],
            "bar": [
                "alpha",
                "colormap",
                "grid",
                "legend",
                "linewidth",
                "subplots",
                "sharey",
                "logy",
                "stacked",
                "rot",
                "kind",
            ],
            "barh": ["alpha", "colormap", "grid", "legend", "linewidth", "subplots", "stacked", "rot", "kind", "logx"],
            "histogram": [
                "alpha",
                "linewidth",
                "grid",
                "stacked",
                "subplots",
                "colormap",
                "sharey",
                "rot",
                "bins",
                "logx",
                "logy",
            ],
            "heatmap": ["colormap", "rot"],
            "area": ["alpha", "colormap", "grid", "linewidth", "legend", "stacked", "kind", "rot", "logx"],
            "density": [
                "alpha",
                "colormap",
                "grid",
                "legend",
                "linestyle",
                "linewidth",
                "marker",
                "subplots",
                "rot",
                "kind",
            ],
            "boxplot": ["rot", "grid", "logy", "colormap", "alpha", "linewidth"],
            "scatter_matrix": ["alpha", "linewidth", "marker", "grid", "s"],
            "contour": ["linewidth", "colormap", "alpha"],
            "imshow": ["colormap", "alpha"],
        }

        data = self.data
        if self._checkNumeric(data) == False:
            self.showWarning("no numeric data to plot")
            return
        # get all options from the mpl options object
        kwds = self.mplopts.kwds
        kind = kwds["kind"]
        table = kwds["table"]
        by = kwds["by"]
        by2 = kwds["by2"]
        errorbars = kwds["errorbars"]
        useindex = kwds["use_index"]
        bw = kwds["bw"]

        # valid kwd args for this plot type
        kwargs = dict((k, kwds[k]) for k in valid[kind] if k in kwds)
        # initialise the figure
        self._initFigure()
        ax = self.ax
        # plt.style.use('dark_background')

        if by != "":
            # groupby needs to be handled per group so we can add all the axes to
            # our figure correctly
            if by not in data.columns:
                self.showWarning("the grouping column must be in selected data")
                return
            if by2 != "" and by2 in data.columns:
                by = [by, by2]
            g = data.groupby(by)
            if len(g) > 30:
                self.showWarning("too many groups to plot")
                return
            self.ax.set_visible(False)
            kwargs["subplots"] = False
            size = len(g)
            nrows = round(np.sqrt(size), 0)
            ncols = np.ceil(size / nrows)
            i = 1

            for n, df in g:
                ax = self.fig.add_subplot(nrows, ncols, i)
                kwargs["legend"] = False  # remove axis legends
                d = df.drop(by, 1)  # remove grouping columns
                self._doplot(d, ax, kind, False, errorbars, useindex, bw=bw, kwargs=kwargs)
                ax.set_title(n)
                handles, labels = ax.get_legend_handles_labels()
                i += 1

            # single plot
            """cmap = plt.cm.get_cmap(kwargs['colormap'])
            colors = []
            names = []
            for n,df in g:
                ax = self.ax
                kwargs['legend'] = False #remove axis legends
                d = df.drop(by,1) #remove grouping columns
                self._doplot(d, ax, kind, False,  errorbars, useindex,
                              bw=bw, kwargs=kwargs)
                names.append(n)
            handles, labels = ax.get_legend_handles_labels()
            print (labels)
            labels = [l+' '+n for l in labels]
            i+=1"""

            self.fig.legend(handles, labels, loc="center right")
            self.fig.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1, hspace=0.25)
            axs = self.fig.get_axes()
            # self.ax = axs[0]
        else:
            axs = self._doplot(data, ax, kind, kwds["subplots"], errorbars, useindex, bw=bw, kwargs=kwargs)
        if table == True:
            from pandas.tools.plotting import table

            if self.table.child != None:
                tabledata = self.table.child.model.df
                table(axs, np.round(tabledata, 2), loc="upper right", colWidths=[0.1 for i in tabledata.columns])

        # set options general for all plot types
        # annotation optons are separate
        lkwds = self.labelopts.kwds.copy()
        lkwds.update(kwds)
        self.setFigureOptions(axs, lkwds)
        scf = 12 / kwds["fontsize"]
        try:
            self.fig.tight_layout()
            self.fig.subplots_adjust(top=0.9)
        except:
            self.fig.subplots_adjust(left=0.1, right=0.9, top=0.89, bottom=0.1, hspace=0.4 / scf, wspace=0.2 / scf)
            print("tight_layout failed")
        # redraw annotations
        self.labelopts.redraw()
        self.canvas.draw()
        return
workspace += 'Data Mining & Text Mining\\kaggle competitions\\san francisco crime'
os.chdir(workspace)

train = './train.csv'
crimeData = pd.read_csv(train, parse_dates=['Dates'], index_col='Dates', delimiter=',')

workspace = "C:\\Users\\Giammi\\OneDrive\\Università\\Machine Learning\\project"
os.chdir(workspace)

head = crimeData.head(5)

ax = plt.subplot(411, frame_on=False) # no visible frame
ax.xaxis.set_visible(False)  # hide the x axis
ax.yaxis.set_visible(False)  # hide the y axis

table(ax, head)  # where head is your data frame

plt.savefig('mytable.png')


# INSPECTION ======================================================================================
pylab.rcParams['figure.figsize'] = (14.5, 6.0)

crimes_rating = crimeData['Category'].value_counts()
print ('San Francisco Crimes\n')
print ('Category\t\tNumber of occurences') 
print (crimes_rating)

top = 18
y_pos = np.arange(len(crimes_rating[0:top].keys()))