Ejemplo n.º 1
0
    def line_pie_chart():
        """円グラフを画像として作成するためのメソッド"""
        data = [1011, 530, 355, 200, 40, 11]
        label = [
            'hoge(' + str(1011) + ')', 'fuga(' + str(530) + ')',
            'piyo(' + str(355) + ')', 'pugya(' + str(200) + ')',
            'dododododododo(' + str(40) + ')', 'ga(' + str(11) + ')'
        ]

        ###綺麗に書くためのおまじない###
        plt.style.use('ggplot')
        plt.rcParams.update({'font.size': 15})

        ###各種パラメータ###
        size = (9, 5)  # 凡例を配置する関係でsizeは横長にしておきます。(横9、縦5)
        col = cm.Spectral(np.arange(len(data)) /
                          float(len(data)))  # color指定はcolormapから好みのものを。

        ###pie###
        plt.figure(figsize=size, dpi=100)
        plt.pie(data,
                colors=col,
                counterclock=False,
                startangle=90,
                autopct=lambda p: '{:.1f}%'.format(p) if p >= 5 else '')
        plt.subplots_adjust(left=0, right=0.7)
        plt.legend(label,
                   fancybox=True,
                   loc='center left',
                   bbox_to_anchor=(0.9, 0.5))
        plt.axis('equal')
        plt.savefig('../../config/img/figure.png',
                    bbox_inches='tight',
                    pad_inches=0.05)
Ejemplo n.º 2
0
def plot_pcoords(df, labels, tag, path_to_plot):
    columns_to_drop = [col for col in df.columns if col not in labels]
    df = df.drop(axis=1, labels=columns_to_drop)
    _, axes = plt.subplots(ncols=len(labels) - 1,
                           sharey=False,
                           figsize=(20, 8))

    for i, ax in enumerate(axes):
        for ix in df.index:
            ax.plot(
                [0, 1],
                df.loc[ix, labels[i]:labels[i + 1]].astype(float),
                c=cm.Spectral(df.loc[ix, "Value"]),
            )
            ax.set_xlim((0, 1))
            ax.set_ylim((-0.05, 1.05))
            try:
                label = labels[i].split("_")[1]
            except:
                label = labels[i]
            ax.set_xticklabels([label])

    ax = plt.twinx(axes[-1])
    ax.set_xticks([0, 1])
    ax.set_ylim((-0.05, 1.05))
    ax.set_xlim((0, 1))
    ax.set_xticklabels([labels[-2], labels[-1]])
    plt.subplots_adjust(wspace=0)
    plt.savefig(f"{path_to_plot}/pcoords_{tag}.pdf")
    plt.close()
Ejemplo n.º 3
0
def Draw(silhouette_avg, sample_silhouette_values, X, y, k):
    # 创建一个 subplot with 1-row 2-column
    fig, ax1 = plt.subplots(1)
    fig.set_size_inches(18, 7)
    # 第一个 subplot 放轮廓系数点
    # 范围是[-1, 1]
    ax1.set_xlim([-0.1, 1])
    # 后面的 (k + 1) * 10 是为了能更明确的展现这些点
    ax1.set_ylim([0, len(X) + (k + 1)])
    y_lower = 0
    for i in range(k):  # 分别遍历这几个聚类
        ith_cluster_silhouette_values = sample_silhouette_values[y == i]
        ith_cluster_silhouette_values.sort()
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        color = cm.Spectral(float(i) / k)  # 搞一款颜色
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)  # 这个系数不知道干什么的
        # 在轮廓系数点这里加上聚类的类别号
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        # 计算下一个点的 y_lower y轴位置
        y_lower = y_upper
    # 在图里搞一条垂直的评论轮廓系数虚线
    ax1.axvline(x=silhouette_avg, color='red', linestyle="--")
    plt.show()
Ejemplo n.º 4
0
def silhouette(Ks, km, df, output_path):
    score_file = Path(output_path, 'sil_avg.txt')
    silfile = open(score_file,
                   'w')  # save the average silhouette score for all clusters
    # following lists all indexed by k
    kmeans = [km0.fit(df) for km0 in km]
    labels = [m.labels_ for m in kmeans]
    #print('type of labels', type(labels))
    #print('shape of labels', len(labels))
    #print('each array size', labels[0].shape)
    #print('3rd array: ', labels[3])
    sil_avg = [silhouette_score(df, label)
               for label in labels]  # average silhouette score for all samples
    for k in range(len(kmeans)):
        line = 'For n_cluster = %d, average silhouette score = %.5f\n' % (
            k + 2, sil_avg[k])
        silfile.write(line)
        print(line)
    sil_each = [silhouette_samples(df, label)
                for label in labels]  # sil score for every sample
    #print('type and shape of sil_each', type(sil_each[1]), sil_each[1].shape)
    for n_cluster in Ks:
        k = Ks.index(n_cluster)
        # for each k, plot individual figure for silhouette value distribution over all samples
        fig = plt.plot()
        plt.xlim([-1, 1])
        plt.ylim([0, len(df) + (n_cluster + 1) * 10
                  ])  # (n_cluster+1) * 10 is the blank between clusters

        low_y = 0
        print('%d clusters' % n_cluster)
        for i in range(n_cluster):
            ith_cluster_sil_values = sil_each[k][labels[k] == i]
            #print('type and shape of %d th_cluster_sil_values' % i, type(ith_cluster_sil_values), ith_cluster_sil_values.shape)
            ith_cluster_sil_values.sort()
            size_cluster_i = ith_cluster_sil_values.shape[0]
            high_y = low_y + size_cluster_i
            color = cm.Spectral(float(i) /
                                n_cluster)  # spread color for each cluster
            plt.fill_betweenx(np.arange(low_y, high_y),
                              0,
                              ith_cluster_sil_values,
                              facecolor=color,
                              edgecolor=color)
            plt.text(-0.05, low_y + 0.5 * size_cluster_i, str(
                i))  # text label cluster number in the middle of each cluster
            low_y = high_y + 10
        plt.title('Silhouette scores for clusters with %d clusters' %
                  n_cluster)
        plt.xlabel('Silhouette coefficient values')
        plt.ylabel('Cluster labels')
        plt.axvline(
            x=sil_avg[k], color='red',
            linestyle='--')  # add vertical line to show average sil score
        plt.savefig(Path(output_path, ('sil%d.eps' % n_cluster)))
        plt.show()
Ejemplo n.º 5
0
def out_graph(amount_map, price_jpy):

    amount = list(amount_map.values())
    c_code = list(amount_map.keys())

    total_jpy = 0.0
    values = {}
    for code in c_code:
        if price_jpy[code] != False:
            values[code] = amount_map[code] * price_jpy[code]
            total_jpy += amount_map[code] * price_jpy[code]

    label = [
        "{c}(JPY:{v:,d}) {a:,f}".format(c=c,
                                        v=int(amount_map[c] * price_jpy[c]),
                                        a=amount_map[c]) for c in c_code
    ]

    plt.style.use("ggplot")
    plt.rcParams.update({"font.size": 15})

    size = (9, 5)
    col = cm.Spectral(np.arange(len(amount)) / float(len(amount)))

    def make_autopct(values):
        def my_autopct(pct):
            total = sum(values)
            val = int(round(pct * total / 100.0))
            return "{p:.2f}%".format(p=pct) if val > 100000 else ""

        return my_autopct

    plt.figure(figsize=size, dpi=100)
    plt.pie(list(values.values()),
            colors=col,
            counterclock=False,
            startangle=90,
            autopct=make_autopct(list(values.values())))
    plt.subplots_adjust(left=0, right=0.7)
    plt.legend(label,
               fancybox=True,
               loc="center left",
               bbox_to_anchor=(0.9, 0.5))
    plt.axis("equal")
    plt.text(-1.7, 1, "{:,d}JPY".format(int(total_jpy)), fontsize=14)
    plt.savefig("pie_graph.png", bbox_inches="tight", pad_inches=0.05)
Ejemplo n.º 6
0
 def createPiechart(data, label, title, figpath):   
     
     ###綺麗に書くためのおまじない###
     plt.style.use('ggplot')
     plt.rcParams.update({'font.size':15})
     
     ###各種パラメータ###
     size=(7,3.5) #凡例を配置する関係でsizeは横長にしておきます。
     col=cm.Spectral(np.arange(len(data))/float(len(data))) #color指定はcolormapから好みのものを。
     
     ###pie###
     plt.figure(figsize=size,dpi=100)
     plt.title(title)
     plt.pie(data,colors=col,counterclock=False,startangle=90,autopct=lambda p:'{:.1f}%'.format(p) if p>=1.0 else '')
     plt.subplots_adjust(left=0,right=0.7)
     plt.legend(label,fancybox=True,loc='center left',bbox_to_anchor=(0.9,0.5))
     plt.axis('equal') 
     plt.savefig(figpath,bbox_inches='tight',pad_inches=0.05)    
Ejemplo n.º 7
0
def pie_chart(data: list, label: list, file_name: str):
    plt.style.use('ggplot')
    plt.rcParams.update({'font.size': 15})
    size = (18, 10)  # 凡例を配置する関係でsizeは横長にしておきます。
    col = cm.Spectral(np.arange(len(data)) / float(len(data)))
    plt.figure(figsize=size, dpi=100)
    plt.pie(data,
            colors=col,
            counterclock=False,
            startangle=90,
            autopct=lambda p: '{:0.1f}%'.format(p) if p >= 5 else '')
    plt.subplots_adjust(left=0, right=0.7)
    plt.legend(label,
               fancybox=True,
               loc='center left',
               bbox_to_anchor=(0.9, 0.5))
    plt.axis('equal')
    plt.savefig(f"{file_name}", bbox_inches='tight', pad_inches=0.05)
    return True
def find_data_DBSCAN(plt, sample, epsln, minSam, title):
    X = sample[['latitude', 'longitude']]
    # Run DBSCAN on the samples
    # Need to tweak eps
    db = DBSCAN(eps=epsln, min_samples=minSam).fit(X)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    print(f'Estimated number of clusters for {title}: {n_clusters_}')

    # Black removed and is used for noise instead.
    unique_labels = set(labels)
    colors = [
        cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))
    ]
    for k, col in zip(unique_labels, colors):
        if k == -1:
            # Black used for noise.
            col = [0, 0, 0, 1]

        class_member_mask = (labels == k)

        xy = X[class_member_mask & core_samples_mask]
        plt.plot(xy.latitude,
                 xy.longitude,
                 '.',
                 markerfacecolor=tuple(col),
                 markeredgecolor='k',
                 markersize=14)

        xy = X[class_member_mask & ~core_samples_mask]
        plt.plot(xy.latitude,
                 xy.longitude,
                 '.',
                 markerfacecolor=tuple(col),
                 markeredgecolor='k',
                 markersize=6)
    plt.set_title(f'{title}:{n_clusters_}')
def plot_hyperboloid(current_ax=None, style='surface', title='$Hyperboloid$'):
    """ Apply hyperboloid mapping to sample grid and plot figure """
    if current_ax == None:
        fig = plt.figure(figsize=plt.figaspect(1))
        ax = fig.gca(projection='3d')
    else:
        ax = current_ax
    X = np.arange(-2, 2, 0.2)
    Y = np.arange(-2, 2, 0.2)
    X, Y = np.meshgrid(X, Y)
    Z = np.sqrt(X**2 + Y**2 + 1)
    if style == 'wireframe':
        ax.plot_wireframe(X, Y, Z, rstride=2, cstride=2, linewidth=1, edgecolor='black')
    else:
        zcolors = Z - min(Z.flat)
        zcolors = zcolors/max(zcolors.flat)
        ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=cm.Spectral(zcolors), linewidth=1)        
    ax.set_title(title, size=20)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.set_zlabel('$z$');
Ejemplo n.º 10
0
def plot(preprocess):
    labels, data = dataset.get(subset="train",
                               preprocess=preprocess,
                               categories=categories,
                               verbose=True)
    labels = np.array(labels)

    print "Getting TF IDF weights"

    vec = TfidfVectorizer(max_df=0.5,
                          max_features=10000,
                          min_df=2,
                          stop_words='english',
                          use_idf=True,
                          ngram_range=(1, 1))
    X = vec.fit_transform(data)

    print(repr(X))

    print "Reducing dimensions to 50"

    X_reduced = TruncatedSVD(n_components=50, random_state=0).fit_transform(X)

    X_embedded = PCA(n_components=2).fit_transform(X_reduced)

    names = np.unique(labels)
    print names
    num_clusters = len(names)
    fig = plt.figure(frameon=False)

    colors = iter(cm.Spectral(np.linspace(0, 1, num_clusters)))

    for name in names:
        X = X_embedded[labels == name]
        plt.scatter(X[:, 0], X[:, 1], marker='x', label=name)

    plt.title("PCA (Preprocessed)" if preprocess else "PCA")
    plt.xticks([])
    plt.yticks([])
    plt.legend()
Ejemplo n.º 11
0
def create_pie_fig(count, label, savename):

    plt.style.use('ggplot')
    plt.rcParams.update({'font.size': 18})
    #日本語対応FONT
    plt.rcParams['font.family'] = 'IPAPGothic'
    size = (8, 8)
    col = cm.Spectral(np.arange(len(count)) /
                      float(len(count)))  #color指定はcolormapから好みのものを。
    plt.figure(figsize=size, dpi=200)
    plt.pie(count,
            colors=col,
            counterclock=False,
            startangle=90,
            autopct=lambda p: '{:.1f}%'.format(p) if p >= 5 else '')
    plt.subplots_adjust(left=0, right=0.9)
    plt.legend(label,
               fancybox=True,
               loc='center left',
               bbox_to_anchor=(0.9, 0.5))
    plt.axis('equal')
    plt.savefig(savename, bbox_inches='tight', pad_inches=0.05)
Ejemplo n.º 12
0
def plot_basis_scatter(embedding, ax):
    # A hack because we now that each embedding consists of
    # 3 clusters of 1000 points each. These clusters are also
    # ordered (1st cluster: 0:999, 2nd: 1000:2000, 3rd: 2000:3000)
    xx = embedding[:, 0]
    yy = embedding[:, 1]
    num_basis = 3  # Rock, paper, scissors
    colors = cm.Spectral(np.linspace(0, 1, num_basis))

    basis_borders = [0, 1000, 2000]
    xx_basis_1 = xx[:basis_borders[1]]
    yy_basis_1 = yy[:basis_borders[1]]

    xx_basis_2 = xx[basis_borders[1]:basis_borders[2]]
    yy_basis_2 = yy[basis_borders[1]:basis_borders[2]]

    xx_basis_3 = xx[basis_borders[2]:]
    yy_basis_3 = yy[basis_borders[2]:]

    ax.scatter(xx_basis_1, yy_basis_1, color=colors[0])
    ax.scatter(xx_basis_2, yy_basis_2, color=colors[1])
    ax.scatter(xx_basis_3, yy_basis_3, color=colors[2])
Ejemplo n.º 13
0
def plot_isovist_sec_by_pt(ax, frames):
    area_isovist = []
    plotpt_x = []
    plotpt_y = []
    texts = []
    for line in tqdm(frames):
        # framelineに分割点がなかった場合
        if isinstance(line.isovist_sec, type(None)):
            pass
        else:
            isovists = [_ for _ in line.isovist_sec if not isinstance(_.positon, type(None))]
            if len(isovists) == 0:  # 分割面で道路が見つからなかった場合
                pass
            else:
                for i in isovists:
                    area_isovist.append(i.isovist.area)
                    plotpt_x.append(i.positon.x)
                    plotpt_y.append(i.positon.y)
                    texts.append("{}:{}".format(line.id, i.id))
    for i, area in enumerate(area_isovist):
        ax.scatter(plotpt_x[i], plotpt_y[i], color=cm.Spectral(area / max(area_isovist)))
        ax.text(plotpt_x[i], plotpt_y[i], texts[i], c='b')
def save_latent_vis(path, data, classes, encoder, epoch, num_classes):

    z = encoder.predict(data)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    colors = cm.Spectral(np.linspace(0, 1, num_classes))

    xx = z[:, 0]
    yy = z[:, 1]

    labels = range(num_classes)

    # plot the 2D data points
    for i in range(num_classes):
        ax.scatter(xx[labels == i],
                   yy[labels == i],
                   color=colors[i],
                   label=labels[i],
                   s=5)

    plt.axis('tight')
    plt.savefig(path + '_latent_vis_%d.png' % (epoch + 1))
Ejemplo n.º 15
0
    def plot_graph(self):
        X = self.X
        plot_fig = plt.figure(figsize=(5, 5))
        plt.xlim(-1, 1)
        plt.ylim(-1, 1)
        if self.cen and self.clusters:
            cen = self.cen
            clus = self.clusters
            K = self.K
            for m, clu in clus.items():
                cs = cm.Spectral(1. * m / self.K)
                plt.plot(cen[m][0], cen[m][1], 'o', marker='*', \
                         markersize=12, color=cs)
                plt.plot(list(zip(*clus[m]))[0], list(zip(*clus[m]))[1], '.', \
                         markersize=8, color=cs, alpha=0.5)
        else:
            plt.plot(list(zip(*X))[0], list(zip(*X))[1], '.', alpha=0.5)

        plot_title = 'K-means Clustering'
        plot_pars = 'N=%s, K=%s' % (str(self.N), str(self.K))
        plt.title('\n'.join([plot_pars, plot_title]), fontsize=16)
        plt.savefig('kpp_N%s_K%s.png' % (str(self.N), str(self.K)), \
                    bbox_inches='tight', dpi=200)
Ejemplo n.º 16
0
def pie(label, data):
    # 書式
    plt.style.use('ggplot')
    plt.rcParams.update({'font.size': 15})

    # 各種パラメータ
    size = (9, 5)
    col = cm.Spectral(np.arange(len(data)) / float(len(data)))

    # pie
    plt.figure(figsize=size, dpi=100)
    plt.pie(data,
            colors=col,
            counterclock=False,
            startangle=90,
            autopct=lambda p: '{:.1f}%'.format(p) if p >= 5 else '')
    plt.subplots_adjust(left=0, right=0.7)
    plt.legend(label,
               fancybox=True,
               loc='center left',
               bbox_to_anchor=(0.9, 0.5))
    plt.axis('equal')
    plt.savefig('tt_wr.png', bbox_inches='tight', pad_inches=0.05)
    plt.show()
Ejemplo n.º 17
0
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()  # console handler
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
logger.info('Initializing %s', __name__)

colors = [
    cm.RdBu(0.85),
    cm.RdBu(0.7),
    cm.PiYG(0.7),
    cm.Spectral(0.38),
    cm.Spectral(0.25)
]

if __name__ == '__main__':
    # parameter setting
    gps_dir = s.GPS_DIR
    gps_counter_file = s.GPS_COUNTER_FILE
    TWEET_COUNTER_FILE = s.TWEET_COUNTER_FILE
    timestart = s.TIMESTART
    timestart_text = s.TIMESTART_TEXT
    timeend = s.TIMEEND
    timeend_text = s.TIMEEND_TEXT
    aoi = s.AOI
    unit_temporal = s.UNIT_TEMPORAL
    # unit_spatial = s.UNIT_SPATIAL
Ejemplo n.º 18
0
    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X_norm, cluster_labels)

    y_lower = 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.Spectral(float(i) / n_clusters)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("The silhouette plot for the various clusters.")
    ax1.set_xlabel("The silhouette coefficient values")
Ejemplo n.º 19
0
def plot_benchmark_simple(rmses_spherical, model_description, lead_times, input_dir, output_dir, \
                          title=True, filename=None, names=[]):
    """
    Plot rmse of different models vs Weyn et al

    :param rmses_spherical: list of xarrays or xarray
    :param model_description: string
    :param lead_times: xarray
    :param input_dir: string
    :param output_dir: string
    :param title: boolean
    :param filename: string
    :param names: if rmses_spherical is a list, names should be a list of same length with the name of each model
    :return:
    """
    
    lead_times0 = np.arange(6, lead_times[-1]+6, 6)
    colors = cm.Spectral(np.linspace(0,1,len(rmses_spherical)))
    
    xlabels = [str(t) if t%4 == 0 else '' for t in lead_times] if lead_times[0] < 12 else lead_times
    
    # RMSE baselines
    
    rmses_weyn = xr.open_dataset(input_dir + 'rmses_weyn.nc').rename({'z500':'z', 't850':'t'})
    
    f, axs = plt.subplots(1, 2, figsize=(17, 6))
    if title:
        f.suptitle('RMSE between forecast and observation as a function of forecast time', fontsize=24, y=1.07)

    axs[0].plot(lead_times0, rmses_weyn.z.values[:len(lead_times0)], label='Weyn 2020', linestyle='-')
    axs[1].plot(lead_times0, rmses_weyn.t.values[:len(lead_times0)], label='Weyn 2020', linestyle='-')

    if len(names) == 0:
        axs[0].plot(lead_times, rmses_spherical.z.values, label='Ours', color='black', marker='o')
        axs[1].plot(lead_times, rmses_spherical.t.values, label='Ours', color='black', marker='o')
    else:
        for rmse, name, c in zip(rmses_spherical, names, colors):
            axs[0].plot(lead_times, rmse.z.values, label=name, color=c, marker='o')
            axs[1].plot(lead_times, rmse.t.values, label=name, color=c, marker='o')

    axs[0].set_ylabel('RMSE [$m^2 s^{−2}$]', fontsize=18)
    axs[0].set_xlabel('Forecast time [h]', fontsize=18)
    axs[0].set_title('Z500', fontsize=22)
    axs[0].tick_params(axis='both', which='major', labelsize=16)
    axs[0].set_xticks(lead_times)
    axs[0].set_xticklabels(xlabels, fontsize=16)
    axs[0].legend(loc='upper left', fontsize=15)

    axs[1].set_ylabel('RMSE [K]', fontsize=18)
    axs[1].set_xlabel('Forecast time [h]', fontsize=18)
    axs[1].set_title('T850', fontsize=22)
    axs[1].set_xticks(lead_times)
    axs[1].set_xticklabels(xlabels, fontsize=16)
    axs[1].tick_params(axis='both', which='major', labelsize=16)
    axs[1].legend(loc='upper left', fontsize=15)


    if not filename:
        filename = model_description + '_benchmark.png'
    
    plt.tight_layout()
    plt.savefig(output_dir + filename, bbox_inches='tight')

    plt.show()
Ejemplo n.º 20
0
                k,
                "the best run is",
                ds,
                "but this has an anomcorr of",
                bestano,
                "which indicates this may not be suitable for phasing.",
            )
            best_results += [(k, bestano, ds)]
    except:
        print("No data at", k, "A passed the sanity check.")

x_val = [x[0] for x in best_results]
y_val = [x[1] for x in best_results]
c_val = [x[2] for x in best_results]
fig, ax = plt.subplots(1, 1)
ax.scatter(x_val, y_val, c=cm.Spectral([i * 10 for i in c_val]))
ax.plot(x_val, y_val, "b-")
plt.axhline(y=9, color="r", linestyle="--")
ax.invert_xaxis()
plt.show()
fig.savefig("ResolutionVsAnomcorr.jpg", dpi=600)

best_run = mode(c_val)
print("\nThe best run appears to be number", best_run)

os.mkdir("best")
shutil.copy2(
    os.path.join(path, str(best_run)) + "/XSCALE.INP", os.path.join(path, "best")
)
subprocess.run(["xscale_par"], cwd=os.path.join(path, "best"))
 def draw_tracks(self, frame_clr):
     drawable_tracks = self.get_drawable_tracks()
     for index, tr in enumerate(drawable_tracks):
         track_color = tuple(256 * x for x in cm.Spectral(index * 100 % 255)[0:3])
         cv2.polylines(frame_clr, [np.int32([tup[0:2] for tup in tr])], False, track_color)
x0 = np.array([10, 10])  # initial condition
A = np.array([
    [0, 1],  # system dynamics matrix
    [0, 0]
])
B = np.array([[0], [1]])  # b matrix
K = np.array([[-2, -3]])  # control law

track = odeint(linear_sys, x0, t, args=(A, B, K))

fig = plt.figure(figsize=(10, 5))
plt.style.use('seaborn-deep')
# temp = cm.winter(t / 10)
# plt.plot(track[:, 0], track[:, 1], lw=3, c=cm.hot(t / 10))
plt.subplot(1, 2, 1)
plt.scatter(track[:, 0], track[:, 1], linewidths=0.5, c=cm.Spectral(t / 5))
plt.xlabel('x1')
plt.ylabel('x2')

plt.subplot(1, 2, 2)
plt.plot(t, track[:, 0], lw=3, label='x1')
plt.plot(t, track[:, 1], lw=3, label='x2')
plt.xlabel('t')
plt.legend()
# plt.savefig('./linear_fdc.png')
plt.show()

# ==========================
# plot the phase of the dynamics system
num_steps = 11
Y, X = np.mgrid[-25:25:(num_steps * 1j), -25:25:(num_steps * 1j)]
Ejemplo n.º 23
0
# print(data)

# compare the trajectory with nonlinear-RL
# phase
plt.style.use('seaborn-deep')
fig = plt.figure(figsize=(8, 6))
num_steps = 11
g = 10
res = [-0.067, 0.00078, -0.031, -15.41, -9.28]
Y, X = np.mgrid[-4 * sqrt(g):4 * sqrt(g):(num_steps * 1j),
                -pi:pi:(num_steps * 1j)]
U = Y
V = res[0] * X * X + res[1] * Y * Y + res[2] * X * Y + res[3] * X + res[
    4] * Y + g * np.sin(X)
speed = np.sqrt(U**2 + V**2)
plt.streamplot(X, Y, U, V, color=speed, cmap='PuBu', density=2)
plt.scatter(data[0],
            data[1],
            s=30,
            c=cm.Spectral(
                np.linspace(0,
                            len(data[0]) * 0.1, len(data[0])) / 40))
plt.plot(data[0], data[1], color='C2', lw=2)
plt.xlabel('x1')
plt.ylabel('x2')
plt.xlim([-pi, pi])
plt.ylim([-4 * sqrt(g), 4 * sqrt(g)])
plt.savefig('./nonlinear_mpc.png')
plt.show()
Ejemplo n.º 24
0
from mpl_toolkits.mplot3d import axes3d
from matplotlib import cm


def squares_sum(w0, w1):
    return w0**2 + w0 * w1 + w1**2


w0, w1 = np.meshgrid(np.arange(-2, 2, 0.1), np.arange(-2, 2, 0.1))

J = squares_sum(w0, w1)
J = (J - J.min()) / (J.max() - J.min()
                     )  # normalization - to match colormap spectre

colors = cm.Spectral(J)
rcount, ccount, _ = colors.shape

fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(w0,
                       w1,
                       J,
                       rcount=rcount,
                       ccount=ccount,
                       facecolors=colors,
                       shade=False)
surf.set_facecolor((0, 0, 0, 0))

ax.set_xlabel('$w_0$')
ax.set_ylabel('$w_1$')
Ejemplo n.º 25
0
def df_input(dfUniprot=DataFrame([])):

    num_phylum = len(dfUniprot.PHYLUM.drop_duplicates())
    from matplotlib import cm
    set3 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Set3(np.arange(12) / 12.)
    ]
    set2 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Set2(np.arange(8) / 8.)
    ]
    set1 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Set1(np.arange(9) / 9.)
    ]
    pastel2 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Pastel2(np.arange(8) / 8.)
    ]
    pastel1 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Pastel1(np.arange(9) / 9.)
    ]
    dark2 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Dark2(np.arange(8) / 8.)
    ]
    paired = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Paired(np.arange(12) / 12.)
    ]
    accent = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Accent(np.arange(8) / 8.)
    ]
    spectral = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.Spectral(np.arange(11) / 11.)
    ]
    tab20 = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.tab20(np.arange(20) / 20.)
    ]
    tab20b = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.tab20b(np.arange(20) / 20.)
    ]
    tab20c = [
        matplotlib.colors.rgb2hex(tuple(i))
        for i in cm.tab20c(np.arange(20) / 20.)
    ]

    Colors1 = set2 + set1 + dark2 + paired + accent + spectral + tab20 + tab20b + tab20c
    Colors2 = accent + spectral + tab20 + tab20b + tab20c + set1 + set2 + dark2 + paired
    Colors3 = dark2 + paired + accent + spectral + tab20 + tab20b + tab20c + set1 + set2
    Colors4 = tab20b + tab20c + set1 + set2 + dark2 + paired + accent + spectral + tab20
    Colors5 = spectral + tab20 + tab20b + tab20c + set1 + set2 + dark2 + paired + accent

    pie_colors = {
        'Set3': cm.Set3(np.arange(12) / 12.),
        'Set2': cm.Set2(np.arange(8) / 8.),
        'Set1': cm.Set1(np.arange(9) / 9.),
        'Pastel2': cm.Pastel2(np.arange(8) / 8.),
        'Pastel1': cm.Pastel1(np.arange(9) / 9.),
        'Dark2': cm.Dark2(np.arange(8) / 8.),
        'Paired': cm.Paired(np.arange(12) / 12.),
        'Accent': cm.Accent(np.arange(8) / 8.),
        'Spectral': cm.Spectral(np.arange(11) / 11.),
        'tab20': cm.tab20(np.arange(20) / 20.),
        'tab20b': cm.tab20b(np.arange(20) / 20.),
        'tab20c': cm.tab20c(np.arange(20) / 20.)
    }
    circle_colors = {
        'Colors1': Colors1[0:num_phylum],
        'Colors2': Colors2[0:num_phylum],
        'Colors3': Colors3[0:num_phylum],
        'Colors4': Colors4[0:num_phylum],
        'Colors5': Colors5[0:num_phylum]
    }

    def tax_colors(color_list=circle_colors['Colors1'], taxx=dfUniprot):
        tax_cols = [
            'Entry', 'Tax_ID', 'KINGDOM', 'PHYLUM', 'CLASS', 'ORDER', 'FAMILY',
            'GENUS', 'SPECIES', 'Organism'
        ]
        new2 = taxx[tax_cols].drop_duplicates()
        #>>>>>>>>>>>>>>>>>>>>>>>>>>>
        phylum0 = new2.groupby(['PHYLUM'
                                ]).Entry.count().reset_index().sort_values(
                                    by='Entry',
                                    ascending=False).reset_index(drop=True)
        asign_color = {}
        for i, j in zip(phylum0.PHYLUM, color_list):
            if i == 'NA':
                asign_color[i] = 'black'
            else:
                asign_color[i] = j
        phylum0['phy_col'] = list(asign_color.values())
        # distribución de Class
        phylum1 = new2.groupby(['PHYLUM', 'CLASS']).Entry.count().reset_index()
        class0 = []
        class0_colors = []
        for i in phylum0.PHYLUM:
            for j in phylum1.PHYLUM:
                if i == j:
                    class0_colors.append(asign_color[j])
                    class0.append(phylum1[phylum1.PHYLUM == i].sort_values(
                        by='Entry', ascending=False).reset_index(drop=True))
                else:
                    pass
        class1 = pd.concat(class0).drop_duplicates()
        class1['class_col'] = class0_colors
        class0_colors_corregido = []
        for index, row in class1.iterrows():
            if row.PHYLUM == 'NA':
                if row.CLASS == 'NA':
                    class0_colors_corregido.append(row.class_col)
                else:
                    class0_colors_corregido.append('grey')
            else:
                if row.CLASS == 'NA':
                    class0_colors_corregido.append('black')
                else:
                    class0_colors_corregido.append(row.class_col)
        class1['class_col'] = class0_colors_corregido
        class11 = class1.groupby(['CLASS'
                                  ]).Entry.sum().reset_index().sort_values(
                                      by='Entry',
                                      ascending=False).reset_index(drop=True)
        class11 = class11.merge(class1[['CLASS',
                                        'class_col']].drop_duplicates(),
                                on='CLASS',
                                how='left')
        # distribución de Order
        phylum2 = new2.groupby(['PHYLUM', 'CLASS',
                                'ORDER']).Entry.count().reset_index()
        order0 = []
        order0_colors = []
        for i in phylum0.PHYLUM:
            for j in phylum2.PHYLUM:
                if i == j:
                    order0_colors.append(asign_color[j])
                    order0.append(phylum2[phylum2.PHYLUM == i].sort_values(
                        by='Entry', ascending=False).reset_index(drop=True))
                else:
                    pass
        order1 = pd.concat(order0).drop_duplicates()
        order1['order_col'] = order0_colors
        order0_colors_corregido = []
        for index, row in order1.iterrows():
            if row.PHYLUM == 'NA':
                if row.ORDER == 'NA':
                    order0_colors_corregido.append(row.order_col)
                else:
                    order0_colors_corregido.append('grey')
            else:
                if row.ORDER == 'NA':
                    order0_colors_corregido.append('black')
                else:
                    order0_colors_corregido.append(row.order_col)
        order1['order_col'] = order0_colors_corregido
        order11 = order1.groupby(['ORDER'
                                  ]).Entry.sum().reset_index().sort_values(
                                      by='Entry',
                                      ascending=False).reset_index(drop=True)
        order11 = order11.merge(order1[['ORDER',
                                        'order_col']].drop_duplicates(),
                                on='ORDER',
                                how='left')
        # distribución de Genus
        phylum3 = new2.groupby(['PHYLUM', 'CLASS', 'ORDER',
                                'GENUS']).Entry.count().reset_index()
        genus0 = []
        genus0_colors = []
        for i in phylum0.PHYLUM:
            for j in phylum3.PHYLUM:
                if i == j:
                    genus0_colors.append(asign_color[j])
                    genus0.append(phylum3[phylum3.PHYLUM == i].sort_values(
                        by='Entry', ascending=False).reset_index(drop=True))
                else:
                    pass
        genus1 = pd.concat(genus0).drop_duplicates()
        genus1['genus_col'] = genus0_colors
        genus0_colors_corregido = []
        for index, row in genus1.iterrows():
            if row.PHYLUM == 'NA':
                if row.GENUS == 'NA':
                    genus0_colors_corregido.append(row.genus_col)
                else:
                    genus0_colors_corregido.append('grey')
            else:
                if row.GENUS == 'NA':
                    genus0_colors_corregido.append('black')
                else:
                    genus0_colors_corregido.append(row.genus_col)
        genus1['genus_col'] = genus0_colors_corregido
        genus11 = genus1.groupby(['GENUS'
                                  ]).Entry.sum().reset_index().sort_values(
                                      by='Entry',
                                      ascending=False).reset_index(drop=True)
        genus11 = genus11.merge(genus1[['GENUS',
                                        'genus_col']].drop_duplicates(),
                                on='GENUS',
                                how='left')
        # distribución de Organism
        phylum4 = new2.groupby(
            ['PHYLUM', 'CLASS', 'ORDER', 'GENUS',
             'Organism']).Entry.count().reset_index()
        org0 = []
        org0_colors = []
        for i in phylum0.PHYLUM:
            for j in phylum4.PHYLUM:
                if i == j:
                    org0_colors.append(asign_color[j])
                    org0.append(phylum4[phylum4.PHYLUM == i].sort_values(
                        by='Entry', ascending=False).reset_index(drop=True))
                else:
                    pass
        org1 = pd.concat(org0).drop_duplicates()
        org1['org_col'] = org0_colors
        org0_colors_corregido = []
        for index, row in org1.iterrows():
            if row.PHYLUM == 'NA':
                if row.Organism == 'NA':
                    org0_colors_corregido.append(row.org_col)
                else:
                    org0_colors_corregido.append('grey')
            else:
                if row.Organism == 'NA':
                    org0_colors_corregido.append('black')
                else:
                    org0_colors_corregido.append(row.org_col)
        org1['org_col'] = org0_colors_corregido
        org11 = org1.groupby(['Organism'
                              ]).Entry.sum().reset_index().sort_values(
                                  by='Entry',
                                  ascending=False).reset_index(drop=True)
        org11 = org11.merge(org1[['Organism', 'org_col']].drop_duplicates(),
                            on='Organism',
                            how='left')
        os.makedirs('tax', exist_ok=True)
        return phylum0.to_csv('tax/phylum0.tsv', sep = '\t', index = None),\
            class1.to_csv('tax/class1.tsv', sep = '\t', index = None),\
            class11.to_csv('tax/class11.tsv', sep = '\t', index = None),\
            order1.to_csv('tax/order1.tsv', sep = '\t', index = None),\
            order11.to_csv('tax/order11.tsv', sep = '\t', index = None),\
            genus1.to_csv('tax/genus1.tsv', sep = '\t', index = None),\
            genus11.to_csv('tax/genus11.tsv', sep = '\t', index = None),\
            org1.to_csv('tax/org1.tsv', sep = '\t', index = None),\
            org11.to_csv('tax/org11.tsv', sep = '\t', index = None)

    alfas = {
        'Lineage*': [1, 1, 1, 1, 1],
        'Phylum': [1, 0.3, 0.3, 0.3, 0.3],
        'Class': [0.3, 1, 0.3, 0.3, 0.3],
        'Order': [0.3, 0.3, 1, 0.3, 0.3],
        'Genus': [0.3, 0.3, 0.3, 1, 0.3],
        'Species': [0.3, 0.3, 0.3, 0.3, 1],
        'Gradient1*': [1, 0.85, 0.7, 0.55, 0.4],
        'Gradient2*': [0.4, 0.55, 0.7, 0.85, 1],
        'Attenuate*': [0.3, 0.3, 0.3, 0.3, 0.3],
        'None*': [0, 0, 0, 0, 0]
    }

    def circle_lineage(alphas=alfas['Phylum']):
        #fig, ax = plt.subplots(111, facecolor= 'white')
        #fig, ax = plt.subplot(111)
        phylum0 = pd.read_csv('tax/phylum0.tsv', sep='\t').fillna('NA')
        class1 = pd.read_csv('tax/class1.tsv', sep='\t').fillna('NA')
        order1 = pd.read_csv('tax/order1.tsv', sep='\t').fillna('NA')
        genus1 = pd.read_csv('tax/genus1.tsv', sep='\t').fillna('NA')
        org1 = pd.read_csv('tax/org1.tsv', sep='\t').fillna('NA')

        radio = 0.5

        linaje = [phylum0, class1, order1, genus1, org1]
        #colores = [list(asign_color.values()), class0_colors, order0_colors, genus0_colors, org0_colors]
        colores = ['phy_col', 'class_col', 'order_col', 'genus_col', 'org_col']
        pat = []
        size = -.205
        for i, j, k in zip(linaje, colores, alphas):
            size += .205
            patches, texts = plt.pie(
                i.Entry,
                radius=radio + size,
                labels=None,
                labeldistance=0.8,
                rotatelabels=True,
                colors=
                i[j],  # new_colors(valor = len(i.Entry), col = 'nipy_spectral'),
                wedgeprops=dict(width=0.2, edgecolor='white', alpha=k),
                textprops=dict(size=10))
            pat.append(patches)

        #plt.legend(pat[0], df_phylum.PHYLUM, loc=2,fontsize=13,labelspacing = 0.4,
        #          bbox_to_anchor=(1.05, 1),frameon=False)

        plt.gca().set(aspect='equal')
        plt.title('Root', fontsize=10, x=0.5, y=0.465)
        plt.text(-1.8,
                 1.35,
                 'Lineage',
                 fontsize=15,
                 ha='left',
                 va='center',
                 color='black')
        #plt.title('Lineage',fontsize=20, fontweight='bold', x = -0.17, y = 1)
        #plt.text(1.1, 1.35, linaje_seleccionado, fontsize = 15, ha='left', va='center',
        #                    color='black')
        #>>>>>>>>>>>>>>>>>>>>>>>
        #### insetplot
        #ax2 = plt.axes([0.1, 0.66, 0.13, 0.14])
        ax2 = plt.axes([-0.07, 1.71, 0.17, 0.18])
        logo = [20, 20, 20, 20, 20, 20, 20, 20]
        logo_col = [
            'white', 'white', 'black', 'white', 'white', 'white', 'white',
            'white'
        ]
        logo_col1 = [
            'white', 'white', 'black', 'black', 'black', 'black', 'black',
            'black'
        ]
        radio = 0.5
        linaje = [logo, logo, logo, logo, logo]
        colores = [logo_col1, logo_col, logo_col, logo_col, logo_col]
        name_linaje = ['Phylum', 'Class', 'Order', 'Genus', 'Species']

        pat = []
        size = -.44
        pos = -.18
        for i, j, k, l in zip(linaje, colores, name_linaje, alphas):
            pos += .47
            size += .44
            ax2.pie(i,
                    radius=radio + size,
                    labels=None,
                    colors=j,
                    wedgeprops=dict(width=0.35, edgecolor='white', alpha=l),
                    textprops=dict(size=10))
            ax2.text(0.1,
                     pos,
                     k,
                     fontsize=9,
                     ha='left',
                     va='center',
                     fontweight='bold',
                     alpha=l)  #color='black'

    def barras_tax(df=DataFrame([]),
                   column=0,
                   dim=111,
                   title='',
                   row_num=10,
                   color=['#ff7f0e'],
                   size_x=8,
                   size_y=10,
                   ylabel_text='',
                   xlabel=10,
                   ylabel=10,
                   size_title=15,
                   size_bartxt=10,
                   sep=1.2):
        if len(df) == 0:
            print('Data frame sin datos')
        else:
            #plt.subplot(dim, facecolor= 'white')
            barWidth = 0.9
            if row_num == len(df):
                ejey = list(df.iloc[0:len(df), 1])
                val = max(ejey)
                ejex = list(df.iloc[0:len(df), column])
                colores = list(df.iloc[0:len(df), 2])
                borde = list(
                    np.repeat('white', len(df.iloc[0:row_num, column])))
                linea = list(np.repeat(0, len(df.iloc[0:row_num, column])))
            if row_num < len(df):
                ejey = list(df.iloc[0:row_num,
                                    1]) + [df.iloc[row_num:len(df), 1].sum()]
                val = max(ejey)
                ejex = list(df.iloc[0:row_num, column]) + ['Others']
                borde = list(
                    np.repeat('white', len(df.iloc[0:row_num,
                                                   column]))) + ['black']
                colores = list(df.iloc[0:row_num, 2]) + ['linen']
                linea = list(np.repeat(0, len(df.iloc[0:row_num,
                                                      column]))) + [1]
            if row_num > len(df):
                ejey = list(df.iloc[0:len(df), 1])
                val = max(ejey)
                ejex = list(df.iloc[0:len(df), column])
                borde = list(
                    np.repeat('white', len(df.iloc[0:row_num, column])))
                colores = list(df.iloc[0:len(df), 2])
                linea = list(np.repeat(0, len(df.iloc[0:row_num, column])))

            for i, j, k, l, m in zip(ejex, ejey, borde, colores, linea):
                plt.barh(i,
                         j,
                         color=l,
                         align='center',
                         height=0.7,
                         linewidth=m,
                         alpha=1,
                         edgecolor=k)
            plt.gca().spines['right'].set_visible(False)
            plt.gca().spines['top'].set_visible(False)
            plt.gca().spines['bottom'].set_position(('data', -0.6))
            plt.gca().spines['left'].set_visible(False)
            plt.title(title, size=size_title, loc='left')
            plt.tick_params(axis="y", color="gray")
            plt.yticks(size=size_y)

            v1 = -50
            v2 = 0
            v3 = 0
            for i in range(10000):
                v1 += 50
                v2 += 50
                v3 += 10
                if v1 <= max(list(ejey)) < v2:
                    #print(v3, v1, val, v2)
                    escala = v3

            plt.xticks(range(0, val, escala), size=size_x)  #fontweight='bold'
            plt.ylabel(ylabel_text, size=ylabel)
            plt.xlabel("Number of Proteins", size=xlabel)
            #plt.tick_params(top = 'on', bottom = 'on', right = 'on', left = 'on')
            #plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)

            for j, k in zip(ejey, range(0, len(ejey))):
                plt.text(j + sep,
                         k - 0.2,
                         j,
                         size=size_bartxt,
                         ha='left',
                         color='black')

    import ipywidgets as widgets
    from ipywidgets import interact, interactive, fixed, interact_manual, Button, HBox, VBox, IntSlider, Label, IntRangeSlider
    from ipywidgets import Checkbox, RadioButtons
    from ipywidgets import Button, Layout
    alfas = {
        'Lineage*': [1, 1, 1, 1, 1],
        'Phylum': [1, 0.3, 0.3, 0.3, 0.3],
        'Class': [0.3, 1, 0.3, 0.3, 0.3],
        'Order': [0.3, 0.3, 1, 0.3, 0.3],
        'Genus': [0.3, 0.3, 0.3, 1, 0.3],
        'Species': [0.3, 0.3, 0.3, 0.3, 1],
        'Gradient1*': [1, 0.85, 0.7, 0.55, 0.4],
        'Gradient2*': [0.4, 0.55, 0.7, 0.85, 1],
        'Attenuate*': [0.3, 0.3, 0.3, 0.3, 0.3],
        'None*': [0, 0, 0, 0, 0]
    }
    plotss = ['Phylum', 'Class', 'Order', 'Genus', 'Species']
    posicion_subplots = []
    n = 0.9
    while n < 2:
        n += 0.1
        posicion_subplots.append(np.around(n, 1))

    color_a6 = widgets.Dropdown(options=list(circle_colors.keys()),
                                value='Colors1',
                                description='Colors:',
                                disabled=False,
                                button_style='',
                                layout=Layout(width='20%', height='25px'))
    a6 = widgets.Dropdown(options=list(alfas.keys()),
                          description='Chart 1:',
                          value='Phylum',
                          disabled=False,
                          button_style='',
                          layout=Layout(width='20%', height='25px'))
    a61 = widgets.Dropdown(options=plotss,
                           description='Chart 2:',
                           disabled=False,
                           button_style='',
                           layout=Layout(width='20%', height='25px'))
    pos_sub1 = widgets.Dropdown(options=posicion_subplots,
                                value=1.3,
                                description='xloc1:',
                                disabled=False,
                                layout=Layout(width='15%', height='25px'))
    pos_sub2 = widgets.Dropdown(options=posicion_subplots,
                                value=1.3,
                                description='xloc2:',
                                disabled=False,
                                layout=Layout(width='15%', height='25px'))
    b6 = widgets.Dropdown(options=list(range(0, 101)),
                          value=10,
                          description='rows1:',
                          disabled=False,
                          layout=Layout(width='15%', height='25px'))
    c6 = widgets.Dropdown(options=list(range(0, 101)),
                          value=10,
                          description='rows2:',
                          disabled=False,
                          layout=Layout(width='15%', height='25px'))
    z6 = widgets.ToggleButton(value=False,
                              description='Save Chart',
                              disabled=False,
                              button_style='',
                              tooltip='Description')
    o6 = widgets.Dropdown(options=[0, 0.25, 0.5, 0.75] + list(range(0, 201)),
                          value=3,
                          description='sep1:',
                          disabled=False,
                          layout=Layout(width='15%', height='25px'))
    o61 = widgets.Dropdown(options=[0, 0.25, 0.5, 0.75] + list(range(0, 201)),
                           value=3,
                           description='sep2:',
                           disabled=False,
                           layout=Layout(width='15%', height='25px'))

    d6 = widgets.Dropdown(options=list(range(0, 51)),
                          value=8,
                          description='size_y1:',
                          disabled=False,
                          layout=Layout(width='15%', height='25px'))
    d61 = widgets.Dropdown(options=list(range(0, 51)),
                           value=8,
                           description='size_y2:',
                           disabled=False,
                           layout=Layout(width='15%', height='25px'))
    g6 = widgets.Dropdown(options=list(range(0, 51)),
                          value=8,
                          description='bartxt1:',
                          disabled=False,
                          layout=Layout(width='15%', height='25px'))
    g61 = widgets.Dropdown(options=list(range(0, 51)),
                           value=8,
                           description='bartxt2:',
                           disabled=False,
                           layout=Layout(width='15%', height='25px'))

    xxx = Button(layout=Layout(width='5%', height='25px'), disabled=True)
    xxx.style.button_color = 'white'
    yyy = Button(layout=Layout(width='94%', height='5px'), disabled=True)
    yyy.style.button_color = 'red'

    ww = widgets.HBox([color_a6, xxx, z6])
    w6 = widgets.HBox([
        a6,
        b6,
        o6,
        d6,
        g6,
        pos_sub1,
    ])
    w7 = widgets.HBox([
        a61,
        c6,
        o61,
        d61,
        g61,
        pos_sub2,
    ])
    w8 = widgets.VBox([w6, w7, yyy])

    ######

    def col(color_a6):
        tax_colors(color_list=circle_colors[color_a6], taxx=dfUniprot)

    out7 = widgets.interactive_output(col, {'color_a6': color_a6})

    def box1(a6, a61, pos_sub1, pos_sub2, b6, c6, z6, o6, o61, d6, d61, g6,
             g61):
        yetiquetas_plot1 = {
            'Lineage*': 'Phylum',
            'Phylum': 'Phylum',
            'Class': 'Class',
            'Order': 'Order',
            'Genus': 'Genus',
            'Species': 'Species',
            'Gradient1*': 'Phylum',
            'Gradient2*': 'Phylum',
            'Attenuate*': 'Phylum',
            'None*': 'Phylum'
        }
        plots1 = {
            'Lineage*': pd.read_csv('tax/phylum0.tsv', sep='\t').fillna('NA'),
            'Phylum': pd.read_csv('tax/phylum0.tsv', sep='\t').fillna('NA'),
            'Class': pd.read_csv('tax/class11.tsv', sep='\t').fillna('NA'),
            'Order': pd.read_csv('tax/order11.tsv', sep='\t').fillna('NA'),
            'Genus': pd.read_csv('tax/genus11.tsv', sep='\t').fillna('NA'),
            'Species': pd.read_csv('tax/org11.tsv', sep='\t').fillna('NA'),
            'Gradient1*': pd.read_csv('tax/phylum0.tsv',
                                      sep='\t').fillna('NA'),
            'Gradient2*': pd.read_csv('tax/phylum0.tsv',
                                      sep='\t').fillna('NA'),
            'Attenuate*': pd.read_csv('tax/phylum0.tsv',
                                      sep='\t').fillna('NA'),
            'None*': pd.read_csv('tax/phylum0.tsv', sep='\t').fillna('NA')
        }
        plots2 = {
            'Phylum': pd.read_csv('tax/phylum0.tsv', sep='\t').fillna('NA'),
            'Class': pd.read_csv('tax/class11.tsv', sep='\t').fillna('NA'),
            'Order': pd.read_csv('tax/order11.tsv', sep='\t').fillna('NA'),
            'Genus': pd.read_csv('tax/genus11.tsv', sep='\t').fillna('NA'),
            'Species': pd.read_csv('tax/org11.tsv', sep='\t').fillna('NA')
        }
        ax3 = plt.axes([pos_sub2, .97, .3, 0.55])
        ##>>>>>>>>>>> grafico circular
        ax = plt.axes([0, 1, 0.9, 1])
        circle_lineage(alphas=alfas[a6])
        ##>>>>>>>>>>> grafico 1
        #ax2 = plt.axes([pos_sub1, 1.51, .3, 0.55])
        ax2 = plt.axes([pos_sub1, 1.63, .3, 0.4])  #>>>>>>>>>>

        barras_tax(
            plots1[a6],
            #barras_tax(tax_colors(color_list = circle_colors['Spectral'])[0],
            row_num=b6,
            color=plots1[a6].iloc[0:b6, 2],
            sep=o6,
            size_y=d6,
            size_bartxt=g6,
            ylabel_text=yetiquetas_plot1[a6],
            ylabel=10)

        ##>>>>>>>>>>> grafico 2
        ax3 = plt.axes([pos_sub2, .97, .3, 0.55])

        barras_tax(
            plots2[a61],
            #barras_tax(tax_colors(color_list = circle_colors['Spectral'])[0],
            row_num=c6,
            color=plots2[a61].iloc[0:b6, 2],
            sep=o61,
            size_y=d61,
            size_bartxt=g61,
            ylabel_text=yetiquetas_plot1[a61],
            ylabel=10)

        ##>>>>>>>>>>>> save
        if z6 == True:
            import datetime
            plt.savefig('img/Lineage' +
                        datetime.datetime.now().strftime('%d.%B.%Y_%I-%M%p') +
                        '.png',
                        dpi=900,
                        bbox_inches='tight')
        else:
            pass

    out6 = widgets.interactive_output(
        box1, {
            'a6': a6,
            'a61': a61,
            'pos_sub1': pos_sub1,
            'pos_sub2': pos_sub2,
            'b6': b6,
            'c6': c6,
            'z6': z6,
            'o6': o6,
            'o61': o61,
            'd6': d6,
            'd61': d61,
            'g6': g6,
            'g61': g61
        })
    import warnings
    warnings.filterwarnings("ignore")
    return display(VBox([yyy, ww, w8, out6]))
Ejemplo n.º 26
0
def plot_cluster(
    db_cluster: DBSCAN,
    data: pd.DataFrame,
    x_predict: np.ndarray,
    plot_label: str = None,
    plot_features: Tuple[int, int] = (0, 1),
    verbose: bool = False,
    cut_off: int = 3,
    xlabel: str = None,
    ylabel: str = None,
):
    """
    Plot clustered data as scatter chart.

    Parameters
    ----------
    db_cluster : DBSCAN
        DBScan Cluster (from SkLearn DBSCAN).
    data : pd.DataFrame
        Dataframe containing original data.
    x_predict : np.ndarray
        The DBSCAN predict numpy array
    plot_label : str, optional
         If set the column to use to label data points
         (the default is None)
    plot_features :  Tuple[int, int], optional
        Which two features in x_predict to plot (the default is (0, 1))
    verbose : bool, optional
        Verbose execution with some extra info
        (the default is False)
    cut_off : int, optional
        The cluster size below which items are considered outliers
        (the default is 3)
    xlabel : str, optional
        x-axis label (the default is None)
    ylabel : str, optional
        y-axis label (the default is None)

    """
    max_idx = x_predict.shape[1] - 1
    if plot_features[0] >= x_predict.shape[1]:
        raise ValueError(
            "plot_features[0] index must be a value from 0 to {}.".format(max_idx)
        )
    if plot_features[1] >= x_predict.shape[1]:
        raise ValueError(
            "plot_features[1] index must be a value from 0 to {}.".format(max_idx)
        )
    if plot_features[0] == plot_features[1]:
        mssg = "plot_features indexes must be 2 different values in range 0 to"
        raise ValueError(mssg + f" {max_idx}.")

    labels = db_cluster.labels_
    core_samples_mask = np.zeros_like(labels, dtype=bool)

    # pylint: disable=unsupported-assignment-operation
    # (assignment of numpy array is valid)
    core_samples_mask[db_cluster.core_sample_indices_] = True
    unique_labels = set(labels)

    # pylint: disable=no-member
    # Spectral color map does exist
    colors = [cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = list(labels).count(-1)
    _, counts = np.unique(labels, return_counts=True)

    if verbose:
        print("Estimated number of clusters: %d" % n_clusters_)
        print("Estimated number of noise points: %d" % n_noise_)
        # print("Silhouette Coefficient: %0.3f"
        #       % metrics.silhouette_score(x_predict, labels))

    if not isinstance(data, pd.DataFrame):
        plot_label = None
    elif plot_label is not None and plot_label not in data:
        plot_label = None

    p_label = None
    for cluster_id, color in zip(unique_labels, colors):
        if cluster_id == -1:
            # Black used for noise.
            color = [0, 0, 0, 1]
        class_member_mask = labels == cluster_id

        cluster_size = counts[cluster_id]
        marker_size = cluster_size
        marker = "o"
        font_size = "small"
        alpha = 0.4

        if cluster_size < cut_off:
            marker = "+"
            marker_size = 10
            font_size = "large"
            alpha = 1.0
        first_row = data[class_member_mask].iloc[0]
        xy_pos = x_predict[class_member_mask & core_samples_mask]
        plt.plot(
            xy_pos[:, plot_features[0]],
            xy_pos[:, plot_features[1]],
            marker,
            markerfacecolor=tuple(color),
            markersize=marker_size,
        )

        if plot_label:
            if not first_row.empty and plot_label in first_row:
                p_label = first_row[plot_label]
                try:
                    plt.annotate(
                        s=p_label,
                        xy=(xy_pos[0, plot_features[0]], xy_pos[0, plot_features[1]]),
                        fontsize=font_size,
                        alpha=alpha,
                    )
                except IndexError:
                    pass

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title("Estimated number of clusters: %d" % n_clusters_)
    plt.show()
    return plt
#Average degree
N = G.order()
K = G.size()
avg_d = float(N) / K
avg_degree = 'Average degree: %.4f ' % (avg_d)

# Plot: Degree_centrality
plt.figure()

ax1 = plt.subplot(211)
plt.title('Degree centrality for nodes', fontsize=12)
a_lenght = np.arange(len(degc_value))
plt.bar(a_lenght[:50],
        degc_value[:50],
        color=cm.Spectral(degc_value),
        align='center')
plt.xticks(a_lenght, degc_key, size='small', rotation=45)
plt.tick_params(axis='x', labelsize=4)
plt.tick_params(axis='y', labelsize=8)
plt.autoscale(enable=True, axis='both', tight=None)

#Top degree centrality:
topTable(degc_key, degc_value, 10)
plt.text(len(degc_value) * 0.75,
         max(degc_value) * 0.4,
         avg_degree,
         bbox={
             'facecolor': 'blue',
             'alpha': 0.25,
             'pad': 10
Ejemplo n.º 28
0
    def extractData(self):

        import matplotlib
        import matplotlib.pyplot as plt
        import matplotlib.cm as cm
        from matplotlib.font_manager import FontProperties
        self.plt = plt

        data = {}
        data["filename_plot"] = ""
        webpage = open(self.source.getTmpPath())
        strwebpage = webpage.read()
        tree = lxml.html.parse(StringIO.StringIO(strwebpage))
        rowlist = tree.findall(".//item")

        StartDates = []
        Jobs = [[0 for t in range(len(rowlist) - 1)]
                for c in range(len(self.cat_names))]
        for i in range(1, len(rowlist)):
            ThisData = {key: 0 for key in self.tags}
            for j in range(len(rowlist[i])):
                if rowlist[i][j].tag == 's_date':
                    StartDates.append(rowlist[i][j].text_content())
                else:
                    if rowlist[i][j].tag in self.tags:
                        ThisData[rowlist[i]
                                 [j].tag] = rowlist[i][j].text_content()
            for key, value in ThisData.iteritems():
                Jobs[self.tags.index(key)][i - 1] = int(value)

        # Calculate derived statistics
        minJobs = [0 for c in range(len(self.cat_names))]
        maxJobs = [0 for c in range(len(self.cat_names))]
        avgJobs = [0 for c in range(len(self.cat_names))]
        JobFractions = [[0 for t in range(len(StartDates))]
                        for c in range(len(self.cat_names))]
        AggrJobsEval = 0
        EvalFraction = 0.0
        JobsEval = [0 for c in range(len(self.cat_names))]
        JobFractionsEval = [0 for c in range(len(self.cat_names))]
        JobsEvalColor = ['' for c in range(len(self.cat_names))]
        for t in range(len(StartDates)):
            Jobs[len(self.cat_names) - 1][t] = 0
            for c in range(len(self.cat_names) - 1):
                Jobs[len(self.cat_names) - 1][t] += Jobs[c][t]
        for t in range(
                len(StartDates) - 1,
                len(StartDates) - 1 - self.eval_interval, -1):
            for c in range(len(self.cat_names)):
                JobsEval[c] += Jobs[c][t]
        for c in range(len(self.cat_names)):
            minJobs[c] = min(Jobs[c])
            maxJobs[c] = max(Jobs[c])
            for t in range(len(StartDates)):
                if Jobs[len(self.cat_names) - 1][t] > 0:
                    JobFractions[c][t] = 100.0 * Jobs[c][t] / float(
                        Jobs[len(self.cat_names) - 1][t])
                else:
                    JobFractions[c][t] = 0.0
                avgJobs[c] += Jobs[c][t]
            avgJobs[c] /= len(StartDates)
            if JobsEval[len(self.cat_names) - 1] > 0:
                JobFractionsEval[c] = 100.0 * JobsEval[c] / float(
                    JobsEval[len(self.cat_names) - 1])
            else:
                JobFractionsEval[c] = 0.0

        # Calculate evaluation statistic
        for item in self.eval_categories:
            AggrJobsEval += JobsEval[self.cat_names.index(item)]
        if JobsEval[len(self.cat_names) - 1] > 0:
            EvalPercentage = 100.0 * AggrJobsEval / float(
                JobsEval[len(self.cat_names) - 1])
        else:
            EvalPercentage = 0.0

        # Set module status according to evaluation statistic
        data['status'] = 1.0
        if JobsEval[len(self.cat_names) - 1] >= self.eval_job_threshold:
            if self.eval_warn_threshold[0] == '<':
                if EvalPercentage <= self.eval_warn_threshold[1]:
                    data['status'] = 0.5
            elif self.eval_warn_threshold[0] == '>':
                if EvalPercentage >= self.eval_warn_threshold[1]:
                    data['status'] = 0.5
            if self.eval_crit_threshold[0] == '<':
                if EvalPercentage <= self.eval_crit_threshold[1]:
                    data['status'] = 0.0
            elif self.eval_crit_threshold[0] == '>':
                if EvalPercentage >= self.eval_crit_threshold[1]:
                    data['status'] = 0.0

        # Color summary subtable cells in column evaluation according to module status
        for item in self.eval_categories:
            if data['status'] == 0.5:
                JobsEvalColor[self.cat_names.index(item)] = 'warning'
            elif data['status'] == 0.0:
                JobsEvalColor[self.cat_names.index(item)] = 'critical'

        # Save subtable data
        for c in range(len(self.cat_names)):
            self.statistics_db_value_list.append({
                'CatName':
                self.cat_names[c],
                'JobsEval':
                JobsEval[c],
                'JobFracsEval':
                '%.1f' % JobFractionsEval[c],
                'JobsEvalColor':
                JobsEvalColor[c],
                'JobsCurrentHour':
                Jobs[c][len(StartDates) - 1],
                'JobFracsCurrentHour':
                '%.1f' % JobFractions[c][len(StartDates) - 1],
                'JobsLastHour':
                Jobs[c][len(StartDates) - 2],
                'JobFracsLastHour':
                '%.1f' % JobFractions[c][len(StartDates) - 2],
                'MinJobs':
                minJobs[c],
                'MaxJobs':
                maxJobs[c],
                'AvgJobs':
                avgJobs[c]
            })

        # Function to convert raw time data given in UTC to local time zone
        def ChangeTimeZone(TimeStringIn, InFormatString, OutFormatString):
            Date = datetime.strptime(TimeStringIn, InFormatString).replace(
                tzinfo=pytz.utc).astimezone(pytz.timezone('Europe/Berlin'))
            return (Date.strftime(OutFormatString))

        # Change times from utc to local
        StartDatesRaw = StartDates[:]
        for t in range(len(StartDates)):
            StartDates[t] = ChangeTimeZone(StartDates[t], "%d-%b-%y %H:%M:%S",
                                           "%d-%b-%y %H:%M:%S")

        data['InstanceTitle'] = self.config['name']
        data['IntervalStart'] = ChangeTimeZone(
            tree.find(".//start").text_content().split(".")[0],
            "%Y-%m-%d %H:%M:%S", "%d-%b-%y %H:%M:%S")
        data['IntervalEnd'] = ChangeTimeZone(
            tree.find(".//end").text_content().split(".")[0],
            "%Y-%m-%d %H:%M:%S", "%d-%b-%y %H:%M:%S")
        data['CurrentHourStart'] = StartDates[len(StartDates) -
                                              1].split(' ')[1]
        data['CurrentHourEnd'] = data['IntervalEnd'].split(' ')[1]
        data['LastHourStart'] = StartDates[len(StartDates) - 2].split(' ')[1]
        data['LastHourEnd'] = StartDates[len(StartDates) - 1].split(' ')[1]
        data['EvaluationStart'] = StartDates[len(StartDates) -
                                             self.eval_interval].split(' ')[1]
        data['EvaluationEnd'] = data['IntervalEnd'].split(' ')[1]

        ######################################################################
        ### Plot data

        #Colors = ['MidnightBlue', 'SteelBlue', 'LightSkyBlue', 'SeaGreen', \
        #        'LightGreen', 'DarkKhaki', 'PaleGoldenrod', 'Khaki', 'LightSalmon', \
        #        'Crimson', 'Maroon']
        Colors = []
        for i in range(len(self.cat_names) - 1):
            # for list of colormaps see http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps
            Colors.append(
                cm.Spectral(1.0 - i / max(float(len(self.cat_names) - 2), 1.0),
                            1))
            # Colors.append(cm.spectral((i+1)/float(len(self.cat_names)-0), 1))
            # Colors.append(cm.jet(i/float(len(self.cat_names)-2), 1))
            # Colors.append(cm.gist_earth((i+1)/float(len(self.cat_names)-1), 1))
            # Colors.append(cm.RdBu(1.0 - i/float(len(self.cat_names)-2), 1))
            # Colors.append(cm.YlGnBu(1.0 - i/float(len(self.cat_names)-2), 1))

        nbins = len(StartDates)
        if nbins == 0:
            # break image creation if there are no jobs
            data[
                'error_string'] = "No plot is generated because data source contains no jobs to be displayed."
            data["filename_plot"] = ""
        else:
            ind = np.arange(nbins)  # the x locations for the groups
            width = 1.00  # the width of the bars: can also be len(x) sequence
            max_val = maxJobs[len(self.cat_names) - 1]
            xlabels = [0] * nbins
            for i in range(0, nbins):
                if i % 2 == 0:
                    DateLabel = StartDates[i].split(' ')[0].split('-')
                    TimeLabel = StartDates[i].split(' ')[1].split(':')
                    xlabels[i] = DateLabel[0] + '-' + DateLabel[1] + '\n' + \
                            TimeLabel[0] + ':' + TimeLabel[1]
                else:
                    xlabels[i] = ''

            # calculate bottom levels in order to enforce stacking
            cat_bottoms = [[0 for t in range(len(StartDates))]
                           for c in range(len(self.cat_names) - 1)]
            for cSet in range(1, len(self.cat_names) - 1):
                for cGet in range(0, cSet):
                    for t in range(len(StartDates)):
                        cat_bottoms[cSet][t] += Jobs[cGet][t]

            # Create figure and plot job numbers of the different categories
            fig = self.plt.figure(figsize=(10, 5.8))
            axis = fig.add_subplot(111)
            p = [
                axis.bar(ind,
                         Jobs[c],
                         width,
                         color=(getcolor(self.cat_names[c]) or Colors[c]),
                         bottom=cat_bottoms[c])
                for c in range(len(self.cat_names) - 1)
            ]

            # Prepare legend entries
            p_leg = []
            cat_leg = []
            for i in range(len(p) - 1, -1, -1):
                p_leg.append(p[i][0])
                cat_leg.append(self.cat_names[i])

            # Configure plot layout
            fontTitle = FontProperties()
            fontTitle.set_size('medium')
            axis.set_title('24 hours from ' + data['IntervalStart'] + ' to ' \
                    + data['IntervalEnd'] + ' (all times are local)',
                    fontproperties=fontTitle)
            axis.set_position([0.10, 0.12, 0.68, 0.82])
            axis.set_ylabel('Number of Jobs')
            axis.set_xticks(ind + 0.0 * width / 2.0)
            axis.set_xticklabels(xlabels, rotation='vertical')
            axis.set_autoscaley_on(False)
            axis.set_ylim([0, (max_val + 1.0) * 1.05])
            fontLegend = FontProperties()
            fontLegend.set_size('small')
            axis.legend(p_leg,
                        cat_leg,
                        bbox_to_anchor=(1.02, 0.5),
                        loc=6,
                        ncol=1,
                        borderaxespad=0.,
                        prop=fontLegend)

            fig.savefig(hf.downloadService.getArchivePath(
                self.run, self.instance_name + "_jobs_dist.png"),
                        dpi=91)
            data["filename_plot"] = self.instance_name + "_jobs_dist.png"

        return data
Ejemplo n.º 29
0
import networkx as nx
import nxviz as nxv

#####
from matplotlib import cm
words = ['Biological Process', 'Molecular Function', 'Cellular Component']
pie_colors = {
    'Set3': cm.Set3(np.arange(12) / 12.),
    'Set2': cm.Set2(np.arange(8) / 8.),
    'Set1': cm.Set1(np.arange(9) / 9.),
    'Pastel2': cm.Pastel2(np.arange(8) / 8.),
    'Pastel1': cm.Pastel1(np.arange(9) / 9.),
    'Dark2': cm.Dark2(np.arange(8) / 8.),
    'Paired': cm.Paired(np.arange(12) / 12.),
    'Accent': cm.Accent(np.arange(8) / 8.),
    'Spectral': cm.Spectral(np.arange(11) / 11.)
}
colors = {
    '#8DD3C7': pie_colors['Set3'][0:1],
    '#FFFFB3': pie_colors['Set3'][1:2],
    '#BEBADA': pie_colors['Set3'][2:3],
    '#FB8072': pie_colors['Set3'][3:4],
    '#80B1D3': pie_colors['Set3'][4:5],
    '#FDB462': pie_colors['Set3'][5:6],
    '#B3DE69': pie_colors['Set3'][6:7],
    '#FCCDE5': pie_colors['Set3'][7:8],
    '#D9D9D9': pie_colors['Set3'][8:9],
    '#BC80BD': pie_colors['Set3'][9:10],
    '#CCEBC5': pie_colors['Set3'][10:11],
    '#FFED6F': pie_colors['Set3'][11:12]
}
Ejemplo n.º 30
0
    def extractData(self):

        # set rack names and associated clusters
        rack_001_010 = {
            'rack_string': 'gridka_rack001-010',
            'clusters': map(lambda x: '%03d' % (x+1), range(0,10))}
        rack_011_020 = {
            'rack_string': 'gridka_rack011-020',
            'clusters': map(lambda x: '%03d' % (x+1), range(10,20))}
        rack_021_030 = {
            'rack_string': 'gridka_rack021-030',
            'clusters': map(lambda x: '%03d' % (x+1), range(20,30))}
        rack_101_110 = {
            'rack_string': 'gridka_rack101-110',
            'clusters': map(lambda x: '%03d' % (x+1), range(100,110))}
        rack_111_120 = {
            'rack_string': 'gridka_rack111-120',
            'clusters': map(lambda x: '%03d' % (x+1), range(110,120))}
        racks = [rack_001_010, rack_011_020, rack_021_030, rack_101_110, rack_111_120]

        import matplotlib.pyplot as plt
        import matplotlib.cm as cm
        from matplotlib.font_manager import FontProperties
        self.plt = plt

        data = {}
        data["filename_plot"] = ""

        with open(self.source.getTmpPath()) as webpage:
            rawdata = json.load(webpage)

        # Function to convert raw time data given in UTC to local time zone
        def ChangeTimeZone(TimeStringIn, InFormatString, OutFormatString):
            Date = datetime.strptime(TimeStringIn, InFormatString).replace(
                    tzinfo=pytz.utc).astimezone(pytz.timezone('Europe/Berlin'))
            return(Date.strftime(OutFormatString))

        data['IntervalEnd'] = ChangeTimeZone(rawdata['meta']['date2'][0], 
                "%Y-%m-%d %H:%M:%S", "%d-%b-%y %H:%M:%S")
        IntervalEnd = datetime.strptime(data['IntervalEnd'], "%d-%b-%y %H:%M:%S")
        IntervalStart = IntervalEnd - timedelta(hours = self.eval_time)
        data['IntervalStart'] = IntervalStart.strftime("%d-%b-%y %H:%M:%S")
        
        #filter rawdata for jobs finished in eval_time or still running
        if self.eval_time != 24:
            for item in rawdata['jobs']:
                try:
                    date = datetime.strptime(item['FinishedTimeStamp'], "%Y-%m-%dT%H:%M:%S").replace(
                            tzinfo=pytz.utc).astimezone(pytz.timezone('Europe/Berlin'))
                    if date < IntervalStart:
                        rawdata['jobs'].remove(item)
                except Exception:
                    pass

        # Prepare different attribute values (either use those indicated in
        # config file or loop over data and get all different categories)
        AttributeValues = []
        if self.eval_attribute_value <> '':
            AttributeValues.append(self.eval_attribute_value)
        if self.attribute_values <> '':
            AddAttributeValues = self.attribute_values.split('|')
            for i in range(len(AddAttributeValues)):
                if AddAttributeValues[i] not in AttributeValues:
                    AttributeValues.append(AddAttributeValues[i])
        else:
            for item in rawdata['jobs']:
                if item[self.attribute] not in AttributeValues:
                    AttributeValues.append(item[self.attribute])

        # Get all different primary and secondary keys for the selected attribute values
        PrimaryKeys = []
        SecondaryKeys = []
        for item in rawdata['jobs']:
            if item[self.attribute] in AttributeValues:
                if item[self.primary_key] not in PrimaryKeys:
                    PrimaryKeys.append(item[self.primary_key])
                    if self.use_secondary_key == True:
                        SecondaryKeys.append(item[self.secondary_key])

        # Get job numbers from raw data
        Jobs = [[0 for k in range(len(PrimaryKeys))] for a in range(len(AttributeValues))]
        for item in rawdata['jobs']:
            if item[self.attribute] in AttributeValues:
                Jobs[AttributeValues.index(item[self.attribute])][
                        PrimaryKeys.index(item[self.primary_key])] += 1

        # Get total number of jobs across all categories per node
        TotalJobsPerNode = [0 for k in range(len(PrimaryKeys))]
        for k in range(len(PrimaryKeys)):
            for a in range(len(AttributeValues)):
                TotalJobsPerNode[k] += Jobs[a][k]
        
        # Calculate module status
        # a 'node' is the class of grouping, i.e. one line in the plot (a host, a workflow, ...)
        data['status'] = 1.0
        if self.eval_threshold > -1:
            TotalEval = 0
            if self.eval_mode == 1: # total jobs evaluation (total number of jobs at different nodes are compared to all nodes)
                Statistics = [TotalJobsPerNode[k] for k in range(len(PrimaryKeys))]
                for k in range(len(PrimaryKeys)):
                    TotalEval += Statistics[k]
            elif self.eval_mode == 2: # global category evaluation (jobs in specified category is summed across all nodes)
                i = AttributeValues.index(self.eval_attribute_value)
                Statistics = 0
                for k in range(len(PrimaryKeys)):
                    Statistics += Jobs[i][k]
                    TotalEval += TotalJobsPerNode[k]
            elif self.eval_mode == 3: # local category evaluation (individual nodes are checked for specific category)
                i = AttributeValues.index(self.eval_attribute_value)
                Statistics = [Jobs[i][k] for k in range(len(PrimaryKeys))]
                for k in range(len(PrimaryKeys)):
                    TotalEval += Statistics[k]
            elif self.eval_mode == 4: # per node evaluation (individual nodes are checked for specific category and compared with all categories of the node)
                i = AttributeValues.index(self.eval_attribute_value)
                Statistics = [Jobs[i][k] for k in range(len(PrimaryKeys))]
            if TotalEval >= self.eval_threshold:
                if self.eval_mode == 1: # total jobs evaluation
                    if self.eval_threshold_warning > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalEval) >= float(
                                    self.eval_threshold_warning):
                                data['status'] = 0.5
                    if self.eval_threshold_critical > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalEval) >= float(
                                    self.eval_threshold_critical):
                                data['status'] = 0.0
                elif self.eval_mode == 2: # global category evaluation
                    if self.eval_threshold_warning > -1:
                        if 100.0 * Statistics / float(TotalEval) >= float(
                                self.eval_threshold_warning):
                            data['status'] = 0.5
                    if self.eval_threshold_critical > -1:
                        if 100.0 * Statistics / float(TotalEval) >= float(
                                self.eval_threshold_critical):
                            data['status'] = 0.0
                elif self.eval_mode == 3: # local category evaluation
                    if self.eval_threshold_warning > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalEval) >= float(
                                    self.eval_threshold_warning):
                                data['status'] = 0.5
                    if self.eval_threshold_critical > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalEval) >= float(
                                    self.eval_threshold_critical):
                                data['status'] = 0.0
                elif self.eval_mode == 4: # per node evaluation
                    count = 0
                    if self.eval_threshold_warning > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalJobsPerNode[k]) >= float(
                                    self.eval_threshold_warning):
                                data['status'] = 0.5
                    if self.eval_threshold_critical > -1:
                        for k in range(len(PrimaryKeys)):
                            if 100.0 * Statistics[k] / float(TotalJobsPerNode[k]) >= float(
                                    self.eval_threshold_critical):
                                count += 1
                                if count <= 1:
                                    data['status'] = 0.5
                                else:
                                    data['status'] = 0.0

        ################################################################
        ### Plot data

        # Get filtered subset of job numbers to plot
        PlotIndices = []
        if self.plot_filter_attribute_value in AttributeValues:
            AttributeValueIndex = AttributeValues.index(
                    self.plot_filter_attribute_value)
            CountsSet = set(Jobs[AttributeValueIndex])
            Counts = [c for c in CountsSet]
            Counts.sort(reverse=True)
            for c in range(len(Counts)):
                for k in range(len(PrimaryKeys)):
                    if Jobs[AttributeValueIndex][k] == Counts[c]:
                        PlotIndices.append(k)
        else:
            CountsSet = set(TotalJobsPerNode)
            Counts = [c for c in CountsSet]
            Counts.sort(reverse=True)
            for c in range(len(Counts)):
                for k in range(len(PrimaryKeys)):
                    if TotalJobsPerNode[k] == Counts[c]:
                        PlotIndices.append(k)
        
        nbins = min(self.plot_filter_node_number, len(PlotIndices))

        # Sort counts and get self.plot_filter_node_number highest
        FilteredJobs = [[0 for k in range(nbins)] for a in AttributeValues]
        TotalFilteredJobs = [0 for k in range(nbins)]
        for a in range(len(AttributeValues)):
            for k in range(nbins):
                FilteredJobs[a][k] = Jobs[a][PlotIndices[k]]
                TotalFilteredJobs[k] += FilteredJobs[a][k]
        
        # Write filtered data to database
        for k in range(nbins-1,-1,-1):  # same ordering as in plot
            for a in range(len(AttributeValues)-1,-1,-1): # same ordering as in plot
                SubtableEntry = {
                        'PrimaryKey': PrimaryKeys[PlotIndices[k]],
                        'PrimaryKeyURL': '',
                        'SecondaryKey': '',
                        'AttributeValue': AttributeValues[a],
                        'AttributeData': FilteredJobs[a][k]}
                if self.use_secondary_key == True:
                    SubtableEntry['SecondaryKey'] = SecondaryKeys[PlotIndices[k]]
                if self.table_link_url <> '':
                    if self.primary_key == 'WNHostName':
                        cluster = PrimaryKeys[PlotIndices[k]].upper().split('-')
                        for r in range(len(racks)):
                            if len(cluster) > 1 and cluster[0]!='UNKNOWN' and cluster[1] in racks[r]['clusters']:
                                SubtableEntry['PrimaryKeyURL'] = self.table_link_url.\
                                        replace('RACK', racks[r]['rack_string']).\
                                        replace('CLUSTER', cluster[0] + '-' + cluster[1]).\
                                        replace('HOST', PrimaryKeys[PlotIndices[k]])
                    elif self.primary_key == 'TaskMonitorId':
                        SubtableEntry['PrimaryKeyURL'] = self.table_link_url.\
                                replace('TASKMONITORID', PrimaryKeys[PlotIndices[k]].replace('wmagent_', ''))
                self.statistics_db_value_list.append(SubtableEntry)
        
        # calculate bottom levels in order to enforce stacking
        Bottoms = [[0 for k in range(nbins)] for c in range(
                len(AttributeValues))]
        for cSet in range(1,len(AttributeValues)):
            for cGet in range(0,cSet):
                for k in range(nbins):
                    Bottoms[cSet][k] += FilteredJobs[cGet][k]

        Colors = []
        for i in range(len(AttributeValues)):
            # for list of colormaps see http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps
            Colors.append(cm.Spectral(1.0 - i/max(float(len(AttributeValues)-1), 1.0), 1))

        if nbins == 0:
            # break image creation if there are no jobs
            data['error_string'] = "No plot is generated because data source contains no jobs to be displayed."
            data["filename_plot"] = ""
        else:
            max_width = max(TotalFilteredJobs)
            xlabels = [0]*nbins
            pos = np.arange(nbins)+0.5

            fig = self.plt.figure(figsize=(self.image_width,self.image_height))
            axis = fig.add_subplot(111)
            p = [axis.barh(pos, FilteredJobs[a], left=Bottoms[a], align='center', 
                    height=0.6, color= (getcolor(AttributeValues[a]) or Colors[a])) for a in range(len(AttributeValues))]
            #fontyAxis = FontProperties()
            #fontyAxis.set_size('small')
            axis.set_yticks(pos)
            #axis.set_yticklabels(xlabels, fontproperties=fontyAxis)
            axis.set_yticklabels('')
            
            fontyLabels = FontProperties()
            fontyLabels.set_size('small')
            fontyLabels.set_weight('bold')
            for i in range(nbins):
                xlabels[i] = PrimaryKeys[PlotIndices[i]]
                if self.use_secondary_key == True:
                    xlabels[i] += ' (' + SecondaryKeys[PlotIndices[i]] + ')'
                if self.plot_ylabels_ellipsis > 0 and len(xlabels[i]) > self.plot_ylabels_ellipsis + 3:
                    xlabels[i] = xlabels[i][:self.plot_ylabels_ellipsis] + '...'
                if self.plot_ylabels_linebreak > 0 and len(xlabels[i]) > self.plot_ylabels_linebreak:
                    xlabels[i] = xlabels[i][:self.plot_ylabels_linebreak] + '\n' + xlabels[i][self.plot_ylabels_linebreak:]
                plt.text(0.03*max_width, pos[i], '%s'%xlabels[i], ha='left', va='center', fontproperties = fontyLabels)

            if self.eval_threshold > -1 and TotalEval >= self.eval_threshold:
                if self.plot_line_warning == 1 and self.eval_threshold_warning >= 0:
                    axis.axvline(TotalEval * self.eval_threshold_warning / 100.0, 
                            color='Yellow',lw=2)
                if self.plot_line_critical == 1 and self.eval_threshold_critical >= 0:
                    axis.axvline(TotalEval * self.eval_threshold_critical / 100.0, 
                            color='Red',lw=3)

            # Prepare legend entries
            p_leg = []
            cat_leg = []
            for i in range(len(p)-1,-1,-1):
                p_leg.append(p[i][0])
                cat_leg.append(AttributeValues[i])

            # Configure plot layout
            fontTitle = FontProperties()
            fontTitle.set_size('medium')
            axis.set_title('%s hours from ' %self.eval_time + data['IntervalStart'] + ' to ' \
                    + data['IntervalEnd'] + ' (all times are local)',
                    fontproperties=fontTitle)
            axis.set_position([self.plot_left,0.08,self.plot_width,0.86])
            axis.set_xlabel('Number of Jobs')
            fontLegend = FontProperties()
            fontLegend.set_size('small')
            axis.legend(p_leg, cat_leg, bbox_to_anchor=(1.02, 0.5), loc=6, ncol=1,
                    borderaxespad=0., prop = fontLegend)

            fig.savefig(hf.downloadService.getArchivePath(self.run, 
                    self.instance_name + "_jobs_dist.png"), dpi=91)
            data["filename_plot"] = self.instance_name + "_jobs_dist.png"
            data['PrimaryKey'] = self.primary_key
            data['SecondaryKey'] = self.secondary_key
            data['Attribute'] = self.attribute

        return data