Ejemplo n.º 1
0
def plot_countries_by_occurrences_and_sentiment(data_mails, nb_country=20):
    '''
    This function plots the occurrences of countries.
    This function uses 4 colors to evaluate the possible sentiment of given countries.
    Abscissa: countries / Ordinate: occurrences / Color: sentiment

    Parameters 
        - data_mails: DataFrame containing the data (emails)
        - nb_country: selection on the most representative countries (20 by default)
    '''

    # We select data for plotting
    most_quoted_countries = data_mails.nlargest(nb_country, 'Occurrences')

    # Definition of the gradation of color
    y = np.array(most_quoted_countries['Sentiment'])
    colors = cm.RdYlGn(y / float(max(y)))
    plot = plt.scatter(y, y, c=y, cmap='RdYlGn')
    plt.clf()
    clb = plt.colorbar(plot)
    clb.ax.set_title('Sentiments')

    # We create and display graph
    countries_data_plot = sns.barplot(x=most_quoted_countries.index,
                                      y='Occurrences',
                                      data=most_quoted_countries,
                                      palette=colors)
    define_plot_title_and_labels(countries_data_plot,
                                 'Occurrences',
                                 title='Hilary\'s opinion for the ' +
                                 str(nb_country) + ' most-quoted countries')
    sns.plt.show()
Ejemplo n.º 2
0
def plot_twosta(f, Cxy, id1, id2, m):
    fig = plt.figure(figsize=(6, 5), dpi=200)
    gs = fig.add_gridspec(1, 1)
    ax = fig.add_subplot(gs[0])
    xmin = f[0]
    xmax = f[-1]
    ymin = 0
    ymax = 1
    # high coherence region
    r = (255 / 255, 0, 25 / 255, 0.2)
    o = (255 / 255, 136 / 255, 0, 0.2)
    g = (28 / 255, 255 / 255, 0)

    #points = np.array([f, Cxy]).T.reshape(-1, 1, 2)
    #segments = np.concatenate([points[:-1], points[1:]], axis=1)
    #norm=Normalize(0,1)
    #lc = LineCollection(segments, cmap='rainbow_r',norm=norm)
    #lc.set_array(Cxy)
    #lc.set_linewidth(4)
    #line = ax.add_collection(lc)
    ax.plot(f, Cxy, linewidth=2.5, c='gray', alpha=.3)
    #ax.scatter(f,Cxy,marker='o',s=75,linewidth=.1,c=cm.RdYlGn(Cxy),edgecolor='k',alpha=1)
    ax.scatter(f,
               Cxy,
               marker='o',
               s=75,
               linewidth=.1,
               c=cm.RdYlGn(Cxy),
               edgecolor='k')

    ax.set_xlim(xmin, xmax)
    xmajor = 5
    xminor = 1
    ax.xaxis.set_major_locator(MultipleLocator(xmajor))
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.1f'))
    ax.xaxis.set_minor_locator(MultipleLocator(xminor))
    ax.xaxis.grid(b=True, which="minor", **color)
    ax.xaxis.grid(b=True, which="major", **color)
    ax.set_xlabel('Frequency(Hz', fontdict=fontdict_axis)

    ax.set_ylim(ymin, ymax)
    xmajor = 0.25
    xminor = 0.05
    ax.yaxis.set_major_locator(MultipleLocator(xmajor))
    #    formatter = FuncFormatter(lambda y, _: '{:.16g}'.format(y))
    #    ax.yaxis.set_major_formatter(formatter)

    ax.yaxis.set_major_formatter(FormatStrFormatter('%0.1g'))
    ax.yaxis.set_minor_locator(MultipleLocator(xminor))
    ax.yaxis.grid(b=True, which="minor", **color)
    ax.yaxis.grid(b=True, which="major", **color)
    ax.set_ylabel('Coherence', fontdict=fontdict_axis)
    ax.set_title(f'Coherence {id1} and {id2}, ({m:0.2} km)',
                 fontdict={'fontsize': 12},
                 loc='left')
    ax.tick_params(labelleft=True, labelsize=axis_tick_size)

    outfile = f'cxy_{id1}_{id2}.png'
    plt.savefig(outfile, bbox_inches='tight')
    plt.close()
def colour_by_scores(message, scores):
    scaled_score = (scores + 1) / 2

    output = ''
    for x, w in enumerate(message):
        output += "<span style='border-bottom: 4px solid rgba" + str(
            tuple(map(lambda x: int(x * 255), cm.RdYlGn(
                scaled_score[x])))) + ";'>" + w + '</span>'
    return output
Ejemplo n.º 4
0
    def barplot3d(data, y_names, x_names, baseline):
        fig = plt.figure(figsize=(10, 8))
        ax = fig.gca(projection='3d')

        x_len = len(x_names)
        y_len = len(y_names)
        x = np.arange(0, x_len, 1)
        y = np.arange(0, y_len, 1)
        x, y = np.meshgrid(x - 0.25, y - 0.5)
        x = x.flatten()
        y = y.flatten()
        z = np.zeros(x_len * y_len)

        rho = np.array(data).flatten()
        dx = 0.5 * np.ones_like(z)
        dy = dx.copy()
        dz = rho.flatten()

        # xx, yy = np.meshgrid(range(len(x_names)), range(len(y_names)))
        # zz = copy(yy)
        # zz.fill(baseline)

        # ax.plot_surface(xx, yy, zz,alpha=0.5)

        ax.w_xaxis.set_ticks([i for i in range(len(data[0]))])
        ax.w_xaxis.set_ticklabels(x_names)

        ax.w_yaxis.set_ticks([i for i in range(len(data))])
        ax.w_yaxis.set_ticklabels(y_names)

        # ax.set_title('models with the size based prior')
        ax.set_zlabel('Predictive accuracy (%)')
        ax.w_zaxis.set_tick_params(labelsize=12)

        ax.get_proj = lambda: np.dot(
            Axes3D.get_proj(ax), np.diag([1, len(data) / len(data[0]), 1, 1]))

        nrm = mpl.colors.Normalize(0, 30)
        c_range = (np.array(data) - 15).flatten()
        # colors = cm.viridis(nrm(c_range))
        # colors = cm.winter(nrm(c_range))
        colors = cm.RdYlGn(nrm(c_range))
        ax.bar3d(x, y, z, dx, dy, dz, colors)
        plt.tight_layout()
        # plt.show()
        fig.savefig('./predictions/barplot3D.png', bbox_inches='tight')
        fig.savefig('./predictions/barplot3D.pdf',
                    format='pdf',
                    transparent=True,
                    bbox_inches='tight')
        fig.savefig('./predictions/barplot3D.eps',
                    format='eps',
                    transparent=True,
                    bbox_inches='tight')
        fig.clear()
        plt.clf()
Ejemplo n.º 5
0
 def value_color(cls, value):
     import matplotlib.cm as cm
     if value is None or np.isnan(value):
         rgb = (128, 128, 128)
     else:
         cmap_low = config_get('cmap_low', 38)
         cmap_high = config_get('cmap_high', 218)
         cmap_range = cmap_high - cmap_low
         cmap = cm.RdYlGn(int(cmap_range * value + cmap_low))[:3]
         rgb = tuple([x * 256 for x in cmap])
     return rgb
Ejemplo n.º 6
0
def exec_isomap(X, Y, mmpno):
    #n_neighbors=20
    isomap = Isomap(n_neighbors=10, n_components=2, eigen_solver='dense')
    X_iso = isomap.fit(X).transform(X)
    Ymax = np.max(Y)
    Ymin = np.min(Y)
    Y0to1 = (Y - Ymin) / (Ymax - Ymin)

    plt.figure(figsize=(1, 8))
    #plt.scatter(, X_iso[:, 1], c=cm.RdYlGn(1-y),s=30)
    plt.show()

    plt.figure(figsize=(8, 8))
    plt.rcParams["font.size"] = 18
    plt.rcParams["font.family"] = "Serif"
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30)
    plt.ylim(-20, 20)
    plt.xlim(-20, 20)
    plt.xlabel("z1")
    plt.ylabel("z2")
    plt.show()

    #plt.figure(figsize=figure.figaspect(1))
    plt.figure(figsize=(8, 8))
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30)
    for i, no_a in enumerate(mmpno[:, 0]):
        no_b = mmpno[i, 1]
        if no_b >= 3025:
            print(i)
        plt.plot([X_iso[no_a, 0], X_iso[no_b, 0]],
                 [X_iso[no_a, 1], X_iso[no_b, 1]],
                 color='blue')

    plt.ylim(-20, 20)
    plt.xlim(-20, 20)
    plt.xlabel("z1")
    plt.ylabel("z2")
    plt.show()
Ejemplo n.º 7
0
def survey(fig, ax, results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """

    from matplotlib import cm
    labels = list(results.keys())
    data = numpy.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = cm.RdYlGn(numpy.linspace(0.15, 0.85, data.shape[1]))

    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, numpy.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        ax.barh(labels,
                widths,
                left=starts,
                height=0.5,
                label=colname,
                color=color)
        xcenters = starts + widths / 2

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x,
                    y,
                    str(int(c)),
                    ha='center',
                    va='center',
                    color=text_color)
    ax.legend(ncol=2,
              bbox_to_anchor=(0, 1),
              loc='lower left',
              fontsize='small')

    ax.set_title("Normalized stacked bar chart", pad=60)

    return fig
Ejemplo n.º 8
0
def plotClusters(X, labels, params=None, method='', prefix='clusters'):
    from ductape.common.utils import slice_it
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm

    X = np.array(X)
    labels = np.array(labels)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    figidx = 1
    fig = plt.figure(1)
    fig.clf()
    for x, y in product(range(len(X[0])), repeat=2):
        ax = fig.add_subplot(len(X[0]), len(X[0]), figidx)

        if figidx % len(X[0]) == 1:
            if not params:
                ax.set_ylabel(x, rotation='horizontal')
            else:
                ax.set_ylabel(params[x], rotation='horizontal')
        if abs((len(X[0]) * len(X[0])) % figidx - len(X[0])) <= len(X[0]):
            if not params:
                ax.set_xlabel(y)
            else:
                ax.set_xlabel(params[y])

        figidx += 1

        color = dict()
        j = 0
        for i in slice_it(range(255), cols=n_clusters_):
            color[j] = cm.RdYlGn(i[0])
            j += 1

        for k in range(n_clusters_):
            my_members = labels == k
            ax.plot(X[my_members, y], X[my_members, x], '.', color=color[k])

    fig.subplots_adjust(wspace=0, hspace=0)
    for ax in fig.axes:
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
    fig.suptitle('Clusters (%s, %s): %d' % (prefix, method, n_clusters_))
    fig.savefig('%s_%s.png' % (prefix, method), dpi=300)
Ejemplo n.º 9
0
    def value_color(cls, value: Union[float, 'Score']) -> tuple:
        """Get a RGB color based on the Score.

        Args:
            value (Union[float,): [description]

        Returns:
            tuple: [description]
        """
        import matplotlib.cm as cm
        if value is None or np.isnan(value):
            rgb = (128, 128, 128)
        else:
            cmap_low = config_get('cmap_low', 38)
            cmap_high = config_get('cmap_high', 218)
            cmap_range = cmap_high - cmap_low
            cmap = cm.RdYlGn(int(cmap_range * value + cmap_low))[:3]
            rgb = tuple([x * 256 for x in cmap])
        return rgb
Ejemplo n.º 10
0
def colorbar_show(Y):
    # colorbar
    Ymax = np.max(Y)
    Ymin = np.min(Y)

    fig, ax = plt.subplots(figsize=(0.5, 8))
    gradient = np.linspace(0, 1, 256)
    x05vec = np.ones((256), float) * 0.5
    ax.tick_params(labelbottom=False, bottom=False)
    #gradient = np.vstack((gradient, gradient))
    #print(gradient)
    #print(x05vec)
    #ax.set_ylabel('p$K_i$')
    ax.set_title('p$K_i$')
    ax.set_ylim(Ymin, Ymax)
    ax.set_xlim(0, 1)
    #ax.figure(figsize=(1,8))
    ax.scatter(x05vec,
               gradient * (Ymax - Ymin) + Ymin,
               c=cm.RdYlGn(1 - gradient),
               s=1000)
Ejemplo n.º 11
0
def plot_coherdist(ax, ans, freq, fl=None, fu=None, domean=None):
    _name = f'{__name__}.coherdist'
    color = {"color": "0.9"}
    colors = [(1, 1, 1), (0, 0, 1), (0, 1, 0), (1, 0, 0)]
    colors = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
    cmap = LinearSegmentedColormap.from_list('my_colors', colors, N=6)

    dists = []
    Cxys = []
    azs = []
    freqs = ans[0][4]
    if domean:
        _idx0 = find_nearest(freqs, fl)
        _idx1 = find_nearest(freqs, fu)
    _idx = find_nearest(freqs, freq)

    for i in ans:
        dists.append(i[0])
        azs.append(i[1])
        if domean:
            mn = np.mean(i[5][_idx0:_idx1])
            Cxys.append(mn)
        else:
            Cxys.append(i[5][_idx])


#ax1.scatter(x, data, c=wts, alpha=0.6, edgecolors='none', cmap=cmap)

#scat=ax.scatter(dists,Cxys,c=azs,alpha=0.6,linewidth=0.35,marker='o',s=50,edgecolor='black',cmap=cmap)
#    ax.scatter(f,Cxy,marker='o',s=70,linewidth=.1,c=cm.RdYlGn(Cxy),edgecolor='k',alpha=.9)
    scat = ax.scatter(dists,
                      Cxys,
                      c=cm.RdYlGn(Cxys),
                      alpha=0.98,
                      linewidth=0.35,
                      marker='o',
                      s=70,
                      edgecolor='black')

    # xaxis stuff
    xmajor, xminor = tick_stride(np.min(dists), np.max(dists), base=1, prec=2)
    ax.set_xlim(0, np.max(dists) * 1.05)
    ax.xaxis.set_major_locator(MultipleLocator(xmajor))
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.1f'))
    ax.xaxis.set_minor_locator(MultipleLocator(xminor))
    ax.xaxis.grid(b=True, which="minor", **color)
    ax.xaxis.grid(b=True, which="major", **color)
    ax.set_xlabel('Intra-station Distance (km)', fontdict=fontdict_axis)
    #        ax.tick_params(labelbottom=False)
    #
    #        # yaxis stuff
    ax.set_ylim(0, 1.05)
    ymajor, yminor = tick_stride(0, 1, base=.1, prec=2)
    ax.yaxis.set_major_locator(MultipleLocator(ymajor))
    ax.yaxis.set_minor_locator(MultipleLocator(yminor))
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    ax.yaxis.grid(b=True, which="major", **color)
    ax.set_ylabel(f'Coherence', fontdict=fontdict_axis)

    # plot colorbar
    scat.set_cmap('RdYlGn')
    pos1 = ax.get_position()
    newax = [pos1.x0 + pos1.width + .01, pos1.y0, 0.01, pos1.height]
    fig = ax.get_figure()
    ax1 = fig.add_axes(newax)
    cbar = fig.colorbar(scat, cax=ax1)
    #cbar.set_label(r'Intra-station Azimuth $^\deg$',fontdict=fontdict_axis)
    cbar.set_label(r'Coherence', fontdict=fontdict_axis)
    #    ax1.invert_yaxis()
    ax1.yaxis.set_major_locator(MultipleLocator(30))
    ax1.yaxis.set_major_locator(MultipleLocator(.25))
    #
    #
    #        # Title
    ax.set_title(f'Coherence, center frequency: {freq:0.4f} Hz',
                 fontdict={'fontsize': 12},
                 loc='left')

    return freqs[_idx]
Ejemplo n.º 12
0
    def plotTruss(self, h1, U):
        # h1: reference to a figure
        # U global displacement matrix
        """ create a plot form this graph """
        plt.figure(h1.number)
        plt.subplot(2, 1, 1)
        sc = 10
        for t in range(0, len(self.elementList)):
            # Plot original bridge
            x1 = self.elementList[t].firstNode.xPos
            y1 = self.elementList[t].firstNode.yPos
            x2 = self.elementList[t].secondNode.xPos
            y2 = self.elementList[t].secondNode.yPos
            plt.plot([x1, x2], [y1, y2], 'k')

            # Plot the nodes for the deformed bridge
            xd1 = self.elementList[t].firstNode.xPos + sc * U[
                2 * (self.elementList[t].firstNode.nodeNr), 0]
            yd1 = self.elementList[t].firstNode.yPos + sc * U[
                2 * (self.elementList[t].firstNode.nodeNr) + 1, 0]
            plt.plot(xd1, yd1, 'r.')
            # Not clean code, some nodes are being written twice
            xd1 = self.elementList[t].secondNode.xPos + sc * U[
                2 * (self.elementList[t].secondNode.nodeNr), 0]
            yd1 = self.elementList[t].secondNode.yPos + sc * U[
                2 * (self.elementList[t].secondNode.nodeNr) + 1, 0]
            plt.plot(xd1, yd1, 'r.')
            # Plot relevent node numbers above each truss.
            # Adapt font size and space before text to fit size of figure you want.
            tt = plt.text(x1,
                          y1,
                          "      ElNr: {0:}, node:  {1:}-{2:}".format(
                              self.elementList[t].elementNr,
                              self.elementList[t].firstNode.nodeNr,
                              self.elementList[t].secondNode.nodeNr),
                          fontsize=9,
                          rotation=(np.arctan2((y2 - y1),
                                               (x2 - x1))) / 3.14 * 180,
                          rotation_mode='anchor',
                          color='grey')
        plt.axis('equal')
        plt.ylabel('metres')
        plt.xlabel('metres')
        plt.title('Unloaded Bridge with Displaced Points')
        #plt.show()

        #plt.figure(2)
        plt.subplot(2, 1, 2)
        #add second plot with colors for stress
        sc = 10  #scale to plot movement
        stressLimit = -19 * 10**6
        #first find the range of stresses
        stresses = np.zeros(len(self.elementList))
        for t in range(0, len(self.elementList)):
            stresses[t] = (self.elementList[t].elementStress)
        sress_range = (np.max(stresses) - np.min(stresses)) * 1.2

        line_list = []
        color_list = []

        for t in range(0, len(self.elementList)):
            # Plot trusses of deformed bridge, colour-coded to show stresses.
            x1 = self.elementList[t].firstNode.xPos + sc * U[
                2 * (self.elementList[t].firstNode.nodeNr), 0]
            y1 = self.elementList[t].firstNode.yPos + sc * U[
                2 * (self.elementList[t].firstNode.nodeNr) + 1, 0]
            x2 = self.elementList[t].secondNode.xPos + sc * U[
                2 * (self.elementList[t].secondNode.nodeNr), 0]
            y2 = self.elementList[t].secondNode.yPos + sc * U[
                2 * (self.elementList[t].secondNode.nodeNr) + 1, 0]

            line_list.append([[x1, x2], [y1, y2]])
            color_list.append(
                ((self.elementList[t].elementStress - np.min(stresses) * 1.2) /
                 sress_range))
            if self.elementList[
                    t].elementStress <= stressLimit or self.elementList[
                        t].elementBuckleRisk >= 100:
                lineStyle = ':'
            else:
                lineStyle = '-'

            plt.plot([x1, x2], [y1, y2],
                     c=cm.RdYlGn(color_list[-1]),
                     lw=3,
                     ls=lineStyle)
            # Adapt font size and space before text to fit size of figure you want.
            tt = plt.text(x1,
                          y1,
                          "        El: {2}: {0:1.3f}Mpa br{1:3.0f}\n ".format(
                              self.elementList[t].elementStress /
                              self.elementList[t].stressScale,
                              self.elementList[t].elementBuckleRisk,
                              self.elementList[t].elementNr),
                          fontsize=6,
                          rotation=(np.arctan2((y2 - y1),
                                               (x2 - x1))) / 3.14 * 180,
                          rotation_mode='anchor',
                          color='grey')
            tt.set_bbox(dict(facecolor='white', alpha=0.5, edgecolor='none'))

        plt.axis('equal')
        plt.ylabel('metres')
        plt.xlabel('metres')
        plt.title('Loaded Bridge with Colour-Coded Stresses')
        plt.show()
Ejemplo n.º 13
0
def plot():
    """
    Given a set of CSV files produced by the module time_counting_test in the directory resources/test, it use the content of
    the most recent file to produce four plots.
    Three plots are scatterplots that shows the relation between the elapsed times, the dataset's rows number and
    the dataset's attributes number for each dataset. Each plot place two of this attributes on the two axis,
    and use the third one as the point radius, where each point corresponds to a dataset.
    The fourth graph show the increasing of the running time respect the increasing of the RFDs found.
    """
    dirpath = os.path.abspath("../resources/test")
    files = getfiles(dirpath)
    file_path = os.path.join(dirpath, files[0])
    try:
        sep, _ = check_sep_n_header(file_path)
    except TypeError:
        print("Unable to find separator in file ", files[0])
        return

    test_df = pd.read_csv(file_path, sep=sep, decimal=',')
    grouped_df = test_df.groupby(['ds_name']).mean()
    datasets = list(grouped_df.index)

    print(grouped_df)

    attr_param = pd.DataFrame(
        {
            "label": [
                'numero di attributi', 'numero di righe',
                'tempo impiegato in ms'
            ],
            "incr_factor": [1000, 10, 1.5],
            "limits": [(1, 7), (-500, 3000), (-5000, 40000)]
        },
        index=['ds_attr_size', 'ds_len', 'time_elapsed'])

    ds_color = pd.DataFrame(cm.RdYlGn(np.linspace(0, 1, len(grouped_df))),
                            index=list(grouped_df.index))

    combinations = pd.DataFrame(
        [[
            'ds_attr_size', 'ds_len', 'time_elapsed',
            'numero di righe rispetto al numero di attributi'
        ],
         [
             'ds_attr_size', 'time_elapsed', 'ds_len',
             'tempo impiegato rispetto al numero di attributi'
         ],
         [
             'ds_len', 'time_elapsed', 'ds_attr_size',
             'tempo impiegato rispetto al numero di righe'
         ]],
        columns=["x", "y", "shape", "title"])

    for index in range(len(attr_param.index)):
        _, ax = plt.subplots()

        ax.set_facecolor('white')
        plt.grid(color='grey')
        ax.spines['bottom'].set_color('grey')
        ax.spines['top'].set_color('grey')
        ax.spines['right'].set_color('grey')
        ax.spines['left'].set_color('grey')

        comb = combinations.iloc[index]

        plt.xlim(attr_param["limits"][comb['x']])
        plt.xlabel(attr_param["label"][comb['x']])
        plt.ylim(attr_param["limits"][comb['y']])
        plt.ylabel(attr_param["label"][comb['y']])
        plt.title(comb["title"])

        grouped_df = grouped_df.sort_values(by=[comb['shape']],
                                            ascending=False)
        for ds_name, row in grouped_df.iterrows():
            xval = grouped_df[comb['x']][ds_name]
            yval = grouped_df[comb['y']][ds_name]
            sval = grouped_df[comb['shape']][ds_name] * attr_param[
                "incr_factor"][comb['shape']]
            ax.scatter(x=xval,
                       y=yval,
                       s=sval,
                       c=ds_color.loc[ds_name],
                       label="{}: time {} ms".format(
                           ds_name[:-4],
                           int(grouped_df["time_elapsed"][ds_name])))

        lgnd = plt.legend(scatterpoints=1, fontsize=10)
        for i in range(len(grouped_df)):
            lgnd.legendHandles[i]._sizes = [75]

    for ds in datasets:
        _, ax = plt.subplots()
        grouped_rfd =  test_df[test_df.ds_name == ds][['rfd_count','time_elapsed']]\
                            .groupby(by=['rfd_count']).mean()

        plot = grouped_rfd.plot(
            y="time_elapsed",
            marker='.',
            markersize=10,
            title=
            "Tempo impiegato rispetto al numero di RFD trovate nel dataset {}".
            format(ds[:-4]),
            ax=ax,
            legend=False)

        legend_dots = []
        for rfd_count, row in grouped_rfd.iterrows():
            legend_text = "{} RFD: tempo {} ms".format(int(rfd_count),
                                                       row['time_elapsed'])
            legend_dots.append(
                Line2D(range(1),
                       range(1),
                       linewidth=0,
                       color="white",
                       marker='o',
                       markerfacecolor="red",
                       label=legend_text))

        plot.set(xlabel="RFD trovate", ylabel='Tempo impiegato in ms')
        ax.legend(handles=legend_dots)
    plt.show()
Ejemplo n.º 14
0
img = envi.open(hdrFile, bilFile)

#  Define band numbers for bands of interest
#nirBandNo = 166
#redBandNo = 115
#greenBandNo = 73
#blueBandNo = 30
nirBandNo = 83
redBandNo = 58
greenBandNo = 36
blueBandNo = 15

#  Grab the bands of color from the tensor
nir = img[:, :, nirBandNo]
red = img[:, :, redBandNo]
green = img[:, :, greenBandNo]
blue = img[:, :, blueBandNo]

#  Create RGB image
rgbFile = '04-05-2016/rgb%d.jpg' % fileIndex
save_rgb(rgbFile, img, [redBandNo, greenBandNo, blueBandNo])

#  Calculate NDVI
ndvi = (nir - red) / (nir + red)
ndvi = np.squeeze(ndvi, axis=2)

#  Use PIL to convert array to an image
ndviImage = Image.fromarray(cm.RdYlGn(ndvi, bytes=True))
ndviFile = '04-05-2016/ndvi%d.jpg' % fileIndex
sm.toimage(ndviImage, cmin=0.0, cmax=1.0).save(ndviFile)
Ejemplo n.º 15
0
def exec_gtm(X, Y, mmpno):
    # settings
    shape_of_map = [30, 30]
    shape_of_rbf_centers = [12, 12]
    variance_of_rbfs = 0.05
    lambda_in_em_algorithm = 0  #1.0e-5
    number_of_iterations = 100
    display_flag = 1

    model = gtm(shape_of_map, shape_of_rbf_centers, variance_of_rbfs,
                lambda_in_em_algorithm, number_of_iterations, display_flag)
    model.fit(X)

    if model.success_flag:
        # calculate of responsibilities
        responsibilities = model.responsibility(X)

        Ymax = np.max(Y)
        Ymin = np.min(Y)

        Y0to1 = (Y - Ymin) / (Ymax - Ymin)
        # plot the mean of responsibilities
        means = responsibilities.dot(model.map_grids)
        plt.rcParams["font.size"] = 18
        plt.rcParams["font.family"] = "Serif"
        plt.figure(figsize=figure.figaspect(1))
        plt.figure(figsize=(10, 10))
        plt.scatter(means[:, 0], means[:, 1], c=cm.RdYlGn(1 - Y0to1), s=100)
        #plt.ylim(-1.1, 1.1)
        #plt.xlim(-1.1, 1.1)
        plt.xlabel("z1 (mean)")
        plt.ylabel("z2 (mean)")
        plt.show()

        # plot the mean of responsibilities with AC
        means = responsibilities.dot(model.map_grids)
        plt.rcParams["font.size"] = 18
        plt.rcParams["font.family"] = "Serif"
        plt.figure(figsize=figure.figaspect(1))
        plt.figure(figsize=(10, 10))
        plt.scatter(means[:, 0], means[:, 1], c=cm.RdYlGn(1 - Y0to1), s=100)
        print(means.shape)
        for i, no_a in enumerate(mmpno[:, 0]):
            no_b = mmpno[i, 1]
            if no_b >= 3025:
                print(i)
            plt.plot([means[no_a, 0], means[no_b, 0]],
                     [means[no_a, 1], means[no_b, 1]],
                     color='blue')
        #plt.ylim(-1.1, 1.1)
        #plt.xlim(-1.1, 1.1)
        plt.xlabel("z1 (mean)")
        plt.ylabel("z2 (mean)")
        plt.show()

        # plot the mode of responsibilities
        modes = model.map_grids[responsibilities.argmax(axis=1), :]
        plt.figure(figsize=figure.figaspect(1))
        plt.figure(figsize=(10, 10))
        plt.scatter(modes[:, 0], modes[:, 1], c=cm.RdYlGn(1 - Y0to1), s=100)
        #plt.ylim(-1.1, 1.1)
        #plt.xlim(-1.1, 1.1)
        plt.xlabel("z1 (mode)")
        plt.ylabel("z2 (mode)")
        plt.show()

        # plot the mode of responsibilities
        modes = model.map_grids[responsibilities.argmax(axis=1), :]
        plt.figure(figsize=figure.figaspect(1))
        plt.figure(figsize=(10, 10))
        plt.scatter(modes[:, 0], modes[:, 1], c=cm.RdYlGn(1 - Y0to1), s=100)
        for i, no_a in enumerate(mmpno[:, 0]):
            no_b = mmpno[i, 1]
            if no_b >= 3025:
                print(i)
            plt.plot([modes[no_a, 0], modes[no_b, 0]],
                     [modes[no_a, 1], modes[no_b, 1]],
                     color='blue')

        #plt.ylim(-1.1, 1.1)
        #plt.xlim(-1.1, 1.1)
        plt.xlabel("z1 (mode)")
        plt.ylabel("z2 (mode)")
        plt.show()
Ejemplo n.º 16
0
def colour_by_score(message,scores):
    scaled_score = (scores+1)/2

    output =     "<span style='background-color:rgba"+str(tuple(map(lambda x:int(x*255),cm.RdYlGn(scaled_score))))+";'>"+message+'</span>'
    return output
Ejemplo n.º 17
0
def main():

    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
                                     description=textwrap.dedent('''
    Welcome to PyFuncover !

    Python Function Uncover ( PyFuncover ) is a new bioinformatic tool
    able to search for protein with a specific function in a full proteome.
    The pipeline coded in python uses BLAST alignment and the sequences from
    a PFAM family as search seed.

    === REQUIREMENT ===

    BLAST
    Python dependancies : BioPython, Numpy, Matplotlib, Pandas

    USAGE :
        python PyFuncover.py -taxid [TAXID ...] -pfam [PFAM ...] --db [DB ...] --out [OUT]
        python PyFuncover.py --update

    TO UPDATE THE DATABASES :

        python PyFuncover.py --update

    === OBLIGATORY ARGUMENTS : ===

    -pfam : List of PFAM familly ID : PF####
            each separated by a blank space

        PyFuncover.py -pfam PF14651 PF#### ...
        

    -taxid: The list of TaxID for each organism you want to download a proteome
        Each separated by a space (for example Human and Yeast (S299C) taxid)

            PyFuncover.py -taxid 9606 559292

        Can be a Taxid that represent a node in the phylogenetic tree
        (Eukaryotes : 2759 ; Insecta : 50557, ...)
        He will retrieve all availlable assembly for them

    === OPTIONAL : ===

    --update :
        Download the last release of the NCBI Taxonomic Database
        Download the last RefSeq, Prokaryote and Eukaryote Genome Assembly List

        PyFuncover.py --update

    --out : Filename output
        Format are in CSV format (pandas.to_csv output)
        Default : result.csv

    --db : The list of choosen cross-ref number to retrieve data from bioDBnet database :
            default : 137 47 48 49 (UNIPROT ID, GO-TERMs Databases)

            WARNING ! : Too high number of requested cross-refs will occur a slow-mode request 1 by 1.
                        If it get an error for 1 request with 1 protein in the mode 1 by 1,
                        the program will ABORT with a too high number of choosen cross-ref choosen exception !

        1 : 'Affy ID',
        2 : 'Agilent ID',
        3 : 'Allergome Code',
        4 : 'ApiDB_CryptoDB ID',
        5 : 'Biocarta Pathway Name',
        6 : 'BioCyc ID',
        7 : 'CCDS ID',
        8 : 'Chromosomal Location',
        9 : 'CleanEx ID',
        10 : 'CodeLink ID',
        11 : 'COSMIC ID',
        12 : 'CPDB Protein Interactor',
        13 : 'CTD Disease Info',
        14 : 'CTD Disease Name',
        15 : 'CYGD ID',
        16 : 'dbSNP ID',
        17 : 'dictyBase ID',
        18 : 'DIP ID',
        19 : 'DisProt ID',
        20 : 'DrugBank Drug ID',
        21 : 'DrugBank Drug Info',
        22 : 'DrugBank Drug Name',
        23 : 'EC Number',
        24 : 'EchoBASE ID',
        25 : 'EcoGene ID',
        26 : 'Ensembl Biotype',
        27 : 'Ensembl Gene ID',
        28 : 'Ensembl Gene Info',
        29 : 'Ensembl Protein ID',
        30 : 'Ensembl Transcript ID',
        31 : 'FlyBase Gene ID',
        32 : 'FlyBase Protein ID',
        33 : 'FlyBase Transcript ID',
        34 : 'GAD Disease Info',
        35 : 'GAD Disease Name',
        36 : 'GenBank Nucleotide Accession',
        37 : 'GenBank Nucleotide GI',
        38 : 'GenBank Protein Accession',
        39 : 'GenBank Protein GI',
        40 : 'Gene ID',
        41 : 'Gene Info',
        42 : 'Gene Symbol',
        43 : 'Gene Symbol and Synonyms',
        44 : 'Gene Symbol ORF',
        45 : 'Gene Synonyms',
        46 : 'GeneFarm ID',
        47 : 'GO - Biological Process',
        48 : 'GO - Cellular Component',
        49 : 'GO - Molecular Function',
        50 : 'GO ID',
        51 : 'GSEA Standard Name',
        52 : 'H-Inv Locus ID',
        53 : 'HAMAP ID',
        54 : 'HGNC ID',
        55 : 'HMDB Metabolite',
        56 : 'Homolog - All Ens Gene ID',
        57 : 'Homolog - All Ens Protein ID',
        58 : 'Homolog - All Gene ID',
        59 : 'Homolog - Human Ens Gene ID',
        60 : 'Homolog - Human Ens Protein ID',
        61 : 'Homolog - Human Gene ID',
        62 : 'Homolog - Mouse Ens Gene ID',
        63 : 'Homolog - Mouse Ens Protein ID',
        64 : 'Homolog - Mouse Gene ID',
        65 : 'Homolog - Rat Ens Gene ID',
        66 : 'Homolog - Rat Ens Protein ID',
        67 : 'Homolog - Rat Gene ID',
        68 : 'HomoloGene ID',
        69 : 'HPA ID',
        70 : 'HPRD ID',
        71 : 'HPRD Protein Complex',
        72 : 'HPRD Protein Interactor',
        73 : 'Illumina ID',
        74 : 'IMGT/GENE-DB ID',
        75 : 'InterPro ID',
        76 : 'IPI ID',
        77 : 'KEGG Disease ID',
        78 : 'KEGG Gene ID',
        79 : 'KEGG Orthology ID',
        80 : 'KEGG Pathway ID',
        81 : 'KEGG Pathway Info',
        82 : 'KEGG Pathway Title',
        83 : 'LegioList ID',
        84 : 'Leproma ID',
        85 : 'Locus Tag',
        86 : 'MaizeGDB ID',
        87 : 'MEROPS ID',
        88 : 'MGC(ZGC/XGC) ID',
        89 : 'MGC(ZGC/XGC) Image ID',
        90 : 'MGC(ZGC/XGC) Info',
        91 : 'MGI ID',
        92 : 'MIM ID',
        93 : 'MIM Info',
        94 : 'miRBase ID',
        95 : 'NCIPID Pathway Name',
        96 : 'NCIPID Protein Complex',
        97 : 'NCIPID Protein Interactor',
        98 : 'NCIPID PTM',
        99 : 'Orphanet ID',
        100 : 'PANTHER ID',
        101 : 'Paralog - Ens Gene ID',
        102 : 'PBR ID',
        103 : 'PDB ID',
        104 : 'PeroxiBase ID',
        105 : 'Pfam ID',
        106 : 'PharmGKB Drug Info',
        107 : 'PharmGKB Gene ID',
        108 : 'PIR ID',
        109 : 'PIRSF ID',
        110 : 'PptaseDB ID',
        111 : 'PRINTS ID',
        112 : 'ProDom ID',
        113 : 'PROSITE ID',
        114 : 'PseudoCAP ID',
        115 : 'PubMed ID',
        116 : 'Reactome ID',
        117 : 'Reactome Pathway Name',
        118 : 'REBASE ID',
        119 : 'RefSeq Genomic Accession',
        120 : 'RefSeq Genomic GI',
        121 : 'RefSeq mRNA Accession',
        122 : 'RefSeq ncRNA Accession',
        123 : 'RefSeq Nucleotide GI',
        124 : 'RefSeq Protein Accession',
        125 : 'RefSeq Protein GI',
        126 : 'Rfam ID',
        127 : 'RGD ID',
        128 : 'SGD ID',
        129 : 'SMART ID',
        130 : 'STRING Protein Interactor',
        131 : 'TAIR ID',
        132 : 'Taxon ID',
        133 : 'TCDB ID',
        134 : 'TIGRFAMs ID',
        135 : 'TubercuList ID',
        136 : 'UCSC ID',
        137 : 'UniGene ID',
        138 : 'UniProt Accession',
        139 : 'UniProt Entry Name',
        140 : 'UniProt Info',
        141 : 'UniProt Protein Name',
        142 : 'UniSTS ID',
        143 : 'VectorBase Gene ID',
        144 : 'VEGA Gene ID',
        145 : 'VEGA Protein ID',
        146 : 'VEGA Transcript ID',
        147 : 'WormBase Gene ID',
        148 : 'WormPep Protein ID',
        149 : 'XenBase Gene ID',
        150 : 'ZFIN ID'

    --nb-blast : The number of parrallelized BLAST process (default : 10)
    Be carefull, high number will use lot of memory and create a stck overflow !

    --nb-prot : The number of protein per request to the bioDBnet Database:
    
                WARNING ! : Too high number of requested cross-refs will occur a slow-mode request 1 by 1.
                            If it throw an error for 1 request in this mode, the program will ABORT with a
                            too high number of cross ref choosen exception !

    '''))
    parser.add_argument('-pfam',
                        nargs='*',
                        help='The list of PFAM accession (PF#####)')
    parser.add_argument('-taxid',
                        nargs='*',
                        help='The list of TaxID for each organism')
    parser.add_argument('--db',nargs='*',type=int, help='bioDBnet Databases Number: (default : 137 47 48 49 (GO-TERMs Databases))', default = [137,47,48,49])
    parser.add_argument('--out', help='File output, default "result.csv"', default = "results.csv")
    parser.add_argument('--update',help='Update the NCBI Taxonomic Database and the Prokaryote, Eukaryote & RefSeq genome assembly', action='store_true', default=False)
    parser.add_argument('--nbblast',help='Number of parrallelized BLAST processes', type=int, default = 10)
    parser.add_argument('--nbprot',help='Number of protein per request to the bioDBnet Database', type=int, default = 250)
    args = parser.parse_args()
    dicoArgs = vars(args)
    print("Args :")
    print(dicoArgs)


    if not len(sys.argv) > 1:
        parser.print_help()
        print
        print('''Exit, no argument provided, minimal use :
python PyFuncover.py -taxid [TAXID] -pfam [PFAM]''')
        exit()

    if dicoArgs['update']:
        print(dicoArgs['update'])
        print("here")
        print('Updating Taxonomic NCBI Databases and the Prokaryote, Eukaryote & RefSeq genome assembly')
        init()
        exit()

    if dicoArgs['taxid'] is None:
        parser.print_help()
        print
        print('You miss -taxid argument')
        exit()
    else:
        taxid = dicoArgs['taxid']

    if dicoArgs['pfam'] is None:
        parser.print_help()
        print
        print('You miss -pfam argument')
        exit()
    else:
        pfam = dicoArgs['pfam']
        
    biodbnet_db = dicoArgs['db']
    output = dicoArgs['out']
    NB_BLAST_PROCESS = dicoArgs['nbblast']
    NB_PROT_DB = dicoArgs['nbprot']
        
    if os.path.isdir(os.path.join(os.getcwd(),'TAXONOMY')) == False:
        print("Download the reference genome list")
        init()
    else:
        print("Taxonomic Database and genome list already downloaded")
        print("Use --update to update them")
        
    print("get Taxonomic nodes...")
    dicNode = dic_Node()
    print("get Taxonomic Name...")
    dicName, dicNameInverse = dic_Name()
    print("create Lineage...")
    dicLineage = lineage(dicName,dicNode)
    print("group Taxid...")
    dicGroupedTaxid = groupedTaxId(dicLineage, dicNameInverse)
    print("Read the reference genome assembly")
    df_refseq = pd.read_csv('assembly_summary_refseq.txt',delimiter='\t', skiprows=1, dtype=str)
    print("Read the eukaryotes genome assembly")
    df_euk = pd.read_csv('eukaryotes.txt',delimiter='\t', dtype=str)
    print("Read the prokaryote genome assembly")
    df_prok = pd.read_csv('prokaryotes.txt',delimiter='\t', dtype=str)

    print("Get the list of available taxid")
    set_taxid = set()
    for i in taxid:
        print('{} : {}'.format(i,dicName[i]) )
        set_taxid.update(dicGroupedTaxid[i])
        print('Availlable species :')
        for i in set_taxid:
            print('\n\t'+i+' : '+dicName[i])
              
        
    print('Download PFAM')
    PFAMS_PATH = download_pfam(pfam)
    if not PFAMS_PATH:
        print('PyFuncover : EXIT - No PFAMs available for the user selection')
        exit()
    print('Download Genome')
    PATH_LIST = download_genome(set_taxid, df_refseq, df_euk, df_prok)

    #check if they are any genome
    if not PATH_LIST:
        print('PyFuncover : EXIT - No proteome available for the user selection')
        exit()
            
    print('Split proteome into individual fasta')

    PATH_FASTA_GENOME = list()
    for fasta in PATH_LIST:
        PATH_FASTA = split_fasta(fasta)
        PATH_FASTA_GENOME += [PATH_FASTA]


    print('BLAST')
    for PFAM in PFAMS_PATH:
        PFAM_NAME = PFAM.split(os.sep)[-2]
        for PATH_FASTA in PATH_FASTA_GENOME:

            #get the path create the xml folder
            species_folder, fasta_assembly_folder, fasta_file = PATH_FASTA[0].split(os.sep)[-3:]
            root = os.getcwd()

            xml_assembly_folder = fasta_assembly_folder.replace('fasta','xml_'+PFAM_NAME)
            xml_folder = os.path.join(root, species_folder, xml_assembly_folder)
            if os.path.isdir(xml_folder) == False:
                os.mkdir(xml_folder)

            n = NB_BLAST_PROCESS
            PATH_FASTA_CHUNKED = list(chunks(PATH_FASTA, n))
            cpt = 1
            print('BLAST on {} - {}'.format(species_folder,fasta_assembly_folder))
            for FASTA_LIST in PATH_FASTA_CHUNKED:
                thr_list = list()
                print('{}/{} ({} running BLASTs) : starting'.format(cpt,len(PATH_FASTA_CHUNKED),n))
                
                for FASTA in FASTA_LIST:
                    fasta_file = os.path.split(FASTA)[-1].replace('fasta','xml')
                    OUT = os.path.join(xml_folder,fasta_file)
                    #check if the file is not already there (from a previous analysis)
                    if os.path.isfile(OUT):
                        #check if the file is not empty (occur when memory error)
                        if os.stat(OUT).st_size == 0:
                            thr = Thread(target=blast, args=[FASTA,PFAM,OUT])
                            thr.start()
                            thr_list.append(thr)
                    #if the file doesnt exist
                    if not os.path.isfile(OUT):
                        thr = Thread(target=blast, args=[FASTA,PFAM,OUT])
                        thr.start()
                        thr_list.append(thr)
                        
                for thr in thr_list:
                    thr.join()
                cpt +=1

    print('PLOTTING')
    RESULTS_LIST = list()
    for PFAM in PFAMS_PATH:
        PFAM_NAME = PFAM.split(os.sep)[-2]
        for PATH_FASTA in PATH_FASTA_GENOME:
            #get the path folder of the fasta files
            species_folder, fasta_assembly_folder, fasta_file = PATH_FASTA[0].split(os.sep)[-3:]
            root = os.getcwd()
              
            #create the graph folder
            graph_assembly_folder = fasta_assembly_folder.replace('fasta','graph_'+PFAM_NAME)
            graph_folder = os.path.join(root, species_folder, graph_assembly_folder)
            if os.path.isdir(graph_folder) == False:
                os.mkdir(graph_folder)

            #create the csv folder
            csv_assembly_folder = fasta_assembly_folder.replace('fasta','csv_'+PFAM_NAME)
            csv_folder = os.path.join(root, species_folder, csv_assembly_folder)
            if os.path.isdir(csv_folder) == False:
                os.mkdir(csv_folder)

            #make the path for xml folder
            xml_assembly_folder = fasta_assembly_folder.replace('fasta','xml_'+PFAM_NAME)
            xml_folder = os.path.join(root, species_folder, xml_assembly_folder)

            #iterate on each fasta
            for FASTA in PATH_FASTA:

                #formate the filename
                basename = os.path.basename(FASTA)
                PNG = os.path.join(graph_folder, os.path.basename(FASTA).replace('fasta','png'))
                XML = os.path.join(xml_folder, os.path.basename(FASTA).replace('fasta','xml'))
                CSV = os.path.join(csv_folder, os.path.basename(FASTA).replace('fasta','csv'))
                #get the path of the FASTA by replacing xml
                protein = SeqIO.read(FASTA, "fasta")
                protein_seq = protein.seq
                protein_len = len(protein_seq)
                
                #parse the XML BLAST REPPORT
                blast_records = SearchIO.parse(XML, 'blast-xml')
                print('Parsing '+os.path.basename(XML)+'...')
                cpt = 0
                score_list = [0] * protein_len
                for record in blast_records:
                    for hsp in record.hsps:
                        cpt += 1
                        q_s = hsp.query_start
                        sim = hsp.aln_annotation['similarity'] #middle line of the blast hsp alignment
                        for i in range(len(hsp.query.seq)):
                            if hsp.query.seq[i] == '-':
                                q_s -= 1
                                continue
                            else:
                                pos = q_s + i
                                c = sim[i]
                                if c =="+":
                                    score_list[pos] = score_list[pos] + 1
                                if c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
                                    score_list[pos] = score_list[pos] + 2
                
                #plt.figure(figsize=(20,14))
                #create colormap
                
                max_score = max(score_list)
                mean_score = np.mean(score_list)
                total_score = sum(score_list)
                normalised_score = total_score / protein_len
                a = protein.description

                if not os.path.isfile(PNG):
                    hexa_colors_dict = dict()
                    for i in range(0, max_score+1):
                        cm_object = cm.RdYlGn(1.*i/max_score)
                        rgb = cm_object[:3]
                        hexa = colors.rgb2hex(rgb)
                        hexa_colors_dict[i] = hexa
                        
                    #attribute color
                    color = list()
                    for i in score_list:
                        color += [hexa_colors_dict[i]]
                    fig, ax = plt.subplots()
                    ax.yaxis.grid()
                    bars = plt.bar(range(1,len(score_list)+1,1),score_list, color=color)
                    
                    #split protein description to have nice title
                    plt.title(a, wrap = True)
                    
                    if not os.path.isfile(CSV):
                        df = pd.DataFrame(data=score_list, index=list(protein_seq), columns=['score'])
                        df.to_csv(CSV)
                        plt.savefig(PNG)
                        plt.close()

                    plt.close()
                
                #STORE RESULT
                #ACCESSION | PROTEIN NAME | SPECIES | TAXID | GCA | MAX SCORE | MEAN SCORE | TOTAL SCORE | 
                NCBI_ACCESSION = '.'.join(basename.rsplit('_',1)).replace('.fasta','')
                SPECIE = re.findall('\[(.*?)\]',protein.description)[-1]
                TAXID = dicNameInverse[SPECIE]
                GCA = "_".join(fasta_assembly_folder.split('_')[:2])
                print(NCBI_ACCESSION, a, SPECIE, TAXID, GCA, max_score, mean_score, total_score, normalised_score)
                RESULTS_LIST += [[NCBI_ACCESSION, protein.description, SPECIE, TAXID, GCA, max_score, mean_score, total_score]]

    print("Make request to BioDBnet")
    columns = ['ACCESSION','PROTEIN DESCRIPTION','SPECIES','TAXID','GCA','MAX SCORE','MEAN SCORE','TOTAL SCORE']
    df_result = pd.DataFrame(RESULTS_LIST,columns=columns)
    #formate accession without the dot
    list_of_accession = [i.split('.')[0] for i in df_result['ACCESSION']]
    #get the DB to query
    db_list = bioDBnet_database(biodbnet_db)
    data = get_biodbnet_data(list_of_accession,db_list,NB_PROT_DB)

    for column in data:
        dico = { df_result['ACCESSION'][row_index] : data[column][row_index] for row_index in range(len(data[column])) }
        df_result[column]= df_result['ACCESSION'].map(dico)
            
    df_result.to_csv(output, index=None)
Ejemplo n.º 18
0
def plot_result(state,
                Routes,
                pooled_exchange=[],
                logbook=None,
                foldername='',
                save_plot=False,
                suffix='',
                commsRadius=False,
                considerationRadius=0.0):
    if logbook is None:
        gens, avgs, mins = [], [], []
        ngen = 0
    else:
        gens, avgs, mins = logbook.select("gen", "avg", "min")
        ngen = gens[-1]

    routes = []

    for a in state.agentRange:
        fullroute = [a - state.noAgents] + Routes[a] + [a - state.noAgents]
        route = []
        for i in range(0, len(fullroute) - 1):
            route.append([fullroute[i], fullroute[i + 1]])
        routes.append(route)

    #### PLOT
    # foldername = 'EATSP'
    colours = [
        'lightgreen', 'lightblue', 'coral', 'orange', 'mediumpurple',
        'turquoise', 'olive', 'saddlebrown', 'plum', 'lightgrey'
    ]
    # fig, ax = plt.subplots(figsize=(12, 8))

    # xdata, ydata = [], []
    # lines = [plt.plot([], [],'-' ,color=colours[a % len(colours)])[0] for a in range(noAgents)]

    fig, ax, lines = plot_setup(state,
                                ngen,
                                commsRadius=commsRadius,
                                considerationRadius=considerationRadius)

    # color_map = plt.cm.Spectral_r
    # allLocs = []
    # for agent in state.agents:
    #     allLocs.extend(agent.locationHistory)
    #     # for loc in agent.locationHistory:
    #     #     np.append(allLocs, loc)
    # tmin = state.simTime - 60.0
    # if len(allLocs) > 0:
    #     allLocs = np.array(allLocs)
    #     extent = state.xlimits[:] + state.ylimits[:]
    #     indt = np.argwhere(allLocs[:,0] > tmin)
    #
    #     if len(indt) > 0:
    #         hexplt = plt.hexbin(allLocs[indt,1], allLocs[indt,2], cmap='Greens',gridsize=10,clim=(0, 20), extent=extent,mincnt=1)
    # cb = plt.colorbar(hexplt, spacing='uniform', extend='max')
    # counts = hexplt.get_array()
    # ncnts = np.count_nonzero(np.power(10, counts))
    # verts = hexplt.get_offsets()
    # for offc in range(verts.shape[0]):
    #     binx, biny = verts[offc][0], verts[offc][1]
    #     if counts[offc]:
    #         plt.plot(binx, biny, 'k.', zorder=100)

    # fig.set_size_inches(8, 7)
    # print routes
    # normalize item number values to colormap
    norm = cls.Normalize(vmin=0, vmax=10)
    hexes = []
    total_score = 0.0
    total_alive = 0
    for hexkey, hexi in state.hex_grid.hexes.items():
        corners = state.hex_grid.layout.polygon_corners(hexi)
        points = [[corner.x, corner.y] for corner in corners]
        score = state.hex_grid.hexinfo[hexkey]['score']
        if score > 0:
            total_alive += 1
        total_score += score
        if state.hex_grid.hexinfo[hexkey]['last_visited'] > 0:
            tdiff = state.simTime - state.hex_grid.hexinfo[hexkey][
                'last_visited']

            # if tdiff <= 100:  # fade out completed tasks
            #     alpha = 1. / (1. + tdiff)
            #     hexpatch = plt.Polygon(points, fill=True, color='lightgreen', alpha=alpha)
            # else:
            cl = cm.RdYlGn(norm(score), bytes=True)
            col = [x / 255 for x in cl[:3]]
            hexpatch = plt.Polygon(points, fill=True, color=col, alpha=0.5)

        else:
            hexpatch = plt.Polygon(points,
                                   fill=True,
                                   color='lightgrey',
                                   alpha=0.25)
        center = state.hex_grid.layout.hex_to_pixel(hexi)
        ax[0].add_patch(hexpatch)
        ax[0].annotate('%2.2f' % (score), (center.x, center.y),
                       color='w',
                       weight='bold',
                       fontsize=6,
                       ha='center',
                       va='center')

    for a in state.agentRange:
        xs = []
        ys = []
        for arci in range(0, len(routes[a])):
            # print arci
            arc = routes[a][arci]
            # print real_routes[a]
            if arc[0] < 0:
                loc_i = state.agents[arc[0] + state.noAgents].current_location
            else:
                loc_i = state.tasks[arc[0]].location
            if arc[1] < 0:
                loc_j = state.agents[arc[1] + state.noAgents].current_location
                lstyle = ':'
                lwidth = 1.0
            else:
                loc_j = state.tasks[arc[1]].location
                lstyle = '-'
                lwidth = 3.0
            xs.append(loc_i[0])
            ys.append(loc_i[1])
            lines[0][a].set_data(xs, ys)
            # lines, = plt.plot([loc_i[0], loc_j[0]], [loc_i[1], loc_j[1]], lstyle ,color=colours[a % len(colours)], linewidth=lwidth)

    for exchange in pooled_exchange:
        loc1 = state.agents[exchange[0]].current_location
        loc2 = state.agents[exchange[1]].current_location
        ax[0].plot([loc1[0], loc2[0]], [loc1[1], loc2[1]], ':', color='green')
    title = 'No Agents: %i, no Tasks: %i, hexScore: %2.2f, alive: %i' % (
        state.noAgents, state.noTasks, total_score, total_alive)
    # res_title = 'Obj: %2.2f' %(obj)
    ax[0].set_title(title)  # +'\n' +res_title )
    max_locs = [0.0, 0.0]

    ax[0].set_xlim(state.xlimits)
    ax[0].set_ylim(state.ylimits)

    # plot the line graph

    # if len(avgs)>0:
    #     maxy = (int(avgs[0]/50)+1)*50
    #     ax[1].set_ylim([0,maxy])
    # lines[1][0].set_data(gens, mins)
    # lines[1][1].set_data(gens, avgs)
    # plt.show()
    if save_plot:
        plot_save(foldername, state, suffix=suffix)
        plot_close()
    else:
        # plot_save(foldername, noAgents, noTasks, suffix=suffix)
        plt.show(block=False)
        plt.pause(0.015)

    return ax, lines