def plot_clustering_plotly(z_run, labels): labels = labels[:z_run.shape[0]] # because of weird batch_size hex_colors = [] for _ in np.unique(labels): hex_colors.append('#%06X' % randint(0, 0xFFFFFF)) colors = [hex_colors[int(i)] for i in labels] z_run_pca = TruncatedSVD(n_components=3).fit_transform(z_run) z_run_tsne = TSNE(perplexity=80, min_grad_norm=1E-12, n_iter=3000).fit_transform(z_run) trace = go.Scatter(x=z_run_pca[:, 0], y=z_run_pca[:, 1], mode='markers', marker=dict(color=colors)) data = go.Data([trace]) layout = go.Layout(title='PCA on z_run', showlegend=False) fig = go.Figure(data=data, layout=layout) plotly.offline.iplot(fig) trace = go.Scatter(x=z_run_tsne[:, 0], y=z_run_tsne[:, 1], mode='markers', marker=dict(color=colors)) data = go.Data([trace]) layout = go.Layout(title='tSNE on z_run', showlegend=False) fig = go.Figure(data=data, layout=layout) plotly.offline.iplot(fig)
def plot_LDA(data, features): X = data[features] y = data["categoria"] X_std = StandardScaler().fit_transform(X) LDA = LinearDiscriminantAnalysis() Y = LDA.fit_transform(X_std, y) results = [] for name in (2, 3, 13): result = go.Scatter(x=Y[y == name, 0], y=Y[y == name, 1], mode="markers", name=name, marker=go.Marker(size=8, line=go.Line( color="rgba(225,225,225,0.2)", width=0.5), opacity=0.75)) results.append(result) data = go.Data(results) layout = go.Layout(xaxis=go.XAxis(title="CP1", showline=False), yaxis=go.YAxis(title="CP2", showline=False)) fig = go.Figure(data=data, layout=layout) py.iplot(fig) return fig
def plotly_histogram2(X, columns, target): colors = { 2: 'rgb(255,127,20)', 3: 'rgb(31, 220, 120)', 13: 'rgb(44, 50, 180)' } traces = [] _targets = sorted(X[target].unique().tolist()) legend = {2: True, 3: True, 13: True} for col in range(2): for key in range(len(_targets)): traces.append( go.Histogram(x=X[X[target] == _targets[key]][columns[col]], opacity=0.7, xaxis="x%s" % (col + 1), marker=go.Marker(color=colors[_targets[key]]), name=_targets[key], showlegend=legend[_targets[key]])) legend = {2: False, 3: False, 13: False} data = go.Data(traces) layout = go.Layout(barmode="overlay", xaxis=go.XAxis(domain=[0, 0.48], title=columns[0]), xaxis2=go.XAxis(domain=[0.52, 1], title=columns[1]), yaxis=go.YAxis(title="Numero de Defectos"), title="Histograma caracteristicas") fig = go.Figure(data=data, layout=layout) py.iplot(fig) return fig
def update_map(year, classification): #Update dataframe with the passed value dff = df[(df['year'] >= year[0]) & (df['year'] <= year[1])] dff_c = dff[dff['classification'] == 'empty'] for classes in classification: dff_c = dff_c.append(dff[dff['classification'] == classes], ignore_index=True) # Paint mapbox into the data mapdata = go.Data([ go.Densitymapbox(lat=dff_c['latitude'], lon=dff_c['longitude'], text=dff_c['number'], customdata=dff_c['number'], colorscale='hot', visible=True, colorbar=dict( borderwidth=1, xpad=1, ypad=1, thickness=3)) ], ) # Layout and mapbox properties layout = go.Layout( #autosize=True, hovermode='closest', mapbox=dict( accesstoken=mapbox_access_token, bearing=0, pitch=0, center=dict(lat=34.5, lon=-94.8), zoom=4, style='mapbox://styles/caldashvinng/ck5i8qzci0t8t1iphlvn9sdz7'), margin={ 'l': 0, 'b': 0, 't': 0, 'r': 0 }, ) return go.Figure(data=mapdata, layout=layout)
user_list = tweet_analyzer.convert_tweets_to_data_frame( other_tweets, False) tweet_text_df = pd.DataFrame(data=user_list, columns=['user_name', 'id']) print(tweet_text_df['user_name'].value_counts()) temp = pd.DataFrame( {'user_name_count': tweet_text_df['user_name'].value_counts()}) df = temp[temp.user_name_count > 5] df = df.sort_values(by='user_name_count', ascending=False, na_position="last") data = go.Data( [go.Bar( x=df.index, y=df.user_name_count, orientation='v', )]) layout = go.Layout(title="Usuarios con mayor cantidad de Tweets") fig = go.Figure(data=data, layout=layout) fig.show() # ############################################################# timeline # segunda parte: por cada usuario buscar la linea de tiempo user_timeline_filename = 'user_timeline_filename_' user_timeline_filtered_filename = 'user_timeline_filtered_filename_' dictionary_user_tweets = {} for user in df.index:
df3.columns = ['year', 'num'] print(df3.columns.values) print(df3.isnull().values.any()) print(df3.head()) print(df3.describe()) #df3.num = df3[::-1] df3['year'].astype(int) df3['num'].astype(int) #df3.year.dtypes plt.bar(df3.year, df3.num) plt.xticks(rotation=90) plt.xlabel("Year") plt.ylabel("Hospital Bed Count") plt.title("Hospital Beds Ireland by Year") plt.show() colors = [ 'lightslategray', 'crimson', 'darkorange', 'pink', 'lightseagreen', 'gold', 'mediumpurple', 'yellowgreen', 'orangered', 'maroon', 'dodgerblue', 'chocolate', 'greenyellow', 'cadetblue', 'seagreen', 'orchid', 'tomato', 'rosybrown' ] colors[1] = 'crimson' data = go.Data( [go.Bar(y=df3.year, x=df3.num, orientation='h', marker_color=colors)]) layout = go.Layout(title="Hospital Beds Ireland", ) fig = go.Figure(data=data, layout=layout) py.iplot(fig) py.plot(fig, filename='hospital_beds_in_ireland.html') pio.write_html(fig, file='index2.html', auto_open=True)