def load_dataset(name: str) -> pd.DataFrame: """Load example dataset. If seaborn is present, its datasets can be loaded. Physt also includes some datasets in CSV format. """ # Our custom datasets: try: binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name)) return pd.read_csv(io.BytesIO(binary_data)) except FileNotFoundError: pass # Seaborn datasets? try: import seaborn as sns import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") if name in sns.get_dataset_names(): return sns.load_dataset(name) except ImportError: pass # Fall through raise RuntimeError("Dataset {0} not available.".format(name))
def prep_example_data(): '''Generates sample data from seaborn''' if not os.path.exists('data'): os.mkdir('data') print('Getting CSV files from seaborn in ./data/ directory') for dataset in sns.get_dataset_names(): file_path = './data/{}.csv'.format(dataset) print(file_path) df = sns.load_dataset(dataset) df.to_csv(file_path, index=False)
def show_iris(): print(sns.get_dataset_names()) # ['anscombe', 'attention', 'brain_networks', 'car_crashes', 'dots', 'exercise', 'flights', 'fmri', 'gammas', 'iris', 'planets', 'tips', 'titanic'] iris = sns.load_dataset('iris') print(type(iris)) # <class 'pandas.core.frame.DataFrame'> print(iris.columns) # Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', # 'species'], # dtype='object') sns.swarmplot(x='species', y='petal_length', data=iris) plt.show()
def __init__(self, filename=""): self.path = os.getcwd() self.files = [] self._standard_datasets = widgets.ToggleButtons( value="tips", options=get_dataset_names(), disabled=False, description="Load a standard dataset:") self._standard_datasets.style.button_width = '100px' self._standard_datasets.style.description_width = '150px' self._filenameText = widgets.Text( layout=widgets.Layout(width='95%', height='30px'), value=filename, description="or choose a custom one:") self._filenameText.style.description_width = '150px' self._refreshBtn = widgets.Button(description="Refresh files ...", button_style='info', layout=widgets.Layout(width='95%')) self._refreshBtn.on_click(self.refreshFileList) self._multiSel = widgets.SelectMultiple(options=self.files, description="Files: ", layout=widgets.Layout( height='400px', width='95%')) self._multiSel.style.description_width = '150px' self._loadBtn = widgets.Button(description="Load files ...", button_style='danger', layout=widgets.Layout(width='95%')) self._loadBtn.on_click(self.load) self.data = pd.DataFrame()
'''Write the code to list all the datasets available in seaborn library.Load the 'mpg' dataset''' import seaborn as sns print("Datasets available in seaborn:\n", sns.get_dataset_names()) mpg = sns.load_dataset('mpg') print("*****Dataset mpg loaded:*****\n", mpg)
for x in dataset: print(x) # print(y) df = pd.DataFrame(dataset) print(df) print("index series into data frame") print(arr) print(pd.DataFrame(arr)) print("being crazy..") print(pd.DataFrame(One_D)[0][1]) print() print() print(".....Fourth Question..." * 5) print("following dataset avaliable in the seaborn:\n", snb.get_dataset_names()) for a in snb.get_dataset_names(): print(a) print() print() print("...Question number 5..." * 5) ds = snb.load_dataset('mpg') print(ds) print("country count in the\'mpg' data set") print("all the count ") print(ds['origin'].describe()) print() print("country of the car is in the dataset......") print() print() for cont in ds['origin'].unique():
def plot(): print(sb.get_dataset_names()) df = sb.load_dataset('tips') print(df.head(10))
iris = fetch_openml(name="iris") iris.details['version'] #%%%% #pip install dataset #https://dataset.readthedocs.io/en/latest/ import dataset db = dataset.connect('sqlite:///:memory:') table = db['sometable'] table.insert(dict(name='John Doe', age=37)) table.insert(dict(name='Jane Doe', age=34, gender='female')) john = table.find_one(name='John Doe') john #%%% seaborn import seaborn as sb df = sb.load_dataset('tips') df.head() sb.get_dataset_names() sbdatasets = sb.get_dataset_names() sbdatasets[0:10] sbdata = pd.DataFrame({'dbname':sb.get_dataset_names()}) sbdata sbdata[ sbdata['dbname'].str.contains("flight") ] sbdata[0:10] sbdata.shape
# importing seaborn library. import seaborn as sb # getting list of all available data sets in seaborn library. a = sb.get_dataset_names() # printing list of all data sets print(a) # loading mpg dataset. b = sb.load_dataset('mpg') # printing dataset print(b)
import seaborn as sns import matplotlib.pyplot as plt all_datasets = sns.get_dataset_names() dataset = sns.load_dataset('iris') #%% dataset.shape dataset.head() dataset.tail() dataset.describe() dataset.sample(3) dataset.isnull().sum() #%% dataset.plot(kind='box') plt.show() #%% sns.set_style('ticks') sns.boxplot(data=dataset) plt.show() #%% dataset.hist() plt.show() #%% sns.swarmplot(x='species', y='petal_length', data=dataset) plt.show()
# python -m pip install bs4 # # On the lab computers, you may have to add the --user flag # There are several dependencies associated with the seaborn library such as numpy, scipy, pandas, matplotlib. # Matplotlib tries to make easy things easy and difficult things possible # Seaborn tries to make a well-defined set of hard things easy to do # Seaborn is built on top of matplotlib. It is designed to complement (not replace) matplotlib # seaborn has built in themes for styling matplotlib visuals, # can visualize univariate and bivariate data, # linear regression plotting/visualization, # plotting time series data, # and works well with both numpy and pandas data structures. # seaborn comes with a few built-in datasets that we can use to experiment with print(sb.get_dataset_names()) # if we want to use one of these existing datasets, we can load them directly! my_df = sb.load_dataset('tips') print(type(my_df)) #notice that it is a DataFrame print(my_df.head(10)) print(my_df.shape) # 244 rows by 7 columns print(my_df.describe()) print(my_df.info()) # Visualizing data generally involves two steps: # 1) creating the plot/visual and 2) making the visual more aesthetically pleasing # Visualization is an art of representing data in an easy and effective way. # Unlike Matplotlib, seaborn comes with customized themes and a high-level interface for # customizing the look and feel of matplotlib graphics.
""" 线性回归图 lmplot 或者 regplot """ # 导入库 import sys import seaborn as sn sn.set_style("whitegrid") import matplotlib.pyplot as plt names = sn.get_dataset_names() print(f'dataset names: {names}') #sys.exit(0) # 导入自带数据 tips = sn.load_dataset("tips") # csv 文件 # 单一分组参数 sn.lmplot(x="total_bill", y="tip", data=tips) plt.savefig("./lmplot1.png", dpi=600) # 分组的线性回归图,hue参数控制分类属性 sn.lmplot(x="total_bill", y="tip", hue="smoker", markers=["o", "*"], data=tips) plt.savefig("./lmplot2.png", dpi=600) # col+hue 双分组参数,既分组,又分子图绘制 # 同时控制axes 列数,及 size 尺寸
# -*- coding: utf-8 -*- """ Created on Sun Apr 12 23:30:06 2020 @author: saisr """ import seaborn as sns a = sns.get_dataset_names() print(a) d = sns.load_dataset("planets") d.head() print(d) a = sns.axes_style() print(a) import matplotlib.pyplot as plt import numpy as np, pandas as pd; sns.set(style = "darkgrid",color_codes = True) attention = sns.load_dataset("attention") attention.head() print(attention) g = sns.jointplot(x = "subject",y = "score",data = attention ,kind = "resid") sns.set(style = "ticks") iris = sns.load_dataset("iris") print(iris.head()) g = sns.pairplot(iris,hue = "species", diag_kind = "hist", kind = "scatter",palette = "husl",markers = ['+','D','*'])
#============================================================================== ### Import Packages import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_style("whitegrid") # darkgrid, whitegrid, dark, white, ticks sns.set(rc={'figure.figsize':(10,5)}) plt.style.use('ggplot') # (bmh, fivethirtyeight, seaborn-dark, ggplot) # ('ggplot', 'seaborn-bright', 'seaborn-ticks', 'seaborn-talk', 'seaborn-muted', 'dark_background', 'tableau-colorblind10', # 'fast', 'seaborn-white', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-paper', 'seaborn', 'bmh', 'fivethirtyeight', # 'Solarize_Light2', 'seaborn-notebook', 'classic', 'seaborn-poster', 'seaborn-pastel', 'seaborn-dark-palette', # 'seaborn-deep', '_classic_test', 'seaborn-whitegrid', 'grayscale', 'seaborn-darkgrid'] sns.get_dataset_names() sns.load_dataset("tips") # titanic ### For Jupyter Notebook # import plotly.offline as pyo # from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot # import cufflinks as cf # %matplotlib inline # %matplotlib notebook ### For Colab # Import Files from google.colab import files uploaded = files.upload() df = pd.read_csv('01-01-2020-TO-27-08-2020RELIANCEEQN.csv')
DataFrame.plot.pie(**kwargs) # Create a scatter plot with varying marker point size and color. DataFrame.plot.scatter(x, y[, s, c]) # Make a box plot from DataFrame columns. DataFrame.boxplot([column, by, ax, …]) # Make a histogram of the DataFrame’s. DataFrame.hist([column, by, grid, …]) ######################################################################## ####################### visualization tutorial ######################### ######################################################################## import seaborn as sns tips = sns.load_dataset("tips") # tips dataset can be loaded from seaborn sns.get_dataset_names() # to get a list of other available datasets import plotly.express as px tips = px.data.tips() # tips dataset can be loaded from plotly # data_canada = px.data.gapminder().query("country == 'Canada'") import pandas as pd tips.to_csv('/Users/vivekparashar/Downloads/tips.csv') import altair as alt import statsmodels.api as sm # Dot plot shows changes between two (or more) points in time or between two (or more) conditions. t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index() px.scatter(t, x='day', y='total_bill', color='sex',
message='.*No parser was explicitly specified.*') iris = sns.load_dataset('iris') flights = sns.load_dataset('flights') multi = flights.set_index(['year', 'month']).unstack() # MultiIndex example pokemon = pd.read_csv( r'https://gist.githubusercontent.com/adamerose/' r'400cb7025fa33ff4534f0b032b26321c/raw/6013206a582db794ed89fdf5e2c7567372489025/pokemon.csv' ) tuples = [('A', 'one', 'x'), ('A', 'one', 'y'), ('A', 'two', 'x'), ('A', 'two', 'y'), ('B', 'one', 'x'), ('B', 'one', 'y'), ('B', 'two', 'x'), ('B', 'two', 'y')] index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third']) multidf = pd.DataFrame(pd.np.random.randn(8, 8), index=index[:8], columns=index[:8]) multidf_columns = pd.DataFrame(pd.np.random.randn(8, 8), columns=index[:8]) multidf_index = pd.DataFrame(pd.np.random.randn(8, 8), index=index[:8]) all_datasets = {} # All Seaborn data sets for name in sns.get_dataset_names(): all_datasets[name] = sns.load_dataset(name) all_datasets['pokemon'] = pokemon all_datasets['multidf'] = multidf all_datasets['multidf_columns'] = multidf_columns all_datasets['multidf_index'] = multidf_index
'Grade': ['A', 'A', 'C', 'B'], 'Subject': ['Physics', 'Physics', 'Physics', 'Physics']}) df = pd.DataFrame(a) df['index'] = df.index df # In[30]: # Question 4 import seaborn as sbn sbn.get_dataset_names() data =sbn.load_dataset('mpg') data # In[34]: # Question 5 data['origin'].unique() # In[38]:
def getDatasets(self): print(sns.get_dataset_names())
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC import sklearn.datasets as d import pandas as pd import grimoire as g import seaborn # df = pd.array(d.make_moons()) # df seaborn.get_dataset_names() breakpoint() # log_clf = LogisticRegression() # rnd_clf = RandomForestClassifier() # svm_clf = SVC() # voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], # voting='hard') # voting_clf.fit(X_train, y_train)
import numpy as np import pandas as pd import matplotlib.pyplot as plt #!pip install seaborn import seaborn as sns #%matplotlib inline #%reload_ext autoreload #%autoreload 2 # In[5]: #dfc = pd.read_csv('some.csv') print(sns.get_dataset_names()) dfc = sns.load_dataset('car_crashes') dfc.head() # In[6]: sns.displot(dfc['not_distracted']) # ## Quickly Creating Summary Counts in Pandas # # Let's next count the number of samples for each species. We can do this in a few # ways, but we'll use `groupby` combined with **a `count()` method**.
# Name : Yogi Halagunaki # Assignment no : 4 (Que 4) # Questions 4: # Write the code to list all the datasets available in seaborn library. # Load the 'mpg' dataset # Note: mpg dataset will be read from seaborn module in the manner sir has already shown(provided in the # materials folder) import seaborn as sb import pandas as pd print("All dataset present in the seaborn library :", sb.get_dataset_names()) # list of all dataset in the seaborn print() mpg = sb.load_dataset('mpg') print("mpg data set from seaborn :", mpg) # loading mpg data set from seaborn library print() print("'anagrams' data set from seaborn :", sb.load_dataset('anagrams')) print() print("car_crashes data set from seaborn :", sb.load_dataset('car_crashes')) print() print(pd.read_csv( "student_records.csv")) # loading user data set using pandas library # Output : # /home/yogi/Desktop/Python_Code/venv/bin/python /home/yogi/Desktop/Python_Code/Lets_Upgrade_Assignments/Day4/Day4_Que4.py # All dataset present in the seaborn library : ['anagrams', 'anscombe', 'attention', # 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'exercise', 'flights', 'fmri', # 'gammas', 'geyser', 'iris', 'mpg', 'penguins', 'planets', 'tips', 'titanic'] #