Exemple #1
0
    def load_dataset(name: str) -> pd.DataFrame:
        """Load example dataset.

        If seaborn is present, its datasets can be loaded.
        Physt also includes some datasets in CSV format.
        """
        # Our custom datasets:
        try:
            binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name))
            return pd.read_csv(io.BytesIO(binary_data))
        except FileNotFoundError:
            pass

        # Seaborn datasets?
        try:
            import seaborn as sns
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                if name in sns.get_dataset_names():
                        return sns.load_dataset(name)
        except ImportError:
            pass

        # Fall through
        raise RuntimeError("Dataset {0} not available.".format(name))
Exemple #2
0
    def load_dataset(name: str) -> pd.DataFrame:
        """Load example dataset.

        If seaborn is present, its datasets can be loaded.
        Physt also includes some datasets in CSV format.
        """
        # Our custom datasets:
        try:
            binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name))
            return pd.read_csv(io.BytesIO(binary_data))
        except FileNotFoundError:
            pass

        # Seaborn datasets?
        try:
            import seaborn as sns
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                if name in sns.get_dataset_names():
                        return sns.load_dataset(name)
        except ImportError:
            pass

        # Fall through
        raise RuntimeError("Dataset {0} not available.".format(name))
Exemple #3
0
def prep_example_data():
    '''Generates sample data from seaborn'''
    if not os.path.exists('data'):
        os.mkdir('data')

    print('Getting CSV files from seaborn in ./data/ directory')

    for dataset in sns.get_dataset_names():
        file_path = './data/{}.csv'.format(dataset)
        print(file_path)
        df = sns.load_dataset(dataset)
        df.to_csv(file_path, index=False)
Exemple #4
0
def show_iris():
    print(sns.get_dataset_names())
    # ['anscombe', 'attention', 'brain_networks', 'car_crashes', 'dots', 'exercise', 'flights', 'fmri', 'gammas', 'iris', 'planets', 'tips', 'titanic']

    iris = sns.load_dataset('iris')
    print(type(iris))  # <class 'pandas.core.frame.DataFrame'>
    print(iris.columns)
    # Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
    #        'species'],
    #       dtype='object')

    sns.swarmplot(x='species', y='petal_length', data=iris)
    plt.show()
Exemple #5
0
    def __init__(self, filename=""):
        self.path = os.getcwd()
        self.files = []

        self._standard_datasets = widgets.ToggleButtons(
            value="tips",
            options=get_dataset_names(),
            disabled=False,
            description="Load a standard dataset:")
        self._standard_datasets.style.button_width = '100px'
        self._standard_datasets.style.description_width = '150px'

        self._filenameText = widgets.Text(
            layout=widgets.Layout(width='95%', height='30px'),
            value=filename,
            description="or choose a custom one:")
        self._filenameText.style.description_width = '150px'

        self._refreshBtn = widgets.Button(description="Refresh files ...",
                                          button_style='info',
                                          layout=widgets.Layout(width='95%'))
        self._refreshBtn.on_click(self.refreshFileList)

        self._multiSel = widgets.SelectMultiple(options=self.files,
                                                description="Files: ",
                                                layout=widgets.Layout(
                                                    height='400px',
                                                    width='95%'))
        self._multiSel.style.description_width = '150px'

        self._loadBtn = widgets.Button(description="Load files ...",
                                       button_style='danger',
                                       layout=widgets.Layout(width='95%'))
        self._loadBtn.on_click(self.load)

        self.data = pd.DataFrame()
Exemple #6
0
'''Write the code to list all the datasets available in seaborn library.Load the 'mpg' dataset'''
import seaborn as sns
print("Datasets available in seaborn:\n", sns.get_dataset_names())
mpg = sns.load_dataset('mpg')
print("*****Dataset mpg loaded:*****\n", mpg)
Exemple #7
0
for x in dataset:
    print(x)
    # print(y)
df = pd.DataFrame(dataset)
print(df)

print("index series into data frame")
print(arr)
print(pd.DataFrame(arr))
print("being crazy..")
print(pd.DataFrame(One_D)[0][1])
print()
print()
print(".....Fourth Question..." * 5)

print("following dataset avaliable in the seaborn:\n", snb.get_dataset_names())
for a in snb.get_dataset_names():
    print(a)
print()
print()
print("...Question number 5..." * 5)
ds = snb.load_dataset('mpg')
print(ds)
print("country count in the\'mpg' data set")
print("all the count ")
print(ds['origin'].describe())
print()
print("country of the car is in the dataset......")
print()
print()
for cont in ds['origin'].unique():
Exemple #8
0
def plot():
    print(sb.get_dataset_names())

    df = sb.load_dataset('tips')
    print(df.head(10))
Exemple #9
0
iris = fetch_openml(name="iris")
iris.details['version']  


#%%%%
#pip install dataset
#https://dataset.readthedocs.io/en/latest/
import dataset
db = dataset.connect('sqlite:///:memory:')

table = db['sometable']
table.insert(dict(name='John Doe', age=37))
table.insert(dict(name='Jane Doe', age=34, gender='female'))

john = table.find_one(name='John Doe')
john


#%%% seaborn
import seaborn as sb
df = sb.load_dataset('tips')
df.head()
sb.get_dataset_names()
sbdatasets = sb.get_dataset_names()
sbdatasets[0:10]
sbdata = pd.DataFrame({'dbname':sb.get_dataset_names()})
sbdata
sbdata[ sbdata['dbname'].str.contains("flight") ]
sbdata[0:10]
sbdata.shape
# importing seaborn library.
import seaborn as sb

# getting list of all available data sets in seaborn library.
a = sb.get_dataset_names()
# printing list of all data sets
print(a)

# loading mpg dataset.
b = sb.load_dataset('mpg')
# printing dataset
print(b)
Exemple #11
0
import seaborn as sns
import matplotlib.pyplot as plt

all_datasets = sns.get_dataset_names()
dataset = sns.load_dataset('iris')

#%%
dataset.shape
dataset.head()
dataset.tail()
dataset.describe()
dataset.sample(3)
dataset.isnull().sum()

#%%
dataset.plot(kind='box')
plt.show()

#%%
sns.set_style('ticks')
sns.boxplot(data=dataset)
plt.show()

#%%
dataset.hist()
plt.show()

#%%
sns.swarmplot(x='species', y='petal_length', data=dataset)
plt.show()
Exemple #12
0
# python -m pip install bs4
# # On the lab computers, you may have to add the --user flag
# There are several dependencies associated with the seaborn library such as numpy, scipy, pandas, matplotlib.

# Matplotlib tries to make easy things easy and difficult things possible
# Seaborn tries to make a well-defined set of hard things easy to do
# Seaborn is built on top of matplotlib.  It is designed to complement (not replace) matplotlib

# seaborn has built in themes for styling matplotlib visuals,
# can visualize univariate and bivariate data,
# linear regression plotting/visualization,
# plotting time series data,
# and works well with both numpy and pandas data structures.

# seaborn comes with a few built-in datasets that we can use to experiment with
print(sb.get_dataset_names())

# if we want to use one of these existing datasets, we can load them directly!
my_df = sb.load_dataset('tips')
print(type(my_df))  #notice that it is a DataFrame
print(my_df.head(10))
print(my_df.shape)  # 244 rows by 7 columns
print(my_df.describe())
print(my_df.info())

# Visualizing data generally involves two steps:
# 1) creating the plot/visual and 2) making the visual more aesthetically pleasing
# Visualization is an art of representing data in an easy and effective way.

# Unlike Matplotlib, seaborn comes with customized themes and a high-level interface for
# customizing the look and feel of matplotlib graphics.
Exemple #13
0
"""
线性回归图  lmplot   或者 regplot
"""

# 导入库
import sys

import seaborn as sn
sn.set_style("whitegrid")

import matplotlib.pyplot as plt

names = sn.get_dataset_names()

print(f'dataset names: {names}')

#sys.exit(0)

# 导入自带数据
tips = sn.load_dataset("tips")  # csv 文件

# 单一分组参数
sn.lmplot(x="total_bill", y="tip", data=tips)
plt.savefig("./lmplot1.png", dpi=600)

# 分组的线性回归图,hue参数控制分类属性
sn.lmplot(x="total_bill", y="tip", hue="smoker", markers=["o", "*"], data=tips)
plt.savefig("./lmplot2.png", dpi=600)

# col+hue 双分组参数,既分组,又分子图绘制
# 同时控制axes 列数,及 size 尺寸
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 12 23:30:06 2020

@author: saisr
"""

import seaborn as sns

a = sns.get_dataset_names()
print(a)

d = sns.load_dataset("planets")
d.head()
print(d)

a = sns.axes_style()
print(a)

import matplotlib.pyplot as plt
import numpy as np, pandas as pd;
sns.set(style = "darkgrid",color_codes = True)
attention = sns.load_dataset("attention")
attention.head()
print(attention)
g = sns.jointplot(x = "subject",y = "score",data = attention ,kind = "resid")

sns.set(style = "ticks")
iris = sns.load_dataset("iris")
print(iris.head())
g = sns.pairplot(iris,hue = "species", diag_kind = "hist", kind = "scatter",palette = "husl",markers = ['+','D','*'])
Exemple #15
0
#==============================================================================
### Import Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid") # darkgrid, whitegrid, dark, white, ticks
sns.set(rc={'figure.figsize':(10,5)})
plt.style.use('ggplot')
# (bmh, fivethirtyeight, seaborn-dark, ggplot)
# ('ggplot', 'seaborn-bright', 'seaborn-ticks', 'seaborn-talk', 'seaborn-muted', 'dark_background', 'tableau-colorblind10',
#  'fast', 'seaborn-white', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-paper', 'seaborn', 'bmh', 'fivethirtyeight',
#  'Solarize_Light2', 'seaborn-notebook', 'classic', 'seaborn-poster', 'seaborn-pastel', 'seaborn-dark-palette',
#  'seaborn-deep', '_classic_test', 'seaborn-whitegrid', 'grayscale', 'seaborn-darkgrid']
 
sns.get_dataset_names()
sns.load_dataset("tips") # titanic
 
### For Jupyter Notebook
# import plotly.offline as pyo
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# import cufflinks as cf
# %matplotlib inline
# %matplotlib notebook
 
 
### For Colab
# Import Files
from google.colab import files
uploaded = files.upload()
df = pd.read_csv('01-01-2020-TO-27-08-2020RELIANCEEQN.csv')
Exemple #16
0
DataFrame.plot.pie(**kwargs)
# Create a scatter plot with varying marker point size and color.
DataFrame.plot.scatter(x, y[, s, c])
# Make a box plot from DataFrame columns.
DataFrame.boxplot([column, by, ax, …])
# Make a histogram of the DataFrame’s.
DataFrame.hist([column, by, grid, …])


########################################################################
####################### visualization tutorial #########################
########################################################################

import seaborn as sns
tips = sns.load_dataset("tips") # tips dataset can be loaded from seaborn
sns.get_dataset_names() # to get a list of other available datasets

import plotly.express as px
tips = px.data.tips() # tips dataset can be loaded from plotly
# data_canada = px.data.gapminder().query("country == 'Canada'")

import pandas as pd
tips.to_csv('/Users/vivekparashar/Downloads/tips.csv')

import altair as alt

import statsmodels.api as sm

# Dot plot shows changes between two (or more) points in time or between two (or more) conditions.
t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index()
px.scatter(t, x='day', y='total_bill', color='sex', 
Exemple #17
0
                        message='.*No parser was explicitly specified.*')

iris = sns.load_dataset('iris')
flights = sns.load_dataset('flights')
multi = flights.set_index(['year', 'month']).unstack()  # MultiIndex example
pokemon = pd.read_csv(
    r'https://gist.githubusercontent.com/adamerose/'
    r'400cb7025fa33ff4534f0b032b26321c/raw/6013206a582db794ed89fdf5e2c7567372489025/pokemon.csv'
)

tuples = [('A', 'one', 'x'), ('A', 'one', 'y'), ('A', 'two', 'x'),
          ('A', 'two', 'y'), ('B', 'one', 'x'), ('B', 'one', 'y'),
          ('B', 'two', 'x'), ('B', 'two', 'y')]
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third'])
multidf = pd.DataFrame(pd.np.random.randn(8, 8),
                       index=index[:8],
                       columns=index[:8])
multidf_columns = pd.DataFrame(pd.np.random.randn(8, 8), columns=index[:8])
multidf_index = pd.DataFrame(pd.np.random.randn(8, 8), index=index[:8])

all_datasets = {}

# All Seaborn data sets
for name in sns.get_dataset_names():
    all_datasets[name] = sns.load_dataset(name)

all_datasets['pokemon'] = pokemon
all_datasets['multidf'] = multidf
all_datasets['multidf_columns'] = multidf_columns
all_datasets['multidf_index'] = multidf_index
               'Grade': ['A', 'A', 'C', 'B'], 
               'Subject': ['Physics', 'Physics', 'Physics', 'Physics']}) 

df = pd.DataFrame(a)
df['index'] = df.index 
df


# In[30]:


# Question 4

import seaborn as sbn

sbn.get_dataset_names()
data =sbn.load_dataset('mpg')

data


# In[34]:


# Question 5

data['origin'].unique()


# In[38]:
Exemple #19
0
 def getDatasets(self):
     print(sns.get_dataset_names())
Exemple #20
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import sklearn.datasets as d
import pandas as pd
import grimoire as g
import seaborn

# df = pd.array(d.make_moons())

# df
seaborn.get_dataset_names()

breakpoint()

# log_clf = LogisticRegression()
# rnd_clf = RandomForestClassifier()
# svm_clf = SVC()

# voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
#                               voting='hard')

# voting_clf.fit(X_train, y_train)
Exemple #21
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#!pip install seaborn
import seaborn as sns

#%matplotlib inline
#%reload_ext autoreload
#%autoreload 2


# In[5]:


#dfc = pd.read_csv('some.csv')
print(sns.get_dataset_names())
dfc = sns.load_dataset('car_crashes')
dfc.head()


# In[6]:


sns.displot(dfc['not_distracted'])


# ## Quickly Creating Summary Counts in Pandas
# 
# Let's next count the number of samples for each species. We can do this in a few
# ways, but we'll use `groupby` combined with **a `count()` method**.
# Name : Yogi Halagunaki
# Assignment no : 4 (Que 4)

# Questions 4:
# Write the code to list all the datasets available in seaborn library.
# Load the 'mpg' dataset
# Note: mpg dataset will be read from seaborn module in the manner sir has already shown(provided in the
# materials folder)
import seaborn as sb
import pandas as pd
print("All dataset present in the seaborn library :",
      sb.get_dataset_names())  # list of all dataset in the seaborn
print()

mpg = sb.load_dataset('mpg')
print("mpg data set from seaborn :",
      mpg)  # loading mpg data set from seaborn library
print()
print("'anagrams' data set from seaborn :", sb.load_dataset('anagrams'))
print()
print("car_crashes data set from seaborn :", sb.load_dataset('car_crashes'))
print()
print(pd.read_csv(
    "student_records.csv"))  # loading user data set using pandas library

# Output :
# /home/yogi/Desktop/Python_Code/venv/bin/python /home/yogi/Desktop/Python_Code/Lets_Upgrade_Assignments/Day4/Day4_Que4.py
# All dataset present in the seaborn library : ['anagrams', 'anscombe', 'attention',
# 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'exercise', 'flights', 'fmri',
# 'gammas', 'geyser', 'iris', 'mpg', 'penguins', 'planets', 'tips', 'titanic']
#