def main(): ''' Runs pipeline to extract audio features from the GTZAN audio data and generate a pandas dataframe. ''' # Download dataset from Kaggle - !Requires account! # !Might need to pip install opendatasets! od.download( 'https://www.kaggle.com/andradaolteanu/gtzan-dataset-music-genre-classification' ) # Specify path to the audio data !MIGHT DIFFER! AUDIO_PATH = '../gtzan-dataset-music-genre-classification/Data/genres_original' # Path to Artist/Title index: INDEX_PATH = './dataframes/GTZANindex.txt' # Generate feature dataframe df = GTZAN_feature_dataframe(AUDIO_PATH) # Generate names dataframe df_names = GTZAN_name_dataframe(INDEX_PATH) # Save as .csv df_path = './dataframes/feature_dataframe.csv' df.to_csv(df_path, index=False) df_names_path = './dataframes/names_dataframe.csv' df_names.to_csv(df_names_path, index=False)
def __init__(self, data_dir): self.dataset_path = os.path.join(data_dir, self.dataset_name) if not os.path.exists(self.dataset_path): ods.download(self.dataset_url, data_dir) shutil.rmtree(os.path.join(self.dataset_path, shutil.rmtree(""))) else: print("Data set already downloaded.") self.files = [] for root, _, files in os.walk(self.dataset_path): for file in files: self.files.append(os.path.join(root, file))
def load_gender_discrimination_data(): """ Load Gender discrimination dataset and returns the feature, target, and sensitive attribute. Gender discrimination influence exists or not on payments and promotion, (for more info : https://www.kaggle.com/hjmjerry/gender-discrimination). To download this dataset please provide your Kaggle credentials. Returns: features (pandas.DataFrame): features of the Bank marketing dataset target (pandas.Series): target values of the Bank marketing dataset sensitive_attributes(pandas.DataFrame): sensitive attributes values of Bank marketing dataset """ try: dataset_url = 'https://www.kaggle.com/hjmjerry/gender-discrimination' od.download(dataset_url) input_data = pd.read_csv(r'./gender-discrimination/Lawsuit.csv') except Exception as e: print("Error : ", e) sys.exit(1) # sensitive attributes; we identify "gender" as sensitive attributes # privileged class male. sensitive_attribs = ['Gender'] sensitive_attributes = (input_data.loc[:, sensitive_attribs].assign( Gender=lambda df: (df['Gender'] == 1).astype(int))) # targets; Rank 1 if the faculty will get promote to full professor , otherwise 0 target = (input_data['Rank'] == 3).astype(int) # features; note that the 'target' and sensitive attribute columns are dropped features = (input_data.drop(columns=['Rank', 'Gender']).pipe( pd.get_dummies, drop_first=True)) display( Markdown( f"features : {features.shape[0]} samples, {features.shape[1]} attributes" )) display(Markdown(f"targets : {target.shape[0]} samples")) display( Markdown( f"sensitives attributes : {sensitive_attributes.shape[0]} samples, {sensitive_attributes.shape[1]} attributes" )) return features, target, sensitive_attributes
import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import os import copy get_ipython().system('pip install opendatasets --upgrade') import opendatasets as od dataset_url = 'https://www.kaggle.com/moltean/fruits' od.download(dataset_url) import os # In[6]: DATA_DIR = '/content/fruits/fruits-360' print(os.listdir(DATA_DIR)) # In[7]: plt.ion() # interactive mode # Data augmentation and normalization for training # Just normalization for validation data_transforms = {
import keras from keras.preprocessing.image import ImageDataGenerator from keras.layers import AveragePooling2D, Dense, Flatten, Dropout from keras.applications import ResNet50 from keras.models import Model from tensorflow.keras.optimizers import Adam from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix, f1_score, roc_auc_score, accuracy_score, auc,roc_curve, plot_roc_curve import seaborn as sns from PIL import Image """#### Downloading the Dataset""" # DOWNLOAD AND UNZIP THE DATASET FROM KAGGLE DATASET_URL = 'https://www.kaggle.com/c/dog-breed-identification/data' od.download(DATASET_URL) !unzip -q /content/dog-breed-identification/dog-breed-identification.zip TRAIN_PATH = '/content/train' TEST_PATH = '/content/test' """#### Data Preparation and Cleaning""" # Loading the CSV file df = pd.read_csv('/content/labels.csv') df.head() # shape of our labes.csv print(df.shape) # Adjusting the labels.csv to feed into our Keras ImageDataGenerators
import numpy as np import pandas as pd import opendatasets as od from IPython import get_ipython # %% [markdown] # # EDA on Stackoverflow Developer Survey # %% get_ipython().system('pip install opendatasets --upgrade --quiet') # %% [markdown] # # Download the dataset # %% od.download('stackoverflow-developer-survey-2020') # %% # Import necessary libraries get_ipython().run_line_magic('matplotlib', 'inline') # %% df_raw = pd.read_csv( './stackoverflow-developer-survey-2020/survey_results_public.csv') df_raw.head() # %% schema_df = pd.read_csv( './stackoverflow-developer-survey-2020/survey_results_schema.csv', index_col='Column') # Using schema_raw to retrieve questions