def main():
    '''
    Runs pipeline to extract audio features from the GTZAN audio data and 
    generate a pandas dataframe.
    '''
    # Download dataset from Kaggle - !Requires account!
    # !Might need to pip install opendatasets!
    od.download(
        'https://www.kaggle.com/andradaolteanu/gtzan-dataset-music-genre-classification'
    )

    # Specify path to the audio data !MIGHT DIFFER!
    AUDIO_PATH = '../gtzan-dataset-music-genre-classification/Data/genres_original'
    # Path to Artist/Title index:
    INDEX_PATH = './dataframes/GTZANindex.txt'

    # Generate feature dataframe
    df = GTZAN_feature_dataframe(AUDIO_PATH)
    # Generate names dataframe
    df_names = GTZAN_name_dataframe(INDEX_PATH)

    # Save as .csv
    df_path = './dataframes/feature_dataframe.csv'
    df.to_csv(df_path, index=False)

    df_names_path = './dataframes/names_dataframe.csv'
    df_names.to_csv(df_names_path, index=False)
 def __init__(self, data_dir):
     self.dataset_path = os.path.join(data_dir, self.dataset_name)
     if not os.path.exists(self.dataset_path):
         ods.download(self.dataset_url, data_dir)
         shutil.rmtree(os.path.join(self.dataset_path, shutil.rmtree("")))
     else:
         print("Data set already downloaded.")
     self.files = []
     for root, _, files in os.walk(self.dataset_path):
         for file in files:
             self.files.append(os.path.join(root, file))
Exemple #3
0
def load_gender_discrimination_data():
    """
    Load Gender discrimination dataset and returns the feature, target, and sensitive attribute.

    Gender discrimination influence exists or not on payments and promotion,
    (for more info : https://www.kaggle.com/hjmjerry/gender-discrimination). To download this dataset
    please provide your Kaggle credentials.

    Returns:
        features (pandas.DataFrame): features of the Bank marketing dataset
        target (pandas.Series): target values of the Bank marketing dataset
        sensitive_attributes(pandas.DataFrame): sensitive attributes values of Bank marketing dataset
    """
    try:
        dataset_url = 'https://www.kaggle.com/hjmjerry/gender-discrimination'
        od.download(dataset_url)
        input_data = pd.read_csv(r'./gender-discrimination/Lawsuit.csv')
    except Exception as e:
        print("Error : ", e)
        sys.exit(1)
    # sensitive attributes; we identify "gender" as sensitive attributes
    # privileged class male.
    sensitive_attribs = ['Gender']
    sensitive_attributes = (input_data.loc[:, sensitive_attribs].assign(
        Gender=lambda df: (df['Gender'] == 1).astype(int)))
    # targets; Rank 1 if the faculty will get promote to full professor , otherwise 0
    target = (input_data['Rank'] == 3).astype(int)

    # features; note that the 'target' and sensitive attribute columns are dropped
    features = (input_data.drop(columns=['Rank', 'Gender']).pipe(
        pd.get_dummies, drop_first=True))
    display(
        Markdown(
            f"features : {features.shape[0]} samples, {features.shape[1]} attributes"
        ))
    display(Markdown(f"targets : {target.shape[0]} samples"))
    display(
        Markdown(
            f"sensitives attributes : {sensitive_attributes.shape[0]} samples, {sensitive_attributes.shape[1]} attributes"
        ))
    return features, target, sensitive_attributes
Exemple #4
0
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

get_ipython().system('pip install opendatasets --upgrade')
import opendatasets as od
dataset_url = 'https://www.kaggle.com/moltean/fruits'
od.download(dataset_url)
import os

# In[6]:

DATA_DIR = '/content/fruits/fruits-360'
print(os.listdir(DATA_DIR))

# In[7]:

plt.ion()  # interactive mode

# Data augmentation and normalization for training

# Just normalization for validation
data_transforms = {
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import AveragePooling2D, Dense, Flatten, Dropout
from keras.applications import ResNet50
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, f1_score, roc_auc_score, accuracy_score, auc,roc_curve, plot_roc_curve
import seaborn as sns
from PIL import Image

"""#### Downloading the Dataset"""

# DOWNLOAD AND UNZIP THE DATASET FROM KAGGLE
DATASET_URL = 'https://www.kaggle.com/c/dog-breed-identification/data'
od.download(DATASET_URL)
!unzip -q /content/dog-breed-identification/dog-breed-identification.zip

TRAIN_PATH = '/content/train'
TEST_PATH = '/content/test'

"""#### Data Preparation and Cleaning"""

# Loading the CSV file
df = pd.read_csv('/content/labels.csv')
df.head()

# shape of our labes.csv
print(df.shape)

# Adjusting the labels.csv to feed into our Keras ImageDataGenerators
Exemple #6
0
import numpy as np
import pandas as pd
import opendatasets as od
from IPython import get_ipython

# %% [markdown]
# # EDA on Stackoverflow Developer Survey

# %%
get_ipython().system('pip install opendatasets --upgrade --quiet')

# %% [markdown]
# # Download the dataset

# %%
od.download('stackoverflow-developer-survey-2020')

# %%
# Import necessary libraries
get_ipython().run_line_magic('matplotlib', 'inline')

# %%
df_raw = pd.read_csv(
    './stackoverflow-developer-survey-2020/survey_results_public.csv')
df_raw.head()

# %%
schema_df = pd.read_csv(
    './stackoverflow-developer-survey-2020/survey_results_schema.csv',
    index_col='Column')
# Using schema_raw to retrieve questions