Created on Sun Apr 22 20:32:16 2018 @author: shifuddin """ from load_data import load_zip from load_data import load_X_y from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix import pandas as pd ''' Load X, y from uri ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip' df = load_zip(uri,'Youtube05-Shakira.csv', 0, 4, 4, 5, False) df_dummy = pd.get_dummies(df, drop_first=True) X, y = load_X_y(df_dummy, 1, 1392, 0, 1) ''' Split into training and test set ''' X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=1) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test)
""" Created on Thu May 3 11:51:18 2018 decission tree with banknote @author: shifuddin """ from load_data import load_zip, load_X_y from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import confusion_matrix import pandas as pd ''' Load X, y from uri ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip' df = load_zip(uri, 'Youtube03-LMFAO.csv', 0, 4, 4, 5, False) df_dummy = pd.get_dummies(df, drop_first=True) X, y = load_X_y(df_dummy, 1, 1392, 0, 1) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler()
""" Created on Thu Apr 12 13:23:12 2018 @author: shifuddin """ from sklearn.neural_network import MLPClassifier from load_data import load_zip, load_X_y from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix import pandas as pd ''' Load X, y from uri ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip' df = load_zip(uri, 'Youtube01-Psy.csv', 0, 4, 4, 5, False) df_dummy = pd.get_dummies(df, drop_first=True) X, y = load_X_y(df_dummy, 1, 1392, 0, 1) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train)
# -*- coding: utf-8 -*- """ Created on Wed May 2 21:53:10 2018 meanshift with iris data @author: shifuddin """ from load_data import load_zip, load_X_y from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.metrics import homogeneity_score import pandas as pd ''' Load X, y from uri ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip' df = load_zip(uri, 'Youtube04-Eminem.csv', 0, 4, 4, 5, False) df_dummy = pd.get_dummies(df, drop_first=True) X, y = load_X_y(df_dummy, 1, 1392, 0, 1) ''' Calculate bandwidth / radius of each cluster centroid from data ''' bandwidth = estimate_bandwidth(X, quantile=.1, n_samples=350) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(X) labels = ms.labels_ centroids = ms.cluster_centers_ homogeneity = homogeneity_score(y.ravel(), labels)
decission tree with banknote @author: shifuddin """ from load_data import load_zip from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.metrics import mean_squared_error from math import sqrt ''' Load feature values as X and target as Y here we read day dataset ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00440/sgemm_product_dataset.zip' X, y = load_zip(uri, 'sgemm_product.csv', 0, 14, 15, 19, True) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Polynomial feature scaling ''' ply_ft = PolynomialFeatures(degree=2) X_train = ply_ft.fit_transform(X_train) X_test = ply_ft.transform(X_test) ''' Fit DecisionTreeRegressor with Bike Day data
Created on Thu May 3 11:51:18 2018 decission tree with banknote @author: shifuddin """ from load_data import load_zip from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPRegressor from sklearn.metrics import mean_squared_error from math import sqrt ''' Load feature values as X and target as Y here we read day dataset ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00304/BlogFeedback.zip' X, y = load_zip(uri, 'blogData_train.csv', 0, 280, 280, 281, True) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) sc_y = StandardScaler()
Created on Thu May 3 11:51:18 2018 decission tree with banknote @author: shifuddin """ from load_data import load_zip from sklearn.model_selection import train_test_split from sklearn.svm import SVR from sklearn.metrics import mean_squared_error from math import sqrt ''' Load feature values as X and target as Y here we read day dataset ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip' X, y = load_zip(uri, 'day.csv', 2, 15, 15, 16, True) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) sc_y = StandardScaler()
Created on Thu May 3 11:51:18 2018 decission tree with banknote @author: shifuddin """ from load_data import load_zip from sklearn.model_selection import train_test_split from sklearn.svm import SVR from sklearn.metrics import mean_squared_error from math import sqrt ''' Load feature values as X and target as Y here we read day dataset ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip' X, y = load_zip(uri, 'hour.csv', 2, 15, 15, 16, True) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) sc_y = StandardScaler()
""" Created on Thu Apr 12 13:23:12 2018 @author: shifuddin """ from sklearn.neural_network import MLPClassifier from load_data import load_zip, load_X_y from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix import pandas as pd ''' Load X, y from uri ''' uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip' df = load_zip(uri, 'Youtube02-KatyPerry.csv', 0, 4, 4, 5, False) df_dummy = pd.get_dummies(df, drop_first=True) X, y = load_X_y(df_dummy, 1, 1392, 0, 1) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train)