from matplotlib.pyplot import figure, boxplot, xlabel, ylabel, show import numpy as np from scipy.io import loadmat from sklearn.neighbors import KNeighborsClassifier from sklearn import model_selection from scipy.io import loadmat import torch from sklearn import model_selection from __init__ import train_neural_net, draw_neural_net from scipy import stats from clean_data import clean_data, transform_data #-----------------------LOADING DATA---------------------------- data = clean_data('Datasets/**videos.csv') data = transform_data( data, ['likes', 'dislikes', 'views', 'comment_count', 'trending_time']) np.random.seed(180820) data = data.head(100000) X = np.array( data[['likes', 'dislikes', 'views', 'comment_count', 'trending_time']]) #y = np.array(data['views']).squeeze() data['class'] = np.where(data["trending_time"] <= 3., 1, 0.) y = np.where(data["trending_time"] <= 3., 1, 0.) #X = np.array(data) #y = X[:,[4]] #X = X[:,0:4] attributeNames = [ 'likes', 'dislikes', 'views', 'comment_count', 'trending_time' ] N, M = X.shape
from matplotlib.pylab import (figure, semilogx, loglog, xlabel, ylabel, legend, title, subplot, show, grid) import numpy as np from scipy.io import loadmat import sklearn.linear_model as lm from sklearn import model_selection from __init__ import rlr_validate from clean_data import clean_data, transform_data import pandas as pd import matplotlib.pyplot as plt # SETUP ------------------------------------------------------------------------------------------------------ cols = ["likes", "dislikes", "views", "comment_count", "trending_time"] data = clean_data('Datasets/**videos.csv') data_norm = transform_data(data, cols) np.random.seed(180820) """ index = np.random.choice(range(0, len(data_norm)), size = 10000, replace = False) index = data_norm.index in index data_norm = data_norm[index,:] """ #data_norm.head(100) #viser at man sagtens kan plotte training error med mindre data X = np.array(data_norm[["likes", "dislikes","views", "comment_count"]])#, "trending_time"]]) y = np.array(data_norm["trending_time"]).squeeze() attributeNames = ["likes", "dislikes", "views", "comment_count"]#, "trending_time"]