import numpy as np import pandas as pd from sklearn.model_selection import StratifiedShuffleSplit from sklearn.preprocessing import MinMaxScaler from sklearn.decomposition import PCA import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from utilities import Timer, MetaData, ResultsWriter # file properties # ----------------------------------------------------- filePath = '../data/results.txt' metadata = MetaData() dataType = metadata.getResultColsDataType() timer = Timer() startTime = timer.getTime() print('Start Time : ', timer.getTime()) # Get the start time for tracking purposes print('------------------------------------------------------------') print('Reading files ... ') print('------------------------------------------------------------') data = np.loadtxt(filePath, delimiter = ',', skiprows = 1, dtype=dataType) df = pd.DataFrame(data) # Separating the subject and activity activity = df.ix[:,-1]%100 activity.name = 'predicted_activity'
import numpy as np import pandas as pd import time from sklearn.model_selection import StratifiedShuffleSplit from sklearn.multioutput import MultiOutputClassifier from sklearn.naive_bayes import GaussianNB from utilities import Timer, MetaData, ResultsWriter # file properties # ----------------------------------------------------- filePath = '../data/consolidated_clean_all.txt' metadata = MetaData() dataType = metadata.getProcessedColsDataType() timer = Timer() startTime = timer.getTime() print('Start Time : ', timer.getTime()) # Get the start time for tracking purposes print('------------------------------------------------------------') print('Reading files ... ') print('------------------------------------------------------------') # Note that this is a numpy structured array as the data set contains both int and float # http://docs.scipy.org/doc/numpy/user/basics.rec.html data = np.genfromtxt(filePath, delimiter=',', skip_header=1, dtype=dataType) df = pd.DataFrame(data) df.ix[:, :31] = (df.ix[:, :31] - df.ix[:, :31].mean()) / (df.ix[:, :31].max() - df.ix[:, :31].min())
import numpy as np import pandas as pd from utilities import Timer, MetaData # file properties # ----------------------------------------------------- filePath = '../data/consolidated_all.txt' outputFile = '../data/consolidated_clean_all.txt' metadata = MetaData() dataType = metadata.getOriginalColsDataType() timer = Timer() startTime = timer.getTime() print('Start Time : ', timer.getTime()) # Get the start time for tracking purposes print('------------------------------------------------------------') print('Reading files ... ') print('------------------------------------------------------------') # Note that this is a numpy structured array as the data set contains both int and float # http://docs.scipy.org/doc/numpy/user/basics.rec.html #activityData = np.genfromtxt(filePath, delimiter = ',', skip_header = 1, dtype=dataType) activityData = np.loadtxt(filePath, delimiter=',', skiprows=1, dtype=dataType) print('loading Time : ', timer.getTime()) # convert to pandas data frame df = pd.DataFrame(activityData) # count missing values in df print('--------------------------------------')