def prepareData(dir_labelDictionary): userinput_pBoudn = 0.9 userinput_column_likelihood0 = { 'column': ['Rightpaw x', 'Rightpaw y'], 'likelihood': 'Rightpaw likelihood' } userinput_column_likelihood1 = { 'column': ['Leftpaw x', 'Leftpaw y'], 'likelihood': 'Leftpaw likelihood' } userinput_column_likelihood2 = { 'column': ['Tailbase x', 'Tailbase y'], 'likelihood': 'Tailbase likelihood' } userinput_column_likelihood3 = { 'column': ['Rotarodtop x', 'Rotarodtop y'], 'likelihood': 'Rotarodtop likelihood' } userinput_column_likelihood4 = { 'column': ['Rotarodbottom x', 'Rotarodbottom y'], 'likelihood': 'Rotarodbottom likelihood' } userinput_columns_likelihoods = [ userinput_column_likelihood0, userinput_column_likelihood1, userinput_column_likelihood2, userinput_column_likelihood3, userinput_column_likelihood4 ] outputDirAbsolutePath = outputAbsPath() for dir_label in dir_labelDictionary: inputDir = dir_label['dir'] label = dir_label['label'] outputDir = os.path.join(outputDirAbsolutePath, label) if not os.path.exists(outputDir): os.mkdir(outputDir) # TODO: For each functions below, add progress bar, especially fillnan (t). lastDir = \ isStepUpFrame( addVelocityColumnsBothFeet( pixel2mm( fillnan(userinput_columns_likelihoods, userinput_pBoudn, cleanCSV(inputDir))))) copy_tree(lastDir, outputDir) shutil.rmtree(lastDir) return outputDir
def cleanCSV(inputDir): prefix = 'cl_' outputDir = os.path.join(outputAbsPath(), 'cleanCSV') if os.path.exists(outputDir): shutil.rmtree(outputDir) os.mkdir(outputDir) if '/' in inputDir[-1]: inputDir = inputDir[-1] for root, dirs, files in os.walk(inputDir): for file in files: if (not file.startswith('.')) and file.endswith('.csv'): with open(os.path.join(inputDir, file), 'r') as readFile: csvReader = csv.reader(readFile) # Delete the first row and join the second and third rows for i, row in enumerate(csvReader): if i == 0: continue if i == 1: row1 = row if i == 2: row2 = row break row0ToBe = zip(row1, row2) row0ToBe = tuple(row0ToBe) row0Col = [] for tup in row0ToBe: newCol = ' '.join(tup) row0Col.append(newCol) file = deleteCommonWordsInFileName(file) outputFileName = os.path.join(outputDir, prefix + file) with open(outputFileName, 'w') as outputFile: csvWriter = csv.writer(outputFile) csvWriter.writerow(row0Col) for line in readFile: outputFile.write(line) return outputDir
def concatenateCSVs(dir_labelDictionaries): ''' :param dir_labelDictionaries: :return: The first element in the array is with label of 0 and the second is of 1. ''' prefix = 'concat' outputCSVs = [] if dir_labelDictionaries[0]['label'] == '0': inputDir0 = dir_labelDictionaries[0]['dir'] inputDir1 = dir_labelDictionaries[1]['dir'] else: inputDir0 = dir_labelDictionaries[1]['dir'] inputDir1 = dir_labelDictionaries[0]['dir'] allCSVs0 = glob.glob(inputDir0 + '/*.csv') allCSVs0.sort() allCSVs1 = glob.glob(inputDir1 + '/*.csv') allCSVs1.sort() allCSVsArray = [allCSVs0, allCSVs1] for allCSVs in allCSVsArray: count = 0 outputPath = os.path.join( outputAbsPath(), prefix + str(allCSVsArray.index(allCSVs)) + '.csv') outputCSVs.append(outputPath) with open(outputPath, 'w') as outputFile: for i, inputPath in enumerate(allCSVs): with open(inputPath, 'r') as inputFile: if i != 0: inputFile.readline() shutil.copyfileobj(inputFile, outputFile) count += 1 print('%d files have been combined.' % count) return outputCSVs
def calculateStepUpHeight(dir_labelDictionaries, framesPerSecond=20, secondsPerInterval=30, estimatedMaxFrames=6500): csvs = [] for dir_labelDictionary in dir_labelDictionaries: inputDir = dir_labelDictionary['dir'] inputLabel = dir_labelDictionary['label'] prefix = 'stepUpHeightRight_' framesPerInterval = framesPerSecond * secondsPerInterval framesPerMinute = framesPerSecond * 60 minutePerInterval = framesPerInterval / framesPerMinute n = int(np.ceil(estimatedMaxFrames / framesPerInterval)) dfInd = [] for i in range(n): dfInd.append(i) dfRightStepUpHeight = pd.DataFrame(index=dfInd) for roots, dirs, files in os.walk(inputDir): for inputFile in files: if inputFile.startswith('.') or not inputFile.endswith('.csv'): continue dfRightStepUpHeight[inputFile] = np.nan df = pd.read_csv(os.path.join(inputDir, inputFile), index_col=0) col = 'rel RightY mm' d = df[col].diff() m = d.lt(0) b = (~m).cumsum() s = d.mask(~m).abs().groupby(b).transform('sum') df['right foot step up height'] = pd.DataFrame( np.select([~b.duplicated(keep='last') & m, d.eq(0)], [s, '1e3'], '')) dfs = df['right foot step up height'] dfLen = len(dfs) for i in range(n): start = i * framesPerInterval end = (i + 1) * framesPerInterval if end < len(dfs): splits = dfs[start:end] elif start < len(dfs): splits = dfs[i * framesPerInterval:] else: break splits = splits.replace('', '0') splits = splits.astype(np.float) splits1 = splits != 0 nSplitStepUp = np.sum(splits1) if nSplitStepUp == 0: dfRightStepUpHeight.loc[i, inputFile] = np.nan else: splitStepUpHeightmm = np.sum(splits) dfRightStepUpHeight.loc[ i, inputFile] = splitStepUpHeightmm / nSplitStepUp outputDir = os.path.join(outputAbsPath(), 'stepHeightCalculation') if not os.path.exists(outputDir): os.mkdir(outputDir) inputDirLastPath = getLastDirectory(inputDir) outputCSVPath = os.path.join(outputDir, prefix + inputDirLastPath + '.csv') dfRightStepUpHeight.to_csv(outputCSVPath) csvs.append(outputCSVPath) return csvs
import numpy as np import pandas as pd import matplotlib.pyplot as plt from utils.getDirAbsPath import outputAbsPath import os import string from sklearn.ensemble import AdaBoostClassifier from matplotlib import pyplot import pandas as pd from numpy.random import shuffle from sklearn.model_selection import train_test_split import numpy as np from utils.getDirAbsPath import outputAbsPath outputDir = os.path.join(outputAbsPath(), 'featureImportance') framesPerInterval = 200 maxFrames = 6500 if not os.path.exists(outputDir): os.mkdir(outputDir) nLoops = range(100) nFeatures = range(1, int(maxFrames / framesPerInterval)) importance_columns = ['feature' + str(i) for i in nFeatures] index = [str(i) for i in nFeatures] acc_mp = pd.DataFrame( index=nLoops, columns=['accuracy'].append(nFeatures))
bins = np.array_split(df01, len(df01) / interval)[1] df01 = pd.DataFrame(bins) footnote = '' # fancyBoxPlot(pd.DataFrame(bins), # xlabel='trained up to (i)th interval', # ylabel='accuracy', # title='Training Accuracy with Different Windows of Time Series Data (step up height) %i iterations' % interval, # outputPath=os.path.join(outputAbsPath(), 'featureImportance', 'accuracies.png'), # footnote=footnote) xlabel = '(i)th interval' ylabel = 'feature importance' title = 'Feature importance in classifying genotypes (%i iterations)' % interval outputPath = os.path.join(outputAbsPath(), 'featureImportance', 'featureImportanceDecisionTree.png') footnote = 'Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. ' # Plotting fig, ax = plt.subplots(figsize=(15, 11), tight_layout=True) plt.subplots_adjust(hspace=1.0, wspace=0.02, bottom=0.17) # Creating axes instance bp = ax.boxplot(bins, patch_artist=True, notch='True') # changing color and linewidth of # whiskers for whisker in bp['whiskers']: whisker.set(color='#8B008B', linestyle="-.", linewidth=3)
import os import string import numpy as np import pandas as pd import matplotlib.pyplot as plt from utils.getDirAbsPath import outputAbsPath inputCSV = os.path.join(outputAbsPath(), 'featureImportance', 'featureImportance_decisionTree.csv') # Plot importance when the number of intervals is 2. interval = len(string.ascii_lowercase) df = pd.read_csv(inputCSV, index_col=0).drop(columns=['accuracy']) bins = np.array_split(df, len(df) / interval)[-1] df = pd.DataFrame(bins) xlabel = '(i)th interval' ylabel = 'feature importance' title = 'Feature importance in classifying genotypes (%i iterations)' % interval outputPath = os.path.join(outputAbsPath(), 'featureImportance', 'featureImportance10intervals.png') footnote = 'Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. ' # Plotting fig, ax = plt.subplots(figsize=(15, 11), tight_layout=True) plt.subplots_adjust(hspace=1.0, wspace=0.02, bottom=0.17) # Creating axes instance
import os # from dataAnalysis.allAnalysis import allAnalysis from dataPrep.main_dataPrep import prepareData from utils.getDirAbsPath import outputAbsPath classWT, classYAC = 0, 1 # TODO: Do not assume the names of columns (ex. Rightpaw x, Rotarod top). Ask for them to the user. # TODO: Create `output` dir main project dir userinput_secondsPerInterval = 30 # TODO: Ask for interval length. userinput_framesPerSecond = 20 # TODO: Ask for frames per second userinput_classification = ['WT', 'YAC'] # TODO: Ask which one should be 0 and 1. userinput_0Day3Dir = \ '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/data_all/Day4_2and3monthOld_rotarodAnalysis/WT' # TODO: User input userinput_1Day3Dir = \ '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/data_all/Day4_2and3monthOld_rotarodAnalysis/YAC128' # TODO: User input userinput_maxFrames = 6500 dir0 = ({'dir': userinput_0Day3Dir, 'label': 'Day4_WT'}) dir1 = ({'dir': userinput_1Day3Dir, 'label': 'Day4_YAC'}) dirs = [dir0, dir1] outputDirAbsolutePath = outputAbsPath() if not os.path.exists(outputDirAbsolutePath): os.mkdir(outputDirAbsolutePath) dirs = prepareData(dirs) # allAnalysis(dirs, userinput_secondsPerInterval, userinput_framesPerSecond, userinput_maxFrames)
import os import string from sklearn.tree import DecisionTreeClassifier from matplotlib import pyplot import pandas as pd from numpy.random import shuffle from sklearn.model_selection import train_test_split import numpy as np from utils.getDirAbsPath import outputAbsPath outputDir = os.path.join(outputAbsPath(), 'featureImportance') if not os.path.exists(outputDir): os.mkdir(outputDir) maxFrames = 6500 nLoops = list(string.ascii_lowercase) nFeatures = int(maxFrames / 2) importance_columns = ['feature' + str(i) for i in range(1, 11)] index = [s + str(i) for i in nFeatures for s in list(string.ascii_lowercase)] acc_mp = pd.DataFrame(index=index, columns=['accuracy'].append(nFeatures)) for s in nLoops: for i in range(1, 11): WT = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_WT.csv' YAC = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_YAC.csv' dfWT = pd.read_csv(WT, index_col=0) dfWT = dfWT.iloc[0:i]
import os import string from sklearn.tree import DecisionTreeClassifier from matplotlib import pyplot import pandas as pd from numpy.random import shuffle from sklearn.model_selection import train_test_split import numpy as np from utils.getDirAbsPath import outputAbsPath outputDir = os.path.join(outputAbsPath(), 'featureImportance') if not os.path.exists(outputDir): os.mkdir(outputDir) nLoops = list(string.ascii_lowercase) nFeatures = range(1, 11) importance_columns = ['feature' + str(i) for i in range(1, 11)] index = [s + str(i) for i in nFeatures for s in list(string.ascii_lowercase)] acc_mp = pd.DataFrame( index=index, columns=['accuracy'].append(nFeatures)) for s in nLoops: for i in range(1, 11): WT = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_WT.csv' YAC = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_YAC.csv' dfWT = pd.read_csv(WT, index_col=0)