def sort_dataframe(df_data, df_filenames): correct_order = Input.load_testdata_filenames() current_order = list(df_filenames.values) indices = [current_order.index(filename) for filename in correct_order] df_data = df_data.reindex(indices) df_data = df_data.reset_index() #reset index --> adds new indices, old indices become column 'index' return df_data.drop('index', axis=1) #remove this new column 'index'
import numpy as np from IO import Output import pickle from sklearn.svm import LinearSVC ''' Helper function to use with the grouping of the dataframe, turns 3 rows of coordinates into a single row ''' def transformXY(coords): return pd.Series(np.asarray(coords).ravel()) #Load the file names of the various datasets trainset_filenames = Input.load_trainset_filenames() validationset_filenames = Input.load_validationset_filenames() traindata_filenames = Input.load_traindata_filenames() testset_filenames = Input.load_testdata_filenames() #Load the features feat = pd.read_csv('skinTrainFeatures.csv', index_col = 0) #Select the features for each dataset x_trainset = feat.ix[trainset_filenames] x_validationset = feat.ix[validationset_filenames] x_testset = feat.ix[testset_filenames] x_traindata = feat.ix[traindata_filenames] #Load the labels for each dataset y_trainset = np.asarray(Input.load_trainset_labels()) y_validationset = np.asarray(Input.load_validationset_labels()) y_traindata = np.asarray(Input.load_traindata_labels())