def trainAnnotatedVideos(user_dir): results_dir_path = join(configurations.output_dir, user_dir) features_file_path = join(results_dir_path, configurations.features_file) # using 100 videos to train support vector machine and 50 videos for testing featuresFilePD = pd.read_csv(features_file_path, header=None) featuresPD = featuresFilePD.iloc[:, 1:] labelsFilePD = pd.read_csv(configurations.annotations_file, header=None) labelsPD = labelsFilePD.iloc[:, 1] numOfVideosForTraining = 100 # training set trainingFeatures = featuresPD[0:numOfVideosForTraining] trainingLabels = labelsPD[0:numOfVideosForTraining] # testing set testingFeatures = featuresPD[numOfVideosForTraining:] testingLabels = labelsPD[numOfVideosForTraining:] # using 4 machine learning algorithms. # SVM - linear kernel # SVM - polynomial kernel with degree 3 # SVM - polynomial kernel with degree 1 # SVM - rbf kernel # logistic regression - linear polynomial accuraciesD = {} for i in ["SVM-rbf", "SVM-linear", "SVM-sigmoid", "SVM-poly", "logistic", "k-NN"]: if i == "logistic": clf = LogisticRegression() elif i == "k-NN": clf = KNeighborsClassifier() elif i[:3] == "SVM": clf = SVC() kernelType = i.split("-")[-1] clf.kernel = kernelType if kernelType == "poly": clf.degree = 1 print 'training now' clf.fit(trainingFeatures.values, trainingLabels.values) print 'predicting now' predictedNP = clf.predict(testingFeatures.values) accuracyI = clf.score(testingFeatures.values, testingLabels.values) # testing accuracy accuraciesD[i] = accuracyI print "the accuracy of %s is %s\n" % (i, accuraciesD[i]) print accuraciesD return accuraciesD