def trainAnnotatedVideos(user_dir):

    results_dir_path = join(configurations.output_dir, user_dir)
    features_file_path = join(results_dir_path, configurations.features_file)

    # using 100 videos to train support vector machine and 50 videos for testing
    featuresFilePD = pd.read_csv(features_file_path, header=None)
    featuresPD = featuresFilePD.iloc[:, 1:]

    labelsFilePD = pd.read_csv(configurations.annotations_file, header=None)
    labelsPD = labelsFilePD.iloc[:, 1]

    numOfVideosForTraining = 100

    # training set
    trainingFeatures = featuresPD[0:numOfVideosForTraining]
    trainingLabels = labelsPD[0:numOfVideosForTraining]

    # testing set
    testingFeatures = featuresPD[numOfVideosForTraining:]
    testingLabels = labelsPD[numOfVideosForTraining:]


    # using 4 machine learning algorithms.
    # SVM - linear kernel
    # SVM - polynomial kernel with degree 3
    # SVM -  polynomial kernel with degree 1
    # SVM - rbf kernel
    # logistic regression - linear polynomial

    accuraciesD = {}
    for i in ["SVM-rbf", "SVM-linear", "SVM-sigmoid", "SVM-poly", "logistic", "k-NN"]:

        if i == "logistic":
            clf = LogisticRegression()

        elif i == "k-NN":
            clf = KNeighborsClassifier()

        elif i[:3] == "SVM":
            clf = SVC()
            kernelType = i.split("-")[-1]
            clf.kernel = kernelType
            if kernelType == "poly":
                clf.degree = 1

        print 'training now'
        clf.fit(trainingFeatures.values, trainingLabels.values)

        print 'predicting now'
        predictedNP = clf.predict(testingFeatures.values)

        accuracyI = clf.score(testingFeatures.values, testingLabels.values)

        # testing accuracy
        accuraciesD[i] = accuracyI
        print "the accuracy of %s is %s\n" % (i, accuraciesD[i])

    print accuraciesD
    return accuraciesD