Ejemplo n.º 1
0
def train_folder_file(file_folder):
    print("Running this process.....")
    #print(file_folder)
    extract_name = file_folder.split("/")
    model = extract_name[-2]
    f_id = extract_name[-1]
    result_report = []

    print(model)
    print(f_id)

    for clf in clf_list:
        temp = 0
        # print(clf)
        # print("file folde : "+file_folder)
        for file_s in os.listdir(file_folder):
            # print("file name :"+file_s)
            file_path = os.path.join(file_folder, file_s)
            # print(file_path)
            # exit(0)
            if model == "NewDP":
                # print("yes")
                result = classifier().train_NewDP_classifier(
                    model, f_id, file_path, clf)
                # print(result)
                # print("NO")
            if model == "levelob":
                print("yes")
                result = classifier().train_NewDP_classifier(
                    model, f_id, file_path, clf)
                # print('no')

            # if model == "tmn":
            #     result = classifier().train_tmn_classifer(model, f_id, file_path, clf)
            # else:
            #     print("Run")
            #     result = classifier().train_classifier(model, f_id, file_path, clf)
            #     print("Run")
            result_report.append(result)
            temp += 1

            print(file_folder + " / " + clf + " : " + str(temp) + "/" +
                  str(len(os.listdir(file_folder))))
        save_t_path = model + "/" + clf
        save_t_name = model + "_" + clf + "_" + f_id + "_.pkl"
        save_path = os.path.join(save_root, save_t_path)
        save_name = os.path.join(save_path, save_t_name)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        with open(save_name, "wb") as f:
            pickle.dump(result_report, f)
    def normal_training(self, classifierName=None, featureScaling = False, test_size=0.2):

        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=test_size, random_state=0)
        trainedClassifier = None
        y_pred = None
        if featureScaling:
            from sklearn.preprocessing import StandardScaler
            sc_X = StandardScaler()
            X_train = sc_X.fit_transform(X_train)
            X_test = sc_X.transform(X_test)
        clf = classifier(X_train, y_train)
        if classifierName == "randomforest":
            trainedClassifier, x_f = clf.random_forest()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "svm":
            trainedClassifier = clf.support_vector()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "gradientboosting":
            trainedClassifier = clf.gradient_boosting()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "decisiontree":
            trainedClassifier, x_f = clf.decision_tree()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "linearsvm":
            trainedClassifier = clf.linear_support_vector()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "logistic":
            trainedClassifier = clf.logistic()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "knn":
            trainedClassifier = clf.KNN()
            y_pred = trainedClassifier.predict(X_test)

        elif classifierName == "sgd":
            trainedClassifier = clf.sgd()
            y_pred = trainedClassifier.predict(X_test)

        else:
            print ("classifier not found")
        dict = {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "trainedmodel": trainedClassifier,
                "y_pred": y_pred, "x_f":x_f}

        return dict
Ejemplo n.º 3
0
def k_folds(k, model, corpusData, corpusLabels, vectorizer):
    accuracySum = 0

    #Get partitions
    dataPartitions, labelPartitions = partition(corpusData=corpusData,
                                                corpusLabels=corpusLabels,
                                                partType='k-cross',
                                                k=10)

    #For each validation partition
    for vali_partition in range(k):
        #clone the model
        model_for_k = clone(model)

        #transform validation partition into counts
        validation_data = dataPartitions[vali_partition]
        validation_data = vectorizeData(validation_data, vectorizer)

        validation_labels = labelPartitions[vali_partition]

        #Use the remaining partitions as the training corpus
        training_data = []
        training_labels = []

        for train_partition in range(k):

            #don't add validation partition
            if train_partition == vali_partition:
                continue

            training_data += dataPartitions[train_partition]
            training_labels += labelPartitions[train_partition]

        # transform training_data into matrix of counts
        training_data = vectorizeData(training_data, vectorizer)

        #fit the model, predict and get accuracy
        accuracy_fold_k = classifier(model_for_k, training_data,
                                     training_labels, validation_data,
                                     validation_labels)

        accuracySum += accuracy_fold_k['accuracy']

    #compute mean accuracy of model across folds
    averageAccuracy = accuracySum / k

    # return model accuracy
    return averageAccuracy
Ejemplo n.º 4
0
def createModelConfusionMatrix(model,
                               proc_type,
                               min_df=1,
                               partitionRatio=0.95):
    data, labels, vectorizer = processFiles('rt-polarity.pos',
                                            'rt-polarity.neg',
                                            processingType=proc_type,
                                            min_df=min_df)

    trainingData, trainingLabels, validationData, validationLabels = partition(
        data, labels, 'simple', testRatio=partitionRatio)

    trainingData = vectorizeData(trainingData, vectorizer)
    validationData = vectorizeData(validationData, vectorizer)

    measures = classifier(model, trainingData, trainingLabels, validationData,
                          validationLabels)

    print("\n================== " + "Accuracy" + " ==================")
    print(measures['accuracy'])
    print("\n================== " + "Confusion Matrix" + " ==================")
    print(measures['confusionMatrix'])
Ejemplo n.º 5
0
        t_conf[i] = float("inf")
    t_dist = 0

    while not done:
        # 1. get sensor data step (i.e. the current state.  this is different for continuous actions)
        # 2. ask if expert wants to perform the corrective demonstration
        pres = "o"
        env.render()
        while pres != "y" and pres != "n":
            pres = input(
                "Do you want to correct the last action taken? [y/n]: ")

        if pres == "n":  # 3. execute the Confident Execution step
            # 5. Put current state into classifier to get a_p, c, and db
            # 6. Get nearest neighbor for state
            a_p, c, db = classifier(svm, process_state(env.decode(state)))
            d = nearest_neighbor(nn, states, process_state(env.decode(state)))
            #states.append(process_state(env.decode(state)))

            if c > t_conf[a_p] and d < t_dist:
                state, reward, done, info = env.step(a_p)
                states.append(process_state(env.decode(state)))
                actions.append(a_p)
            else:
                pres = -1
                while pres not in [0, 1, 2, 3, 4, 5]:
                    pres = int(
                        input(
                            "Expert: enter the next action I should take [0-5]: "
                        ))
                states.append(process_state(env.decode(state)))
    def k_fold_cross_validation(self, n_split=2, classifierName=None, n=0):

        from sklearn.model_selection import KFold
        import numpy as np
        from evalMetrics import evalMetric

        kf = KFold(n_splits=n_split)
        X = np.array(self.X)
        y = np.array(self.y)
        dataset = np.array(self.dataset)
        score = 0
        feature_imp = 0
        scores = []
        scores1 = []
        score1 = []
        score2 = []
        for train_indices, test_indices in kf.split(dataset):
            x_train = X[train_indices]
            x_test = X[test_indices]
            y_train = y[train_indices]
            y_test = y[test_indices]
            clf = classifier(x_train, y_train)
            if classifierName == "randomforest":
                c = clf.random_forest()
                predict = c.predict(x_test)
            elif classifierName == "svm":
                c = clf.support_vector()
                predict = c.predict(x_test)
            elif classifierName == "gradientboosting":
                c = clf.gradient_boosting()
                predict = c.predict(x_test)
            elif classifierName == "decisiontree":
                c = clf.decision_tree(n)
                predict = c.predict(x_test)
            elif classifierName == "linearsvm":
                c = clf.linear_support_vector()
                predict = c.predict(x_test)
            elif classifierName == "logistic":
                c = clf.logistic()
                predict = c.predict(x_test)
            elif classifierName == "knn":
                c = clf.KNN(n)
                predict = c.predict(x_test)
            elif classifierName == "sgd":
                c = clf.sgd()
                predict = c.predict(x_test)

            e1 = evalMetric(y_test, predict).F1_score()
            score1.append(e1)
            e = evalMetric(y_test, predict).accuracy_skore()
            score2.append(e)
            # scores1.append(e1)
            score += e1

            if classifierName == 'randomforest':
                feature_imp += c.feature_importances_
            # print("Score: ", score)

        print("Average Score: ", score / n_split)
        scores.append(score1)
        scores.append(score2)
        return score / n_split, feature_imp / n_split, scores