def csvConversion(old_filepath, new_filepath):
    # Open and read txt file under variable "file"
    file = open(old_filepath, "r")
    csv = csvFileEvaluation()

    #Create csv with identified .txt file rows and columns
    csv.createCSV(file, old_filepath, new_filepath)

    #Print rows and columnes of "file"
    print("\nNew .csv file dimensions: ")
    print("Rows: ", csv.rows)
    print("Columns: ", csv.columns)

    # Close .txt file
    file.close()
    y_train, y_test = Y[train_index], Y[test_index]

    # Naive Bayes
    elapsedTraining, elapsedTesting, y_pred = nbClassifier(X_train, y_train, X_test)

    # Calculate the different accuracy scores
    cm_metrics = Calculate_Metrics(y_test, y_pred)
    metrics_dict = cm_metrics.calculateScore()
    # Calculate the run time
    elapsed = (timeit.default_timer() - start_time) * 1000

    metrics_dict.update({'Run Time(MSec)': elapsed})

    # Create metrics for each fold
    pdf_indi = printPDF(metrics_dict)
    pdf_indi.create_pdf_indi(test_spec_dict, test_description, test_path_indi,
                            str(fold) + '_' + testTimeConvertet + '_' + hostname)

    metrics_array.append(metrics_dict)

# Calculate the overall score for all the folds
metrics_mean_dict = cm_metrics.calculate_mean_score(metrics_array, folds)

# # Create PDF
pdf = printPDF(metrics_mean_dict)
pdf.create_pdf(test_spec_dict, test_description, test_path, '_' + testTimeConvertet + '_' + hostname)

# Create CSV
csv = printCSV(metrics_mean_dict, test_spec_dict)
csv.createCSV(path, csvfile_name)
Example #3
0
    def runTest(self,
                path,
                treeSize,
                trainingFile,
                test_description,
                type,
                traning_size,
                create_pdf=False):

        ## Overall configurations
        hostname = socket.gethostname()
        #testStart = datetime.datetime.now()
        #testStartConvertet = time.strftime("(%Y-%m-%d)-(%H-%M-%S)")
        # path = testStartConvertet + '_' + hostname + '/'

        try:
            os.makedirs(path)
        except OSError:
            if not os.path.isdir(path):
                raise

        ## Induvidual test configurations
        testTime = datetime.datetime.now()
        testTimeConvertet = time.strftime("(%Y-%m-%d)-(%H-%M-%S)")
        csvfile_name = str(treeSize) + '_' + testTimeConvertet + '_' + str(
            traning_size) + '.csv'
        test_path = path + str(treeSize) + '/'
        test_path_indi = path + str(treeSize) + '/' + 'Folds/'

        try:
            os.makedirs(test_path)
        except OSError:
            if not os.path.isdir(test_path):
                raise
        try:
            os.makedirs(test_path_indi)
        except OSError:
            if not os.path.isdir(test_path_indi):
                raise

        print "DT Test: " + str(treeSize) + "  Time: " + testTimeConvertet

        # Training Set Path setup
        traning_set = trainingFile
        # Load training set
        test_set = loadTestSet(traning_set)
        X, Y = test_set.loadTestSet()
        # Set number of folds
        folds = 6
        kf = KFold(n_splits=folds)
        kf.get_n_splits(X)

        metrics_array = []
        fold = 0

        test_spec_dict = {
            'Classifier': 'DT',
            'Depth': treeSize,
            'Date': testTimeConvertet,
            'Host': hostname,
            'Training Set': traning_set,
            'Sample Type': type
        }

        elapsedTraining1 = 0
        elapsedTesting2 = 0

        for train_index, test_index in kf.split(X):
            fold += 1
            start_time = timeit.default_timer()

            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]

            # Classifies fields with different k values from 1 to 50.
            elapsedTraining, elapsedTesting, y_pred = dtClassifier(
                treeSize, X_train, y_train, X_test)
            elapsedTraining1 += elapsedTraining
            elapsedTesting2 += elapsedTesting

            # Calculate the different accuracy scores
            cm_metrics = Calculate_Metrics(y_test, y_pred)
            metrics_dict = cm_metrics.calculateScore()
            # Calculate the run time
            elapsed = (timeit.default_timer() - start_time) * 1000

            metrics_dict.update({'Run Time(MSec)': elapsed})

            if create_pdf:
                # Create metrics for the fold
                pdf_indi = printPDF(metrics_dict)
                pdf_indi.create_pdf_indi(
                    test_spec_dict, test_description, test_path_indi,
                    str(fold) + '_' + testTimeConvertet + '_' + hostname)

            metrics_array.append(metrics_dict)

        # Calculate the overall score for all the folds
        metrics_mean_dict = cm_metrics.calculate_mean_score(
            metrics_array, folds)

        print 'Time in ms for training: ' + str(elapsedTraining1 / 6)
        print 'Time in ms for testing: ' + str(elapsedTesting2 / 6)

        if create_pdf:
            # Create PDF
            pdf = printPDF(metrics_mean_dict)
            pdf.create_pdf(
                test_spec_dict, test_description, test_path,
                str(treeSize) + '_' + testTimeConvertet + '_' + hostname)

        # Create CSV
        csv = printCSV(metrics_mean_dict, test_spec_dict)
        csv.createCSV(path, csvfile_name)

        return csvfile_name