Exemple #1
0
 def test_entropy(self):
     obj_tree = tree.DecisionTree()
     test_arr = []
     test_arr.append({
         'arr':
         np.array([1 for x in range(9)] + [0 for x in range(11)]),
         'answer':
         0.993
     })
     test_arr.append({
         'arr':
         np.array([1 for x in range(8)] + [0 for x in range(5)]),
         'answer':
         0.961
     })
     test_arr.append({
         'arr':
         np.array([1 for x in range(1)] + [0 for x in range(6)]),
         'answer':
         0.592
     })
     test_arr.append({'arr': np.array([1, 2, 3, 4, 5, 6]), 'answer': 2.585})
     for x in test_arr:
         val = obj_tree._entropy(x['arr'])
         self.assertEqual(round(val, 3), x['answer'])
    def train(self,X,Y,vX=None,vY=None):
            '''
            Trains a RandomForest using the provided training set..
            
            Input:
            ---------
            X: a m x d matrix of training data...
            Y: labels (m x 1) label matrix

            vX: a n x d matrix of validation data (will be used to stop growing the RF)...
            vY: labels (n x 1) label matrix

            Returns:
            -----------

            '''

            nexamples, nfeatures= X.shape

            self.findScalingParameters(X)
            if self.scalefeat:
                X=self.applyScaling(X)

            #print X.max(axis=0), X.min(axis=0)
            self.trees=[]
            
            #-----------------------TODO-----------------------#
            #--------Write Your Code Here ---------------------#
            while len(self.trees)<self.ntrees:
                dtnew=tree.DecisionTree(weaklearner=self.weaklearner, nsplits=self.nsplits, nfeattest=self.nfeattest)
                print 'training new tree'
                dtnew.train(X, Y)
                self.trees.append(dtnew)
Exemple #3
0
    def train(self,X,Y,vX=None,vY=None):
            '''
            Trains a RandomForest using the provided training set..
            
            Input:
            ---------
            X: a m x d matrix of training data...
            Y: labels (m x 1) label matrix

            vX: a n x d matrix of validation data (will be used to stop growing the RF)...
            vY: labels (n x 1) label matrix

            Returns:
            -----------

            '''

            nexamples, nfeatures= X.shape

            self.findScalingParameters(X)
            if self.scalefeat:
                X=self.applyScaling(X)

            self.trees=[]
            
            self.dT = tree.DecisionTree(weaklearner)
            
            #-----------------------TODO-----------------------#
            #--------Write Your Code Here ---------------------#
            #S_D,S_DY = split_data(X , Y , ba)
            for i range(0 , ntrees):
                self.trees[i] = dT.build_tree(X , Y)
Exemple #4
0
 def test_gini(self):
     obj_tree = tree.DecisionTree()
     test_arr = []
     test_arr.append({
         'arr':
         np.array([1 for x in range(9)] + [0 for x in range(11)]),
         'answer':
         0.495
     })
     test_arr.append({
         'arr':
         np.array([1 for x in range(8)] + [0 for x in range(5)]),
         'answer':
         0.473
     })
     test_arr.append({
         'arr':
         np.array([1 for x in range(1)] + [0 for x in range(6)]),
         'answer':
         0.245
     })
     test_arr.append({'arr': np.array([1, 2, 3, 4, 5, 6]), 'answer': 0.833})
     for x in test_arr:
         val = obj_tree._gini(x['arr'])
         self.assertEqual(round(val, 3), x['answer'])
Exemple #5
0
 def test_fit_and_predict(self):
     obj = tree.DecisionTree(min_samples_split=2, max_depth=1)
     X = np.array([[1, 2, 3], [0, 5, 6], [7, 8, 9], [2, 2, 4], [1, 1, 3]])
     y = np.array([1, 1, 3, 4, 5])
     obj.fit(X, y)
     X = np.array([[7, 8, 9]])
     result_y = obj.predict(X)
     self.assertEqual(result_y[0], 1)
Exemple #6
0
    def test_chek_y(self):
        obj = tree.DecisionTree()
        X = np.array([[1, 2, 3], [0, 5, 6], [7, 8, 9], [2, 2, 4], [1, 1, 3]])
        result = obj._chek_y(X)
        self.assertEqual(result, True)

        X = np.array([[1, 2, 1], [0, 5, 1], [7, 8, 1], [2, 2, 1], [1, 1, 1]])
        result = obj._chek_y(X)
        self.assertEqual(result, False)
Exemple #7
0
    def fit(self, X, y):
        self.trees = []

        for _ in range(self.n_trees):
            classifier = tree.DecisionTree(self.leaf_size, self.n_trials)
            self.trees.append(classifier)

        for i, classifier in enumerate(self.trees):
            classifier = classifier.fit(X, y)
            self.trees[i] = classifier
        return self
Exemple #8
0
 def test_mad_median(self):
     obj_tree = tree.DecisionTree()
     test_arr = []
     test_arr.append({
         'arr': np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
         'answer': 2.222
     })
     test_arr.append({
         'arr': np.array([1, 2, 7, 4, 5, 6, 7, 8, 9]),
         'answer': 2.111
     })
     for x in test_arr:
         val = obj_tree._mad_median(x['arr'])
         self.assertEqual(round(val, 3), x['answer'])
Exemple #9
0
 def test_variance(self):
     obj_tree = tree.DecisionTree()
     test_arr = []
     test_arr.append({
         'arr': np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
         'answer': 6.667
     })
     test_arr.append({
         'arr': np.array([1, 2, 7, 4, 5, 6, 7, 8, 9]),
         'answer': 6.469
     })
     for x in test_arr:
         val = obj_tree._variance(x['arr'])
         self.assertEqual(round(val, 3), x['answer'])
Exemple #10
0
    def train(self, X, Y, vX=None, vY=None):
        '''
            Trains a RandomForest using the provided training set..
            
            Input:
            ---------
            X: a m x d matrix of training data...
            Y: labels (m x 1) label matrix

            vX: a n x d matrix of validation data (will be used to stop growing the RF)...
            vY: labels (n x 1) label matrix

            Returns:
            -----------

            '''
        self.classes = np.unique(Y)
        nexamples, nfeatures = X.shape

        self.findScalingParameters(X)
        if self.scalefeat:
            X = self.applyScaling(X)

        self.trees = []

        #-----------------------TODO-----------------------#
        #--------Write Your Code Here ---------------------#
        for i in range(0, self.ntrees):
            arranged = np.arange(0, nexamples)
            shuffled = np.random.shuffle(arranged)
            #print 'shape of Y',Y.shape
            #print 'shape of X',X.shape
            shuffledY = Y[shuffled]
            shuffledY = np.squeeze(shuffledY)
            print 'creating a tree'
            my_tree = tree.DecisionTree(maxdepth=self.treedepth,
                                        weaklearner=self.weaklearner,
                                        nsplits=self.nsplits,
                                        nfeattest=nfeatures)

            shuffledX = X[shuffled]
            shuffledX = np.squeeze(shuffledX)
            #print 'shape of sqX',shuffledX.shape
            #print 'shape of sqY',shuffledY.shape
            print 'training a tree'
            my_tree.train(shuffledX, shuffledY)
            print 'appending the created tree to the list'
            self.trees.append(my_tree)
Exemple #11
0
    def train(self,X,Y,vX=None,vY=None):
        nexamples, nfeatures= X.shape

        self.findScalingParameters(X)
        if self.scalefeat:
            X=self.applyScaling(X)

        self.trees=[]

        for i in range(self.ntrees):
            ShufflingIndexes=list(range(len(X)))
            rd.shuffle(ShufflingIndexes)
            x,y=X[ShufflingIndexes],Y[ShufflingIndexes]
            Indices=int(len(X)*(1-rd.uniform(0,0.4)))
            Sample_X,Sample_Y=x[:Indices,:],y[:Indices]
            Tree=tree.DecisionTree(0.95,5,self.treedepth,self.weaklearner)
            Tree.train(Sample_X,Sample_Y)
            self.trees.append(Tree)
Exemple #12
0
    def train_tree(self, X, Y, verbose=True):
        '''
        Trains A tree based on given arguments

        return : the Decision Tree object
        '''

        dt = tree.DecisionTree(exthreshold=10,
                               maxdepth=self.treedepth,
                               weaklearner=self.weaklearner,
                               nsplits=self.nsplits)
        dt.verbose = verbose

        if self.usebagging:
            X_train, _, Y_train, _ = train_test_split(
                X, Y, train_size=self.baggingfraction)
            dt.train(X_train, Y_train)
            return dt

        dt.train(X, Y)
        return dt
    def train(self, X, Y, vX=None, vY=None):
        '''
            Trains a RandomForest using the provided training set..
        
            Input:
            ---------
            X: a m x d matrix of training data...
            Y: labels (m x 1) label matrix

            vX: a n x d matrix of validation data (will be used to stop growing the RF)...
            vY: labels (n x 1) label matrix

            Returns:
            -----------

        '''

        nexamples, nfeatures = X.shape

        self.findScalingParameters(X)
        if self.scalefeat:
            X = self.applyScaling(X)

        self.trees = []

        #-----------------------TODO-----------------------#
        #--------Write Your Code Here ---------------------#

        if (vX == None):

            for t in range(self.ntrees):

                myTree = tree.DecisionTree(purity=0.95,
                                           maxdepth=self.treedepth,
                                           weaklearner=self.weaklearner,
                                           nsplits=self.nsplits)
                myTree.train(X, Y)

                self.trees.append(myTree)
    def __init__(self, ntrees=10,treedepth=5,usebagging=False,baggingfraction=0.6,
        weaklearner="Conic",
        nsplits=10,        
        nfeattest=None, posteriorprob=False,scalefeat=True ):        
        """      
            Build a random forest classification forest....

            Input:
            ---------------
                ntrees: number of trees in random forest
                treedepth: depth of each tree 
                usebagging: to use bagging for training multiple trees
                baggingfraction: what fraction of training set to use for building each tree,
                weaklearner: which weaklearner to use at each interal node, e.g. "Conic, Linear, Axis-Aligned, Axis-Aligned-Random",
                nsplits: number of splits to test during each feature selection round for finding best IG,                
                nfeattest: number of features to test for random Axis-Aligned weaklearner
                posteriorprob: return the posteriorprob class prob 
                scalefeat: wheter to scale features or not...
        """

        self.ntrees=ntrees
        self.treedepth=treedepth
        self.usebagging=usebagging
        self.baggingfraction=baggingfraction

        self.weaklearner=weaklearner
        self.nsplits=nsplits
        self.nfeattest=nfeattest
        
        self.posteriorprob=posteriorprob
        
        self.scalefeat=scalefeat
        self.trees=[]
        for i in range(0,self.ntrees):
            self.trees.append(tree.DecisionTree(purity=0.95,maxdepth=5,weaklearner=self.weaklearner))
        
        pass    
Exemple #15
0
    def train(self,X,Y,vX=None,vY=None):
        '''
            Trains a RandomForest using the provided training set..
        
            Input:
            ---------
            X: a m x d matrix of training data...
            Y: labels (m x 1) label matrix

            vX: a n x d matrix of validation data (will be used to stop growing the RF)...
            vY: labels (n x 1) label matrix

            Returns:
            -----------

        '''

        nexamples, nfeatures= X.shape

        self.findScalingParameters(X)
        if self.scalefeat:
            X=self.applyScaling(X)

        self.trees=[]
            
        #-----------------------TODO-----------------------#
        #--------Write Your Code Here ---------------------#
        self.classes = np.unique(Y)
        for i in range(self.ntrees):
            sort = np.arange(0,nexamples)
            mixed_val = np.random.shuffle(sort)
            mixed_valX = np.squeeze(X[mixed_val])
            mixed_valY = np.squeeze(Y[mixed_val])
            
            dt = tree.DecisionTree(purity=0.9,maxdepth=self.treedepth,weaklearner=self.weaklearner,nsplits=self.nsplits,nfeattest=nfeatures)
            dt.train(mixed_valX,mixed_valY)
            self.trees.append(dt)
Exemple #16
0
reload(load_kc_data)
import tree
reload(tree)
import svm
reload(svm)
import linear
reload(linear)
import ensemble
reload(ensemble)

os.chdir('My/Path/Here')

train_X, train_y, dev_X, dev_y, test_X, test_y = load_kc_data.load_kc_housing()

# Train models
tree_model = tree.DecisionTree(train_X, train_y, dev_X, dev_y, test_X, test_y)
linear_model = linear.LinearRegression(train_X, train_y, dev_X, dev_y, test_X, test_y)

# Results
def evaluate_model(clf,X,y):
    y_pred = clf.predict(X)
    rms = sklearn.metrics.mean_squared_error(y,y_pred)
    print "The model's RMS is " + str(rms) + ", which is " + str(100*rms/np.var(y)) + "% of data variance."
    
print "Decision Tree:"
evaluate_model(tree_model, test_X, test_y)
print "\nLinear Regression"
evaluate_model(linear_model, test_X, test_y)

# Averaging the results of the two models
ensemble_rms = ensemble.Ensemble(test_X, test_y, [tree_model,linear_model])
Exemple #17
0
def nFoldValidationIrisData(n, x, y):
    # We know we have balanced and sorted data
    # so we split them up so we can equal distribute them into n-buckets

    list_of_data_x, list_of_data_y = splitDatainNBuckets(n, x, y)
    rows, _ = np.shape(list_of_data_x[0])

    depth = 30
    iterations = 100

    indexes_shuffle = np.arange(rows)
    num_of_training_data = math.ceil(rows * 4 / 5)
    avg_acc_test = [0] * depth
    avg_acc_train = [0] * depth
    for _ in range(iterations):
        # shuffle the data 100 times for each of the buckets
        # Shuffle the rows the same for each of the datasets
        np.random.shuffle(indexes_shuffle)
        for j in range(len(list_of_data_x)):
            list_of_data_x[j] = list_of_data_x[j][indexes_shuffle, :]
            list_of_data_y[j] = list_of_data_y[j][indexes_shuffle, :]

        for d in range(depth):
            # for each depth calculate the accuracy
            for j in range(len(list_of_data_x)):
                # For each of the buckets with data
                # take out random data for training and testing
                training_data_x = list_of_data_x[j][0:num_of_training_data, :]
                training_data_y = list_of_data_y[j][0:num_of_training_data, :]

                testing_data_x = list_of_data_x[j][num_of_training_data:, :]
                testing_data_y = list_of_data_y[j][num_of_training_data:, :]

                #Train
                dTree = tree.DecisionTree(phase='Training',
                                          x=training_data_x,
                                          y=training_data_y,
                                          depth=d)

                #Test
                classified_y = tree.DecisionTree(phase='Validation',
                                                 x=testing_data_x,
                                                 tree=dTree)
                accuracy = tree.calculateAccuracy(classified_y, testing_data_y)
                avg_acc_test[d] += accuracy

                classified_y_train = tree.DecisionTree(phase='Validation',
                                                       x=training_data_x,
                                                       tree=dTree)
                accuracy_train = tree.calculateAccuracy(
                    classified_y_train, training_data_y)
                avg_acc_train[d] += accuracy_train

    avg_acc_test = [
        x / (iterations * len(list_of_data_x)) for x in avg_acc_test
    ]
    avg_acc_train = [
        x / (iterations * len(list_of_data_x)) for x in avg_acc_train
    ]

    return (
        avg_acc_test,
        avg_acc_train,
    )
Exemple #18
0
 def fit(self):
     z_bound_left, z_bound_right = hyperplane.get_bounds(self.c_hyperplanes)
     phi_opt = self.born_again(z_bound_left, z_bound_right)
     return tree.DecisionTree(
         self.extract_optimal_solutions(z_bound_left, z_bound_right,
                                        phi_opt), self.columns)
Exemple #19
0
 def __init__(self):
     self.b = bayes.Bayes()
     self.t = tree.DecisionTree()
def trainGenreID(documents):
    data, labels = dataGenre(documents)
    s = tr.DecisionTree()
    s.train(data, labels, 10)
    return s
def trainYearID(documents):
    data, labels = dataYear(documents)
    s = tr.DecisionTree()
    s.train(data, labels, 10)
    return s
def trainTopID(documents):
    data, labels = dataTop(documents)
    s = tr.DecisionTree()
    s.train(data, labels, 30)
    return s
def trainBottomID(documents):
    data, labels = dataBottom(documents)
    s = tr.DecisionTree()
    s.train(data, labels, 30)
    return s
Exemple #24
0
def main(data_type, class_labels_fn, class_names_fn, ft_names_fn, max_depth,
         limit_entities, limited_label_fn, vector_names_fn, dt_dev,
         doc_topic_prior, topic_word_prior, n_topics, file_name,
         final_csv_name, high_amt, low_amt, cross_val, rewrite_files, classify,
         tf_fn):

    print("importing class all")
    tf = np.asarray(
        sp.load_npz("../data/" + data_type + "/bow/frequency/phrases/" +
                    tf_fn).todense())
    names = dt.import1dArray(ft_names_fn)
    variables_to_execute = list(
        product(doc_topic_prior, topic_word_prior, n_topics))
    print("executing", len(variables_to_execute), "variations")
    csvs = []
    csv_fns = []
    file_names = []
    for vt in variables_to_execute:
        doc_topic_prior = vt[0]
        topic_word_prior = vt[1]
        n_topics = vt[2]
        file_names.append(file_name + "DTP" + str(doc_topic_prior) + "TWP" +
                          str(topic_word_prior) + "NT" + str(n_topics))
    final_csv_fn = "../data/" + data_type + "/rules/tree_csv/" + file_name + final_csv_name + ".csv"
    for vt in range(len(variables_to_execute)):
        doc_topic_prior = variables_to_execute[vt][0]
        topic_word_prior = variables_to_execute[vt][1]
        n_topics = variables_to_execute[vt][2]
        file_name = file_names[vt]
        LDA(tf, names, n_topics, file_name, doc_topic_prior, topic_word_prior,
            data_type, rewrite_files)

        dimension_names_fn = "../data/" + data_type + "/LDA/names/" + file_name + ".txt"

        #NMFFrob(dt.import2dArray("../data/"+data_type+"/bow/ppmi/class-all-100-10-all"),  dt.import1dArray("../data/"+data_type+"/bow/names/100.txt"), 200, file_name)

        topic_model_fn = "../data/" + data_type + "/LDA/rep/" + file_name + ".txt"
        cv_fns = []
        og_fn = file_name
        for c in range(cross_val):
            file_name = og_fn + " " + str(cross_val) + "CV " + str(
                c) + classify + "Dev" + str(dt_dev)
            csv_name = "../data/" + data_type + "/rules/tree_csv/" + file_name + ".csv"
            cv_fns.append(csv_name)

            tree.DecisionTree(topic_model_fn,
                              class_labels_fn,
                              class_names_fn,
                              dimension_names_fn,
                              file_name,
                              10000,
                              max_depth=1,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              cv_splits=cross_val,
                              split_to_use=c,
                              data_type=data_type,
                              csv_fn=csv_name,
                              rewrite_files=rewrite_files,
                              development=dt_dev,
                              limit_entities=limit_entities,
                              limited_label_fn=limited_label_fn,
                              vector_names_fn=vector_names_fn,
                              clusters_fn=topic_model_fn,
                              cluster_duplicates=True,
                              save_results_so_far=False)

            tree.DecisionTree(topic_model_fn,
                              class_labels_fn,
                              class_names_fn,
                              dimension_names_fn,
                              file_name,
                              10000,
                              max_depth=max_depth,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              cv_splits=cross_val,
                              split_to_use=c,
                              data_type=data_type,
                              csv_fn=csv_name,
                              rewrite_files=rewrite_files,
                              development=dt_dev,
                              limit_entities=limit_entities,
                              limited_label_fn=limited_label_fn,
                              vector_names_fn=vector_names_fn,
                              clusters_fn=topic_model_fn,
                              cluster_duplicates=True,
                              save_results_so_far=False)

            tree.DecisionTree(topic_model_fn,
                              class_labels_fn,
                              class_names_fn,
                              dimension_names_fn,
                              file_name + "None",
                              10000,
                              max_depth=None,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              data_type=data_type,
                              csv_fn=csv_name,
                              rewrite_files=rewrite_files,
                              cv_splits=cross_val,
                              split_to_use=c,
                              development=dt_dev,
                              limit_entities=limit_entities,
                              limited_label_fn=limited_label_fn,
                              vector_names_fn=vector_names_fn,
                              clusters_fn=topic_model_fn,
                              cluster_duplicates=True,
                              save_results_so_far=False)

        dt.averageCSVs(cv_fns)
        file_name = og_fn + " " + str(cross_val) + "CV " + str(
            0) + classify + "Dev" + str(dt_dev)
        csvs.append("../data/" + data_type + "/rules/tree_csv/" + file_name +
                    "AVG.csv")
    dt.arrangeByScore(np.unique(np.asarray(csvs)), final_csv_fn)
def main():
    hidden_layer_sizes = [100, 100, 100, 100, 100, 100]
    file_names = []

    data_type = "wines"

    for f in range(len(hidden_layer_sizes)):
        #file_names.append("filmsBOWL" + str(f + 1) + "" + str(hidden_layer_sizes[f]))
        #file_names.append("filmsPPMIDropoutL"+str(f+1)+""+str(hidden_layer_sizes[f]))
        file_names.append(data_type + "100L" + str(f + 1) + "" +
                          str(hidden_layer_sizes[f]))

    #init_vector_path= "../data/" + data_type + "/bow/binary/phrases/class-all"
    #init_vector_path = "../data/" + data_type + "/bow/ppmi/class-all"
    #init_vector_path="../data/" + data_type + "/nnet/spaces/films200L1100N0.5pavPPMIN0.5FTadagradcategorical_crossentropy100.txt"
    init_vector_path = "../data/" + data_type + "/nnet/spaces/wines100.txt"
    end_file_names = []

    # Class and vector inputs
    for i in range(len(file_names)):
        #These are the parameter values
        hidden_layer_size = hidden_layer_sizes[i]
        batch_size = 200
        reg = 0.0
        noise = 0.5
        dropout_noise = None
        file_name = file_names[i] + "N" + str(noise)
        hidden_activation = "tanh"
        output_activation = "tanh"
        optimizer_name = "sgd"
        learn_rate = 0.01
        epochs = 500
        activity_reg = 0
        loss = "mse"
        class_path = None
        print(file_name)
        #deep_size = hidden_layer_sizes[i]
        deep_size = None
        if deep_size is not None:
            file_name = file_name + "DL" + str(deep_size)
        #NN Setup
        """
        SDA = NeuralNetwork( noise=noise, optimizer_name=optimizer_name, batch_size=batch_size, epochs=epochs, dropout_noise=dropout_noise,
                         vector_path=init_vector_path,  hidden_layer_size=hidden_layer_size, class_path=class_path, reg=reg, data_type=data_type,
                               hidden_activation=hidden_activation, output_activation=output_activation, learn_rate=learn_rate,
                              file_name=file_name, network_type="da", deep_size=deep_size, activity_reg=activity_reg)
        """
        file_name = "wines100trimmed"

        vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + ".txt"
        init_vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + ".txt"
        past_model_weights_fn = [
            "../data/" + data_type + "/nnet/weights/L1" + file_name + ".txt"
        ]
        past_model_bias_fn = [
            "../data/" + data_type + "/nnet/bias/L1" + file_name + ".txt"
        ]
        hidden_space_fn = "../data/" + data_type + "/nnet/spaces/" + file_name + ".txt"

        # Get SVM scores

        lowest_count = 50
        highest_count = 0
        #vector_path = "../data/" + data_type + "/nnet/spaces/"+file_name+"L1.txt"
        class_path = "../data/" + data_type + "/bow/binary/phrases/class-all-" + str(
            lowest_count)
        property_names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        svm_type = "svm"
        file_name = file_name + svm_type
        """
        svm.getSVMResults(vector_path, class_path, property_names_fn, file_name, lowest_count=lowest_count, highest_count=highest_count,
                          svm_type=svm_type, get_kappa=True, get_f1=False, single_class=True, data_type=data_type)
        """
        directions_fn = "../data/" + data_type + "/svm/directions/" + file_name + str(
            lowest_count) + ".txt"
        # Get rankings
        vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"
        class_names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        directions_fn = "../data/" + data_type + "/svm/directions/" + file_name + str(
            lowest_count) + ".txt"

        #rank.getAllPhraseRankings(directions_fn, vector_path, class_names_fn, vector_names_fn, file_name, data_type=data_type)

        #ndcg.getNDCG("../data/" + data_type + "/rank/numeric/"+file_name+"ALL.txt",file_name, data_type, lowest_count)

        scores_fn = "../data/" + data_type + "/ndcg/" + file_name + ".txt"
        file_name = file_name + "ndcg"
        kappa = False
        #scores_fn = "../data/" + data_type + "/svm/kappa/" + file_name + str(lowest_count)+".txt"
        #file_name = file_name + "kappa"
        #kappa = True

        # Get clusters
        amt_high_directions = hidden_layer_size * 2
        amt_low_directions = 13000
        amt_of_clusters = hidden_layer_size * 2
        #scores_fn = "../data/" + data_type + "/svm/kappa/"+file_name+"200.txt"
        #file_name = file_name + "similarityclustering"
        #cluster.getClusters(directions_fn, scores_fn, class_names_fn, False,  amt_high_directions, amt_low_directions, file_name, amt_of_clusters)
        clusters_fn = "../data/" + data_type + "/cluster/clusters/" + file_name + ".txt"
        property_names_fn = "../data/" + data_type + "/cluster/names/" + file_name + ".txt"
        percentage_bin = 1
        #rank.getAllRankings(clusters_fn, vector_path, property_names_fn, vector_names_fn, 0.2, 1, False, file_name, False, data_type)

        names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        dissimilarity_threshold = 0.5
        similarity_threshold = 0.9
        cluster_amt = 200
        amount_to_start = 8000
        score_limit = 0.95
        print(file_name)
        add_all_terms = False
        file_name = file_name + "not all terms" + str(score_limit)
        hierarchy.initClustering(vector_path, directions_fn, scores_fn,
                                 names_fn, amount_to_start, False,
                                 dissimilarity_threshold, cluster_amt,
                                 score_limit, file_name, kappa,
                                 similarity_threshold, add_all_terms,
                                 data_type)

        # Get rankings
        clusters_fn = "../data/" + data_type + "/cluster/hierarchy_directions/" + file_name + ".txt"
        property_names_fn = "../data/" + data_type + "/cluster/hierarchy_names/" + file_name + ".txt"
        vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"

        rank.getAllRankings(clusters_fn, vector_path, property_names_fn,
                            vector_names_fn, 0.2, 1, False, file_name, False,
                            data_type)

        #file_name = "films100previouswork"
        # Get PAV
        ranking_fn = "../data/" + data_type + "/rank/numeric/" + file_name + ".txt"

        #fto.pavPPMI(property_names_fn, ranking_fn, file_name, data_type)

        #fto.pavTermFrequency(ranking_fn, cluster_names_fn, file_name, False)
        #fto.binaryClusterTerm(cluster_names_fn, file_name)
        #fto.binaryInCluster(property_names_fn, file_name)
        discrete_labels_fn = "../data/" + data_type + "/rank/discrete/" + file_name + "P1.txt"

        # Use PAV as class vectors
        fine_tune_weights_fn = [clusters_fn]
        epochs = 2000
        batch_size = 200
        learn_rate = 0.001
        is_identity = True
        identity_swap = False
        randomize_finetune_weights = False
        corrupt_finetune_weights = False
        from_ae = True
        #from_ae = False
        finetune_size = 200
        fn = file_name

        # Running Finetune on original space
        file_name = file_name + "pavPPMI"
        class_path = "../data/" + data_type + "/finetune/" + file_name + ".txt"

        if randomize_finetune_weights:
            fine_tune_weights_fn = None
            file_name = file_name + "N" + str(noise) + "FTR"
        elif corrupt_finetune_weights:
            file_name = file_name + "N" + str(noise) + "FTC"
        else:
            file_name = file_name + "N" + str(noise) + "FT"

        if is_identity:
            file_name = file_name + "IT"
        if identity_swap:
            file_name = file_name + "ITS"
            file_name = file_name + "ITS"
        print(file_name)

        loss = "mse"
        optimizer_name = "sgd"
        hidden_activation = "tanh"
        finetune_activation = "linear"
        file_name = file_name + optimizer_name + loss + str(epochs)

        print(file_name)
        amount_of_finetune = 1
        """
        SDA = NeuralNetwork( noise=0, fine_tune_weights_fn=fine_tune_weights_fn, optimizer_name=optimizer_name,  network_type="ft",
                             past_model_bias_fn=past_model_bias_fn,  randomize_finetune_weights=randomize_finetune_weights,
                             vector_path=init_vector_path,  hidden_layer_size=hidden_layer_size, class_path=class_path,
                             amount_of_finetune=amount_of_finetune, identity_swap=identity_swap,
                               hidden_activation=hidden_activation, output_activation=output_activation, epochs=epochs,
                             learn_rate=learn_rate, is_identity=is_identity, finetune_activation=finetune_activation,
                         batch_size=batch_size, past_model_weights_fn = past_model_weights_fn, loss=loss,
                             file_name=file_name, from_ae=from_ae, finetune_size=finetune_size, data_type=data_type)
        """
        init_vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + "L1.txt"

        # Get SVM scores
        lowest_count = 200
        highest_count = 10000
        vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + "L1.txt"
        class_path = "../data/" + data_type + "/bow/binary/phrases/class-all-" + str(
            lowest_count)
        property_names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        svm_type = "svm"
        file_name = file_name + svm_type
        #svm.getSVMResults(vector_path, class_path, property_names_fn, file_name, lowest_count=lowest_count, highest_count=highest_count, svm_type=svm_type, get_kappa=False, get_f1=False)
        # Get rankings
        vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"
        class_names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        directions_fn = "../data/" + data_type + "/svm/directions/" + file_name + str(
            lowest_count) + ".txt"
        #rank.getAllPhraseRankings(directions_fn, vector_path, property_names_fn, vector_names_fn, file_name)
        # file_name = file_name + "ndcg"
        #ndcg.getNDCG("../data/" + data_type + "/rank/numeric/"+file_name+"ALL.txt",file_name)

        names_fn = "../data/" + data_type + "/bow/names/" + str(
            lowest_count) + ".txt"
        similarity_threshold = 0.5
        cluster_amt = 200
        amount_to_start = 8000
        score_limit = 0.9
        print(file_name)
        #hierarchy.initClustering(vector_path, directions_fn, scores_fn, names_fn, amount_to_start, False, similarity_threshold,  cluster_amt, score_limit, file_name, kappa)
        """
        scores_fn = "../data/" + data_type + "/svm/kappa/" + file_name + "200.txt"
        file_name = file_name + "kappa"
                kappa = True
        hierarchy.initClustering(vector_path, directions_fn, scores_fn, names_fn, amount_to_start, False,
                                 similarity_threshold, cluster_amt, score_limit, file_name, kappa)
        """
        # Get rankings
        clusters_fn = "../data/" + data_type + "/cluster/hierarchy_directions/" + file_name + str(
            score_limit) + str(cluster_amt) + ".txt"
        property_names_fn = "../data/" + data_type + "/cluster/hierarchy_names/" + file_name + str(
            score_limit) + str(cluster_amt) + ".txt"
        vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"
        #rank.getAllRankings(clusters_fn, vector_path, property_names_fn, vector_names_fn, 0.2, 1, False, file_name, False)

        cluster_to_classify = -1
        max_depth = 50
        label_names_fn = "../data/" + data_type + "/classify/keywords/names.txt"
        cluster_labels_fn = "../data/" + data_type + "/classify/keywords/class-All"
        cluster_names_fn = "../data/" + data_type + "/cluster/hierarchy_names/" + fn + str(
            score_limit) + ".txt"
        #clf = tree.DecisionTree(clusters_fn, cluster_labels_fn, label_names_fn, cluster_names_fn, file_name, 10000, max_depth)

    fn_to_place = "films100L3100N0.5"
    score_limit = 0.8
    cluster_amt = 200
    property_names_fn = "../data/" + data_type + "/cluster/hierarchy_names/" + fn_to_place + str(
        score_limit) + str(cluster_amt) + ".txt"

    ranking_fn = "../data/" + data_type + "/rank/numeric/" + fn_to_place + ".txt"

    #fto.pavPPMI(property_names_fn, ranking_fn, fn_to_place)

    end_file_names = [
        "L1films100L3100N0.5InClusterN0.5FTadagradcategorical_crossentropy100Genres100L3",
        "L2films100L3100N0.5InClusterN0.5FTadagradcategorical_crossentropy100Genres100L3",
        "L3films100L3100N0.5InClusterN0.5FTadagradcategorical_crossentropy100Genres100L3"
    ]
    init_vector_path = "../data/" + data_type + "/nnet/spaces/films100.txt"
    past_model_weights_fn = []
    past_model_bias_fn = []

    for f in end_file_names:
        past_model_weights_fn.append("../data/" + data_type +
                                     "/nnet/weights/" + f + ".txt")
        past_model_bias_fn.append("../data/" + data_type + "/nnet/bias/" + f +
                                  ".txt")

    class_path = "../data/" + data_type + "/classify/genres/class-all"
    loss = "binary_crossentropy"
    output_activation = "sigmoid"
    optimizer_name = "adagrad"
    hidden_activation = "tanh"
    learn_rate = 0.01
    fine_tune_weights_fn = None
    randomize_finetune_weights = False
    epochs = 100
    batch_size = 200
    hidden_layer_size = 400
    is_identity = False
    dropout_noise = None
    from_ae = True
    identity_swap = False
    file_name = end_file_names[len(end_file_names) - 1]
    """
    score_limit = 0.8
    cluster_amt = 400
    clusters_fn = "../data/" + data_type + "/cluster/hierarchy_directions/" + fn_to_place + str(score_limit) + str(
        cluster_amt) + ".txt"
    fine_tune_weights_fn = [clusters_fn]
    randomize_finetune_weights = False
    class_path ="../data/" + data_type + "/finetune/" + fn_to_place + "pavPPMI.txt"
    loss = "mse"
    output_activation = "linear"
    batch_size = 200
    hidden_layer_size = 100
    epochs = 250
    file_name = file_name + "Genres" + str(epochs) + "L" + str(len(end_file_names))
    """
    """
    deep_size = 400
    epochs = 299
    from_ae = False
    past_model_weights_fn = None
    past_model_bias_fn = None
    fine_tune_weights_fn = None
    is_identity = True
    amount_of_finetune = 5
    randomize_finetune_weights = True
    file_name = "films100"
    finetune_size = cluster_amt
    init_vector_path = "../data/" + data_type + "/rank/numeric/"+file_name+".txt"
    file_name = file_name + "rank" + "E" + str(epochs) + "DS" + str(deep_size) + "L" +  str(amount_of_finetune)
    SDA = NeuralNetwork(noise=0, fine_tune_weights_fn=fine_tune_weights_fn, optimizer_name=optimizer_name,
                        network_type="ft", past_model_bias_fn=past_model_bias_fn, deep_size=deep_size,
                        randomize_finetune_weights=randomize_finetune_weights, amount_of_finetune=amount_of_finetune,
                        vector_path=init_vector_path, hidden_layer_size=hidden_layer_size, class_path=class_path,
                        identity_swap=identity_swap, dropout_noise=dropout_noise,
                        hidden_activation=hidden_activation, output_activation=output_activation, epochs=epochs,
                        learn_rate=learn_rate, is_identity=is_identity, finetune_size = finetune_size,
                        batch_size=batch_size, past_model_weights_fn=past_model_weights_fn, loss=loss,
                        file_name=file_name, from_ae=from_ae)
    """
    deep_size = 400
    epochs = 299
    from_ae = True
    #past_model_weights_fn = None
    #past_model_bias_fn = None
    #file_name = "films100"
    fine_tune_weights_fn = None
    is_identity = False
    amount_of_finetune = 0
    randomize_finetune_weights = False
    #file_name = end_file_names[len(end_file_names)-1]
    #init_vector_path = "../data/" + data_type + "/nnet/spaces/films100.txt"
    score_limit = 0.9
    cluster_amt = 400
    output_size = 23
    hidden_layer_size = 100
    epochs = 200
    class_outputs = True
    optimizer_name = "adagrad"
    learn_rate = 0.01
    output_activation = "sigmoid"
    finetune_activation = "linear"
    hidden_activation = "tanh"
    finetune_size = cluster_amt
    file_name = "films100"
    original_fn = file_name
    init_vector_path = "../data/" + data_type + "/rank/numeric/" + file_name + "svmndcg0.9" + str(
        cluster_amt) + ".txt"
    clusters_fn = "../data/" + data_type + "/cluster/hierarchy_directions/" + file_name + "svmndcg0.9" + str(
        cluster_amt) + ".txt"
    deep_size = [100, 100, 100]
    fine_tune_weights_fn = [clusters_fn]
    fine_tune_weights_fn = ""
    class_path = "../data/" + data_type + "/classify/genres/class-All"
    from_ae = False
    file_name = file_name + "rank" + "E" + str(epochs) + "DS" + str(
        deep_size) + "L" + str(len(deep_size)) + str(cluster_amt)
    """
    SDA = NeuralNetwork(noise=0, fine_tune_weights_fn=fine_tune_weights_fn, optimizer_name=optimizer_name,
                        network_type="ft", past_model_bias_fn=past_model_bias_fn, deep_size=deep_size,
                        finetune_activation=finetune_activation,
                        randomize_finetune_weights=randomize_finetune_weights, amount_of_finetune=amount_of_finetune,
                        vector_path=init_vector_path, hidden_layer_size=hidden_layer_size, class_path=class_path,
                        identity_swap=identity_swap, dropout_noise=dropout_noise, class_outputs=class_outputs,
                        hidden_activation=hidden_activation, output_activation=output_activation, epochs=epochs,
                        learn_rate=learn_rate, is_identity=is_identity, output_size=output_size,
                        finetune_size=finetune_size,
                        batch_size=batch_size, past_model_weights_fn=past_model_weights_fn, loss=loss,
                        file_name=file_name, from_ae=from_ae)
    """
    data_type = "wines"
    classification_task = "types"
    file_name = "wines100trimmed"
    init_vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + ".txt"

    #file_name = "winesppmi"
    #init_vector_path = "../data/wines/bow/ppmi/class-trimmed-all-50"

    deep_size = [100, 100, 100]
    for d in range(len(deep_size)):
        print(deep_size, init_vector_path)
        loss = "binary_crossentropy"
        output_activation = "sigmoid"
        optimizer_name = "adagrad"
        hidden_activation = "tanh"
        classification_path = "../data/" + data_type + "/classify/" + classification_task + "/class-all"
        learn_rate = 0.01
        fine_tune_weights_fn = None
        epochs = 500
        batch_size = 200
        class_outputs = True
        dropout_noise = 0.3
        is_identity = False
        identity_swap = False
        randomize_finetune_weights = False
        hidden_layer_size = 100
        output_size = 10
        randomize_finetune_weights = False
        corrupt_finetune_weights = False
        fine_tune_weights_fn = []

        #init_vector_path = "../data/" + data_type + "/movies/bow/binary/phrases/class-all"
        if d == 0:
            file_name = file_name + "rank" + "E" + str(epochs) + "DS" + str(deep_size) + "L" + str(amount_of_finetune)\
                        + "DN" + str(dropout_noise) + hidden_activation + "SFT" + str(d)
        else:
            file_name = file_name + "SFT" + str(d)
        print("!!!!!!!!!!!!!!!", deep_size)

        SDA = NeuralNetwork(
            noise=0,
            fine_tune_weights_fn=fine_tune_weights_fn,
            optimizer_name=optimizer_name,
            network_type="ft",
            past_model_bias_fn=past_model_bias_fn,
            deep_size=deep_size,
            finetune_activation=finetune_activation,
            randomize_finetune_weights=randomize_finetune_weights,
            amount_of_finetune=amount_of_finetune,
            vector_path=init_vector_path,
            hidden_layer_size=hidden_layer_size,
            class_path=classification_path,
            identity_swap=identity_swap,
            dropout_noise=dropout_noise,
            class_outputs=class_outputs,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            epochs=epochs,
            learn_rate=learn_rate,
            is_identity=is_identity,
            output_size=output_size,
            finetune_size=finetune_size,
            batch_size=batch_size,
            past_model_weights_fn=past_model_weights_fn,
            loss=loss,
            file_name=file_name,
            from_ae=from_ae,
            data_type=data_type)
        new_file_names = []
        if dropout_noise is not None and dropout_noise > 0.0:
            for j in range(0, len(deep_size) * 2 + 1, 2):
                new_fn = file_name + "L" + str(j)
                new_file_names.append(new_fn)
        else:
            for j in range(0, len(deep_size) + 1):
                new_fn = file_name + "L" + str(j)
                new_file_names.append(new_fn)

        for j in range(len(new_file_names)):
            #file_name = "wines100trimmed"
            #file_name = "films100rankE200DS[100, 100, 100]L3300L1svmndcg0.9200pavPPMIN0.5FTITsgdmse2000L1rankE100DS[100, 100]L0"
            file_name = new_file_names[j]
            past_model_weights_fn = [
                "../data/" + data_type + "/nnet/weights/" + file_name + ".txt"
            ]
            past_model_bias_fn = [
                "../data/" + data_type + "/nnet/bias/" + file_name + ".txt"
            ]
            # Get SVM scores

            if data_type is "wines" or "placetypes":
                lowest_count = 50
            else:
                lowest_count = 200
            highest_count = 10000
            vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + ".txt"
            class_path = "../data/" + data_type + "/bow/binary/phrases/class-all-" + str(
                lowest_count)
            property_names_fn = "../data/" + data_type + "/bow/names/" + str(
                lowest_count) + ".txt"
            svm_type = "svm"
            threads = 4
            file_name = file_name + svm_type
            svm.getSVMResults(vector_path,
                              class_path,
                              property_names_fn,
                              file_name,
                              lowest_count=lowest_count,
                              highest_count=highest_count,
                              svm_type=svm_type,
                              data_type=data_type,
                              get_kappa=True,
                              get_f1=False,
                              getting_directions=True,
                              threads=4)

            directions_fn = "../data/" + data_type + "/svm/directions/" + file_name + str(
                lowest_count) + ".txt"
            # Get rankings
            vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"
            class_names_fn = "../data/" + data_type + "/bow/names/" + str(
                lowest_count) + ".txt"
            directions_fn = "../data/" + data_type + "/svm/directions/" + file_name + str(
                lowest_count) + ".txt"
            """
            scores_fn = "../data/" + data_type + "/svm/kappa/" + file_name + str(lowest_count) + ".txt"
            kappa = True
            if d == 0:
                file_name = file_name + "kappa"
            """

            rank.getAllPhraseRankings(directions_fn,
                                      vector_path,
                                      class_names_fn,
                                      vector_names_fn,
                                      file_name,
                                      data_type=data_type)
            ndcg.getNDCG("../data/" + data_type + "/rank/numeric/" +
                         file_name + "ALL.txt",
                         file_name,
                         data_type=data_type,
                         lowest_count=lowest_count)
            scores_fn = "../data/" + data_type + "/ndcg/" + file_name + ".txt"
            kappa = False
            if d == 0:
                file_name = file_name + "ndcg"

            names_fn = "../data/" + data_type + "/bow/names/" + str(
                lowest_count) + ".txt"
            similarity_threshold = 0.5
            cluster_amt = deep_size[j] * 2
            amount_to_start = 8000
            score_limit = 0.9
            dissimilarity_threshold = 0.9

            file_name = file_name + str(score_limit) + str(cluster_amt)

            hierarchy.initClustering(vector_path,
                                     directions_fn,
                                     scores_fn,
                                     names_fn,
                                     amount_to_start,
                                     False,
                                     similarity_threshold,
                                     cluster_amt,
                                     score_limit,
                                     file_name,
                                     kappa,
                                     dissimilarity_threshold,
                                     data_type=data_type)

            # Get rankings
            clusters_fn = "../data/" + data_type + "/cluster/hierarchy_directions/" + file_name + ".txt"
            property_names_fn = "../data/" + data_type + "/cluster/hierarchy_names/" + file_name + ".txt"
            vector_names_fn = "../data/" + data_type + "/nnet/spaces/entitynames.txt"

            rank.getAllRankings(clusters_fn,
                                vector_path,
                                property_names_fn,
                                vector_names_fn,
                                0.2,
                                1,
                                False,
                                file_name,
                                False,
                                data_type=data_type)

            # Get PAV
            ranking_fn = "../data/" + data_type + "/rank/numeric/" + file_name + ".txt"
            label_names_fn = "../data/" + data_type + "/classify/" + classification_task + "/names.txt"

            tree.DecisionTree(ranking_fn,
                              classification_path,
                              label_names_fn,
                              property_names_fn,
                              file_name,
                              10000,
                              3,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              data_type=data_type)

            tree.DecisionTree(ranking_fn,
                              classification_path,
                              label_names_fn,
                              property_names_fn,
                              file_name,
                              10000,
                              None,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              data_type=data_type)

            if d == 0:
                file_name = file_name + "pavPPMI"

            fto.pavPPMI(property_names_fn,
                        ranking_fn,
                        file_name,
                        data_type=data_type)
            discrete_labels_fn = "../data/" + data_type + "/rank/discrete/" + file_name + "P1.txt"

            # Use PAV as class vectors
            fine_tune_weights_fn = [clusters_fn]
            epochs = 1000
            batch_size = 200
            learn_rate = 0.001
            is_identity = True
            identity_swap = False
            randomize_finetune_weights = False
            # from_ae = False
            finetune_size = cluster_amt
            fn = file_name

            # Running Finetune on original space
            class_path = "../data/" + data_type + "/finetune/" + file_name + ".txt"
            if d == 0:
                file_name = file_name + "IT"
            print(file_name)

            loss = "mse"
            optimizer_name = "sgd"
            hidden_activation = "tanh"
            finetune_activation = "linear"
            hidden_layer_size = deep_size[j]
            if d == 0:
                file_name = file_name + optimizer_name + loss + str(epochs)
            from_ae = True
            past_model_weights_fn = [
                "../data/" + data_type + "/nnet/weights/L" +
                new_file_names[j] + ".txt"
            ]
            past_model_bias_fn = [
                "../data/" + data_type + "/nnet/bias/L" + new_file_names[j] +
                ".txt"
            ]

            print(file_name)
            amount_of_finetune = 1

            SDA = NeuralNetwork(
                noise=0,
                fine_tune_weights_fn=fine_tune_weights_fn,
                optimizer_name=optimizer_name,
                network_type="ft",
                past_model_bias_fn=past_model_bias_fn,
                randomize_finetune_weights=randomize_finetune_weights,
                vector_path=init_vector_path,
                hidden_layer_size=hidden_layer_size,
                class_path=class_path,
                identity_swap=identity_swap,
                amount_of_finetune=amount_of_finetune,
                hidden_activation=hidden_activation,
                output_activation=output_activation,
                epochs=epochs,
                learn_rate=learn_rate,
                is_identity=is_identity,
                finetune_activation=finetune_activation,
                batch_size=batch_size,
                past_model_weights_fn=past_model_weights_fn,
                loss=loss,
                file_name=file_name,
                from_ae=from_ae,
                finetune_size=finetune_size,
                data_type=data_type)
            new_file_names[j - 1] = file_name

            ranking_fn = "../data/" + data_type + "/nnet/clusters/" + file_name + ".txt"

            tree.DecisionTree(ranking_fn,
                              classification_path,
                              label_names_fn,
                              property_names_fn,
                              file_name,
                              10000,
                              3,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              data_type=data_type)

            tree.DecisionTree(ranking_fn,
                              classification_path,
                              label_names_fn,
                              property_names_fn,
                              file_name,
                              10000,
                              None,
                              balance="balanced",
                              criterion="entropy",
                              save_details=False,
                              data_type=data_type)
        """
        file_name ="films100rankE200DS[100, 100, 100]L3300L1svmndcg0.9200pavPPMIN0.5FTITsgdmse2000L1"
        loss = "binary_crossentropy"
        output_activation = "sigmoid"
        optimizer_name = "adagrad"
        hidden_activation = "tanh"
        class_path = "../data/" + data_type + "/classify/genres/class-all"
        learn_rate = 0.01
        fine_tune_weights_fn = None
        epochs = 100
        batch_size = 200
        class_outputs = True
        dropout_noise = None
        deep_size = [100, 100]
        hidden_layer_size = 100
        output_size = 23
        randomize_finetune_weights = False
        corrupt_finetune_weights = False
        fine_tune_weights_fn = []
        init_vector_path = "../data/" + data_type + "/nnet/clusters/" + file_name + ".txt"
        file_name = file_name + "rank" + "E" + str(epochs) + "DS" + str(deep_size) + "L" + str(amount_of_finetune)

        SDA = NeuralNetwork(noise=0, fine_tune_weights_fn=fine_tune_weights_fn, optimizer_name=optimizer_name,
                            network_type="ft", past_model_bias_fn=past_model_bias_fn, deep_size=deep_size,
                            randomize_finetune_weights=randomize_finetune_weights, output_size=output_size,
                            amount_of_finetune=amount_of_finetune, class_outputs=class_outputs,
                            vector_path=init_vector_path, hidden_layer_size=hidden_layer_size, class_path=class_path,
                            identity_swap=identity_swap, dropout_noise=dropout_noise,
                            hidden_activation=hidden_activation, output_activation=output_activation, epochs=epochs,
                            learn_rate=learn_rate, is_identity=is_identity, finetune_size=finetune_size,
                            batch_size=batch_size, past_model_weights_fn=past_model_weights_fn, loss=loss,
                            file_name=file_name, from_ae=from_ae)
        """
        file_name = new_file_names[0]
        init_vector_path = "../data/" + data_type + "/nnet/spaces/" + file_name + "L0.txt"
        deep_size = deep_size[:len(deep_size) - 1]