Example #1
0
def get_layer_info():
    """Created 04/11/2016"""
    from nolearn.lasagne import PrintLayerInfo
    layer_info = PrintLayerInfo()
    return layer_info
    def make_submission(self, train_images, train_results, test_images, output_file_path, feature_extractors, model, size=64):
        # Create a vector of feature vectors and initialize the codebook of sift extractor
        feature_vectors = []
        sift_extractor = temp_extractor = next((extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None)
        if(sift_extractor != None):
            sift_extractor.set_codebook(train_images)
            feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor

        # Extract features from every image
        for image in train_images:
            print("Training ", image, "...")
            preprocessed_color_image = self.preprocess_image(image, size)
            feature_vector = []
            if feature_extractors != []:
                for feature_extractor in feature_extractors:
                    if type(feature_extractor) != SiftFeatureExtractor:
                        feature_vector = append(feature_vector,
                                                feature_extractor.extract_feature_vector(preprocessed_color_image))
                    else:
                        feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image))
            else:
                feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
            feature_vectors.append(feature_vector)

        # Logistic Regression for feature selection, higher C = more features will be deleted
        clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4)

        # Feature selection/reduction
        if(model != "conv"):
            print("Old feature vector shape = ", len(feature_vectors), len(feature_vectors[0]))
            new_feature_vectors = clf2.fit_transform(feature_vectors, train_results)
            print("New feature vector shape = ", len(new_feature_vectors), len(new_feature_vectors[0]))
            if(model == "neural"):
                model = self.build_nn(nr_features=len(new_feature_vectors[0]))
                new_feature_vectors = np.asarray(new_feature_vectors)
                train_results = np.asarray(train_results)
                model.initialize()
                layer_info = PrintLayerInfo()
                layer_info(model)

            # Fit our model
            model.fit(new_feature_vectors, train_results)

        else:
            model = self.build_conv()

            # Fit our model
            model.fit(np.asarray(feature_vectors), np.asarray(train_results))

        # Iterate over the test images and add their prediction to a prediction object
        prediction_object = Prediction()
        for im in test_images:
            print("Predicting ", im)
            preprocessed_color_image = self.preprocess_image(im, size)
            validation_feature_vector = []
            if feature_extractors != []:
                for feature_extractor in feature_extractors:
                    if type(feature_extractor) != SiftFeatureExtractor:
                        validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                    else:
                        validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                validation_feature_vector = clf2.transform(validation_feature_vector)
            else:
                validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
            prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

        # Write out the prediction object
        FileParser.write_CSV(output_file_path, prediction_object)
Example #3
0
    # Random Forest
    rf.fit(train_features_df.values.tolist(), train_labels_df['cat'].tolist())
    predicted_labels = []
    for index, vector in enumerate(test_features_df.values):
        predicted_labels.append(str(rf.predict(vector.reshape(1, -1))[0]))
    tree_confusion_matrices["Random Forest"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels))  # Bit hacky to use the tree method

    train_features_df = (train_features_df - train_features_df.mean()) / (train_features_df.max() - train_features_df.min())
    train_features_df = train_features_df.reset_index(drop=True)
    test_features_df = (test_features_df - test_features_df.mean()) / (test_features_df.max() - test_features_df.min())
    test_features_df = test_features_df.reset_index(drop=True)

    # Neural Network
    model = build_nn(nr_features=len(train_features_df.columns))
    model.initialize()
    layer_info = PrintLayerInfo()
    layer_info(model)
    y_train = np.reshape(np.asarray(train_labels_df, dtype='int32'), (-1, 1)).ravel()
    model.fit(train_features_df.values, np.add(y_train, -1))
    predicted_labels = []
    for index, vector in enumerate(test_features_df.values):
        predicted_labels.append(str(model.predict(vector.reshape(1, -1))[0]+1))
    tree_confusion_matrices["Neural Network"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels))  # Bit hacky to use the tree method


    #Bayesian Network
    train_features_df, test_features_df = features_df.iloc[train_index,:].copy(), features_df.iloc[test_index,:].copy()
    train_labels_df, test_labels_df = labels_df.iloc[train_index,:].copy(), labels_df.iloc[test_index,:].copy()
    train_features_df = train_features_df.reset_index(drop=True)
    test_features_df = test_features_df.reset_index(drop=True)
    train_labels_df = train_labels_df.reset_index(drop=True)
    nn = net1.fit(X_train, y_train)

    print "# Saving weights"
    net1.save_weights_to(s.net_name)
if _test:
    print "# Loading weights"
    net1.load_weights_from(s.net_name)

# evaluate
print "# Evaluating"
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = net1.predict(X_test)

####################### OUTPUT to shell and to file for later
filename = "experiment_log.txt"
target = open(filename, 'a+')
target.write("-------------------------------------------------------------------------\n")
from nolearn.lasagne import PrintLayerInfo
pli = PrintLayerInfo()
net1.verbose = 3
layer_info, legend = pli._get_layer_info_conv(net1)
target.write(layer_info)
target.write(classification_report(y_test, predictions))
#target.write(accuracy_score(y_test, predictions))
target.close()

print layer_info
print classification_report(y_test, predictions)
accuracy_score(y_test, predictions)

    def local_test(self, images, results, feature_extractors, model, k=2, size=64):
        kf = KFold(len(images), n_folds=k, shuffle=True, random_state=1337)
        # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337)
        train_errors = []
        test_errors = []

        for train, validation in kf:
            # Divide the train_images in a training and validation set (using KFold)
            train_set = [images[i % len(images)] for i in train]
            validation_set = [images[i % len(images)] for i in validation]
            train_set_results = [results[i % len(images)] for i in train]
            validation_set_results = [results[i % len(images)] for i in validation]

            # Create an empty feature_vectors array and set the codebook of the sift extractor if there is any
            feature_vectors = []
            sift_extractor = temp_extractor = next(
                (extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None)
            if (sift_extractor != None):
                sift_extractor.set_codebook(train_set)
                feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor

            # Iterate over the train_set, extract the features from each image and append them to feature_vectors
            for image in train_set:
                print("Training ", image, "...")
                preprocessed_color_image = self.preprocess_image(image, size)
                feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            feature_vector = append(feature_vector,
                                                    feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image))
                else:
                    feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                feature_vectors.append(feature_vector)

            # Logistic Regression for feature selection, higher C = more features will be deleted

            clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4)

            # Feature selection/reduction
            if(model != "conv"):
                new_feature_vectors = clf2.fit_transform(feature_vectors, train_set_results)
                if(model == "neural"):
                    model = self.build_nn(nr_features=len(new_feature_vectors[0]))
                    new_feature_vectors = np.asarray(new_feature_vectors)
                    train_set_results = np.asarray(train_set_results)
                    model.initialize()
                    layer_info = PrintLayerInfo()
                    layer_info(model)

                # Fit our model
                model.fit(new_feature_vectors, train_set_results)

            else:
                model = self.build_conv()

                # Fit our model
                model.fit(np.asarray(feature_vectors), np.asarray(train_set_results))

            train_prediction_object = Prediction()
            counter=0
            for im in train_set:
                print("predicting train image ", counter)
                counter+=1
                preprocessed_color_image = self.preprocess_image(im, size)
                validation_feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                    validation_feature_vector = clf2.transform(validation_feature_vector)
                else:
                    validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                train_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

            print("predicting test images")
            test_prediction_object = Prediction()
            counter=0
            for im in validation_set:
                print("predicting test image ", counter)
                counter+=1
                preprocessed_color_image = self.preprocess_image(im, size)
                validation_feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                    validation_feature_vector = clf2.transform(validation_feature_vector)
                else:
                    validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                test_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

            train_errors.append(train_prediction_object.evaluate(train_set_results))
            test_errors.append(test_prediction_object.evaluate(validation_set_results))

        return [train_errors, test_errors]
Example #6
0
    input_shape=(None, num_features),
    dense_num_units=64,
    narrow_num_units=48,
    denseReverse1_num_units=64,
    denseReverse2_num_units=128,
    output_num_units=128,

    #input_nonlinearity = None, #nonlinearities.sigmoid,
    #dense_nonlinearity = nonlinearities.tanh,
    narrow_nonlinearity=nonlinearities.softplus,
    #denseReverse1_nonlinearity = nonlinearities.tanh,
    denseReverse2_nonlinearity=nonlinearities.softplus,
    output_nonlinearity=nonlinearities.linear,  #nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,
    dropout2_p=0.001,
    regression=True,
    verbose=1)

ae.initialize()
PrintLayerInfo()(ae)

maybe_this_is_a_history = ae.fit(Z, Z)

#learned_parameters = ae.get_all_params_values()
#np.save("task4/learned_parameter.npy", learned_parameters)

#SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history)
ae.save_params_to('task4/koebi_train_history_AE2')
Example #7
0
def make_memnn(vocab_size,
               cont_sl,
               cont_wl,
               quest_wl,
               answ_wl,
               rnn_size,
               rnn_type='LSTM',
               pool_size=4,
               answ_n=4,
               dence_l=[100],
               dropout=0.5,
               batch_size=16,
               emb_size=50,
               grad_clip=40,
               init_std=0.1,
               num_hops=3,
               rnn_style=False,
               nonlin=LN.softmax,
               init_W=None,
               rng=None,
               art_pool=4,
               lr=0.01,
               mom=0,
               updates=LU.adagrad,
               valid_indices=0.2,
               permute_answ=False,
               permute_cont=False):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #-----------------------------------------------------------------------weights
    tr_variables = {}
    tr_variables['WQ'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WA'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WC'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WTA'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WTC'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WAnsw'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))

    #------------------------------------------------------------------input layers
    layers = [(LL.InputLayer, {
        'name': 'l_in_q',
        'shape': (batch_size, 1, quest_wl),
        'input_var': T.itensor3('l_in_q_')
    }),
              (LL.InputLayer, {
                  'name': 'l_in_a',
                  'shape': (batch_size, answ_n, answ_wl),
                  'input_var': T.itensor3('l_in_a_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_q_pe',
                  'shape': (batch_size, 1, quest_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_a_pe',
                  'shape': (batch_size, answ_n, answ_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont',
                  'shape': (batch_size, cont_sl, cont_wl),
                  'input_var': T.itensor3('l_in_cont_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont_pe',
                  'shape': (batch_size, cont_sl, cont_wl, emb_size)
              })]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)]
    l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_names[i],
            'incoming': 'l_in_a',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_pe_names[i],
            'incoming': 'l_in_a_pe',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
#------------------------------------------------------------------MEMNN layers
#question----------------------------------------------------------------------
    layers.extend([(EncodingFullLayer, {
        'name': 'l_emb_f_q',
        'incomings': ('l_in_q', 'l_in_q_pe'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'W': tr_variables['WQ'],
        'WT': None
    })])

    l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)]

    layers.extend([(MemoryLayer, {
        'name': l_mem_names[0],
        'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'A': tr_variables['WA'],
        'C': tr_variables['WC'],
        'AT': tr_variables['WTA'],
        'CT': tr_variables['WTC'],
        'nonlin': nonlin
    })])
    for i in range(1, num_hops):
        if i % 2:
            WC, WA = tr_variables['WA'], tr_variables['WC']
            WTC, WTA = tr_variables['WTA'], tr_variables['WTC']
        else:
            WA, WC = tr_variables['WA'], tr_variables['WC']
            WTA, WTC = tr_variables['WTA'], tr_variables['WTC']
        layers.extend([(MemoryLayer, {
            'name':
            l_mem_names[i],
            'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]),
            'vocab_size':
            vocab_size,
            'emb_size':
            emb_size,
            'A':
            WA,
            'C':
            WC,
            'AT':
            WTA,
            'CT':
            WTC,
            'nonlin':
            nonlin
        })])
#answers-----------------------------------------------------------------------
    l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(EncodingFullLayer, {
            'name': l_emb_f_a_names[i],
            'incomings': (l_a_names[i], l_a_pe_names[i]),
            'vocab_size': vocab_size,
            'emb_size': emb_size,
            'W': tr_variables['WAnsw'],
            'WT': None
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qma_concat',
        'incomings': l_mem_names + l_emb_f_a_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_f',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': False,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_b',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': True,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])

    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_f_sl',
        'incoming': 'l_qa_rnn_f',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])
    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_b_sl',
        'incoming': 'l_qa_rnn_b',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])

    layers.extend([(LL.ElemwiseMergeLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'),
        'merge_function': T.add
    })])
    #-----------------------------------------------------------------pooling layer
    #    layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_',
    #                                         'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})])
    layers.extend([(LL.Pool1DLayer, {
        'name': 'l_qa_pool',
        'incoming': 'l_qa_rnn_conc',
        'pool_size': pool_size,
        'mode': 'max'
    })])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)]
    if dropout:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do',
            'p': dropout
        })])
    for i, d in enumerate(dence_l):
        if i < len(dence_l) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence_l) - 1 and dropout:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout
            })])

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)

    if permute_answ or permute_cont:
        batch_iterator_train = PermIterator(batch_size, permute_answ,
                                            permute_cont)
    else:
        batch_iterator_train = BatchIterator(batch_size=batch_size)

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=updates,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=batch_iterator_train,
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split,
        on_batch_finished=[zero_memnn])
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Example #8
0
def local_test(feature_vectors_df, labels_df, k=2):
    # labeltjes = [None] * labels_df.values.shape[0]
    labeltjes = labels_df.values
    print labeltjes.shape
    labeltjes = labeltjes
    labeltjes -= 1
    labeltjes = labeltjes.ravel().tolist()
    kf = StratifiedKFold(labeltjes, n_folds=k, shuffle=True)
    # kf = StratifiedKFold(len(feature_vectors_df.index), n_folds=k, shuffle=True)
    # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337)
    confusion_matrices_folds = []

    for train, test in kf:
        # Divide the train_images in a training and validation set (using KFold)
        X_train = feature_vectors_df.values[train, :]
        X_test = feature_vectors_df.values[test, :]

        y_train = [labeltjes[i] for i in train]
        y_test = [labeltjes[i] for i in test]

        # Logistic Regression for feature selection, higher C = more features will be deleted

        # Feature selection/reduction
        model = build_nn(nr_features=X_train.shape[1])
        model.initialize()
        layer_info = PrintLayerInfo()
        layer_info(model)

        # Fit our model

        y_train = np.reshape(np.asarray(y_train, dtype='int32'),
                             (-1, 1)).ravel()
        # print y_train
        # print "Train feature vectors shape: " + X_train.shape.__str__()
        # print "Train labels shape:" + len(y_train).__str__()
        #
        # print "X_train as array shape: " + str(X_train.shape)
        # print "y_train as array shape: " + str(np.reshape(np.asarray(y_train), (-1, 1)).shape)

        model.fit(X_train, np.reshape(np.asarray(y_train), (-1, 1)).ravel())

        preds = model.predict(X_test)
        c = []
        [
            c.append(preds[i]) if preds[i] == y_test[i] else None
            for i in range(min(len(y_test), len(preds)))
        ]
        # checks = len([i for i, j in zip(preds, np.reshape(np.asarray(y_train), (-1, 1))) if i == j])

        model = None
        del model
        # Save the confusion matrix for this fold and plot it
        confusion_matrix = sklearn.metrics.confusion_matrix(y_test, preds)
        confusion_matrices_folds.append(confusion_matrix)

        # print preds.tolist()
        # print "number of ones: " + str(sum(preds))
        # print y_test
        # print c
        print "Accuracy for fold: " + str(
            ((len(c) * 1.0) / (len(y_test) * 1.0)
             )) + "\n\n\n\n\n-----------------------------\n\n\n"
        #     Let's plot the confusion matrix of the avarage confusion matrix
    sum = confusion_matrices_folds[0] * 1.0
    for i in range(1, len(confusion_matrices_folds)):
        sum += confusion_matrices_folds[i]
    sum /= len(confusion_matrices_folds)
    metrics.plot_confusion_matrix(sum)
def run(LEARNING_RATE=0.04,  UPDATE_MOMENTUM=0.9,UPDATE_RHO=None, NUM_OF_EPOCH=50, OUTPUT_SIZE = 20 , input_width=300, input_height=140,
                    dataset='ISH.pkl.gz', TRAIN_VALIDATION_SPLIT=0.2, #activation=lasagne.nonlinearities.tanh, #rectify
                    NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False):
    
    global counter
    FILE_PREFIX =  os.path.split(dataset)[1][:-6] #os.path.split(__file__)[1][:-3]
    FOLDER_PREFIX = "results/"+FILE_PREFIX+"/run_"+str(counter)+"/"
    if not os.path.exists(FOLDER_PREFIX):
        os.makedirs(FOLDER_PREFIX)
    
    PARAMS_FILE_NAME = FOLDER_PREFIX + "parameters.txt"
    HIDDEN_LAYER_OUTPUT_FILE_NAME = FOLDER_PREFIX +"hiddenLayerOutput.pickle"
    FIG_FILE_NAME = FOLDER_PREFIX + "fig"
    PICKLES_NET_FILE_NAME = FOLDER_PREFIX + "picklesNN.pickle"
    SVM_FILE_NAME = FOLDER_PREFIX + "svmData.txt"
#     VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt"
#     PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt"
    counter +=1

    outputFile = open(PARAMS_FILE_NAME, "w")   
    
    def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False):
        print 'loading data...'   
    
        datasets = load_data(dataset, toShuffleInput, withZeroMeaning)
    
        train_set_x, train_set_y = datasets[0]
    #     valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        
        train_set_x = train_set_x.reshape(-1, 1, input_width, input_height)
#         valid_set_x = valid_set_x.reshape(-1, 1, input_width, input_height)
        test_set_x = test_set_x.reshape(-1, 1, input_width, input_height)

        print(train_set_x.shape[0], 'train samples')
        return train_set_x, train_set_y, test_set_x, test_set_y 

    def createNNwithMomentom(input_height, input_width):
        net2 = NeuralNet(layers=[
                ('input', layers.InputLayer), 
                ('conv1', layers.Conv2DLayer), 
                ('pool1', layers.MaxPool2DLayer), 
                ('conv2', layers.Conv2DLayer), 
                ('pool2', layers.MaxPool2DLayer), 
                ('conv3', layers.Conv2DLayer), 
                ('pool3', layers.MaxPool2DLayer), 
                ('conv4', layers.Conv2DLayer), 
                ('pool4', layers.MaxPool2DLayer), 
                ('hidden5', layers.DenseLayer), 
                ('hidden6', layers.DenseLayer), 
                ('hidden7', layers.DenseLayer), 
                ('output', layers.DenseLayer)], 
            input_shape=(None, 1, input_width, input_height), 
            conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), 
            conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), 
            conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), 
            conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), 
            hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, 
            output_num_units=20, output_nonlinearity=None, 
            update_learning_rate=LEARNING_RATE, 
            update_momentum=UPDATE_MOMENTUM,
            update=nesterov_momentum, 
            train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), 
            batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), 
            regression=True, 
            max_epochs=NUM_OF_EPOCH, 
            verbose=1, 
            hiddenLayer_to_output=-2)
    #         on_training_finished=last_hidden_layer,
        return net2
    
    def createNNwithDecay(input_height, input_width):
        net2 = NeuralNet(layers=[
                ('input', layers.InputLayer), 
                ('conv1', layers.Conv2DLayer), 
                ('pool1', layers.MaxPool2DLayer), 
                ('conv2', layers.Conv2DLayer), 
                ('pool2', layers.MaxPool2DLayer), 
                ('conv3', layers.Conv2DLayer), 
                ('pool3', layers.MaxPool2DLayer), 
                ('conv4', layers.Conv2DLayer), 
                ('pool4', layers.MaxPool2DLayer), 
                ('hidden5', layers.DenseLayer), 
                ('hidden6', layers.DenseLayer), 
                ('hidden7', layers.DenseLayer), 
                ('output', layers.DenseLayer)], 
            input_shape=(None, 1, input_width, input_height), 
            conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), 
            conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), 
            conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), 
            conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), 
            hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, 
            output_num_units=20, output_nonlinearity=None, 
            update_learning_rate=LEARNING_RATE, 
            update_rho=UPDATE_RHO, 
            update=rmsprop, 
            train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), 
            batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), 
            regression=True, 
            max_epochs=NUM_OF_EPOCH, 
            verbose=1, 
            hiddenLayer_to_output=-2)
    #         on_training_finished=last_hidden_layer,
        return net2
  
    def last_hidden_layer(s, h):
        
        print s.output_last_hidden_layer_(X)
#         input_layer = s.get_all_layers()[0]
#         last_h_layer = s.get_all_layers()[-2]
#         f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True)
 
#         myFunc = theano.function(
#                     inputs=s.input_X,
#                     outputs=s.h_predict,
#                     allow_input_downcast=True,
#                     )
#         print s.output_last_hidden_layer_(X,-2)

    def writeOutputFile(outputFile,train_history,layer_info):
        # save the network's parameters
        outputFile.write("Validation set Prediction rate is: "+str((1-train_history[-1]['valid_accuracy'])*100) + "%\n")
        outputFile.write("Run time[minutes] is: "+str(run_time) + "\n\n")
        
        outputFile.write("Learning rate: " + str(LEARNING_RATE) + "\n")
        outputFile.write("Momentum: " + str(UPDATE_MOMENTUM) + "\n") if (UPDATE_RHO == None) else outputFile.write("Decay Factor: " + str(UPDATE_RHO) + "\n")
        outputFile.write("Batch size: " + str(BATCH_SIZE) + "\n")
        outputFile.write("Num epochs: " + str(NUM_OF_EPOCH) + "\n")
        outputFile.write("Num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n")
#         outputFile.write("activation func: " + str(activation) + "\n")
        outputFile.write("Train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n")
        outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n")
        outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n")
        
        outputFile.write("history: " + str(train_history) + "\n\n")
        outputFile.write("layer_info:\n" + str(layer_info) + "\n")

    start_time = time.clock()
       
    
    net2 = createNNwithMomentom(input_height, input_width) if UPDATE_RHO == None else createNNwithDecay(input_height, input_width)   
    
    
    X, y, test_x, test_y  = load2d()  # load 2-d data
    net2.fit(X, y)       
    
    run_time = (time.clock() - start_time) / 60.
    

    print "outputing last hidden layer"
    train_last_hiddenLayer = net2.output_hiddenLayer(X)
    test_last_hiddenLayer = net2.output_hiddenLayer(test_x)
#     ohlFile = open(HIDDEN_LAYER_OUTPUT_FILE_NAME+".txt", "w")
#     for line in train_last_hiddenLayer:
#         ohlFile.write(str(line) + "\n")  
    with open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb') as f:
        ob = (train_last_hiddenLayer,y,test_last_hiddenLayer,test_y)
        pickle.dump(ob, f, -1)
        f.close()

    
    writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2))

    errorRates = runSvm(HIDDEN_LAYER_OUTPUT_FILE_NAME)
    errorRate = np.average(errorRates)

    outputFile.write("\nSVM Total Prediction rate is: "+str(100-errorRate) + "\n\n")
    outputFile.write("SVM Error rate is:\n"+str(errorRates) + "\n")

    outputFile.close()

#     write svm data
#     writeDataToFile(HIDDEN_LAYER_OUTPUT_FILE_NAME,SVM_FILE_NAME)
    
    
    ##############################################
    train_loss = np.array([i["train_loss"] for i in net2.train_history_])
    valid_loss = np.array([i["valid_loss"] for i in net2.train_history_])
    pyplot.plot(train_loss, linewidth=3, label="train")
    pyplot.plot(valid_loss, linewidth=3, label="valid")
    pyplot.grid()
    pyplot.legend()
    pyplot.xlabel("epoch")
    pyplot.ylabel("loss")
    pyplot.ylim(1e-3, 1)
    pyplot.yscale("log")
    pyplot.savefig(FIG_FILE_NAME)
    
    #################################################
    # def plot_sample(x, y, axis):
    #     img = x.reshape(96, 96)
    #     axis.imshow(img, cmap='gray')
    #     axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10)
    # 
    # X, _ = load(test=True)
    # y_pred = net1.predict(X)
    # 
    # fig = pyplot.figure(figsize=(6, 6))
    # fig.subplots_adjust(
    #     left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
    # 
    # for i in range(16):
    #     ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
    #     plot_sample(X[i], y_pred[i], ax)
    # 
    # pyplot.show()
    
    ########## pickle the network ##########
    print "pickling"    
    with open(PICKLES_NET_FILE_NAME,'wb') as f:
        pickle.dump(net2, f, -1)
        f.close()
Example #10
0
def run(loadedData=None,FOLDER_NAME="defualt",LEARNING_RATE=0.04, UPDATE_MOMENTUM=0.9, UPDATE_RHO=None, NUM_OF_EPOCH=15, input_width=300, input_height=140,
                    dataset='withOutDataSet', TRAIN_VALIDATION_SPLIT=0.2, MULTI_POSITIVES=20, dropout_percent=0.1, USE_NUM_CAT=20,end_index=16351, #activation=lasagne.nonlinearities.tanh, #rectify
                    NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False):
    
    global counter
#     FILE_PREFIX =  os.path.split(dataset)[1][:-6] #os.path.split(__file__)[1][:-3]
    FOLDER_PREFIX = "results/"+FOLDER_NAME+"/run_"+str(counter)+"/"
    if not os.path.exists(FOLDER_PREFIX):
        os.makedirs(FOLDER_PREFIX)
    
    PARAMS_FILE_NAME = FOLDER_PREFIX + "parameters.txt"
    HIDDEN_LAYER_OUTPUT_FILE_NAME = FOLDER_PREFIX +"hiddenLayerOutput.pkl.gz"
    FIG_FILE_NAME = FOLDER_PREFIX + "fig"
    PICKLES_NET_FILE_NAME = FOLDER_PREFIX + "picklesNN.pkl.gz"
    SVM_FILE_NAME = FOLDER_PREFIX + "svmData.txt"
#     VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt"
#     PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt"
    counter +=1

    outputFile = open(PARAMS_FILE_NAME, "w")   
    
 

    def createNNwithMomentom(input_height, input_width):
        if USE_NUM_CAT==20:
            outputLayerSize=20
        else:
            outputLayerSize=15
            
        net2 = NeuralNet(layers=[
                ('input', layers.InputLayer), 
                ('conv1', layers.Conv2DLayer), 
                ('pool1', layers.MaxPool2DLayer), 
                ('conv2', layers.Conv2DLayer), 
                ('pool2', layers.MaxPool2DLayer), 
                ('conv3', layers.Conv2DLayer), 
                ('pool3', layers.MaxPool2DLayer), 
                ('conv4', layers.Conv2DLayer), 
                ('pool4', layers.MaxPool2DLayer), 
                ('hidden5', layers.DenseLayer), 
                ('hidden6', layers.DenseLayer), 
                ('hidden7', layers.DenseLayer), 
                ('output', layers.DenseLayer)], 
            input_shape=(None, 1, input_width, input_height), 
            conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), 
            conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), 
            conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), 
            conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), 
            hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, 
            output_num_units=outputLayerSize, output_nonlinearity=None, 
            update_learning_rate=LEARNING_RATE, 
            update_momentum=UPDATE_MOMENTUM,
            update=nesterov_momentum, 
            train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), 
            batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), 
            regression=True, 
            max_epochs=NUM_OF_EPOCH, 
            verbose=1, 
            hiddenLayer_to_output=-2)
    #         on_training_finished=last_hidden_layer,
        return net2
    
    def createNNwithDecay(input_height, input_width):
        if USE_NUM_CAT==20:
            outputLayerSize=20
        else:
            outputLayerSize=15

        net2 = NeuralNet(layers=[
                ('input', layers.InputLayer), 
                ('conv1', layers.Conv2DLayer), 
                ('pool1', layers.MaxPool2DLayer), 
                ('conv2', layers.Conv2DLayer), 
                ('pool2', layers.MaxPool2DLayer), 
                ('conv3', layers.Conv2DLayer), 
                ('pool3', layers.MaxPool2DLayer), 
                ('conv4', layers.Conv2DLayer), 
                ('pool4', layers.MaxPool2DLayer), 
                ('hidden5', layers.DenseLayer), 
                ('hidden6', layers.DenseLayer), 
                ('hidden7', layers.DenseLayer), 
                ('output', layers.DenseLayer)], 
            input_shape=(None, 1, input_width, input_height), 
            conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), 
            conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), 
            conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), 
            conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), 
            hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, 
            output_num_units=outputLayerSize, output_nonlinearity=None, 
            update_learning_rate=LEARNING_RATE, 
            update_rho=UPDATE_RHO, 
            update=rmsprop, 
            train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), 
            batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), 
            regression=True, 
            max_epochs=NUM_OF_EPOCH, 
            verbose=1, 
            hiddenLayer_to_output=-2)
    #         on_training_finished=last_hidden_layer,
        return net2
  
    def last_hidden_layer(s, h):
        
        print s.output_last_hidden_layer_(X)
#         input_layer = s.get_all_layers()[0]
#         last_h_layer = s.get_all_layers()[-2]
#         f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True)
 
#         myFunc = theano.function(
#                     inputs=s.input_X,
#                     outputs=s.h_predict,
#                     allow_input_downcast=True,
#                     )
#         print s.output_last_hidden_layer_(X,-2)

    def outputLastLayer_CNN(net2, X, y, test_x, test_y):
        print "outputing last hidden layer" #     train_last_hiddenLayer = net2.output_hiddenLayer(X)
        quarter_x = np.floor(X.shape[0] / 4)
        train_last_hiddenLayer1 = net2.output_hiddenLayer(X[:quarter_x])
        print "after first quarter train output"
        train_last_hiddenLayer2 = net2.output_hiddenLayer(X[quarter_x:2 * quarter_x])
        print "after seconed quarter train output"
        train_last_hiddenLayer3 = net2.output_hiddenLayer(X[2 * quarter_x:3 * quarter_x])
        print "after third quarter train output"
        train_last_hiddenLayer4 = net2.output_hiddenLayer(X[3 * quarter_x:])
        print "after all train output"
        test_last_hiddenLayer = net2.output_hiddenLayer(test_x)
        print "after test output" #     lastLayerOutputs = (train_last_hiddenLayer,y,test_last_hiddenLayer,test_y)
        lastLayerOutputs = np.concatenate((train_last_hiddenLayer1, train_last_hiddenLayer2, train_last_hiddenLayer3, train_last_hiddenLayer4), axis=0), y, test_last_hiddenLayer, test_y
        return lastLayerOutputs

    def writeOutputFile(outputFile,train_history,layer_info):
        # save the network's parameters
        outputFile.write("Validation set Prediction rate is: "+str((1-train_history[-1]['valid_accuracy'])*100) + "%\n")
        outputFile.write("Run time[minutes] is: "+str(run_time) + "\n\n")
        
        outputFile.write("Training NN on: " + ("20 Top Categorys\n" if USE_NUM_CAT==20 else "Article Categorys\n"))
        outputFile.write("Learning rate: " + str(LEARNING_RATE) + "\n")
        outputFile.write(("Momentum: " + str(UPDATE_MOMENTUM)+ "\n") if (UPDATE_RHO == None) else ("Decay Factor: " + str(UPDATE_RHO)+ "\n") )
        outputFile.write("Batch size: " + str(BATCH_SIZE) + "\n")
        outputFile.write("Num epochs: " + str(NUM_OF_EPOCH) + "\n")
        outputFile.write("Num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n\n")
#         outputFile.write("activation func: " + str(activation) + "\n")
        outputFile.write("Multipuly Positives by: " + str(MULTI_POSITIVES) + "\n")
        outputFile.write("New Positives Dropout rate: " + str(dropout_percent) + "\n")
        outputFile.write("Train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n")
        outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n")
        outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n")
        
        outputFile.write("history: " + str(train_history) + "\n\n")
        outputFile.write("layer_info:\n" + str(layer_info) + "\n")
        
        outputFile.flush()
        
        
    start_time = time.clock()
    print "Start time: " , time.ctime()
       
    
    net2 = createNNwithMomentom(input_height, input_width) if UPDATE_RHO == None else createNNwithDecay(input_height, input_width)   
    
    if loadedData is None:
    
        X, y, test_x, test_y  = load2d(USE_NUM_CAT,outputFile,input_width,input_height,end_index,MULTI_POSITIVES,dropout_percent)  # load 2-d data
    else:
        X, y, test_x, test_y = loadedData
    
    net2.fit(X, y)       

    run_time = (time.clock() - start_time) / 60.
    
    writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2))

    lastLayerOutputs = outputLastLayer_CNN(net2, X, y, test_x, test_y)

    
    
    print "running Category Classifier"    
    errorRates, aucScores = runSvm(lastLayerOutputs,15) #HIDDEN_LAYER_OUTPUT_FILE_NAME,15)
#     errorRates, aucScores = runCrossSvm(lastLayerOutputs,15)
#     errorRates, aucScores = runNNclassifier(lastLayerOutputs,15)

    errorRate = np.average(errorRates)
    aucScore = np.average(aucScores)
    
    
    outputFile.write("\nClassifiers Total Prediction rate is: "+str(100-errorRate) + "\n\n")
    outputFile.write("Classifiers Error rates are:\n"+str(errorRates) + "\n")
    outputFile.write("\nClassifiers Total AUC Score is: "+str(aucScore) + "\n\n")
    outputFile.write("Classifiers AUC Scores are:\n"+str(aucScores) + "\n")
    outputFile.close()
    
    print "saving last layer outputs"
#     with open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb') as f:
#         pickle.dump(lastLayerOutputs, f, -1)
#         f.close()
    f = gzip.open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb')
    cPickle.dump(lastLayerOutputs, f, protocol=2)
    f.close() 

#     write svm data
#     writeDataToFile(HIDDEN_LAYER_OUTPUT_FILE_NAME,SVM_FILE_NAME)
    
    
    ##############################################
    train_loss = np.array([i["train_loss"] for i in net2.train_history_])
    valid_loss = np.array([i["valid_loss"] for i in net2.train_history_])
    pyplot.plot(train_loss, linewidth=3, label="train")
    pyplot.plot(valid_loss, linewidth=3, label="valid")
    pyplot.grid()
    pyplot.legend()
    pyplot.xlabel("epoch")
    pyplot.ylabel("loss")
    pyplot.ylim(1e-3, 1)
    pyplot.yscale("log")
    pyplot.savefig(FIG_FILE_NAME)
    
    #################################################
    # def plot_sample(x, y, axis):
    #     img = x.reshape(96, 96)
    #     axis.imshow(img, cmap='gray')
    #     axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10)
    # 
    # X, _ = load(test=True)
    # y_pred = net1.predict(X)
    # 
    # fig = pyplot.figure(figsize=(6, 6))
    # fig.subplots_adjust(
    #     left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
    # 
    # for i in range(16):
    #     ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
    #     plot_sample(X[i], y_pred[i], ax)
    # 
    # pyplot.show()
    
    ########## pickle the network ##########
    print "pickling"    
#     with open(PICKLES_NET_FILE_NAME,'wb') as f:
#         pickle.dump(net2, f, -1)
#         f.close()
    f = gzip.open(PICKLES_NET_FILE_NAME,'wb')
    cPickle.dump(net2, f, protocol=2)
    f.close()
Example #11
0
def run(LEARNING_RATE=0.04,  UPDATE_MOMENTUM=0.9, NUM_OF_EPOCH=50, OUTPUT_SIZE = 20 , input_width=300, input_height=140,
                    dataset='ISH.pkl.gz', TRAIN_VALIDATION_SPLIT=0.2, #activation=lasagne.nonlinearities.tanh, #rectify
                    NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False):
    
    global counter
    FILE_PREFIX =  os.path.split(dataset)[1][4:16] #os.path.split(__file__)[1][:-3]
    PARAMS_FILE_NAME = "results/"+FILE_PREFIX+"_parameters_"+str(counter)+".txt"
    FIG_FILE_NAME = "results/"+FILE_PREFIX+"_fig_"+str(counter)
    PICKLES_NET_FILE_NAME = "results/"+FILE_PREFIX+"_picklesNN_"+str(counter)+".pickle"
#     VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt"
#     PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt"
    counter +=1

    outputFile = open(PARAMS_FILE_NAME, "w")   
    
    def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False):
        print 'loading data...'   
    
        datasets = load_data(dataset, toShuffleInput, withZeroMeaning)
    
        train_set_x, train_set_y = datasets[0]
    #     valid_set_x, valid_set_y = datasets[1]
    #     test_set_x, test_set_y = datasets[2]
        
        train_set_x = train_set_x.reshape(-1, 1, input_width, input_height)
        print(train_set_x.shape[0], 'train samples')
        return train_set_x, train_set_y


    def writeOutputFile(outputFile,train_history,layer_info):
        # save the network's parameters
        outputFile.write("error is: "+str(1-train_history[-1]['valid_accuracy']) + "\n")
        outputFile.write("time is: "+str(run_time) + "\n\n")
        
        outputFile.write("learning rate: " + str(LEARNING_RATE) + "\n")
        outputFile.write("momentum: " + str(UPDATE_MOMENTUM) + "\n")
        outputFile.write("batch size: " + str(BATCH_SIZE) + "\n")
        outputFile.write("num epochs: " + str(NUM_OF_EPOCH) + "\n")
        outputFile.write("num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n")
#         outputFile.write("activation func: " + str(activation) + "\n")
        outputFile.write("train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n")
        outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n")
        outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n")
        
        outputFile.write("history: " + str(train_history) + "\n\n")
        outputFile.write("layer_info:\n" + str(layer_info) + "\n")

    start_time = time.clock()
       
    
    net2 = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv1', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3', layers.Conv2DLayer),
            ('pool3', layers.MaxPool2DLayer),
            ('conv4', layers.Conv2DLayer),
            ('pool4', layers.MaxPool2DLayer),
            ('hidden5', layers.DenseLayer),
            ('hidden6', layers.DenseLayer),
            ('hidden7', layers.DenseLayer),
            ('output', layers.DenseLayer),
            ],
        input_shape=(None, 1, input_width, input_height),
        conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2),
        conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2),
        conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2),
        conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2),
        hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100,
        output_num_units=20, output_nonlinearity=None,
    
        update_learning_rate=LEARNING_RATE,
        update_momentum=UPDATE_MOMENTUM,
        train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT),
        batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE),
    
        regression=True,
        max_epochs=NUM_OF_EPOCH,
        verbose=1,
        )

    
    
    
    X, y = load2d()  # load 2-d data
    net2.fit(X, y)
       
    
    run_time = (time.clock() - start_time) / 60.
    
    writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2))

    # import numpy as np
    # np.sqrt(0.003255) * 48
    
    ##############################################
    train_loss = np.array([i["train_loss"] for i in net2.train_history_])
    valid_loss = np.array([i["valid_loss"] for i in net2.train_history_])
    pyplot.plot(train_loss, linewidth=3, label="train")
    pyplot.plot(valid_loss, linewidth=3, label="valid")
    pyplot.grid()
    pyplot.legend()
    pyplot.xlabel("epoch")
    pyplot.ylabel("loss")
    pyplot.ylim(1e-3, 1)
    pyplot.yscale("log")
    pyplot.savefig(FIG_FILE_NAME)
    
    #################################################
    # def plot_sample(x, y, axis):
    #     img = x.reshape(96, 96)
    #     axis.imshow(img, cmap='gray')
    #     axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10)
    # 
    # X, _ = load(test=True)
    # y_pred = net1.predict(X)
    # 
    # fig = pyplot.figure(figsize=(6, 6))
    # fig.subplots_adjust(
    #     left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
    # 
    # for i in range(16):
    #     ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
    #     plot_sample(X[i], y_pred[i], ax)
    # 
    # pyplot.show()
    
    ########## pickle the network ##########
    print "pickling"    
    with open(PICKLES_NET_FILE_NAME,'wb') as f:
        pickle.dump(net2, f, -1)
        f.close()
Example #12
0
def run(loadedData=None, learning_rate=0.04, update_momentum=0.9, update_rho=None, epochs=15,
        input_width=300, input_height=140, train_valid_split=0.2, multiple_positives=20, flip_batch=True,
        dropout_percent=0.1, end_index=16351, activation=None, last_layer_activation=None, batch_size=32,
        layers_size=[5, 10, 20, 40], shuffle_input=False, zero_meaning=False, filters_type=3,
        input_noise_rate=0.3, pre_train_epochs=1, softmax_train_epochs=2, fine_tune_epochs=2,
        categories=15, folder_name="default", dataset='withOutDataSet'):

    global counter
    folder_path = "results_dae"+FILE_SEPARATOR + folder_name + FILE_SEPARATOR + "run_" + str(counter) + FILE_SEPARATOR
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    PARAMS_FILE_NAME = folder_path + "parameters.txt"
    HIDDEN_LAYER_OUTPUT_FILE_NAME = folder_path + "hiddenLayerOutput.pkl.gz"
    FIG_FILE_NAME = folder_path + "fig"
    PICKLES_NET_FILE_NAME = folder_path + "picklesNN.pkl.gz"
    SVM_FILE_NAME = folder_path + "svmData.txt"
    LOG_FILE_NAME = folder_path + "message.log"

    All_Results_FIle = "results_dae"+FILE_SEPARATOR + "all_results.txt"


    #     old_stdout = sys.stdout
    #     print "less",LOG_FILE_NAME
    log_file = open(LOG_FILE_NAME, "w")
    #     sys.stdout = log_file

    counter += 1

    output_file = open(PARAMS_FILE_NAME, "w")
    results_file = open(All_Results_FIle, "a")

    if filters_type == 3:
        filter_1 = (3, 3)
        filter_2 = (3, 3)
        filter_3 = (3, 3)
        filter_4 = (3, 3)
        filter_5 = (3, 3)
        filter_6 = (3, 3)
    elif filters_type == 5:
        filter_1 = (5, 5)
        filter_2 = (5, 5)
        filter_3 = (5, 5)
        filter_4 = (5, 5)
        filter_5 = (5, 5)
        filter_6 = (5, 5)
    elif filters_type == 7:
        filter_1 = (7, 7)
        filter_2 = (7, 7)
        filter_3 = (5, 5)
        filter_4 = (7, 7)
        filter_5 = (7, 7)
        filter_6 = (5, 5)
    elif filters_type == 9:
        filter_1 = (9, 9)
        filter_2 = (7, 7)
        filter_3 = (5, 5)
        filter_4 = (7, 7)
        filter_5 = (9, 9)
        filter_6 = (5, 5)

    def createCSAE(input_height, input_width, X_train, X_out):

        X_train = np.random.binomial(1, 1-dropout_percent, size=X_train.shape) * X_train

        cnn = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('conv1', layers.Conv2DLayer),
            ('conv11', layers.Conv2DLayer),
            # ('conv12', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2', layers.Conv2DLayer),
            ('conv21', layers.Conv2DLayer),
            # ('conv22', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3', layers.Conv2DLayer),
            # ('conv31', layers.Conv2DLayer),
            ('conv32', layers.Conv2DLayer),
            ('unpool1', Unpool2DLayer),
            ('conv4', layers.Conv2DLayer),
            ('conv41', layers.Conv2DLayer),
            # ('conv42', layers.Conv2DLayer),
            ('unpool2', Unpool2DLayer),
            ('conv5', layers.Conv2DLayer),
            ('conv51', layers.Conv2DLayer),
            # ('conv52', layers.Conv2DLayer),
            ('conv6', layers.Conv2DLayer),
            ('output_layer', ReshapeLayer),
        ],

            input_shape=(None, 1, input_width, input_height),
            # Layer current size - 1x300x140

            conv1_num_filters=layers_size[0], conv1_filter_size=filter_1, conv1_nonlinearity=activation,
            # conv1_border_mode="same",
            conv1_pad="same",
            conv11_num_filters=layers_size[0], conv11_filter_size=filter_1, conv11_nonlinearity=activation,
            # conv11_border_mode="same",
            conv11_pad="same",
            # conv12_num_filters=layers_size[0], conv12_filter_size=filter_1, conv12_nonlinearity=activation,
            # # conv12_border_mode="same",
            # conv12_pad="same",

            pool1_pool_size=(2, 2),

            conv2_num_filters=layers_size[1], conv2_filter_size=filter_2, conv2_nonlinearity=activation,
            # conv2_border_mode="same",
            conv2_pad="same",
            conv21_num_filters=layers_size[1], conv21_filter_size=filter_2, conv21_nonlinearity=activation,
            # conv21_border_mode="same",
            conv21_pad="same",
            # conv22_num_filters=layers_size[1], conv22_filter_size=filter_2, conv22_nonlinearity=activation,
            # # conv22_border_mode="same",
            # conv22_pad="same",

            pool2_pool_size=(2, 2),

            conv3_num_filters=layers_size[2], conv3_filter_size=filter_3, conv3_nonlinearity=activation,
            # conv3_border_mode="same",
            conv3_pad="same",
            # conv31_num_filters=layers_size[2], conv31_filter_size=filter_3, conv31_nonlinearity=activation,
            # # conv31_border_mode="same",
            # conv31_pad="same",
            conv32_num_filters=1, conv32_filter_size=filter_3, conv32_nonlinearity=activation,
            # conv32_border_mode="same",
            conv32_pad="same",

            unpool1_ds=(2, 2),

            conv4_num_filters=layers_size[3], conv4_filter_size=filter_4, conv4_nonlinearity=activation,
            # conv4_border_mode="same",
            conv4_pad="same",
            conv41_num_filters=layers_size[3], conv41_filter_size=filter_4, conv41_nonlinearity=activation,
            # conv41_border_mode="same",
            conv41_pad="same",
            # conv42_num_filters=layers_size[3], conv42_filter_size=filter_4, conv42_nonlinearity=activation,
            # # conv42_border_mode="same",
            # conv42_pad="same",

            unpool2_ds=(2, 2),

            conv5_num_filters=layers_size[4], conv5_filter_size=filter_5, conv5_nonlinearity=activation,
            # conv5_border_mode="same",
            conv5_pad="same",
            conv51_num_filters=layers_size[4], conv51_filter_size=filter_5, conv51_nonlinearity=activation,
            # conv51_border_mode="same",
            conv51_pad="same",
            # conv52_num_filters=layers_size[4], conv52_filter_size=filter_5, conv52_nonlinearity=activation,
            # # conv52_border_mode="same",
            # conv52_pad="same",

            conv6_num_filters=1, conv6_filter_size=filter_6, conv6_nonlinearity=last_layer_activation,
            # conv6_border_mode="same",
            conv6_pad="same",

            output_layer_shape=(([0], -1)),

            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            batch_iterator_train=FlipBatchIterator(batch_size=batch_size) if flip_batch else BatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-9)

        cnn.fit(X_train, X_out)

        try:
            pickle.dump(cnn, open(folder_path + 'conv_ae.pkl', 'w'))
            # cnn = pickle.load(open(folder_path + 'conv_ae.pkl','r'))
            cnn.save_weights_to(folder_path + 'conv_ae.np')
        except:
            print ("Could not pickle cnn")

        X_pred = cnn.predict(X_train).reshape(-1, input_height, input_width) # * sigma + mu
        # # X_pred = np.rint(X_pred).astype(int)
        # # X_pred = np.clip(X_pred, a_min=0, a_max=255)
        # # X_pred = X_pred.astype('uint8')
        #
        # try:
        #     trian_last_hiddenLayer = cnn.output_hiddenLayer(X_train)
        #     # test_last_hiddenLayer = cnn.output_hiddenLayer(test_x)
        #     pickle.dump(trian_last_hiddenLayer, open(folder_path + 'encode.pkl', 'w'))
        # except:
        #     print "Could not save encoded images"

        print ("Saving some images....")
        for i in range(10):
            index = np.random.randint(train_x.shape[0])
            print (index)

            def get_picture_array(X, index):
                array = np.rint(X[index] * 256).astype(np.int).reshape(input_height, input_width)
                array = np.clip(array, a_min=0, a_max=255)
                return array.repeat(4, axis=0).repeat(4, axis=1).astype(np.uint8())

            original_image = Image.fromarray(get_picture_array(X_out, index))
            # original_image.save(folder_path + 'original' + str(index) + '.png', format="PNG")
            #
            # array = np.rint(trian_last_hiddenLayer[index] * 256).astype(np.int).reshape(input_height/2, input_width/2)
            # array = np.clip(array, a_min=0, a_max=255)
            # encode_image = Image.fromarray(array.repeat(4, axis=0).repeat(4, axis=1).astype(np.uint8()))
            # encode_image.save(folder_path + 'encode' + str(index) + '.png', format="PNG")

            new_size = (original_image.size[0] * 3, original_image.size[1])
            new_im = Image.new('L', new_size)
            new_im.paste(original_image, (0, 0))
            pred_image = Image.fromarray(get_picture_array(X_pred, index))
            # pred_image.save(folder_path + 'pred' + str(index) + '.png', format="PNG")
            new_im.paste(pred_image, (original_image.size[0], 0))

            noise_image = Image.fromarray(get_picture_array(X_train, index))
            new_im.paste(noise_image, (original_image.size[0]*2, 0))
            new_im.save(folder_path+'origin_prediction_noise-'+str(index)+'.png', format="PNG")

            # diff = ImageChops.difference(original_image, pred_image)
            # diff = diff.convert('L')
            # diff.save(folder_path + 'diff' + str(index) + '.png', format="PNG")

            # plt.imshow(new_im)
            # new_size = (original_image.size[0] * 2, original_image.size[1])
            # new_im = Image.new('L', new_size)
            # new_im.paste(original_image, (0, 0))
            # pred_image = Image.fromarray(get_picture_array(X_train, index))
            # # pred_image.save(folder_path + 'noisyInput' + str(index) + '.png', format="PNG")
            # new_im.paste(pred_image, (original_image.size[0], 0))
            # new_im.save(folder_path+'origin_VS_noise-'+str(index)+'.png', format="PNG")
            # plt.imshow(new_im)

        return cnn

    def createSAE(input_height, input_width, X_train, X_out):
        encode_size = 200

        cnn1 = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('hidden', layers.DenseLayer),
            ('hiddenOut', layers.DenseLayer),
            ('output_layer', ReshapeLayer),
        ],

            input_shape=(None, 1, input_width, input_height),
            hidden_num_units= 10000,
            hiddenOut_num_units= 42000,
            output_layer_shape = (([0], -1)),

            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            # batch_iterator_train=BatchIterator(batch_size=batch_size),
            batch_iterator_train=FlipBatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-3)

        cnn1.fit(X_train, X_out)
        trian_last_hiddenLayer = cnn1.output_hiddenLayer(X_train)
        test_last_hiddenLayer = cnn1.output_hiddenLayer(test_x)

        cnn2 = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('hidden', layers.DenseLayer),
            ('output_layer', layers.DenseLayer),
        ],

            input_shape=(None,10000),
            hidden_num_units= 3000,
            output_layer_num_units = 10000,

            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            batch_iterator_train=BatchIterator(batch_size=batch_size),
            # batch_iterator_train=FlipBatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-2)

        trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32)

        cnn2.fit(trian_last_hiddenLayer, trian_last_hiddenLayer)
        trian_last_hiddenLayer = cnn2.output_hiddenLayer(trian_last_hiddenLayer)
        test_last_hiddenLayer = cnn2.output_hiddenLayer(test_last_hiddenLayer)

        cnn3 = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('hidden', layers.DenseLayer),
            ('output_layer', layers.DenseLayer),
        ],

            input_shape=(None,3000),
            hidden_num_units= 1000,
            output_layer_num_units = 3000,

            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            batch_iterator_train=BatchIterator(batch_size=batch_size),
            # batch_iterator_train=FlipBatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-2)

        trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32)
        cnn3.fit(trian_last_hiddenLayer, trian_last_hiddenLayer)
        trian_last_hiddenLayer = cnn3.output_hiddenLayer(trian_last_hiddenLayer)
        test_last_hiddenLayer = cnn3.output_hiddenLayer(test_last_hiddenLayer)

        cnn4 = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('hidden', layers.DenseLayer),
            ('output_layer', layers.DenseLayer),
        ],

            input_shape=(None,1000),
            hidden_num_units= 300,
            output_layer_num_units = 1000,

            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            batch_iterator_train=BatchIterator(batch_size=batch_size),
            # batch_iterator_train=FlipBatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-2)

        trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32)
        cnn4.fit(trian_last_hiddenLayer, trian_last_hiddenLayer)
        trian_last_hiddenLayer = cnn4.output_hiddenLayer(trian_last_hiddenLayer)
        test_last_hiddenLayer = cnn4.output_hiddenLayer(test_last_hiddenLayer)


        input_layer = cnn1.get_all_layers()[0]
        hidden1_layer = cnn1.get_all_layers()[1]
        hidden1_layer.input_layer = input_layer
        hidden2_layer = cnn2.get_all_layers()[1]
        hidden2_layer.input_layer = hidden1_layer
        hidden3_layer = cnn3.get_all_layers()[1]
        hidden3_layer.input_layer = hidden2_layer
        final_layer = cnn4.get_all_layers()[1]
        final_layer.input_layer = hidden3_layer

        #         out_train = final_layer.get_output(x_train).eval()
        #         out_test = final_layer.get_output(test_x).eval()

        f = gzip.open(folder_path + "output.pkl.gz",'wb')
        cPickle.dump((trian_last_hiddenLayer, test_last_hiddenLayer), f, protocol=2)
        f.close()
        #         f = gzip.open("pickled_images/tmp.pkl.gz", 'rb')
        #         trian_last_hiddenLayer, test_last_hiddenLayer = cPickle.load(f)
        #         f.close()

        return cnn1

    def createCnn_AE(input_height, input_width):
        if categories==20:
            outputLayerSize=20
        else:
            outputLayerSize=15

        encode_size = 1024
        border_mode = "same"

        cnn = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('conv1', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3', layers.Conv2DLayer),
            ('pool3', layers.MaxPool2DLayer),
            # ('conv4', layers.Conv2DLayer),
            # ('pool4', layers.MaxPool2DLayer),
            ('flatten', ReshapeLayer),  # output_dense
            ('encode_layer', layers.DenseLayer),
            ('hidden', layers.DenseLayer),  # output_dense
            ('unflatten', ReshapeLayer),
            # ('unpool4', Unpool2DLayer),
            # ('deconv4', layers.Conv2DLayer),
            ('unpool3', Unpool2DLayer),
            ('deconv3', layers.Conv2DLayer),
            ('unpool2', Unpool2DLayer),
            ('deconv2', layers.Conv2DLayer),
            ('unpool1', Unpool2DLayer),
            ('deconv1', layers.Conv2DLayer),
            ('output_layer', ReshapeLayer),

            # ('hidden5', layers.DenseLayer),
            # ('hidden6', layers.DenseLayer),
            # ('hidden7', layers.DenseLayer),
            # ('output', layers.DenseLayer)
        ],

            input_shape=(None, 1, input_width, input_height),
            # Layer current size - 1x300x140
            conv1_num_filters=layers_size[0], conv1_filter_size=(5, 5), conv1_border_mode="valid", conv1_nonlinearity=None,
            #Layer current size - NFx296x136
            pool1_pool_size=(2, 2),
            # Layer current size - NFx148x68
            conv2_num_filters=layers_size[1], conv2_filter_size=(5, 5), conv2_border_mode=border_mode, conv2_nonlinearity=None,
            # Layer current size - NFx148x68
            pool2_pool_size=(2, 2),
            # Layer current size - NFx74x34
            conv3_num_filters=layers_size[2], conv3_filter_size=(3, 3), conv3_border_mode=border_mode, conv3_nonlinearity=None,
            # Layer current size - NFx74x34
            pool3_pool_size=(2, 2),

            # conv4_num_filters=layers_size[3], conv4_filter_size=(5, 5), conv4_border_mode=border_mode, conv4_nonlinearity=None,
            # pool4_pool_size=(2, 2),

            # Layer current size - NFx37x17
            flatten_shape=(([0], -1)), # not sure if necessary?
            # Layer current size - NF*37*17
            encode_layer_num_units = encode_size,
            # Layer current size - 200
            hidden_num_units=layers_size[-1] * 37 * 17,
            # Layer current size - NF*37*17
            unflatten_shape=(([0], layers_size[-1], 37, 17)),

            # deconv4_num_filters=layers_size[3], deconv4_filter_size=(5, 5), deconv4_border_mode=border_mode, deconv4_nonlinearity=None,
            # unpool4_ds=(2, 2),

            # Layer current size - NFx37x17
            unpool3_ds=(2, 2),
            # Layer current size - NFx74x34
            deconv3_num_filters=layers_size[-2], deconv3_filter_size=(3, 3), deconv3_border_mode=border_mode, deconv3_nonlinearity=None,
            # Layer current size - NFx74x34
            unpool2_ds=(2, 2),
            # Layer current size - NFx148x68
            deconv2_num_filters=layers_size[-3], deconv2_filter_size=(5, 5), deconv2_border_mode=border_mode, deconv2_nonlinearity=None,
            # Layer current size - NFx148x68
            unpool1_ds=(2, 2),
            # Layer current size - NFx296x136
            deconv1_num_filters=1, deconv1_filter_size=(5, 5), deconv1_border_mode="full", deconv1_nonlinearity=None,
            # Layer current size - 1x300x140
            output_layer_shape = (([0], -1)),
            # Layer current size - 300*140

            # output_num_units=outputLayerSize, output_nonlinearity=None,
            update_learning_rate=learning_rate,
            update_momentum=update_momentum,
            update=nesterov_momentum,
            train_split=TrainSplit(eval_size=train_valid_split),
            # batch_iterator_train=BatchIterator(batch_size=batch_size),
            batch_iterator_train=FlipBatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-10)
        # on_training_finished=last_hidden_layer,
        return cnn

    def createNNwithDecay(input_height, input_width):
        if categories==20:
            outputLayerSize=20
        else:
            outputLayerSize=15

        cnn = NeuralNet(layers=[
            ('input', layers.InputLayer),
            ('conv1', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3', layers.Conv2DLayer),
            ('pool3', layers.MaxPool2DLayer),
            ('conv4', layers.Conv2DLayer),
            ('pool4', layers.MaxPool2DLayer),
            ('hidden5', layers.DenseLayer),
            ('hidden6', layers.DenseLayer),
            ('hidden7', layers.DenseLayer),
            ('output', layers.DenseLayer)],
            input_shape=(None, 1, input_width, input_height),
            conv1_num_filters=layers_size[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2),
            conv2_num_filters=layers_size[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2),
            conv3_num_filters=layers_size[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2),
            conv4_num_filters=layers_size[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2),
            hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100,
            output_num_units=outputLayerSize, output_nonlinearity=None,
            update_learning_rate=learning_rate,
            update_rho=update_rho,
            update=rmsprop,
            train_split=TrainSplit(eval_size=train_valid_split),
            batch_iterator_train=BatchIterator(batch_size=batch_size),
            regression=True,
            max_epochs=epochs,
            verbose=1,
            hiddenLayer_to_output=-2)
        #         on_training_finished=last_hidden_layer,
        return cnn

    def last_hidden_layer(s, h):

        print s.output_last_hidden_layer_(train_x)
    #         input_layer = s.get_all_layers()[0]
    #         last_h_layer = s.get_all_layers()[-2]
    #         f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True)

    #         myFunc = theano.function(
    #                     inputs=s.input_X,
    #                     outputs=s.h_predict,
    #                     allow_input_downcast=True,
    #                     )
    #         print s.output_last_hidden_layer_(train_x,-2)

    def writeOutputFile(output_file, train_history, layer_info):
        # save the network's parameters
        output_file.write("Validation set error: " + str(train_history[-1]['valid_accuracy']) + "\n\n")
        results_file.write(str(train_history[-1]['valid_accuracy']) + "\t")

        output_file.write("Training NN on: " + ("20 Top Categories\n" if 20 == categories else "Article Categories\n"))
        output_file.write("Learning rate: " + str(learning_rate) + "\n")
        results_file.write(str(learning_rate) + "\t")
        output_file.write(("Momentum: " + str(update_momentum) + "\n") if update_rho is None else (
        "Decay Factor: " + str(update_rho) + "\n"))
        results_file.write(str(update_momentum) + "\t")
        output_file.write(("FlipBatcherIterater" if flip_batch else "BatchIterator") + " with batch: " + str(batch_size) + "\n")
        results_file.write("FlipBatcherIterater\t" + str(batch_size) + "\t")
        output_file.write("Num epochs: " + str(epochs) + "\n")
        results_file.write(str(epochs) + "\t")
        output_file.write("Layers size: " + str(layers_size) + "\n\n")
        results_file.write(str(layers_size) + "\t")
        output_file.write("Activation func: " + ("Rectify" if activation is None else str(activation)) + "\n")
        results_file.write(("Rectify" if activation is None else str(activation)) + "\t")
        output_file.write(
            "Last layer activation func: " + ("Rectify" if last_layer_activation is None else str(last_layer_activation)) + "\n")
        results_file.write(("Rectify" if activation is None else str(last_layer_activation)) + "\t")
        #         output_file.write("Multiple Positives by: " + str(multiple_positives) + "\n")
        output_file.write("Number of images: " + str(end_index) + "\n")
        results_file.write(str(end_index) + "\t")
        output_file.write("Dropout noise precent: " + str(dropout_percent * 100) + "%\n")
        results_file.write(str(dropout_percent * 100) + "%\t")
        output_file.write("Train/validation split: " + str(train_valid_split) + "\n")
        results_file.write(str(train_valid_split) + "\t")
        output_file.write("shuffle_input: " + str(shuffle_input) + "\n")
        results_file.write(str(shuffle_input) + "\t")
        output_file.write("zero_meaning: " + str(zero_meaning) + "\n\n")
        results_file.write(str(zero_meaning) + "\t")

        output_file.write("history: " + str(train_history) + "\n\n")
        results_file.write(str(train_history) + "\t")
        output_file.write("layer_info:\n" + str(layer_info) + "\n")
        results_file.write("[" + str(layer_info).replace("\n", ",") + "]\t")
        output_file.write("filters_info:\n" + str(filter_1) + "\n")
        output_file.write(str(filter_2) + "\n")
        output_file.write(str(filter_3) + "\n")
        output_file.write(str(filter_4) + "\n")
        output_file.write(str(filter_5) + "\n")
        output_file.write(str(filter_6) + "\n\n")
        results_file.write("{" + str((filter_1, filter_2, filter_3, filter_4, filter_5, filter_6)) + "]\t")
        output_file.write("Run time[minutes] is: " + str(run_time) + "\n")

        output_file.flush()
        results_file.write(str(time.ctime()) + "\t")
        results_file.write(folder_name + "\n")
        results_file.flush()

    def outputLastLayer_CNN(cnn, X, y, test_x, test_y):
        print "outputing last hidden layer" #     train_last_hiddenLayer = cnn.output_hiddenLayer(train_x)
        quarter_x = np.floor(X.shape[0] / 4)
        train_last_hiddenLayer1 = cnn.output_hiddenLayer(X[:quarter_x])
        print "after first quarter train output"
        train_last_hiddenLayer2 = cnn.output_hiddenLayer(X[quarter_x:2 * quarter_x])
        print "after seconed quarter train output"
        train_last_hiddenLayer3 = cnn.output_hiddenLayer(X[2 * quarter_x:3 * quarter_x])
        print "after third quarter train output"
        train_last_hiddenLayer4 = cnn.output_hiddenLayer(X[3 * quarter_x:])
        print "after all train output"
        test_last_hiddenLayer = cnn.output_hiddenLayer(test_x)
        print "after test output" #     lastLayerOutputs = (train_last_hiddenLayer,train_y,test_last_hiddenLayer,test_y)
        lastLayerOutputs = np.concatenate((train_last_hiddenLayer1, train_last_hiddenLayer2, train_last_hiddenLayer3, train_last_hiddenLayer4), axis=0), y, test_last_hiddenLayer, test_y
        return lastLayerOutputs

    def outputLastLayer_DAE(train_x, train_y, test_x, test_y):

        # building the SDA
        sDA = StackedDA(layers_size)

        # pre-trainning the SDA
        sDA.pre_train(train_x, noise_rate=input_noise_rate, epochs=pre_train_epochs,LOG=log_file)

        # saving a PNG representation of the first layer
        W = sDA.Layers[0].W.T[:, 1:]
        #         import rl_dae.utils
        #         utils.saveTiles(W, img_shape= (28,28), tile_shape=(10,10), filename="results/res_dA.png")

        # adding the final layer
        #         sDA.finalLayer(train_x, train_y, epochs=softmax_train_epochs)

        # trainning the whole network
        #         sDA.fine_tune(train_x, train_x, epochs=fine_tune_epochs)

        # predicting using the SDA
        testRepresentation = sDA.predict(test_x)
        pred = testRepresentation.argmax(1)

        # let's see how the network did
        #         test_category = test_y.argmax(1)
        e = 0.0
        t = 0.0
        for i in range(test_y.shape[0]):
            if any(test_y[i]):
                e += (test_y[i,pred[i]]==1)
                t += 1

        # printing the result, this structure should result in 80% accuracy
        print "DAE accuracy: %2.2f%%"%(100*e/t)
        output_file.write("DAE predict rate:  "+str(100*e/t) + "%\n")

        lastLayerOutputs = (sDA.predict(train_x), train_y, testRepresentation, test_y)
        return lastLayerOutputs #sDA

    start_time = time.clock()
    print ("Start time: ", time.ctime())

    if loadedData is None:
        train_x, train_y, test_x, test_y = load2d(categories, output_file, input_width, input_height, end_index, multiple_positives, dropout_percent)  # load 2-d data
    else:
        train_x, train_y, test_x, test_y = loadedData

    if zero_meaning:
        train_x = train_x.astype(np.float64)
        mu, sigma = np.mean(train_x.flatten()), np.std(train_x.flatten())
        print("Mean- ", mu)
        print("Std- ", sigma)
        train_x = (train_x - mu) / sigma

    x_train = train_x[:end_index].astype(np.float32).reshape((-1, 1, input_width, input_height))
    x_out = x_train.reshape((x_train.shape[0], -1))
    # test_x = test_x.astype(np.float32).reshape((-1, 1, input_width, input_height))

    cnn = createCSAE(input_height, input_width, x_train, x_out)


    ''' Denoising Autoencoder
    dae = DenoisingAutoencoder(n_hidden=10)
    dae.fit(train_x)
    new_X = dae.transform(train_x)
    print new_X
    '''

    '''Conv Stacked AE
    train_x = np.rint(train_x * 256).astype(np.int).reshape((-1, 1, input_width, input_height ))  # convert to (0,255) int range (we'll do our own scaling)
    mu, sigma = np.mean(train_x.flatten()), np.std(train_x.flatten())

    x_train = train_x.astype(np.float64)
    x_train = (x_train - mu) / sigma
    x_train = x_train.astype(np.float32)

    # we need our target to be 1 dimensional
    x_out = x_train.reshape((x_train.shape[0], -1))

    test_x = np.rint(test_x * 256).astype(np.int).reshape((-1, 1, input_width, input_height ))  # convert to (0,255) int range (we'll do our own scaling)
    # mu, sigma = np.mean(test_x.flatten()), np.std(test_x.flatten())
    test_x = train_x.astype(np.float64)
    test_x = (x_train - mu) / sigma
    test_x = x_train.astype(np.float32)
    '''

    ''' CNN with lasagne
    cnn = createNNwithMomentom(input_height, input_width) if update_rho == None else createNNwithDecay(input_height, input_width)
    cnn.fit(train_x, train_y)
    lastLayerOutputs = outputLastLayer_CNN(cnn, train_x, train_y, test_x, test_y)
    '''

    '''  AE (not Stacked) with Convolutional layers
    cnn = createCnn_AE(input_height, input_width)
    cnn.fit(x_train, x_out)
    '''

    ''' Stacaked AE with lasagne
    cnn = createSAE(input_height, input_width, x_train, x_out)
    '''

    run_time = (time.clock() - start_time) / 60.

    writeOutputFile(output_file, cnn.train_history_, PrintLayerInfo._get_layer_info_plain(cnn))

    print ("Learning took (min)- ", run_time)
    try:
        train_x = np.random.binomial(1, 1 - dropout_percent, size=train_x.shape) * train_x
        trian_last_hiddenLayer = cnn.output_hiddenLayer(train_x)
        print ("Pickling all encoded images:")
        pickle.dump(trian_last_hiddenLayer, open(folder_path + 'encode.pkl', 'w'))
    except:
        print ("Could not save encoded images")

    print ("Runing SVM:")
    run_svm(trian_last_hiddenLayer)

    sys.setrecursionlimit(10000)
    # pickle.dump(cnn, open(folder_path+'conv_ae.pkl', 'w'))
    # ae = pickle.load(open('mnist/conv_ae.pkl','r'))
    # cnn.save_weights_to(folder_path+'conv_ae.np')

    # run_svm(cnn)

    return cnn.train_history_[-1]['valid_accuracy']
def make_grnn(
        batch_size,
        emb_size,
        g_hidden_size,
        word_n,
        wc_num,
        dence,
        wsm_num=1,
        rnn_type='LSTM',
        rnn_size=12,
        dropout_d=0.5,  # pooling='mean',
        quest_na=4,
        gradient_steps=-1,
        valid_indices=None,
        lr=0.05,
        grad_clip=10):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #------------------------------------------------------------------input layers
    layers = [
        (LL.InputLayer, {
            'name': 'l_in_se_q',
            'shape': (None, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_se_a',
            'shape': (None, quest_na, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_q',
            'shape': (None, word_n, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_a',
            'shape': (None, word_n, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act_',
            'shape': (None, word_n, g_hidden_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act__',
            'shape': (None, word_n, word_n, g_hidden_size)
        }),
    ]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)]
    l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)]
    l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)]
    l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)]
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_ase_names[i],
            'incoming': 'l_in_se_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_names[i],
            'incoming': 'l_in_mask_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_ri_names[i],
            'incoming': 'l_in_mask_ri_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_awt_names[i],
            'incoming': 'l_in_wt_a',
            'indices': i,
            'axis': 1
        })])
#-------------------------------------------------------------------GRNN layers
    WC = theano.shared(
        np.random.randn(wc_num, g_hidden_size,
                        g_hidden_size).astype('float32'))
    #    WC = LI.Normal(0.1)
    WSM = theano.shared(
        np.random.randn(emb_size, g_hidden_size).astype('float32'))
    b = theano.shared(np.ones(g_hidden_size).astype('float32'))
    #    b = lasagne.init.Constant(1.0)
    layers.extend([(GRNNLayer, {
        'name':
        'l_q_grnn',
        'incomings':
        ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'],
        'emb_size':
        emb_size,
        'hidden_size':
        g_hidden_size,
        'word_n':
        word_n,
        'wc_num':
        wc_num,
        'wsm_num':
        wsm_num,
        'only_return_final':
        False,
        'WC':
        WC,
        'WSM':
        WSM,
        'b':
        b
    })])
    l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)]
    for i, l_a_grnns_name in enumerate(l_a_grnns_names):
        layers.extend([(GRNNLayer, {
            'name':
            l_a_grnns_name,
            'incomings': [
                l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_',
                'l_in_act__'
            ],
            'emb_size':
            emb_size,
            'hidden_size':
            g_hidden_size,
            'word_n':
            word_n,
            'wc_num':
            wc_num,
            'wsm_num':
            wsm_num,
            'only_return_final':
            False,
            'WC':
            WC,
            'WSM':
            WSM,
            'b':
            b
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qa_concat',
        'incomings': ['l_q_grnn'] + l_a_grnns_names
    })])
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qamask_concat',
        'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_f',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': False,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_b',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': True,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(LL.ElemwiseSumLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b']
    })])
    ##-----------------------------------------------------------------pooling layer
    ##    l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool',
    ##                                                     'incoming': l_qa_rnn_conc,
    ##                                                     'function': lambda X: X.mean(-1),
    ##                                                     'output_shape'='auto'})])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)]
    if dropout_d:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do' + 'do',
            'p': dropout_d
        })])
    for i, d in enumerate(dence):
        if i < len(dence) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence) - 1 and dropout_d:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout_d
            })])

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))


#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)
    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=LU.adagrad,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=PermIterator(batch_size=batch_size),
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split)
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Example #14
0
    narrow_nonlinearity=nonlinearities.softplus,
    reverse_nonlinearity=nonlinearities.sigmoid,
    coutput_nonlinearity=nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,

    #regression=True,
    regression=False,
    verbose=1)

nn.initialize()

nn.load_params_from('task4/koebi_train_history_AE')

PrintLayerInfo()(nn)

nn.fit(X, Y)

test = pd.read_hdf("task4/test.h5", "test")
id_col = test.index
test_data = np.array(test)
test_data = skpre.StandardScaler().fit_transform(test_data)
test_prediction = nn.predict(test_data)

# Write prediction and it's linenumber into a csv file
with open('task4/' + result_file_name + '.csv', 'wb') as csvfile:
    fieldnames = ['Id', 'y']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    #    print test_prediction
    writer.writeheader()