Esempio n. 1
0
    # Random Forest
    rf.fit(train_features_df.values.tolist(), train_labels_df['cat'].tolist())
    predicted_labels = []
    for index, vector in enumerate(test_features_df.values):
        predicted_labels.append(str(rf.predict(vector.reshape(1, -1))[0]))
    tree_confusion_matrices["Random Forest"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels))  # Bit hacky to use the tree method

    train_features_df = (train_features_df - train_features_df.mean()) / (train_features_df.max() - train_features_df.min())
    train_features_df = train_features_df.reset_index(drop=True)
    test_features_df = (test_features_df - test_features_df.mean()) / (test_features_df.max() - test_features_df.min())
    test_features_df = test_features_df.reset_index(drop=True)

    # Neural Network
    model = build_nn(nr_features=len(train_features_df.columns))
    model.initialize()
    layer_info = PrintLayerInfo()
    layer_info(model)
    y_train = np.reshape(np.asarray(train_labels_df, dtype='int32'), (-1, 1)).ravel()
    model.fit(train_features_df.values, np.add(y_train, -1))
    predicted_labels = []
    for index, vector in enumerate(test_features_df.values):
        predicted_labels.append(str(model.predict(vector.reshape(1, -1))[0]+1))
    tree_confusion_matrices["Neural Network"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels))  # Bit hacky to use the tree method


    #Bayesian Network
    train_features_df, test_features_df = features_df.iloc[train_index,:].copy(), features_df.iloc[test_index,:].copy()
    train_labels_df, test_labels_df = labels_df.iloc[train_index,:].copy(), labels_df.iloc[test_index,:].copy()
    train_features_df = train_features_df.reset_index(drop=True)
    test_features_df = test_features_df.reset_index(drop=True)
    train_labels_df = train_labels_df.reset_index(drop=True)
Esempio n. 2
0
def get_layer_info():
    """Created 04/11/2016"""
    from nolearn.lasagne import PrintLayerInfo
    layer_info = PrintLayerInfo()
    return layer_info
    def local_test(self, images, results, feature_extractors, model, k=2, size=64):
        kf = KFold(len(images), n_folds=k, shuffle=True, random_state=1337)
        # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337)
        train_errors = []
        test_errors = []

        for train, validation in kf:
            # Divide the train_images in a training and validation set (using KFold)
            train_set = [images[i % len(images)] for i in train]
            validation_set = [images[i % len(images)] for i in validation]
            train_set_results = [results[i % len(images)] for i in train]
            validation_set_results = [results[i % len(images)] for i in validation]

            # Create an empty feature_vectors array and set the codebook of the sift extractor if there is any
            feature_vectors = []
            sift_extractor = temp_extractor = next(
                (extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None)
            if (sift_extractor != None):
                sift_extractor.set_codebook(train_set)
                feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor

            # Iterate over the train_set, extract the features from each image and append them to feature_vectors
            for image in train_set:
                print("Training ", image, "...")
                preprocessed_color_image = self.preprocess_image(image, size)
                feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            feature_vector = append(feature_vector,
                                                    feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image))
                else:
                    feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                feature_vectors.append(feature_vector)

            # Logistic Regression for feature selection, higher C = more features will be deleted

            clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4)

            # Feature selection/reduction
            if(model != "conv"):
                new_feature_vectors = clf2.fit_transform(feature_vectors, train_set_results)
                if(model == "neural"):
                    model = self.build_nn(nr_features=len(new_feature_vectors[0]))
                    new_feature_vectors = np.asarray(new_feature_vectors)
                    train_set_results = np.asarray(train_set_results)
                    model.initialize()
                    layer_info = PrintLayerInfo()
                    layer_info(model)

                # Fit our model
                model.fit(new_feature_vectors, train_set_results)

            else:
                model = self.build_conv()

                # Fit our model
                model.fit(np.asarray(feature_vectors), np.asarray(train_set_results))

            train_prediction_object = Prediction()
            counter=0
            for im in train_set:
                print("predicting train image ", counter)
                counter+=1
                preprocessed_color_image = self.preprocess_image(im, size)
                validation_feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                    validation_feature_vector = clf2.transform(validation_feature_vector)
                else:
                    validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                train_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

            print("predicting test images")
            test_prediction_object = Prediction()
            counter=0
            for im in validation_set:
                print("predicting test image ", counter)
                counter+=1
                preprocessed_color_image = self.preprocess_image(im, size)
                validation_feature_vector = []
                if feature_extractors != []:
                    for feature_extractor in feature_extractors:
                        if type(feature_extractor) != SiftFeatureExtractor:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                        else:
                            validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                    validation_feature_vector = clf2.transform(validation_feature_vector)
                else:
                    validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
                test_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

            train_errors.append(train_prediction_object.evaluate(train_set_results))
            test_errors.append(test_prediction_object.evaluate(validation_set_results))

        return [train_errors, test_errors]
    def make_submission(self, train_images, train_results, test_images, output_file_path, feature_extractors, model, size=64):
        # Create a vector of feature vectors and initialize the codebook of sift extractor
        feature_vectors = []
        sift_extractor = temp_extractor = next((extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None)
        if(sift_extractor != None):
            sift_extractor.set_codebook(train_images)
            feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor

        # Extract features from every image
        for image in train_images:
            print("Training ", image, "...")
            preprocessed_color_image = self.preprocess_image(image, size)
            feature_vector = []
            if feature_extractors != []:
                for feature_extractor in feature_extractors:
                    if type(feature_extractor) != SiftFeatureExtractor:
                        feature_vector = append(feature_vector,
                                                feature_extractor.extract_feature_vector(preprocessed_color_image))
                    else:
                        feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image))
            else:
                feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
            feature_vectors.append(feature_vector)

        # Logistic Regression for feature selection, higher C = more features will be deleted
        clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4)

        # Feature selection/reduction
        if(model != "conv"):
            print("Old feature vector shape = ", len(feature_vectors), len(feature_vectors[0]))
            new_feature_vectors = clf2.fit_transform(feature_vectors, train_results)
            print("New feature vector shape = ", len(new_feature_vectors), len(new_feature_vectors[0]))
            if(model == "neural"):
                model = self.build_nn(nr_features=len(new_feature_vectors[0]))
                new_feature_vectors = np.asarray(new_feature_vectors)
                train_results = np.asarray(train_results)
                model.initialize()
                layer_info = PrintLayerInfo()
                layer_info(model)

            # Fit our model
            model.fit(new_feature_vectors, train_results)

        else:
            model = self.build_conv()

            # Fit our model
            model.fit(np.asarray(feature_vectors), np.asarray(train_results))

        # Iterate over the test images and add their prediction to a prediction object
        prediction_object = Prediction()
        for im in test_images:
            print("Predicting ", im)
            preprocessed_color_image = self.preprocess_image(im, size)
            validation_feature_vector = []
            if feature_extractors != []:
                for feature_extractor in feature_extractors:
                    if type(feature_extractor) != SiftFeatureExtractor:
                        validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image))
                    else:
                        validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im))
                validation_feature_vector = clf2.transform(validation_feature_vector)
            else:
                validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48))
            prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0])

        # Write out the prediction object
        FileParser.write_CSV(output_file_path, prediction_object)
Esempio n. 5
0
def make_memnn(vocab_size,
               cont_sl,
               cont_wl,
               quest_wl,
               answ_wl,
               rnn_size,
               rnn_type='LSTM',
               pool_size=4,
               answ_n=4,
               dence_l=[100],
               dropout=0.5,
               batch_size=16,
               emb_size=50,
               grad_clip=40,
               init_std=0.1,
               num_hops=3,
               rnn_style=False,
               nonlin=LN.softmax,
               init_W=None,
               rng=None,
               art_pool=4,
               lr=0.01,
               mom=0,
               updates=LU.adagrad,
               valid_indices=0.2,
               permute_answ=False,
               permute_cont=False):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #-----------------------------------------------------------------------weights
    tr_variables = {}
    tr_variables['WQ'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WA'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WC'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WTA'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WTC'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WAnsw'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))

    #------------------------------------------------------------------input layers
    layers = [(LL.InputLayer, {
        'name': 'l_in_q',
        'shape': (batch_size, 1, quest_wl),
        'input_var': T.itensor3('l_in_q_')
    }),
              (LL.InputLayer, {
                  'name': 'l_in_a',
                  'shape': (batch_size, answ_n, answ_wl),
                  'input_var': T.itensor3('l_in_a_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_q_pe',
                  'shape': (batch_size, 1, quest_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_a_pe',
                  'shape': (batch_size, answ_n, answ_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont',
                  'shape': (batch_size, cont_sl, cont_wl),
                  'input_var': T.itensor3('l_in_cont_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont_pe',
                  'shape': (batch_size, cont_sl, cont_wl, emb_size)
              })]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)]
    l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_names[i],
            'incoming': 'l_in_a',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_pe_names[i],
            'incoming': 'l_in_a_pe',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
#------------------------------------------------------------------MEMNN layers
#question----------------------------------------------------------------------
    layers.extend([(EncodingFullLayer, {
        'name': 'l_emb_f_q',
        'incomings': ('l_in_q', 'l_in_q_pe'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'W': tr_variables['WQ'],
        'WT': None
    })])

    l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)]

    layers.extend([(MemoryLayer, {
        'name': l_mem_names[0],
        'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'A': tr_variables['WA'],
        'C': tr_variables['WC'],
        'AT': tr_variables['WTA'],
        'CT': tr_variables['WTC'],
        'nonlin': nonlin
    })])
    for i in range(1, num_hops):
        if i % 2:
            WC, WA = tr_variables['WA'], tr_variables['WC']
            WTC, WTA = tr_variables['WTA'], tr_variables['WTC']
        else:
            WA, WC = tr_variables['WA'], tr_variables['WC']
            WTA, WTC = tr_variables['WTA'], tr_variables['WTC']
        layers.extend([(MemoryLayer, {
            'name':
            l_mem_names[i],
            'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]),
            'vocab_size':
            vocab_size,
            'emb_size':
            emb_size,
            'A':
            WA,
            'C':
            WC,
            'AT':
            WTA,
            'CT':
            WTC,
            'nonlin':
            nonlin
        })])
#answers-----------------------------------------------------------------------
    l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(EncodingFullLayer, {
            'name': l_emb_f_a_names[i],
            'incomings': (l_a_names[i], l_a_pe_names[i]),
            'vocab_size': vocab_size,
            'emb_size': emb_size,
            'W': tr_variables['WAnsw'],
            'WT': None
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qma_concat',
        'incomings': l_mem_names + l_emb_f_a_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_f',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': False,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_b',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': True,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])

    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_f_sl',
        'incoming': 'l_qa_rnn_f',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])
    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_b_sl',
        'incoming': 'l_qa_rnn_b',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])

    layers.extend([(LL.ElemwiseMergeLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'),
        'merge_function': T.add
    })])
    #-----------------------------------------------------------------pooling layer
    #    layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_',
    #                                         'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})])
    layers.extend([(LL.Pool1DLayer, {
        'name': 'l_qa_pool',
        'incoming': 'l_qa_rnn_conc',
        'pool_size': pool_size,
        'mode': 'max'
    })])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)]
    if dropout:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do',
            'p': dropout
        })])
    for i, d in enumerate(dence_l):
        if i < len(dence_l) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence_l) - 1 and dropout:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout
            })])

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)

    if permute_answ or permute_cont:
        batch_iterator_train = PermIterator(batch_size, permute_answ,
                                            permute_cont)
    else:
        batch_iterator_train = BatchIterator(batch_size=batch_size)

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=updates,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=batch_iterator_train,
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split,
        on_batch_finished=[zero_memnn])
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Esempio n. 6
0
    input_shape=(None, num_features),
    dense_num_units=64,
    narrow_num_units=48,
    denseReverse1_num_units=64,
    denseReverse2_num_units=128,
    output_num_units=128,

    #input_nonlinearity = None, #nonlinearities.sigmoid,
    #dense_nonlinearity = nonlinearities.tanh,
    narrow_nonlinearity=nonlinearities.softplus,
    #denseReverse1_nonlinearity = nonlinearities.tanh,
    denseReverse2_nonlinearity=nonlinearities.softplus,
    output_nonlinearity=nonlinearities.linear,  #nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,
    dropout2_p=0.001,
    regression=True,
    verbose=1)

ae.initialize()
PrintLayerInfo()(ae)

maybe_this_is_a_history = ae.fit(Z, Z)

#learned_parameters = ae.get_all_params_values()
#np.save("task4/learned_parameter.npy", learned_parameters)

#SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history)
ae.save_params_to('task4/koebi_train_history_AE2')
Esempio n. 7
0
def local_test(feature_vectors_df, labels_df, k=2):
    # labeltjes = [None] * labels_df.values.shape[0]
    labeltjes = labels_df.values
    print labeltjes.shape
    labeltjes = labeltjes
    labeltjes -= 1
    labeltjes = labeltjes.ravel().tolist()
    kf = StratifiedKFold(labeltjes, n_folds=k, shuffle=True)
    # kf = StratifiedKFold(len(feature_vectors_df.index), n_folds=k, shuffle=True)
    # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337)
    confusion_matrices_folds = []

    for train, test in kf:
        # Divide the train_images in a training and validation set (using KFold)
        X_train = feature_vectors_df.values[train, :]
        X_test = feature_vectors_df.values[test, :]

        y_train = [labeltjes[i] for i in train]
        y_test = [labeltjes[i] for i in test]

        # Logistic Regression for feature selection, higher C = more features will be deleted

        # Feature selection/reduction
        model = build_nn(nr_features=X_train.shape[1])
        model.initialize()
        layer_info = PrintLayerInfo()
        layer_info(model)

        # Fit our model

        y_train = np.reshape(np.asarray(y_train, dtype='int32'),
                             (-1, 1)).ravel()
        # print y_train
        # print "Train feature vectors shape: " + X_train.shape.__str__()
        # print "Train labels shape:" + len(y_train).__str__()
        #
        # print "X_train as array shape: " + str(X_train.shape)
        # print "y_train as array shape: " + str(np.reshape(np.asarray(y_train), (-1, 1)).shape)

        model.fit(X_train, np.reshape(np.asarray(y_train), (-1, 1)).ravel())

        preds = model.predict(X_test)
        c = []
        [
            c.append(preds[i]) if preds[i] == y_test[i] else None
            for i in range(min(len(y_test), len(preds)))
        ]
        # checks = len([i for i, j in zip(preds, np.reshape(np.asarray(y_train), (-1, 1))) if i == j])

        model = None
        del model
        # Save the confusion matrix for this fold and plot it
        confusion_matrix = sklearn.metrics.confusion_matrix(y_test, preds)
        confusion_matrices_folds.append(confusion_matrix)

        # print preds.tolist()
        # print "number of ones: " + str(sum(preds))
        # print y_test
        # print c
        print "Accuracy for fold: " + str(
            ((len(c) * 1.0) / (len(y_test) * 1.0)
             )) + "\n\n\n\n\n-----------------------------\n\n\n"
        #     Let's plot the confusion matrix of the avarage confusion matrix
    sum = confusion_matrices_folds[0] * 1.0
    for i in range(1, len(confusion_matrices_folds)):
        sum += confusion_matrices_folds[i]
    sum /= len(confusion_matrices_folds)
    metrics.plot_confusion_matrix(sum)
def make_grnn(
        batch_size,
        emb_size,
        g_hidden_size,
        word_n,
        wc_num,
        dence,
        wsm_num=1,
        rnn_type='LSTM',
        rnn_size=12,
        dropout_d=0.5,  # pooling='mean',
        quest_na=4,
        gradient_steps=-1,
        valid_indices=None,
        lr=0.05,
        grad_clip=10):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #------------------------------------------------------------------input layers
    layers = [
        (LL.InputLayer, {
            'name': 'l_in_se_q',
            'shape': (None, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_se_a',
            'shape': (None, quest_na, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_q',
            'shape': (None, word_n, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_a',
            'shape': (None, word_n, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act_',
            'shape': (None, word_n, g_hidden_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act__',
            'shape': (None, word_n, word_n, g_hidden_size)
        }),
    ]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)]
    l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)]
    l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)]
    l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)]
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_ase_names[i],
            'incoming': 'l_in_se_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_names[i],
            'incoming': 'l_in_mask_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_ri_names[i],
            'incoming': 'l_in_mask_ri_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_awt_names[i],
            'incoming': 'l_in_wt_a',
            'indices': i,
            'axis': 1
        })])
#-------------------------------------------------------------------GRNN layers
    WC = theano.shared(
        np.random.randn(wc_num, g_hidden_size,
                        g_hidden_size).astype('float32'))
    #    WC = LI.Normal(0.1)
    WSM = theano.shared(
        np.random.randn(emb_size, g_hidden_size).astype('float32'))
    b = theano.shared(np.ones(g_hidden_size).astype('float32'))
    #    b = lasagne.init.Constant(1.0)
    layers.extend([(GRNNLayer, {
        'name':
        'l_q_grnn',
        'incomings':
        ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'],
        'emb_size':
        emb_size,
        'hidden_size':
        g_hidden_size,
        'word_n':
        word_n,
        'wc_num':
        wc_num,
        'wsm_num':
        wsm_num,
        'only_return_final':
        False,
        'WC':
        WC,
        'WSM':
        WSM,
        'b':
        b
    })])
    l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)]
    for i, l_a_grnns_name in enumerate(l_a_grnns_names):
        layers.extend([(GRNNLayer, {
            'name':
            l_a_grnns_name,
            'incomings': [
                l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_',
                'l_in_act__'
            ],
            'emb_size':
            emb_size,
            'hidden_size':
            g_hidden_size,
            'word_n':
            word_n,
            'wc_num':
            wc_num,
            'wsm_num':
            wsm_num,
            'only_return_final':
            False,
            'WC':
            WC,
            'WSM':
            WSM,
            'b':
            b
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qa_concat',
        'incomings': ['l_q_grnn'] + l_a_grnns_names
    })])
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qamask_concat',
        'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_f',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': False,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_b',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': True,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(LL.ElemwiseSumLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b']
    })])
    ##-----------------------------------------------------------------pooling layer
    ##    l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool',
    ##                                                     'incoming': l_qa_rnn_conc,
    ##                                                     'function': lambda X: X.mean(-1),
    ##                                                     'output_shape'='auto'})])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)]
    if dropout_d:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do' + 'do',
            'p': dropout_d
        })])
    for i, d in enumerate(dence):
        if i < len(dence) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence) - 1 and dropout_d:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout_d
            })])

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))


#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)
    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=LU.adagrad,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=PermIterator(batch_size=batch_size),
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split)
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Esempio n. 9
0
    narrow_nonlinearity=nonlinearities.softplus,
    reverse_nonlinearity=nonlinearities.sigmoid,
    coutput_nonlinearity=nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,

    #regression=True,
    regression=False,
    verbose=1)

nn.initialize()

nn.load_params_from('task4/koebi_train_history_AE')

PrintLayerInfo()(nn)

nn.fit(X, Y)

test = pd.read_hdf("task4/test.h5", "test")
id_col = test.index
test_data = np.array(test)
test_data = skpre.StandardScaler().fit_transform(test_data)
test_prediction = nn.predict(test_data)

# Write prediction and it's linenumber into a csv file
with open('task4/' + result_file_name + '.csv', 'wb') as csvfile:
    fieldnames = ['Id', 'y']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    #    print test_prediction
    writer.writeheader()