def run_neural_net(X_train, Y_train, X_test, Y_test):
    # Rows are samples, columns are features

    INPUT_NODES = X_train.shape[1]
    OUTPUT_NODES = 1

    # create model
    model = Sequential()
    model.add(
        Dense(512, input_dim=INPUT_NODES, init='uniform', activation='relu'))
    model.add(Dropout(0.8))
    #model.add(Dense(X.shape[1], init='uniform', activation='relu'))
    model.add(Dense(OUTPUT_NODES, init='uniform', activation='sigmoid'))
    #model = Sequential([
    #    Dense(32, input_dim=X.shape[1], init='uniform'),
    #    Activation('relu'),
    #    #Dense(10, init='uniform'),
    #    #Activation('relu'),
    #    Dense(1, init='uniform'),
    #    Activation('sigmoid'),
    #])

    # Compile model
    #model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', recall, precision, window_diff_metric, size1, size2])
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', recall, precision])

    # Fit the model
    model.fit(X_train,
              Y_train,
              nb_epoch=200,
              batch_size=100,
              validation_data=(X_test, Y_test))

    # evaluate the model
    print 'Evaluating...\n'
    scores = model.evaluate(X_test, Y_test)
    print Y_test.shape
    for (name, score) in zip(model.metrics_names, scores):
        print " %s: %0.3f" % (name, score)

    predictions = model.predict(X_test)  # calculate predictions
    rounded = np.round(predictions)
    rounded = np.expand_dims(rounded, axis=0)
    print helper.windiff_metric_NUMPY(np.expand_dims(Y_test, axis=0), rounded)
    pdb.set_trace()
예제 #2
0
def train_LSTM(X, Y, model, train_split=0.8, epochs=10, batch_size=32):

    # Clinical
    SAMPLE_TYPE_cli, X_cli, Y_cli = get_input(sample_type=4,
                                              shuffle_documents=False,
                                              pad=False)

    which_model = 2
    if which_model == 2:
        custom_fit(X, Y, train_split=train_split, model=model, epochs=epochs)
        print "Clinical Data"
        custom_fit(X_cli, Y_cli, train_split=1, model=model)  # Test clinical

    elif which_modle == 1:
        # Works for TYPE2 but check for others
        # Both these lines work for which_model == 1
        X_train, Y_train, X_test, Y_test = split_data(X,
                                                      Y,
                                                      train_split=train_split)
        model.fit(X_train,
                  Y_train,
                  shuffle=False,
                  nb_epoch=epochs,
                  batch_size=batch_size,
                  validation_data=(X_test, Y_test))

        # WIkipedia
        #model.evaluate(X_test, Y_test, batch_size=batch_size)
        #pred = model.predict(X_test)
        #rounded = np.round(pred)
        #result = helper.windiff_metric_NUMPY(Y_test, rounded)
        #print result

        # Clinical
        # Temporary TRUNCATION
        TRUNCATE_LEN = X_train.shape[1]
        print "NOTE: Truncating the Test dataset(clinical) from %d sentences to %d sentences." % (
            X_cli.shape[1], TRUNCATE_LEN)
        X_cli, Y_cli = X_cli[:, :TRUNCATE_LEN, :], Y_cli[:, :TRUNCATE_LEN, :]
        model.evaluate(X_cli, Y_cli, batch_size=batch_size)
        pred = model.predict(X_cli)
        rounded = np.round(pred)
        result = helper.windiff_metric_NUMPY(Y_cli,
                                             rounded,
                                             win_size=10,
                                             rounded=True)
        print result

    pdb.set_trace()
예제 #3
0
def train_LSTM(X,
               Y,
               model,
               embedding_W,
               train_split=0.8,
               epochs=10,
               batch_size=32):
    global X_wiki, Y_wiki, X_cli, Y_cli, X_bio, Y_bio

    which_model = 2
    if which_model == 2:
        custom_fit(X,
                   Y,
                   model=model,
                   batch_size=batch_size,
                   train_split=train_split,
                   epochs=epochs)
        print "############## Clinical Data ###########"
        custom_fit(X_cli,
                   Y_cli,
                   model=model,
                   batch_size=batch_size,
                   train_split=0,
                   epochs=-1)  # Test clinical
        print "############## Biography Data ###########"
        custom_fit(X_bio,
                   Y_bio,
                   model=model,
                   batch_size=batch_size,
                   train_split=0,
                   epochs=-1)  # Test biography

    elif which_modle == 1:
        # Works for TYPE2 but check for others
        # Both these lines work for which_model == 1
        X_train, Y_train, X_test, Y_test = split_data(X,
                                                      Y,
                                                      train_split=train_split)
        model.fit(X_train,
                  Y_train,
                  shuffle=False,
                  nb_epoch=epochs,
                  batch_size=batch_size,
                  validation_data=(X_test, Y_test))

        # WIkipedia
        #model.evaluate(X_test, Y_test, batch_size=batch_size)
        #pred = model.predict(X_test)
        #rounded = np.round(pred)
        #result = helper.windiff_metric_NUMPY(Y_test, rounded)
        #print result

        # Clinical
        # Temporary TRUNCATION
        TRUNCATE_LEN = X_train.shape[1]
        print "NOTE: Truncating the Test dataset(clinical) from %d sentences to %d sentences." % (
            X_cli.shape[1], TRUNCATE_LEN)
        X_cli, Y_cli = X_cli[:, :TRUNCATE_LEN, :], Y_cli[:, :TRUNCATE_LEN, :]
        model.evaluate(X_cli, Y_cli, batch_size=batch_size)
        pred = model.predict(X_cli)
        rounded = np.round(pred)
        result = helper.windiff_metric_NUMPY(Y_cli,
                                             rounded,
                                             win_size=10,
                                             rounded=True)
        print result

    pdb.set_trace()
예제 #4
0
def custom_fit(X, Y, model, train_split=0.8, epochs=10):

    if train_split == 1:
        X_test, Y_test = X, Y
    else:
        # This is only for training! (If train_split =1 then only TEST)
        X_train, Y_train, X_test, Y_test = split_data(X,
                                                      Y,
                                                      train_split=train_split)
        print "Batch size = 1"
        print 'Train...'
        for epoch in range(epochs):
            mean_tr_acc = []
            mean_tr_loss = []
            for i in range(len(X_train)):
                #y_true = Y_train[i]
                for sequence, truth in zip(X_train[i],
                                           Y_train[i]):  # Sequence in document
                    sequence = sequence.reshape((1, sequence.shape[0]))
                    sequence = np.expand_dims(sequence, axis=0)
                    tr_loss, tr_acc = model.train_on_batch(
                        [sequence, sequence, sequence, sequence], truth)

                    mean_tr_acc.append(tr_acc)
                    mean_tr_loss.append(tr_loss)
                model.reset_states()

            print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
            print('loss training = {}'.format(np.mean(mean_tr_loss)))
            print('___________________________________')

    mean_te_acc = []
    mean_te_loss = []
    predictions = []
    for i in range(len(X_test)):
        for sequence, truth in zip(X_test[i], Y_test[i]):
            sequence = sequence.reshape((1, sequence.shape[0]))
            sequence = np.expand_dims(sequence, axis=0)
            te_loss, te_acc = model.test_on_batch(
                [sequence, sequence, sequence, sequence], truth)

            mean_te_acc.append(te_acc)
            mean_te_loss.append(te_loss)
        model.reset_states()

        predictions.append([])
        for sequence, truth in zip(X_test[i], Y_test[i]):
            sequence = sequence.reshape((1, sequence.shape[0]))
            sequence = np.expand_dims(sequence, axis=0)
            y_pred = model.predict_on_batch(
                [sequence, sequence, sequence, sequence])
            predictions[i].append(y_pred)
        model.reset_states()

    print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
    print('loss testing = {}'.format(np.mean(mean_te_loss)))

    print "Check windiff value"
    #rounded = np.round(predictions)
    result = helper.windiff_metric_NUMPY(Y_test,
                                         predictions,
                                         win_size=-1,
                                         rounded=False)
    print result
    print('___________________________________')
예제 #5
0
def custom_fit(X, Y, model, train_split=0.8, epochs=10):
        
    if train_split == 0:
        X_test, Y_test = X, Y
    else:
        # This is only for training! (If train_split =1 then only TEST)
        X_train, Y_train, X_test, Y_test = split_data(X, Y, train_split=train_split)

        print "Batch size = 1"
        print 'Train...'
        _total_docs = len(X_train)
        _total_sentences = sum([sequence.shape[0] for sequence in X_train])
        for epoch in range(epochs):
            mean_tr_acc = []
            mean_tr_loss = []
            _sentence_no = 0
            for i in range(len(X_train)):
                #y_true = Y_train[i]
                for sequence, truth in zip(X_train[i], Y_train[i]): # Sequence in document
                    sequence = sequence.reshape((1, sequence.shape[0]))
                    #sequence = np.expand_dims(sequence, axis=0)
                    tr_loss, tr_acc = model.train_on_batch([sequence], truth)

                    mean_tr_acc.append(tr_acc)
                    mean_tr_loss.append(tr_loss)
                    _sentence_no += 1
                    print ">> Epoch: %d/%d | Doc: %d/%d | Sent: %d/%d" %(epoch+1, epochs, i+1, _total_docs, _sentence_no+1, _total_sentences)
                model.reset_states()
        
            print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
            print('loss training = {}'.format(np.mean(mean_tr_loss)))
            print('___________________________________')
    
    # Testing
    mean_te_acc = []
    mean_te_loss = []
    predictions = []
    _total_docs = len(X_test)
    _total_sentences = sum([sequence.shape[0] for sequence in X_test])
    _sentence_no = 0
    for i in range(len(X_test)):
        for sequence, truth in zip(X_test[i], Y_test[i]):
            sequence = sequence.reshape((1, sequence.shape[0]))
            #sequence = np.expand_dims(sequence, axis=0)
            te_loss, te_acc = model.test_on_batch([sequence], truth)

            mean_te_acc.append(te_acc)
            mean_te_loss.append(te_loss)
            _sentence_no += 1
            print ">> TEST >> Doc: %d/%d | Sent: %d/%d" %(i+1, _total_docs, _sentence_no+1, _total_sentences)
        model.reset_states()

    print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
    print('loss testing = {}'.format(np.mean(mean_te_loss)))
    print("Predicting...")
    
    for i in range(len(X_test)):
        predictions.append([])
        for sequence, truth in zip(X_test[i], Y_test[i]):
            sequence = sequence.reshape((1, sequence.shape[0]))
            #sequence = np.expand_dims(sequence, axis=0)
            y_pred = model.predict_on_batch([sequence])
            predictions[i].append(y_pred)
        model.reset_states()

    print "Check windiff value"
    #rounded = np.round(predictions)
    result = helper.windiff_metric_NUMPY(Y_test, predictions, win_size=-1, rounded=False)
    print result
    print('___________________________________')