Python split_indices Examples

Programming Language: Python

Namespace/Package Name: our_util

Method/Function: split_indices

Examples at hotexamples.com: 3

Python split_indices - 3 examples found. These are the top rated real world Python examples of our_util.split_indices extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: execute_lstm_conditional.py Project: nehaiyer/nlu-project

def run_lstm_conditional(config,
                         split=True,
                         outputpath='output/lstm_conditional',
                         final=False):
    ## Get data
    config, data_dict = get_data(
        config,
        filename_embeddings='/../../glove.6B.50d.txt',
        pickle_path='/data/glove50d_h_ids_b_ids_pickle.p',
        concat=False)

    ## pass data into local namespace:
    y = data_dict['y']
    h = data_dict['h_np']
    b = data_dict['b_np']
    h_len = data_dict['h_seqlen']
    b_len = data_dict['b_seqlen']

    #    config, data_dict1 = get_data(config,
    #            filename_embeddings = '/../../glove.6B.50d.txt',
    #            pickle_path = '/data/glove50d_h_ids_b_ids_comp_pickle.p',
    #            concat = False)
    #
    #    ## pass data into local namespace:
    #    y1 = data_dict1['y']
    #    h1 = data_dict1['h_np']
    #    b1 = data_dict1['b_np']
    #    h1_len = data_dict1['h_seqlen']
    #    b1_len = data_dict1['b_seqlen']
    #
    #
    #
    #    if config.b_max_len is not None:
    #        b_max_len = config.b_max_len
    #        if np.shape(b1)[1] > b_max_len:
    #            b1 = b1[:, 0:b_max_len]
    #        b1_len = np.minimum(b1_len, b_max_len)

    # Do shortening of dataset ## affects number of samples and max_len.
    if config.num_samples is not None:
        ## Random seed
        np.random.seed(1)
        ind = range(np.shape(h)[0])
        random.shuffle(ind)
        indices = ind[0:config.num_samples]
        h = h[indices, :]
        b = b[indices, :]
        h_len = h_len[indices]
        b_len = b_len[indices]
        y = y[indices]

    # Truncate headlines and bodies
    if config.h_max_len is not None:
        h_max_len = config.h_max_len
        if np.shape(h)[1] > h_max_len:
            h = h[:, 0:h_max_len]
        h_len = np.minimum(h_len, h_max_len)

    if config.b_max_len is not None:
        b_max_len = config.b_max_len
        if np.shape(b)[1] > b_max_len:
            b = b[:, 0:b_max_len]
        b_len = np.minimum(b_len, b_max_len)

    if split:
        # Split data
        train_indices, dev_indices, test_indices = split_indices(
            np.shape(h)[0])
        # Divide data
        train_h = h[train_indices, :]
        train_b = b[train_indices, :]
        train_h_len = h_len[train_indices]
        train_b_len = b_len[train_indices]
        train_y = y[train_indices]
        # test
        dev_h = h[dev_indices, :]
        dev_b = b[dev_indices, :]
        dev_h_len = h_len[dev_indices]
        dev_b_len = b_len[dev_indices]
        dev_y = y[dev_indices]

        if final:
            # Combine train and dev
            train_dev_indices = train_indices + dev_indices
            train_h = h[train_dev_indices, :]
            train_b = b[train_dev_indices, :]
            train_h_len = h_len[train_dev_indices]
            train_b_len = b_len[train_dev_indices]
            train_y = y[train_dev_indices]

            # Set dev to test
            dev_h = h[test_indices, :]
            dev_b = b[test_indices, :]
            dev_h_len = h_len[test_indices]
            dev_b_len = b_len[test_indices]
            dev_y = y[test_indices]

    print np.shape(train_h)
    #    dev_h = h1
    #    dev_b = b1
    #    dev_h_len = h1_len
    #    dev_b_len = b1_len
    #    dev_y = y1
    ## Passing parameter_dict to config settings
    ## Changes to config  based on data shape
    assert (np.shape(train_h)[0] == np.shape(train_b)[0] ==
            np.shape(train_y)[0] == np.shape(train_h_len)[0] ==
            np.shape(train_b_len)[0])
    #    assert(np.shape(dev_h)[0] == np.shape(dev_b)[0] == np.shape(dev_y)[0] == np.shape(dev_h_len)[0] == np.shape(dev_b_len)[0])
    config.num_samples = np.shape(train_h)[0]
    config.h_max_len = np.shape(train_h)[1]
    config.b_max_len = np.shape(train_b)[1]

    data_path = '/home/neha/sem2/nlu/project/stance_detection-master/code/models/lstm_conditional/'
    #    model_save_name= 'lstm-conditional-50blen-2l-60e'
    model_save_name = 'lstm-conditional-test2'

    ## Start Tensorflow!
    print('Starting TensorFlow operations')
    print 'With hidden layers: ', config.n_layers  ## hidden layer?
    with tf.Graph().as_default():
        tf.set_random_seed(1)
        model = LSTMCondModel(config)
        init = tf.global_variables_initializer()
        #        saver = tf.train.Saver()
        with tf.Session() as session:
            session.run(init)
            #            saver.restore(session, tf.train.latest_checkpoint(checkpoint_dir = '/home/neha/sem2/nlu/project/stance_detection-master/code/models/lstm_conditional/'))
            #            saver = tf.train.Saver()
            losses_ep, dev_performances_ep, dev_predicted_classes_ep, dev_predictions_ep = model.fit(
                session, train_h, train_b, train_h_len, train_b_len, train_y,
                dev_h, dev_b, dev_h_len, dev_b_len, dev_y)  #M


#            saver.save(session, data_path + model_save_name + '-final')

# Write results to csv
    convertOutputs(outputpath, config, losses_ep, dev_performances_ep)

    print('Losses ', losses_ep)
    print('Dev Performance ', dev_performances_ep)  #M
    return losses_ep, dev_predicted_classes_ep, dev_performances_ep  #M

Example #2

Show file

File: execute_bilstm_conditional_sentiment.py Project: malvika-hp/stance-detection-in-news

def run_lstm_conditional(config,
                         split=True,
                         outputpath='../../xp',
                         final=False):
    ## Get data
    config, data_dict = get_data(
        config,
        filename_embeddings='/../../glove/glove.twitter.27B.50d.txt',
        pickle_path='/../../glove/twitter50d_h_ids_b_ids_pickle.p',
        concat=False)

    config1, lex_data_dict = get_lexicon_data_bilstm_sentiment(
        config,
        filename_embeddings='/../../glove/glove.twitter.27B.50d.txt',
        pickle_path='lexiconp.p',
        concat=False)
    # print(data_dict_l)

    ## pass data into local namespace:
    y = data_dict['y']
    h = data_dict['h_np']
    b = data_dict['b_np']
    h_len = data_dict['h_seqlen']
    b_len = data_dict['b_seqlen']

    # Do shortening of dataset ## affects number of samples and max_len.
    if config.num_samples is not None:
        ## Random seed
        np.random.seed(1)
        ind = range(np.shape(h)[0])
        random.shuffle(ind)
        indices = ind[0:config.num_samples]
        h = h[indices, :]
        b = b[indices, :]
        h_len = h_len[indices]
        b_len = b_len[indices]
        y = y[indices]

    # Truncate headlines and bodies
    if config.h_max_len is not None:
        h_max_len = config.h_max_len
        if np.shape(h)[1] > h_max_len:
            h = h[:, 0:h_max_len]
        h_len = np.minimum(h_len, h_max_len)

    if config.b_max_len is not None:
        b_max_len = config.b_max_len
        if np.shape(b)[1] > b_max_len:
            b = b[:, 0:b_max_len]
        b_len = np.minimum(b_len, b_max_len)

    ####################### LEX DATA #######################

    #lex_y = lex_data_dict['y']
    #lex_h = lex_data_dict['h_np']
    #lex_h_len = lex_data_dict['h_seqlen']

    lex_b = lex_data_dict['h_b_np']
    lex_b_len = lex_data_dict['seqlen']

    # lex_y = data_dict_l['y']
    # lex_h_b_np = data_dict_l['h_b_np']
    # lex_seqlen = data_dict_l['seqlen']

    # lex_data = pack_labels(lex_h_b_np, lex_y, lex_seqlen)
    # if config1.num_samples is not None:
    #     lex_num_samples = config1.num_samples
    #     lex_data = lex_data[0:num_samples - 1]
    # lex_train_data, lex_dev_data, lex_test_data, lex_train_indices, lex_dev_indices, lex_test_indices = split_data(lex_data, prop_train = 0.6, prop_dev = 0.2, seed = 56)
    # config1.num_samples = len(lex_train_indices)
    # config1.max_length = 75

    # print("lex_train_data", lex_train_data[0])

    if split:
        # Split data
        train_indices, dev_indices, test_indices = split_indices(
            np.shape(h)[0])
        # Divide data
        train_h = h[train_indices, :]
        train_b = b[train_indices, :]
        train_h_len = h_len[train_indices]
        train_b_len = b_len[train_indices]
        train_y = y[train_indices]

        ####################### LEX DATA #######################

        #lex_train_h = lex_h[train_indices,:]
        #lex_train_h_len = lex_h_len[train_indices]

        lex_train_b = lex_b[train_indices, :]
        lex_train_b_len = lex_b_len[train_indices]

        # test
        dev_h = h[dev_indices, :]
        dev_b = b[dev_indices, :]
        dev_h_len = h_len[dev_indices]
        dev_b_len = b_len[dev_indices]
        dev_y = y[dev_indices]

        ################## LEX ########################

        #lex_dev_h = lex_h[dev_indices,:]
        #lex_dev_h_len = lex_h_len[dev_indices]

        lex_dev_b = lex_b[dev_indices, :]
        lex_dev_b_len = lex_b_len[dev_indices]

        if final:
            # Combine train and dev
            train_dev_indices = train_indices + dev_indices
            train_h = h[train_dev_indices, :]
            train_b = b[train_dev_indices, :]
            train_h_len = h_len[train_dev_indices]
            train_b_len = b_len[train_dev_indices]
            train_y = y[train_dev_indices]

            ################## LEX  train ########################
            #lex_train_h = lex_h[train_dev_indices,:]
            #lex_train_h_len = lex_h_len[train_dev_indices]

            lex_train_b = lex_b[train_dev_indices, :]
            lex_train_b_len = lex_b_len[train_dev_indices]

            # Set dev to test
            dev_h = h[test_indices, :]
            dev_b = b[test_indices, :]
            dev_h_len = h_len[test_indices]
            dev_b_len = b_len[test_indices]
            dev_y = y[test_indices]

            ################### LEX dev ######################

            #lex_dev_h = lex_h[test_indices,:]
            #lex_dev_h_len = lex_h_len[test_indices]

            lex_dev_b = lex_b[test_indices, :]
            lex_dev_b_len = lex_b_len[test_indices]

    ## Passing parameter_dict to config settings
    ## Changes to config  based on data shape
    assert (np.shape(train_h)[0] == np.shape(train_b)[0] ==
            np.shape(train_y)[0] == np.shape(train_h_len)[0] ==
            np.shape(train_b_len)[0] == np.shape(lex_train_b_len)[0])
    config.num_samples = np.shape(train_h)[0]
    config.h_max_len = np.shape(train_h)[1]
    config.b_max_len = np.shape(train_b)[1]

    ## Start Tensorflow!
    print('Starting TensorFlow operations')
    print 'With hidden layers: ', config.n_layers  ## hidden layer?
    with tf.Graph().as_default():
        tf.set_random_seed(1)
        model = LSTMCondModel(config)
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            losses_ep, dev_performances_ep, dev_predicted_classes_ep, dev_predictions_ep = model.fit(
                session, train_h, train_b, train_h_len, train_b_len, train_y,
                dev_h, dev_b, dev_h_len, dev_b_len, dev_y, lex_train_b,
                lex_train_b_len, lex_dev_b, lex_dev_b_len)  #M

    # Write results to csv
    convertOutputs(outputpath, config, losses_ep, dev_performances_ep)

    print('Losses ', losses_ep)
    print('Dev Performance ', dev_performances_ep)  #M
    return losses_ep, dev_predicted_classes_ep, dev_performances_ep  #M

Example #3

Show file

File: execute_bilstm_conditional.py Project: malvika-hp/stance-detection-in-news

def run_lstm_conditional(config,
                         split=True,
                         outputpath='../../xp',
                         final=False):
    ## Get data

    config, data_dict = get_data(
        config,
        filename_embeddings='/../../glove/glove.twitter.27B.50d.txt',
        pickle_path='/../../glove/twitter50d_h_ids_b_ids_pickle.p',
        concat=False)

    ## pass data into local namespace:
    y = data_dict['y']
    h = data_dict['h_np']
    b = data_dict['b_np']
    print("===========Number of head=========", len(h))
    print("===========Number of body=========", len(b))
    h_len = data_dict['h_seqlen']
    b_len = data_dict['b_seqlen']

    # Do shortening of dataset ## affects number of samples and max_len.
    if config.num_samples is not None:
        ## Random seed
        np.random.seed(1)
        ind = range(np.shape(h)[0])
        random.shuffle(ind)
        indices = ind[0:config.num_samples]
        h = h[indices, :]
        b = b[indices, :]
        h_len = h_len[indices]
        b_len = b_len[indices]
        y = y[indices]

    # Truncate headlines and bodies
    if config.h_max_len is not None:
        h_max_len = config.h_max_len
        if np.shape(h)[1] > h_max_len:
            h = h[:, 0:h_max_len]
        h_len = np.minimum(h_len, h_max_len)

    if config.b_max_len is not None:
        b_max_len = config.b_max_len
        if np.shape(b)[1] > b_max_len:
            b = b[:, 0:b_max_len]
        b_len = np.minimum(b_len, b_max_len)

    if split:
        # Split data
        train_indices, dev_indices, test_indices = split_indices(
            np.shape(h)[0])
        # Divide data
        train_h = h[train_indices, :]
        train_b = b[train_indices, :]
        train_h_len = h_len[train_indices]
        train_b_len = b_len[train_indices]
        train_y = y[train_indices]
        # test
        dev_h = h[dev_indices, :]
        dev_b = b[dev_indices, :]
        dev_h_len = h_len[dev_indices]
        dev_b_len = b_len[dev_indices]
        dev_y = y[dev_indices]

        if final:
            # Combine train and dev
            train_dev_indices = train_indices + dev_indices
            train_h = h[train_dev_indices, :]
            train_b = b[train_dev_indices, :]
            train_h_len = h_len[train_dev_indices]
            train_b_len = b_len[train_dev_indices]
            train_y = y[train_dev_indices]

            # Set dev to test
            dev_h = h[test_indices, :]
            dev_b = b[test_indices, :]
            dev_h_len = h_len[test_indices]
            dev_b_len = b_len[test_indices]
            dev_y = y[test_indices]

    ## Passing parameter_dict to config settings
    ## Changes to config  based on data shape
    assert (np.shape(train_h)[0] == np.shape(train_b)[0] ==
            np.shape(train_y)[0] == np.shape(train_h_len)[0] ==
            np.shape(train_b_len)[0])
    config.num_samples = np.shape(train_h)[0]
    config.h_max_len = np.shape(train_h)[1]
    config.b_max_len = np.shape(train_b)[1]

    ## Start Tensorflow!
    print('Starting TensorFlow operations')
    print 'With hidden layers: ', config.n_layers  ## hidden layer?
    with tf.Graph().as_default():
        tf.set_random_seed(1)
        model = LSTMCondModel(config)
        # saver = tf.train.Saver()
        saver = tf.train.Saver(tf.global_variables())
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            losses_ep, dev_performances_ep, dev_predicted_classes_ep, dev_predictions_ep = model.fit(
                session, train_h, train_b, train_h_len, train_b_len, train_y,
                dev_h, dev_b, dev_h_len, dev_b_len, dev_y)  #M
            save_path = saver.save(session, "model/model.ckpt")
            print("Model saved in file: %s" % save_path)

    # Write results to csv
    convertOutputs(outputpath, config, losses_ep, dev_performances_ep)

    print('Losses ', losses_ep)
    print('Dev Performance ', dev_performances_ep)
    return losses_ep, dev_predicted_classes_ep, dev_performances_ep