Beispiel #1
0
def get_data(train_portion=0.7):
    ''' put the protein data into bucketes and split into train, dev, test sets
        
        Args:
            train_portion(default=0.7): the ratio of training data for splitting

        Returns:
            train_set, dev_set, test_set: data sets paired and put in buckets
    '''
    print("Reading data...")
    raw_data = dr.load_data()
    print(np.array(raw_data).shape)
    global feature_size
    feature_size = len(raw_data[0][0])
    pair_data = []
    for i in xrange(len(raw_data)):
        pair = (raw_data[i],raw_data[i])
        pair_data.append(pair)

    train_data, test_data =dr.split_train_test(pair_data,train_portion)
    train_set = (train_data,) # only one bucket
    test_set = (test_data,)
    dev_set = (test_data,) # for simplicity
#  print(np.array(train_set).shape)
    
    return train_set,dev_set,test_set
Beispiel #2
0
def main(unused_args):
    #  if not FLAGS.data_path:
    #        raise ValueError("Must set --data_path to PTB data directory")
    config = SmallConfig()
    with tf.Graph().as_default(), tf.Session() as session:
        raw_data = np.array(datareader.load_data())
        #        print raw_data
        config.feature_size = len(raw_data[0, 0, :])
        #        print config.feature_size
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = LSTMModel(True, config)
        tf.initialize_all_variables().run()
    print "done"