def get_data(train_portion=0.7): ''' put the protein data into bucketes and split into train, dev, test sets Args: train_portion(default=0.7): the ratio of training data for splitting Returns: train_set, dev_set, test_set: data sets paired and put in buckets ''' print("Reading data...") raw_data = dr.load_data() print(np.array(raw_data).shape) global feature_size feature_size = len(raw_data[0][0]) pair_data = [] for i in xrange(len(raw_data)): pair = (raw_data[i],raw_data[i]) pair_data.append(pair) train_data, test_data =dr.split_train_test(pair_data,train_portion) train_set = (train_data,) # only one bucket test_set = (test_data,) dev_set = (test_data,) # for simplicity # print(np.array(train_set).shape) return train_set,dev_set,test_set
def main(unused_args): # if not FLAGS.data_path: # raise ValueError("Must set --data_path to PTB data directory") config = SmallConfig() with tf.Graph().as_default(), tf.Session() as session: raw_data = np.array(datareader.load_data()) # print raw_data config.feature_size = len(raw_data[0, 0, :]) # print config.feature_size initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = LSTMModel(True, config) tf.initialize_all_variables().run() print "done"