def detect_serialized_datasets(self): """ Finding raw data pickles. If none found, proceed to creating pickles out of raw data. calls - 1. prepare.get_raw_input_data 2. prepare.get_raw_target_data 3. prepare.prepare_dataset """ prepared_data_dir = str(utils.prepared_data_folder / self.dir_str / self.period) os.makedirs(prepared_data_dir, exist_ok=True) self.prepared_data_dir = prepared_data_dir print(f'Looking for pickles in {self.prepared_data_dir}') if len(utils.find('*serialized.pkl', self.prepared_data_dir)) == 2: print('This domain-period combination has been serialized before, loading objects...') for pkl in utils.find('*.pkl', self.prepared_data_dir): if "input_ds" in pkl: self.input_ds_serialized_path = pkl elif "rf_ds" in pkl: self.rf_ds_serialized_path = pkl else: print('Proceeding to load & serialize raw data. ') self.raw_input_dir = prepare.get_raw_input_data(self) self.raw_rf_dir = prepare.get_raw_target_data(self) print(f'Raw input datasets taken from @: \n{self.raw_input_dir}') print(f'Raw rainfall datasets taken from @: \n{self.raw_rf_dir}') self.input_ds_serialized_path, self.rf_ds_serialized_path = prepare.prepare_dataset(self, self.prepared_data_dir) print(f'Serialized raw input datasets @: \n{self.input_ds_serialized_path}') print(f'Serialized raw RF datasets @: \n{self.rf_ds_serialized_path}')
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') if not FLAGS.tflite_model: raise ValueError('You must supply the frozen pb with --tflite_model') if FLAGS.inference_type != 'float' and FLAGS.inference_type != 'uint8': raise ValueError('--inference_type must be one of float or uint8') tf.logging.set_verbosity(tf.logging.INFO) tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir, FLAGS.dataset_split_name) if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: num_records = sum( [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords]) num_batches = int(math.ceil(num_records / float(FLAGS.batch_size))) filenames = tf.placeholder(tf.string, shape=[None]) dataset = prepare_dataset(filenames, FLAGS.dataset_name, FLAGS.input_size, batch_size=FLAGS.batch_size, inference_type=FLAGS.inference_type) iterator = dataset.make_initializable_iterator() next_batch = iterator.get_next() tf.logging.info('Prepare run_tflite') eval_dir = os.path.dirname(FLAGS.tflite_model) eval_dir = os.path.join(eval_dir, 'eval_tflite') if not os.path.exists(eval_dir): os.makedirs(eval_dir) cmds = prepare_run_tflite_commands(eval_dir, FLAGS.tflite_model, FLAGS.inference_type) tf.logging.info('Prepare metrics') lbls, preds, accuracy, acc_update_op = prepare_metrics( FLAGS.dataset_name, inference_type=FLAGS.inference_type) # Initialize `iterator` with dataset. with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(iterator.initializer, feed_dict={filenames: tfrecords}) for step in range(num_batches): if (step % 1000) == 0: print('{}/{}'.format(step, num_batches)) # print(' '.join(cmds)) print(' Accuracy: [{:.4f}]'.format(sess.run(accuracy))) images, labels = sess.run(next_batch) np.save(os.path.join(eval_dir, 'batch_xs.npy'), images) subprocess.check_output(cmds) ys = np.load(os.path.join(eval_dir, 'output_ys.npy')) sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys}) print('Accuracy: [{:.4f}]'.format(sess.run(accuracy)))
import prepare as pp import feature_construction as fc import modeling as md # 计时开始 from time import time start = time() pp.prepare_dataset() fc.construct_feature() md.one_hot() md.tuning_hyper_parameters_sim() md.predict_test_ol() print('\nThe total time : {0:.0f} s'.format(time() - start))
"url": "https://www.dropbox.com/s/m38haw5rhz9wdm2/train_clean.tgz", "source": "train_clean.en", "target": "train_clean.zh", "data_source": "train.en", "data_target": "train.zh", } VALID = { "url": "https://www.dropbox.com/s/ft2evgnh8taeonf/valid_clean.tgz", "source": "valid_clean.en", "target": "valid_clean.zh", "data_source": "valid.en", "data_target": "valid.zh", } SAMPLE = { "url": "https://www.dropbox.com/s/11i3ccsizgq8lgt/sample_train.tgz", "source": "sample_train.en", "target": "sample_train.zh", "data_source": "train.en", "data_target": "train.zh", } DATA_DIR = "data/challenger_nmt/" TMP_DIR = "tmp/challenger_nmt/" if __name__ == '__main__': for ds in [SAMPLE, VALID]: # dataset is already tokenized prepare.prepare_dataset(DATA_DIR, TMP_DIR, ds, tokenize=False)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') if not FLAGS.frozen_pb: raise ValueError('You must supply the frozen pb with --frozen_pb') if not FLAGS.output_node_name: raise ValueError( 'You must supply the output node name with --output_node_name') tf.logging.set_verbosity(tf.logging.INFO) tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir, FLAGS.dataset_split_name) if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: num_records = sum( [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords]) num_batches = int(math.ceil(num_records / float(FLAGS.batch_size))) # for example in tf.python_io.tf_record_iterator(tfrecords[0]): # result = tf.train.Example.FromString(example) # print(result) # break tf.logging.info('Prepare Dataset from tfrecord[0] '.format(tfrecords[0])) filenames = tf.placeholder(tf.string, shape=[None]) dataset = prepare_dataset(filenames, FLAGS.dataset_name, FLAGS.input_size, batch_size=FLAGS.batch_size) iterator = dataset.make_initializable_iterator() next_batch = iterator.get_next() tf.logging.info('Load GraphDef from frozen_pb {}'.format(FLAGS.frozen_pb)) graph_def = load_graph_def(FLAGS.frozen_pb) tf.logging.info('Prepare metrics') lbls, preds, accuracy, acc_update_op = prepare_metrics(FLAGS.dataset_name) if FLAGS.summary_dir: tf.logging.info('Prepare summary writer') summary_writer = tf.summary.FileWriter(FLAGS.summary_dir) summaries = tf.summary.merge_all() # Initialize `iterator` with training data. with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(iterator.initializer, feed_dict={filenames: tfrecords}) tf.import_graph_def(graph_def, name='') graph = sess.graph # get x and y x = graph.get_tensor_by_name('{}:0'.format(FLAGS.input_node_name)) y = graph.get_tensor_by_name('{}:0'.format(FLAGS.output_node_name)) for step in range(num_batches): images, labels = sess.run(next_batch) ys = sess.run(y, feed_dict={x: images}) sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys}) if FLAGS.summary_dir: summary = sess.run(summaries) summary_writer.add_summary(summary, step) print('Accuracy: [{:.4f}]'.format(sess.run(accuracy))) # import ipdb # ipdb.set_trace() if FLAGS.summary_dir: summary_writer.add_graph(sess.graph)
interval=args.interval Ntrain=args.Ntrain iterations=args.iterations image_path="/coco/" image_list=os.listdir(image_path) outdir="./output_pretrain" if not os.path.exists(outdir): os.mkdir(outdir) test_box=[] for i in range(testsize): rnd = np.random.randint(Ntrain + 1, Ntrain + 100) image_name = image_path + image_list[rnd] _, sr = prepare_dataset(image_name) test_box.append(sr) x_test=chainer.as_variable(xp.array(test_box).astype(xp.float32)) generator=Generator() generator.to_gpu() gen_opt=set_optimizer(generator) for epoch in range(epochs): sum_gen_loss=0 sum_dis_loss=0 for batch in range(0,iterations,batchsize): hr_box=[] sr_box=[] for index in range(batchsize):
y_path="/twin/" x_tag_path="/medium_mask/" y_tag_path="/twin_mask/" x_list=os.listdir(x_tag_path) y_list=os.listdir(y_tag_path) Nx = len(x_list) - 50 Ny = len(y_list) test_box=[] binary_box=[] for _ in range(testsize): rnd=np.random.randint(Nx,Nx+50) filename=x_tag_path+x_list[rnd] binary=prepare_mask(filename,size,cluster) filename=x_path+x_list[rnd] image=prepare_dataset(filename,size,cluster) test_box.append(image) binary_box.append(binary) test_img=chainer.as_variable(xp.array(test_box).astype(xp.float32)) test_mask=chainer.as_variable(xp.array(binary_box).astype(xp.float32)) outdir="./output" if not os.path.exists(outdir): os.mkdir(outdir) generator_xy = Generator() generator_xy.to_gpu() gen_xy_opt = set_optimizer(generator_xy) generator_yx = Generator()
VALID_ENZH = { "url": "http://data.statmt.org/wmt18/translation-task/dev.tgz", "source": "dev/newsdev2017-zhen-ref.en.sgm", "target": "dev/newsdev2017-zhen-src.zh.sgm", "data_source": "valid.en", "data_target": "valid.zh", } TEST_ZHEN = { "url": "http://data.statmt.org/wmt18/translation-task/test.tgz", "source": "test/newstest2018-zhen-ref.en.sgm", "target": "test/newstest2018-zhen-src.zh.sgm", "data_source": "test.en", "data_target": "test.zh", } DATA_DIR = "data/wmt18_en_zh/" TMP_DIR = "tmp/wmt18_en_zh/" if __name__ == '__main__': sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', write_through=True, line_buffering=True) for ds in [TRAIN_ENZH, VALID_ENZH, TEST_ZHEN]: prepare.prepare_dataset(DATA_DIR, TMP_DIR, ds)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') if not FLAGS.frozen_pb: raise ValueError('You must supply the frozen pb with --frozen_pb') if not FLAGS.output_node_name: raise ValueError( 'You must supply the output node name with --output_node_name') if not FLAGS.output_dir: raise ValueError( 'You must supply the output directory with --output_dir') tf.logging.set_verbosity(tf.logging.INFO) tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir, FLAGS.dataset_split_name) if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: num_records = sum( [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords]) num_batches = int(math.ceil(num_records / float(FLAGS.batch_size))) tf.logging.info('Load GraphDef from frozen_pb {}'.format(FLAGS.frozen_pb)) graph_def = load_graph_def(FLAGS.frozen_pb) tf.logging.info('Quantize Graph') with tf.Session() as sess: tf.import_graph_def(graph_def, name='') quantized_graph = qg.create_training_graph(sess.graph) quantized_inf_graph = qg.create_eval_graph(sess.graph) # Initialize `iterator` with training data. with tf.Session(graph=quantized_graph) as sess: tf.logging.info('Prepare dataset') with tf.name_scope("dataset"): filenames = tf.placeholder(tf.string, shape=[None]) dataset = prepare_dataset(filenames, FLAGS.dataset_name, FLAGS.input_size, batch_size=FLAGS.batch_size) iterator = dataset.make_initializable_iterator() next_batch = iterator.get_next() tf.logging.info('Prepare metrics') lbls, preds, accuracy, acc_update_op = prepare_metrics( FLAGS.dataset_name) tf.logging.info('Prepare Saver') saver = tf.train.Saver() if FLAGS.summary_dir: tf.logging.info('Prepare summary writer') summary_writer = tf.summary.FileWriter(FLAGS.summary_dir) # initialize sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(iterator.initializer, feed_dict={filenames: tfrecords}) graph = sess.graph # get x and y x = graph.get_tensor_by_name('{}:0'.format(FLAGS.input_node_name)) y = graph.get_tensor_by_name('{}:0'.format(FLAGS.output_node_name)) # summary all min/max variables # print(graph.get_collection('variables')[3].eval()) for var in graph.get_collection('variables'): tf.summary.scalar(var.name, var) summaries = tf.summary.merge_all() for step in range(num_batches): images, labels = sess.run(next_batch) ys = sess.run(y, feed_dict={x: images}) sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys}) summary = sess.run(summaries) if FLAGS.summary_dir: summary_writer.add_summary(summary, step) print('Accuracy: [{:.4f}]'.format(sess.run(accuracy))) if FLAGS.summary_dir: summary_writer.add_graph(graph) # save graph and ckpts saver.save(sess, os.path.join(FLAGS.output_dir, "model.ckpt")) # tf.train.write_graph(graph, FLAGS.output_dir, 'quantor.pb', as_text=False) tf.train.write_graph(quantized_inf_graph, FLAGS.output_dir, 'quantor.pb', as_text=False)
predictor_y = UNet() predictor_y.to_gpu() pre_opt_y = set_optimizer(predictor_y) for epoch in range(epochs): sum_dis_loss = 0 sum_gen_loss = 0 for batch in range(0, iterations, batchsize): x_box = [] y_box = [] rnd1 = np.random.randint(x_len - batchsize) rnd2 = np.random.randint(y_len - batchsize) for index in range(batchsize): image_name = x_path + str(rnd1 + index) + ".png" source = prepare_dataset(image_name) image_name = y_path + str(rnd2 + index) + ".png" target = prepare_dataset(image_name) x_box.append(source) y_box.append(target) x = chainer.as_variable(xp.array(x_box).astype(xp.float32)) y = chainer.as_variable(xp.array(y_box).astype(xp.float32)) for index in range(frames, batchsize): x_series = F.concat([ x[index - 2].reshape(1, 3, size, size), x[index - 1].reshape(1, 3, size, size) ]) x_serial = x[index - 2:index] y_series = F.concat([
discriminator.to_gpu() dis_opt = set_optimizer(discriminator) for epoch in range(epochs): sum_gen_loss = 0 sum_dis_loss = 0 for batch in range(0, iterations, framesize): input_box = [] target_box = [] opt_box = [] rnd = np.random.randint(image_len) dir_path = image_path + image_list[rnd] ta = np.random.choice(["lefteye", "righteye"]) for index in range(framesize): filename1 = dir_path + "/" + ta + "_" + str(0) + ".png" inp = prepare_dataset(filename1) input_box.append(inp) filename2 = dir_path + "/" + ta + "_" + str(index) + ".png" img = prepare_dataset(filename2) target_box.append(img) ref = optical_flow(filename1, filename2) opt_box.append(ref) x = chainer.as_variable(xp.array(input_box).astype(xp.float32)) t = chainer.as_variable(xp.array(target_box).astype(xp.float32)) opt = chainer.as_variable(xp.array(opt_box).astype(xp.float32)) #y=encoder(F.concat([x,opt])) #_, channels, height, width=y.shape #y=y.reshape(1,framesize,channels,height,width).transpose(0,2,1,3,4)