def network_input( choice): parser = Parser(int(choice)) trainset = dataset(parser, Config.Train_tfrecord, Config.BATCH_SIZE, shuffle=Config.SHUFFLE_SIZE) validset = dataset(parser, Config.Valid_tfrecord, 1, shuffle=None) example = tf.cond(is_training, lambda: trainset.get_next(), lambda: validset.get_next()) images, y_true = example return images, y_true
def load_train_val_data(train_path, val_path): parser_train = Parser(ANCHORS, NUM_CLASSES, image_size=(416, 416)) parser_test = Parser(ANCHORS, NUM_CLASSES) trainset = dataset(parser_train, train_path, BATCH_SIZE, shuffle=SHUFFLE_SIZE, multi_image_size=False) testset = dataset(parser_test, val_path, BATCH_SIZE, shuffle=None) return trainset, testset
def show_example(self): sess = tf.Session() classes = os.listdir(self.source_dir[:-len(".tfrecords") or None]) train_tfrecord = self.source_dir anchors = utils.get_anchors(self.anchor_dir, self.img_h, self.img_w) parser = Parser(image_h=self.img_h, image_w=self.img_w, anchors=anchors, num_classes=self.num_classes, cell_size=self.cell_size, debug=True) trainset = dataset(parser, train_tfrecord, 1, shuffle=1) is_training = tf.placeholder(tf.bool) example = trainset.get_next() image, boxes = sess.run(example) image, boxes = image[0], boxes[0] n_box = len(boxes) print(boxes) image = np.repeat(image, 3, axis=2) for i in range(n_box): image = cv2.rectangle( image, (int(float(boxes[i][0])), int(float(boxes[i][1]))), (int(float(boxes[i][2])), int(float(boxes[i][3]))), (255, 0, 0), 1) label = str(int(float(boxes[i][4]))) image = cv2.putText( image, label, (int(float(boxes[i][0])), int(float(boxes[i][1]))), cv2.FONT_HERSHEY_SIMPLEX, .6, (0, 255, 0), 1, 2) image = Image.fromarray(np.uint8(image * 255)) image.show()
BATCH_SIZE = 8 STEPS = 25000 LR = 0.001 # if Nan, set 0.0005, 0.0001 DECAY_STEPS = 100 DECAY_RATE = 0.9 SHUFFLE_SIZE = 200 CLASSES = utils.read_coco_names(class_name_path) ANCHORS = utils.get_anchors('data/voc_anchors.txt', IMAGE_H, IMAGE_W) NUM_CLASSES = len(CLASSES) EVAL_INTERNAL = 100 SAVE_INTERNAL = 500 parser = Parser(IMAGE_H, IMAGE_W, ANCHORS, NUM_CLASSES) trainset = dataset(parser, train_tfrecord, BATCH_SIZE, shuffle=SHUFFLE_SIZE) testset = dataset(parser, test_tfrecord , BATCH_SIZE, shuffle=None) is_training = tf.placeholder(tf.bool) example = tf.cond(is_training, lambda: trainset.get_next(), lambda: testset.get_next()) images, *y_true = example model = yolov3.yolov3(NUM_CLASSES, ANCHORS) with tf.variable_scope('yolov3'): pred_feature_map = model.forward(images, is_training=is_training) loss = model.compute_loss(pred_feature_map, y_true) y_pred = model.predict(pred_feature_map) tf.summary.scalar("loss/coord_loss", loss[1]) tf.summary.scalar("loss/sizes_loss", loss[2])
import tensorflow as tf from core import utils from PIL import Image from core.dataset import Parser, dataset sess = tf.Session() BATCH_SIZE = 1 SHUFFLE_SIZE = 1 CLASSES = utils.read_coco_names('./data/class.names') ANCHORS = utils.get_anchors('./data/anchors.txt') NUM_CLASSES = len(CLASSES) TRAIN_TFRECORD = "./data/train_data/val.tfrecords" TEST_TFRECORD = "./data/val_data/val.tfrecords" parser_train = Parser(ANCHORS, NUM_CLASSES) parser_test = Parser(ANCHORS, NUM_CLASSES) trainset = dataset(parser_train, TRAIN_TFRECORD, BATCH_SIZE, shuffle=SHUFFLE_SIZE, multi_image_size=False) testset = dataset(parser_test, TEST_TFRECORD, BATCH_SIZE, shuffle=None) is_training = tf.placeholder(tf.bool) example = testset.get_next() for l in range(1): res = sess.run(example) image = res[0][0] * 255 y_true = res[1:] boxes = utils.decode_gtbox(y_true) n_box = len(boxes) for i in range(n_box): image = cv2.rectangle(image,(int(float(boxes[i][0])), int(float(boxes[i][1]))), (int(float(boxes[i][2])), int(float(boxes[i][3]))), (255,0,0), 1)
CLASSES = utils.read_coco_names('./data/raccoon.names') NUM_CLASSES = len(CLASSES) ANCHORS = utils.get_anchors('./data/raccoon_anchors.txt', IMAGE_H, IMAGE_W) CKPT_FILE = "./checkpoint/yolov3.ckpt-2500" IOU_THRESH = 0.5 SCORE_THRESH = 0.3 all_detections = [] all_annotations = [] all_aver_precs = {CLASSES[i]: 0. for i in range(NUM_CLASSES)} test_tfrecord = "./raccoon_dataset/raccoon_*.tfrecords" parser = Parser(IMAGE_H, IMAGE_W, ANCHORS, NUM_CLASSES) testset = dataset(parser, test_tfrecord, batch_size=1, shuffle=None, repeat=False) images_tensor, *y_true_tensor = testset.get_next() model = yolov3.yolov3(NUM_CLASSES, ANCHORS) with tf.variable_scope('yolov3'): pred_feature_map = model.forward(images_tensor, is_training=False) y_pred_tensor = model.predict(pred_feature_map) saver = tf.train.Saver() saver.restore(sess, CKPT_FILE) try: image_idx = 0 while True:
LR = 0.0001 # if Nan, set 0.0005, 0.0001 DECAY_STEPS = 100 DECAY_RATE = 0.9 SHUFFLE_SIZE = 45 # 这个要根据输入的训练图片数量进行修改!!! CLASSES = utils.read_coco_names('../data/object.names') ANCHORS = utils.get_anchors('../data/object_anchors.txt', IMAGE_H, IMAGE_W) NUM_CLASSES = len(CLASSES) EVAL_INTERNAL = 1 SAVE_INTERNAL = 1 # 在一个计算图开始前,将文件读入到queue中,TFRecord可以格式统一管理存储数据 train_tfrecord = "../data/images_train.tfrecords" test_tfrecord = "../data/images_test.tfrecords" parser = Parser(IMAGE_H, IMAGE_W, ANCHORS, NUM_CLASSES) trainset = dataset(parser, train_tfrecord, batch_size=BATCH_SIZE, shuffle=SHUFFLE_SIZE) testset = dataset(parser, test_tfrecord, batch_size=BATCH_SIZE, shuffle=None) # 这里我用全部测试集来训练,方便看效果 is_training = tf.placeholder(tf.bool) # 占位符,运行时必须传入值 # 根据是is_training判断是不是在训练,然后调用trainset或者testset的get_next example = tf.cond(is_training, lambda: trainset.get_next(), lambda: testset.get_next()) # y_true = [feature_map_1 , feature_map_2 , feature_map_3] images, *y_true = example # a,*c = 1,2,3,4 a=1, c = [2,3,4] model = YOLOv3.yolov3(NUM_CLASSES, ANCHORS) with tf.variable_scope('yolov3'):
NUM_CLASSES = len(CLASSES) CLASS_WEIGHTS = np.ones(NUM_CLASSES, dtype='float32') TRUE_BOX_BUFFER = 20 tfrecord = "./test_data/train0003.tfrecords" sess = tf.Session() parser = parser(IMAGE_H, IMAGE_W, GRID_H, GRID_W, ANCHORS, NUM_CLASSES, DEBUG=True) trainset = dataset(parser, tfrecord, BATCH_SIZE, shuffle=1) example = trainset.get_next() for l in range(2): image, boxes = sess.run(example) image, boxes = image[0], boxes[0] n_box = len(boxes) for i in range(n_box): image = cv2.rectangle( image, (int(float(boxes[i][0])), int(float(boxes[i][1]))), (int(float(boxes[i][2])), int(float(boxes[i][3]))), (255, 0, 0), 1) image = Image.fromarray(np.uint8(image)) image.show()
import tensorflow as tf from core import utils, yolov3 from core.dataset import dataset, Parser from PIL import Image sess = tf.Session() IMAGE_H, IMAGE_W = 416, 416 BATCH_SIZE = 1 SHUFFLE_SIZE = 200 CLASSES = utils.read_coco_names("data/SVHN/SVHN.names") ANCHORS = utils.get_anchors("data/SVHN/SVHN_anchors.txt", IMAGE_H, IMAGE_W) NUM_CLASSES = len(CLASSES) test_tfrecord = "data/SVHN/tfrecords/quick_test_data.tfrecords" parser = Parser(IMAGE_H, IMAGE_W, ANCHORS, NUM_CLASSES) testset = dataset(parser, test_tfrecord, BATCH_SIZE, shuffle=None) is_training = tf.placeholder(tf.bool) example = testset.get_next() images, *y_true = example model = yolov3.yolov3(NUM_CLASSES, ANCHORS) with tf.variable_scope('yolov3'): pred_feature_map = model.forward(images, is_training=is_training) loss = model.compute_loss(pred_feature_map, y_true) y_pred = model.predict(pred_feature_map) saver = tf.train.Saver() saver.restore(sess, "data/SVHN/checkpoint5/yolov3.ckpt-4000")
def train(self): ANCHORS = utils.get_anchors(self.anchors_path, self.img_h, self.img_w) parser = Parser(image_h=self.img_h, image_w=self.img_w, anchors=ANCHORS, num_classes=self.num_classes) trainset = dataset(parser, self.train_records, self.batch_size, shuffle=self.shuffle_size) testset = dataset(parser, self.test_records, self.batch_size, shuffle=None) is_training = tf.placeholder(tf.bool) example = tf.cond(is_training, lambda: trainset.get_next(), lambda: testset.get_next()) images, y_true = example model = yolov3.yolov3(self.num_classes, ANCHORS) with tf.variable_scope('yolov3'): # Give the images to the network, and receive a prediction # feature map pred_feature_map = model.forward(images, is_training=is_training, n_filters_dn=self.n_filters_dn, n_strides_dn=self.n_strides_dn, n_ksizes_dn=self.n_ksizes_dn) loss = model.compute_loss(pred_feature_map, y_true, self.iou_threshold) y_pred = model.predict(pred_feature_map) tf.summary.scalar("loss/coord_loss", loss[1]) tf.summary.scalar("loss/sizes_loss", loss[2]) tf.summary.scalar("loss/confs_loss", loss[3]) tf.summary.scalar("loss/class_loss", loss[4]) global_step = tf.Variable(0, trainable=True, collections=[tf.GraphKeys.LOCAL_VARIABLES]) write_op = tf.summary.merge_all() writer_train = tf.summary.FileWriter("../../data/train_summary", sess.graph) writer_test = tf.summary.FileWriter("../../data/test_summary") update_vars = tf.contrib.framework.get_variables_to_restore( include=["yolov3/yolo-v3"]) lr = tf.train.exponential_decay(self.learning_rate, global_step, decay_steps=self.decay_steps, decay_rate=self.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(lr) # set dependencies for BN ops update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss[0], var_list=update_vars, global_step=global_step) sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) saver = tf.train.Saver(max_to_keep=2) for step in range(self.steps): run_items = sess.run([train_op, write_op, y_pred, y_true] + loss, feed_dict={is_training: True}) if (step + 1) % self.eval_internal == 0: train_rec_value, train_prec_value = utils.evaluate( run_items[2], run_items[3]) writer_train.add_summary(run_items[1], global_step=step) writer_train.flush() # Flushes the event file to disk if (step + 1) % self.save_internal == 0: saver.save(sess, save_path=self.checkpoint_path, global_step=step + 1) if (step + 1) % self.print_every_n == 0: print(f"=> STEP {step+1} [TRAIN]:\tloss_xy: " + f"{run_items[5]:.4f} \tloss_wh:{run_items[6]:.4f} \t" + f"loss_conf:{run_items[7]:.4f} \tloss_class:" + f"{run_items[8]:.4f}") run_items = sess.run([write_op, y_pred, y_true] + loss, feed_dict={is_training: False}) if (step + 1) % self.eval_internal == 0: test_rec_value, test_prec_value = utils.evaluate( run_items[1], run_items[2]) print(f"\n{20*'='}> evaluation result <{20*'='}\n") print(f"=> STEP {step+1} [TRAIN]:\trecall:" + f"{train_rec_value:.2f} \tprecision:" + f"{train_prec_value:.4f}") print(f"=> STEP {step+1} [VALID]:\trecall:" + f"{test_rec_value:.2f} \tprecision:" + f"{test_prec_value:.4f}") print(f"\n{20*'='}> evaluation result <{20*'='}\n") writer_test.add_summary(run_items[0], global_step=step) writer_test.flush() # Flushes the event file to disk
TRUE_BOX_BUFFER = 80 # test_tfrecord = "./test_data/train0003.tfrecords" train_tfrecord = "../voc/train*.tfrecords" val_tfrecord = "../voc/val*.tfrecords" sess = tf.Session() parser = parser(IMAGE_H, IMAGE_W, GRID_H, GRID_W, ANCHORS, NUM_CLASSES, TRUE_BOX_BUFFER, DEBUG=False) trainset = dataset(parser, train_tfrecord, BATCH_SIZE, shuffle=1) valset = dataset(parser, val_tfrecord, BATCH_SIZE, shuffle=None) is_training = tf.placeholder(tf.bool) example = tf.cond(is_training, lambda: trainset.get_next(), lambda: valset.get_next()) input_image, y_true, true_boxes = example feature_extractor = backbone.FullYoloFeature(input_image, is_training) features = feature_extractor.feature # ========================> method 1 <================================ # output = tf.keras.layers.Conv2D(NUM_ANCHORS * (5 + NUM_CLASSES), # (1,1), strides=(1,1), # padding='same',
def split_dataset_for_oldScene_KCV(args): data_dir = args.cur_data_dir num_negatives = args.num_negatives test_candidates_nums = args.test_candidates_nums kcv = args.kcv name = 'oldScene_neg{}_testCandi{}'.format(num_negatives, test_candidates_nums) result_root = os.path.join(data_dir, 'split_data_oldScene', name) mashup_api_list = meta_data(args).mashup_api_list mashup_api_dict = list2dict(mashup_api_list) # 返回K个数据对象,每个可以使用相关属性 if os.path.exists(dataset(args, result_root, name, kcv - 1).train_df_path): # 已经划分过 for i in range(kcv): print('has splited data in kcv mode before,read them!') data = dataset(args, result_root, name, i) data.initialize() # 从文件中读取对象 set_ds(data) # 设置唯一实例 yield data else: # 还未划分过 mashup_ids = list(mashup_api_dict.keys()) all_apis = set(meta_data(args).api_df.index.tolist()) # 所有api的id # 首先为每个mashup指定确定的正负例,候选api等 # {mid:api_instances} mid2true_instances, mid2false_instances, mid2candidate_instances = {}, {}, {} for mashup_id, api_ids in mashup_api_dict.items(): # api_ids是set unobserved_apis_list = list(all_apis - api_ids) random.shuffle(unobserved_apis_list) mid2true_instances[mashup_id] = {} mid2false_instances[mashup_id] = {} mid2candidate_instances[mashup_id] = {} api_ids_list = list(api_ids) # 已选择的apis,做正例 mid2true_instances[mashup_id] = api_ids_list all_neg_num = min( meta_data(args).api_num, num_negatives * len(api_ids_list)) mid2false_instances[ mashup_id] = unobserved_apis_list[:all_neg_num] # 负例 if test_candidates_nums == 'all': # 选取全部api做测试 mid2candidate_instances[mashup_id] = list(all_apis) else: # 选取部分作为测试,实际组件api和部分unobserved test_candidates_nums = int(test_candidates_nums) mid2candidate_instances[ mashup_id] = api_ids_list + unobserved_apis_list[: test_candidates_nums] random.shuffle(mashup_ids) batch = len(mashup_ids) // kcv for i in range(kcv): # 每个kcv start_index = i * batch batch_stopindex = len(mashup_ids) if i == kcv - 1 else (i + 1) * batch test_mashups = mashup_ids[start_index:batch_stopindex] train_mashups = mashup_ids[:start_index] + mashup_ids[ batch_stopindex:-1] train_df = pd.DataFrame(columns=['mashup', 'api', 'label']) for mashup_id in train_mashups: for true_api_id in mid2true_instances[mashup_id]: train_df.append( { 'mashup': mashup_id, 'api': true_api_id, 'label': 1 }, ignore_index=True) for false_api_id in mid2false_instances[mashup_id]: train_df.append( { 'mashup': mashup_id, 'api': false_api_id, 'label': 0 }, ignore_index=True) # test和train格式不同 # test mashup和api的一行list是多个测试样本,而all_ground_api_ids,test_slt_ids的一行对应前者的一行 test_df = pd.DataFrame(columns=[ 'mashup', 'slt_apis', 'candidate_apis', 'all_ground_api_ids' ]) for mashup_id in test_mashups: test_df.append( { 'mashup': mashup_id, 'candidate_apis': mid2candidate_instances[mashup_id], 'all_ground_api_ids': mid2true_instances[mashup_id] }, ignore_index=True) data = dataset(args, result_root, name, i) data.initialize(train_df, test_df) set_ds(data) # 设置唯一实例 print('{}/{} dataset, build done!'.format(i, kcv)) yield data
def split_dataset_for_newScene_KCV(args): """ 新场景划分数据 :param data_dir: 要划分数据的路径 :param num_negatives: 负采样比例 :param slt_num: 指定的最大已选择服务的数目 :param slt_combination_num: 真实组件服务中,只选取一部分组合作为已选服务,缓解数据不平衡问题: eg: C10/3 C50/3 # :param train_positive_samples: 每个训练用的mashup,除了已选服务,剩下的保留多少个服务作为训练正例,防止组件太多的mashup所占的比例太大 :param test_candidates_nums: 每个mashup要评价多少个待测负例item: 为all时全部评价 :param kcv: :return: 某折的dataset对象 """ data_dir = args.cur_data_dir num_negatives = args.num_negatives slt_num = args.slt_item_num slt_combination_num = args.combination_num test_candidates_nums = args.test_candidates_nums kcv = args.kcv name = 'newScene_neg{}_sltNum{}_com{}_testCandi{}'.format( num_negatives, slt_num, slt_combination_num, test_candidates_nums) result_root = os.path.join(data_dir, 'split_data_newScene', name) # 返回K个dataset对象,每个可以使用相关属性 if os.path.exists(dataset(args, result_root, name, kcv - 1).train_df_path): # 已经划分过 for i in range(kcv): print('data has been splited in kcv mode before,read them!') data = dataset(args, result_root, name, i) data.initialize() # 从文件中读取对象 set_ds(data) # 设置唯一实例 yield data else: mashup_api_list = meta_data(args).mashup_api_list mashup_api_dict = list2dict(mashup_api_list) mashup_ids = meta_data(args).mashup_df.index.tolist() mashup_ids.remove(0) # 占位 all_apis = set(meta_data(args).api_df.index.tolist()) # 所有api的id all_apis.remove(0) # 1.首先为每个mashup指定已选服务和对应的正负例(训练)/待测api(测试) # {mid:{slt_aid_list:api_instances} mid2true_instances, mid2false_instances, mid2candidate_instances = {}, {}, {} for mashup_id, api_ids in mashup_api_dict.items(): # api_ids是set unobserved_apis_list = list(all_apis - api_ids) random.shuffle(unobserved_apis_list) mid2true_instances[mashup_id] = {} mid2false_instances[mashup_id] = {} mid2candidate_instances[mashup_id] = {} api_ids_list = list(api_ids) max_slt_num = min(slt_num, len(api_ids_list) - 1) # eg:最大需要三个已选服务,但是只有2个services构成 for act_slt_num in range(max_slt_num): # 选择1个时,两个时... act_slt_num += 1 combinations = list( itertools.combinations(api_ids_list, act_slt_num)) if slt_combination_num != 'all': # 只选取一部分组合,缓解数据不平衡问题 slt_combination_num = min(len(combinations), slt_combination_num) combinations = combinations[:slt_combination_num] for slt_api_ids in combinations: # 随机组合已选择的api,扩大数据量 # 组合产生,当做已选中的apis train_api_ids = list( api_ids - set(slt_api_ids) ) # masked observed interaction 用于训练或测试的 # if train_positive_samples != 'all': # 选择一部分正例 做训练或测试 # train_positive_samples_num = min(len(train_api_ids), train_positive_samples) # 最多50个,一般没有那么多 # train_api_ids = train_api_ids[:train_positive_samples_num] mid2true_instances[mashup_id][ slt_api_ids] = train_api_ids # 训练用正例 slt_api_ids是tuple num_negative_instances = min( num_negatives * len(train_api_ids), len(unobserved_apis_list)) mid2false_instances[mashup_id][ slt_api_ids] = unobserved_apis_list[: num_negative_instances] # 随机选择的负例 if test_candidates_nums == 'all': # 待预测 test_candidates_list = list(all_apis - set(slt_api_ids)) else: test_candidates_nums = int(test_candidates_nums) test_candidates_list = unobserved_apis_list[: test_candidates_nums] + train_api_ids mid2candidate_instances[mashup_id][ slt_api_ids] = test_candidates_list random.shuffle(mashup_ids) batch = len(mashup_ids) // kcv # 2.然后,根据上面的结果划分为各个KCV,训练和测试 for i in range(kcv): # 每个kcv start_index = i * batch batch_stopindex = len(mashup_ids) if i == kcv - 1 else (i + 1) * batch test_mashups = mashup_ids[start_index:batch_stopindex] train_mashups = mashup_ids[:start_index] + mashup_ids[ batch_stopindex:-1] print(train_mashups) print(test_mashups) train_df = pd.DataFrame( columns=['mashup', 'slt_apis', 'api', 'label']) for mashup_id in train_mashups: for slt_api_ids, true_api_instances in mid2true_instances[ mashup_id].items(): for true_api_id in true_api_instances: train_df = train_df.append( { 'mashup': mashup_id, 'slt_apis': slt_api_ids, 'api': true_api_id, 'label': 1 }, ignore_index=True) for slt_api_ids, false_api_instances in mid2false_instances[ mashup_id].items(): for false_api_id in false_api_instances: train_df = train_df.append( { 'mashup': mashup_id, 'slt_apis': slt_api_ids, 'api': false_api_id, 'label': 0 }, ignore_index=True) # test和train格式不同: train是一行一个样本; test的待测api太多,节省空间,一行: # 一个mashup,多个待测api,一份all_ground_api_ids,一份test_slt_ids test_df = pd.DataFrame(columns=[ 'mashup', 'slt_apis', 'candidate_apis', 'all_ground_api_ids' ]) for mashup_id in test_mashups: for slt_api_ids, candidate_api_instances in mid2candidate_instances[ mashup_id].items(): test_df = test_df.append( { 'mashup': mashup_id, 'slt_apis': slt_api_ids, 'candidate_apis': candidate_api_instances, 'all_ground_api_ids': mid2true_instances[mashup_id][slt_api_ids] }, ignore_index=True) # 3.根据训练集和测试集的划分,初始化dataset对象!!! data = dataset(args, result_root, name, i) data.initialize(train_df, test_df) set_ds(data) # 设置唯一实例 print('{}/{} dataset, build done!'.format(i, kcv)) yield data print('you have splited and saved them!')