Beispiel #1
0
    def confusion_matrix(self, name=None):
        predictions = self.model.predict(self.valX)
        labels = list(set(get_labels(self.valY))) 
        print(labels)
        target_names = ["N", "P"]
        print("Classification report for " + FOLDER + " ---> " +self.model.name)
        # print(precision_recall_fscore_support(np.argmax(predictions, axis=1), np.argmax(self.valY, axis=1)))
        print("F1 SCORE:")
        print(f1_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))
        print("RECALL:")
        print(recall_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))

        print("PRECISION:")
        print(precision_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))

        print("SPECIFICITY:")
        self.fpr, self.tpr, _ = roc_curve(np.argmax(self.valY, axis=1),predictions[:,1])
        self.auc = roc_auc_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1))
        cm = confusion_matrix(get_labels(self.valY),get_labels(predictions))
        tn, fp, fn, tp = confusion_matrix(get_labels(self.valY),get_labels(predictions)).ravel()
        print("True Positive {} False Positive {} False Negative {} True Positive {}".format(tn, fp, fn, tp))
        print("TN {}".format(cm[0][0]))
        print("FP {}".format(cm[0][1]))
        print("FN {}".format(cm[1][0]))
        print("TP {}".format(cm[1][1]))
        specificity = cm[0][0] / (cm[0][0] + cm[0][1])
        print(specificity)
        print("Confusion Matrix {}\n".format(cm))
        plot_confusion_matrix(cm, labels, title=name if not None else self.model.name+FOLDER)
Beispiel #2
0
    def get_results(self, models):
        for model in models:
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer(),metrics=['accuracy'])
            aug = ImageDataGenerator(
                # rotation_range=90, 
                # zoom_range=0.15,
                # width_shift_range=0.2,
                # height_shift_range=0.2,
                shear_range=0.25,
                horizontal_flip=True,
                vertical_flip=True,
                fill_mode="nearest"
            )

            if(K.image_dim_ordering() == 'th'):
                self.x = np.moveaxis(self.x, -1, 1)
                self.valX = np.moveaxis(self.valX, -1, 1)
            
            if(os.path.exists('../models/'+model.name+FOLDER+'.h5')):
                model.load_weights('../models/'+model.name+FOLDER+'.h5') 
            else:
                # self.history = self.model.fit_generator(self.trainGen,
                #     epochs=self.epochs, verbose=1, shuffle=True,
                #     validation_data=self.valGen, workers=2, use_multiprocessing=False)
                self.history = model.fit_generator(aug.flow(self.trainX,self.trainY, batch_size=self.batch_size, shuffle=True, seed=1000),
                    steps_per_epoch=len(self.trainX)/self.batch_size ,epochs=self.epochs, verbose=1, 
                    validation_data=(self.valX, self.valY))
            predictions = model.predict(self.valX)
            labels = list(set(get_labels(self.valY))) 
            print(labels)
            target_names = ["N", "P"]
            print("Classification report for " + FOLDER + " ---> " +model.name)
            print("\n====================================================================================================================================================")
            # print(precision_recall_fscore_support(np.argmax(predictions, axis=1), np.argmax(self.valY, axis=1)))
            print("F1 SCORE:")
            print(f1_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))
            print("RECALL:")
            print(recall_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))

            print("PRECISION:")
            print(precision_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1)))

            print("SPECIFICITY:")
            self.fpr, self.tpr, _ = roc_curve(np.argmax(self.valY, axis=1),predictions[:,1])
            self.auc = roc_auc_score(np.argmax(self.valY, axis=1), np.argmax(predictions, axis=1))
            cm = confusion_matrix(get_labels(self.valY),get_labels(predictions))
            tn, fp, fn, tp = confusion_matrix(get_labels(self.valY),get_labels(predictions)).ravel()
            print("True Positive {} False Positive {} False Negative {} True Positive {}".format(tn, fp, fn, tp))
            print("TN {}".format(cm[0][0]))
            print("FP {}".format(cm[0][1]))
            print("FN {}".format(cm[1][0]))
            print("TP {}".format(cm[1][1]))
            specificity = cm[0][0] / (cm[0][0] + cm[0][1])
            print(specificity)
            print("Confusion Matrix {}\n".format(cm))
            plot_confusion_matrix(cm, labels, title=model.name+FOLDER)
    def confusion_matrix(self, title=None):
        if title == None:
            title = self.model.name + FOLDER
        predictions = self.model.predict(self.valX)
        class_counts = onehot_to_cat(self.valY)
        print("Class counts")
        print(np.unique(class_counts, return_counts=True))
        print("=================================================")

        labels = list(set(get_labels(self.valY)))
        cm = confusion_matrix(get_labels(self.valY), get_labels(predictions))
        print("Confusion Matrix {}".format(cm))
        plot_confusion_matrix(cm, labels, title=title)
Beispiel #4
0
 def srv_get_training_data(self, params):
     data_dir = os.path.join(self.data_root, self.ns)
     labels = get_labels(data_dir)
     counts = [
         len(os.listdir(os.path.join(data_dir, label))) for label in labels
     ]
     return GetPredictionsResponse(labels, counts)
    def __init__(self, threshold=0.5):
        args = utils.get_arguments()
        self.weight_path = args.weight_path
        self.cfg_path = args.config_path
        self.labels = utils.get_labels(args.classes_path)
        self.threshold = threshold

        # Tải mô hình YOLOv3 Tiny cho phát hiện vùng biển số
        self.model = cv2.dnn.readNet(model=self.weight_path,
                                     config=self.cfg_path)
    def __init__(self, threshold=0.5):
        args = utils.get_arguments()
        self.weight_path = args.weight_path
        self.cfg_path = args.config_path
        self.labels = utils.get_labels(args.classes_path)
        self.threshold = threshold

        # Load model
        self.model = cv2.dnn.readNet(model=self.weight_path,
                                     config=self.cfg_path)
    def __init__(self, threshold=0.5):
        print("------- initial detectNumberPlate")
        try:
            self.weight_path = "./weights/yolov3-tiny.weights"
            self.cfg_path = "./cfg/yolov3-tiny.cfg"
            self.labels = utils.get_labels("./cfg/coco.names")
            self.threshold = threshold

            print("------- before DetectVehicle.load_model_readNet")
            # Load model
            self.model = cv2.dnn.readNet(model=self.weight_path,
                                         config=self.cfg_path)
            print("------- after DetectVehicle.load_model_readNet")
        except Exception as ex:
            print("############## Error: {} ##############".format(str(ex)))
Beispiel #8
0
def validate():
    data_dir = ".\\data\\"
    label_names = get_labels(data_dir)
    model = load_model('asr_model.h5') # 加载训练模型

    features, labels = loadFromPickle()
    features, labels = shuffle(features, labels)
    features=features.reshape(features.shape[0],20,32,1)
    labels=prepress_labels(labels)
    train_x, test_x, train_y, test_y = train_test_split(features, labels, random_state=0,
                                                        test_size=0.3)
    print_summary(model)

    # 开始评估模型效果 # verbose=0为不输出日志信息
    score = model.evaluate(test_x, test_y, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1]) # 准确度	
Beispiel #9
0
 def predict(self, cv_img=None, path=None):
     with self.session.as_default():
         with self.graph.as_default():
             data_dir = os.path.join(self.data_root, self.ns)
             if path is not None:
                 cv_img = cv2.imread(path)
             label_names = get_labels(data_dir)
             rs_img_f32 = cv2.resize(
                 cv_img,
                 (self.IMAGE_SIZE, self.IMAGE_SIZE)).astype('float32')
             input_data = rs_img_f32.reshape(-1, self.IMAGE_SIZE,
                                             self.IMAGE_SIZE, 3)
             if self.model is not None:
                 result = self.model.predict(input_data, steps=1)
                 print(label_names)
                 print("result:", result)
                 return label_names, result[0]
             else:
                 print('your model is not ready')
                 return None
Beispiel #10
0
    def __init__(self, threshold=0.5):
        print("------- initial detectNumberPlate")
        try:
            # args = utils.get_arguments()
            # print("------- args: ", args)
            # self.weight_path = args.weight_path
            # self.cfg_path = args.config_path
            # self.labels = utils.get_labels(args.classes_path)
            # self.threshold = threshold

            self.weight_path = "./weights/yolov3-tiny_15000.weights"
            self.cfg_path = "./cfg/yolov3-tiny.cfg"
            self.labels = utils.get_labels("./cfg/yolo.names")
            self.threshold = threshold

            print("------- before detectNumberPlate.load_model_readNet")
            # Load model
            self.model = cv2.dnn.readNet(model=self.weight_path,
                                         config=self.cfg_path)
            print("------- after detectNumberPlate.load_model_readNet")
        except Exception as ex:
            print("############## Error: {} ##############".format(str(ex)))
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help="输入数据目录。应该包含CoNLL-2003 NER任务的训练文件",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="列表中选择的模型类型: " + ", ".join(MODEL_CLASSES.keys()),
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="列表中选择的预训练模型或快捷方式名称的路径: " + ", ".join(ALL_MODELS),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help="输出目录, 将在其中写入模型预测和checkpoint",
    )

    # Other parameters
    parser.add_argument("--config_name",
                        default="",
                        type=str,
                        help="预训练的配置名称或路径(如果与model_name不同)")
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="预训练的tokenizer名称或路径(如果与model_name不同)",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help="您想在哪里存储从s3下载的预训练模型",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help="tokenization后的最大总输入序列长度。长度大于此长度的序列将被截断,较短的序列将被填充。",
    )
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_predict",
                        action="store_true",
                        help="Whether to run predictions on the test set.")
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="是否在每个日志记录step的训练期间进行评估.",
    )
    parser.add_argument("--do_lower_case",
                        action="store_true",
                        help="如果使用的是uncased的模型,请设置此标志")

    parser.add_argument("--per_gpu_train_batch_size",
                        default=8,
                        type=int,
                        help="训练时每个GPU / CPU的批次大小。")
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=8,
                        type=int,
                        help="评估时每个GPU / CPU的批次大小。")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="在执行向后/更新过程之前要梯度累积的更新步骤数。",
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--adam_beta1",
                        default=0.9,
                        type=float,
                        help="BETA1 for Adam optimizer.")
    parser.add_argument("--adam_beta2",
                        default=0.999,
                        type=float,
                        help="BETA2 for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="要执行的训练epoch总数。")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: 设置要执行的训练步骤总数。覆盖num_train_epochs。",
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps",
                        type=int,
                        default=50,
                        help="Log every X updates steps.")
    parser.add_argument("--save_steps",
                        type=int,
                        default=50,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Avoid using CUDA when available")
    parser.add_argument("--overwrite_output_dir",
                        action="store_true",
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument(
        "--fp16",
        action="store_true",
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="For distributed training: local_rank")
    parser.add_argument("--server_ip",
                        type=str,
                        default="",
                        help="For distant debugging.")
    parser.add_argument("--server_port",
                        type=str,
                        default="",
                        help="For distant debugging.")

    # mean teacher
    parser.add_argument('--mt',
                        type=int,
                        default=0,
                        help='mean teacher. 是否使用mean teacher')
    parser.add_argument('--mt_updatefreq',
                        type=int,
                        default=1,
                        help='mean teacher update frequency')
    parser.add_argument(
        '--mt_class',
        type=str,
        default="kl",
        help=
        'mean teacher class, choices:[smart, prob, logit, kl(default), distill].'
    )
    parser.add_argument('--mt_lambda',
                        type=float,
                        default=1,
                        help="trade off parameter of the consistent loss.")
    parser.add_argument('--mt_rampup',
                        type=int,
                        default=300,
                        help="rampup iteration.")
    parser.add_argument(
        '--mt_alpha1',
        default=0.99,
        type=float,
        help=
        "moving average parameter of mean teacher (for the exponential moving average)."
    )
    parser.add_argument(
        '--mt_alpha2',
        default=0.995,
        type=float,
        help=
        "moving average parameter of mean teacher (for the exponential moving average)."
    )
    parser.add_argument('--mt_beta',
                        default=10,
                        type=float,
                        help="coefficient of mt_loss term.")
    parser.add_argument(
        '--mt_avg',
        default="exponential",
        type=str,
        help=
        "moving average method, choices:[exponentail(default), simple, double_ema]."
    )
    parser.add_argument(
        '--mt_loss_type',
        default="logits",
        type=str,
        help="subject to 衡量模型差异, choices:[embeds, logits(default)].")

    # virtual adversarial training
    parser.add_argument('--vat',
                        type=int,
                        default=0,
                        help='virtual adversarial training.')
    parser.add_argument(
        '--vat_eps',
        type=float,
        default=1e-3,
        help='perturbation size for virtual adversarial training.')
    parser.add_argument(
        '--vat_lambda',
        type=float,
        default=1,
        help='trade off parameter for virtual adversarial training.')
    parser.add_argument(
        '--vat_beta',
        type=float,
        default=1,
        help='coefficient of the virtual adversarial training loss term.')
    parser.add_argument(
        '--vat_loss_type',
        default="logits",
        type=str,
        help=
        "subject to measure model difference, choices = [embeds, logits(default)]."
    )

    # self-training
    parser.add_argument(
        '--self_training_reinit',
        type=int,
        default=0,
        help='如果teacher模型已更新,是否重新初始化student模型。0表示重启重新初始化,1表示不初始化')
    parser.add_argument(
        '--self_training_begin_step',
        type=int,
        default=900,
        help='开始步骤(通常在第一个epoch之后)开始self-training。对应论文中的第一阶段早停策略')
    parser.add_argument(
        '--self_training_label_mode',
        type=str,
        default="hard",
        help=
        '伪标签类型. choices:[hard(default), soft]. 软标签是一个teacher模型预测出来的,类似logits的概率值,是浮点数,硬标签直接就是整数,就是对应概率最大的位置的索引,例如soft是0.82, hard就是1'
    )
    parser.add_argument(
        '--self_training_period',
        type=int,
        default=878,
        help='the self-training period., 每训练多少个step后,更新一下teacher模型')
    parser.add_argument('--self_training_hp_label',
                        type=float,
                        default=0,
                        help='是否使用高置信度标签重新加权软标签')
    parser.add_argument('--self_training_ensemble_label',
                        type=int,
                        default=0,
                        help='use ensemble label.')

    args = parser.parse_args()

    # 决定是否覆盖已有的output目录
    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # 如果outputs目录不存在,那么创建
    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)
    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logging_fh = logging.FileHandler(os.path.join(args.output_dir, 'log.txt'))
    logging_fh.setLevel(logging.DEBUG)
    logger.addHandler(logging_fh)
    logger.warning(
        "处理的 rank: %s, device: %s, n_gpu: %s, 是否分布式训练: %s, 是否 16-bits 训练 : %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)
    # 获取这个数据的所有labels.  eg: ['O', 'B-LOC', 'B-ORG', 'B-PER', 'B-MISC', 'I-PER', 'I-MISC', 'I-ORG', 'I-LOC', '<START>', '<STOP>']
    labels = get_labels(args.data_dir)
    num_labels = len(labels)
    # 使用交叉熵, 忽略索引作为padding label ID,以便以后真实标签ID去计算损失, eg: pad_token_label_id = -100
    pad_token_label_id = CrossEntropyLoss().ignore_index

    # 加载预训练模型
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    logger.info("训练/评估 参数 %s", args)

    # 开始训练
    if args.do_train:
        #加载数据集
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                labels,
                                                pad_token_label_id,
                                                mode="train")
        #开始训练模型
        model, global_step, tr_loss, best_dev, best_test = train(
            args, train_dataset, model_class, config, tokenizer, labels,
            pad_token_label_id)
        #打印日志
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # 保存last-practice:如果您使用模型的默认名称,则可以使用from_pretrained()重新加载它
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        logger.info("保存模型 checkpoint to %s", args.output_dir)
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
        torch.save(model.state_dict(), os.path.join(args.output_dir,
                                                    "model.pt"))

    #评估模型
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("评估如下 checkpoints: %s", checkpoints)

        if not best_dev:
            best_dev = [0, 0, 0]
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result, _, best_dev, _ = evaluate(args,
                                              model,
                                              tokenizer,
                                              labels,
                                              pad_token_label_id,
                                              best=best_dev,
                                              mode="dev",
                                              prefix=global_step)
            if global_step:
                result = {
                    "{}_{}".format(global_step, k): v
                    for k, v in result.items()
                }
            results.update(result)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            for key in sorted(results.keys()):
                writer.write("{} = {}\n".format(key, str(results[key])))

    if args.do_predict and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model = model_class.from_pretrained(args.output_dir)
        model.to(args.device)

        if not best_test:
            best_test = [0, 0, 0]
        result, predictions, _, _ = evaluate(args,
                                             model,
                                             tokenizer,
                                             labels,
                                             pad_token_label_id,
                                             best=best_test,
                                             mode="test")
        # Save results
        output_test_results_file = os.path.join(args.output_dir,
                                                "test_results.txt")
        with open(output_test_results_file, "w") as writer:
            for key in sorted(result.keys()):
                writer.write("{} = {}\n".format(key, str(result[key])))
        # Save predictions
        output_test_predictions_file = os.path.join(args.output_dir,
                                                    "test_predictions.txt")
        with open(output_test_predictions_file, "w") as writer:
            with open(os.path.join(args.data_dir, "test.json"), "r") as f:
                example_id = 0
                data = json.load(f)
                for item in data:
                    output_line = str(
                        item["str_words"]) + " " + predictions[example_id].pop(
                            0) + "\n"
                    writer.write(output_line)
                    example_id += 1

    return results
Beispiel #12
0
    def train(self, data_dir, epochs=3, callback=None):
        if self.busy:
            return 1
        self.busy = True
        label_names = get_labels(data_dir)
        self.NUM_CLASSES = len(label_names)
        from keras.applications.vgg16 import VGG16, preprocess_input
        # 采用VGG16为基本模型,include_top为False,表示FC层是可自定义的,抛弃模型中的FC层;该模型会在~/.keras/models下载基本模型
        base_model = VGG16(input_shape=(self.IMAGE_SIZE, self.IMAGE_SIZE, 3),
                           include_top=False,
                           weights='imagenet')
        x_data, y_label = load_img_from_dir(data_dir,
                                            target_size=(self.IMAGE_SIZE,
                                                         self.IMAGE_SIZE),
                                            max_num=30)
        for i in range(x_data.shape[0]):
            x_data[i] = preprocess_input(x_data[i])
        print(x_data.shape)
        print(x_data[0].shape)
        x_data = x_data.reshape(x_data.shape[0], self.IMAGE_SIZE,
                                self.IMAGE_SIZE, 3)
        y_label_one_hot = prepress_labels(y_label)
        # 验证应该使用从未见过的图片
        train_x, test_x, train_y, test_y = train_test_split(x_data,
                                                            y_label_one_hot,
                                                            random_state=0,
                                                            test_size=0.3)
        # 自定义FC层以基本模型的输入为卷积层的最后一层
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(self.FC_NUMS, activation='relu')(x)
        prediction = Dense(self.NUM_CLASSES, activation='softmax')(x)

        # 构造完新的FC层,加入custom层
        model = Model(inputs=base_model.input, outputs=prediction)

        # 获取模型的层数
        print("layer nums:", len(model.layers))

        # 除了FC层,靠近FC层的一部分卷积层可参与参数训练,
        # 一般来说,模型结构已经标明一个卷积块包含的层数,
        # 在这里我们选择FREEZE_LAYERS为17,表示最后一个卷积块和FC层要参与参数训练
        for layer in model.layers[:self.FREEZE_LAYERS]:
            layer.trainable = False
        for layer in model.layers[self.FREEZE_LAYERS:]:
            layer.trainable = True
        for layer in model.layers:
            print("layer.trainable:", layer.trainable)

        # 预编译模型
        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        model.fit(
            train_x,
            train_y,
            validation_data=(test_x, test_y),
            # model.fit(x_data, y_label_one_hot,
            #         validation_split=0.4,
            callbacks=[AccuracyLogger(callback)],
            epochs=epochs,
            batch_size=4,
            # steps_per_epoch=1,validation_steps =1 ,
            verbose=1,
            shuffle=True)
        self.model = model
        model.save(os.path.join(data_dir, 'model.h5'))
        self.dump_label_name(label_names)
        self.session = K.get_session()
        self.graph = tf.get_default_graph()
        self.busy = False
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--data_dir", default=None, type=str, required=True,
                        help="The input data dir. Should contain the training files for the CoNLL-2003 NER task.")
    parser.add_argument("--model_type", default=None, type=str, required=True,
                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
                        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS))
    parser.add_argument("--output_dir", default=None, type=str, required=True,
                        help="The output directory where the model predictions and checkpoints will be written.")

    ## Other parameters
    parser.add_argument("--labels", default="", type=str,
                        help="Path to a file containing all labels. If not specified, CoNLL-2003 labels are used.")
    parser.add_argument("--config_name", default="", type=str,
                        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument("--tokenizer_name", default="", type=str,
                        help="Pretrained tokenizer name or path if not the same as model_name")
    parser.add_argument("--cache_dir", default="", type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument("--max_seq_length", default=128, type=int,
                        help="The maximum total input sequence length after tokenization. Sequences longer "
                             "than this will be truncated, sequences shorter will be padded.")
    parser.add_argument("--do_train", action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval", action="store_true",
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_predict", action="store_true",
                        help="Whether to run predictions on the test set.")
    parser.add_argument("--evaluate_during_training", action="store_true",
                        help="Whether to run evaluation during training at each logging step.")
    parser.add_argument("--do_lower_case", action="store_true",
                        help="Set this flag if you are using an uncased model.")

    parser.add_argument("--per_gpu_train_batch_size", default=16, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=16, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--gradient_accumulation_steps", type=int, default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--learning_rate", default=5e-5, type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=1.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
    parser.add_argument("--warmup_steps", default=0, type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps", type=int, default=50,
                        help="Log every X updates steps.")
    parser.add_argument("--save_steps", type=int, default=50,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument("--eval_all_checkpoints", action="store_true",
                        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")
    parser.add_argument("--no_cuda", action="store_true",
                        help="Avoid using CUDA when available")
    parser.add_argument("--overwrite_output_dir", action="store_true",
                        help="Overwrite the content of the output directory")
    parser.add_argument("--overwrite_cache", action="store_true",
                        help="Overwrite the cached training and evaluation sets")
    parser.add_argument("--seed", type=int, default=42,
                        help="random seed for initialization")

    parser.add_argument("--fp16", action="store_true",
                        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
    parser.add_argument("--fp16_opt_level", type=str, default="O1",
                        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
                             "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument("--local_rank", type=int, default=-1,
                        help="For distributed training: local_rank")
    parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")
    parser.add_argument("--server_port", type=str, default="", help="For distant debugging.")
    
    parser.add_argument("--method", default='finetune', type=str, help="which method to use")

    # self-training
    parser.add_argument('--soft_label', type = int, default = 1, help = 'whether soft label (0 for hard, 1 for soft)')
    parser.add_argument("--soft_label_weight", default=1.0, type=float, help="iters for pretrains")
    parser.add_argument('--self_training_eps', type = float, default = 0.8, help = 'threshold for confidence')
    parser.add_argument('--self_training_power', type = float, default = 2, help = 'power of pred score')
    parser.add_argument('--self_training_confreg', type = float, default = 0, help = 'confidence smooth power')
    parser.add_argument('--self_training_contrastive_weight', type = float, default = 0, help = 'contrastive learning weight')

    parser.add_argument('--self_training_max_step', type = int, default = 10000, help = 'the maximum step (usually after the first epoch) for self training')    
    parser.add_argument('--distmetric', type = str, default = "l2", help = 'distance type. Choices = [cos, l2]')
    parser.add_argument('--self_training_label_mode', type = str, default = "hard", help = 'pseudo label type. Choices = [hard, soft]')
    parser.add_argument('--self_training_update_period', type = int, default = 100, help = 'update period')


    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
                args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
                        datefmt="%m/%d/%Y %H:%M:%S",
                        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
                   args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16)

    # Set seed
    set_seed(args)

    # Prepare NER task
    labels = get_labels(args.labels)
    num_labels = len(labels)
    # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
    pad_token_label_id = CrossEntropyLoss().ignore_index

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
                                          num_labels=num_labels)
    tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
                                                do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path),
                                        config=config)

    if args.local_rank == 0:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        if args.method == 'finetune':
            train_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="train")
            global_step, tr_loss = train(args, train_dataset, model, tokenizer, labels, pad_token_label_id)
            logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
          
        if args.method == 'selftrain':
            args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)

            logger.info("***selftrain starts here***")
            unlabeled_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="unlabeled")
            train_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="train")
            
            selftrain_dataset = ConcatDataset([train_dataset, unlabeled_dataset])
            ##train step
            self_train(args, selftrain_dataset, model, tokenizer, labels, pad_token_label_id)


            exit()

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = model.module if hasattr(model, "module") else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)))
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id, mode="dev", prefix=global_step)
            if global_step:
                result = {"{}_{}".format(global_step, k): v for k, v in result.items()}
            results.update(result)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            for key in sorted(results.keys()):
                writer.write("{} = {}\n".format(key, str(results[key])))

    if args.do_predict and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        model = model_class.from_pretrained(args.output_dir)
        model.to(args.device)
        result, predictions = evaluate(args, model, tokenizer, labels, pad_token_label_id, mode="test")
        # Save results
        output_test_results_file = os.path.join(args.output_dir, "test_results.txt")
        with open(output_test_results_file, "w") as writer:
            for key in sorted(result.keys()):
                writer.write("{} = {}\n".format(key, str(result[key])))
        # Save predictions
        output_test_predictions_file = os.path.join(args.output_dir, "test_predictions.txt")
        with open(output_test_predictions_file, "w", encoding='utf-8') as writer:
            with open(os.path.join(args.data_dir, "test.txt"), "r", encoding='utf-8') as f:
                example_id = 0
                for line in f:

                    if line.startswith("-DOCSTART-") or line == "" or line == "\n":
                        writer.write(line)
                        if not predictions[example_id]:
                            example_id += 1
                    elif predictions[example_id]:
                        output_line = line.split()[0] + " " + predictions[example_id].pop(0) + "\n"
                        writer.write(output_line)
                    else:
                        logger.warning("Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0])

    return results
Beispiel #14
0
from PIL import Image
import data_utils as du
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage.transform import resize
from collections import namedtuple
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import fileinput

coords, chips, classes = du.get_labels(
    '/Users/nanboliu/Desktop/xview/data/xView_train.geojson')
img_dict = {}
for i in range(chips.shape[0]):
    if chips[i] not in ['24.tif']:
        if chips[i] not in img_dict:
            img_dict[chips[i]] = set()
        img_dict[chips[i]].add(i)

#create a label dictionary
labels = {}
for line in fileinput.input(
        '/Users/nanboliu/Desktop/xview/data/class_labels.txt'):
    labels[int(line.split(":")[0])] = line.split(":")[1].rstrip('\n')

class_vector = [keys for keys in labels]
n_class = len(class_vector)
resized_image = (32, 32)
dataset = namedtuple('Dataset', ['X', 'y'])
path = '/Volumes/Nanbo/xview_data/train_images/'
Beispiel #15
0
from keras.models import load_model
import tensorflow as tf
import base64
import numpy as np
import cv2
import os
import json

from data_utils import get_labels

app = Flask(__name__)

graph = tf.get_default_graph()
model = load_model('QuickDraw_conv82.h5')  # 加载训练模型
data_dir = ".\\npy_data\\"
label_names = get_labels(data_dir)


@app.route('/labels')
def get_labels():
    # return json.dumps(label_names)
    return Response(json.dumps(label_names), mimetype='application/json')


@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        # print(request.form)
        img_b64encode = request.form.get("base64img", "")
        # img_b64encode = "......."
        img_b64decode = base64.b64decode(img_b64encode[23:])  # base64解码
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help="The input data dir. Should contain the training files for the CoNLL-2003 NER task.",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help="The output directory where the model predictions and checkpoints will be written.",
    )

    parser.add_argument(
        "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name"
    )
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help="Where do you want to store the pre-trained models downloaded from s3",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help="The maximum total input sequence length after tokenization. Sequences longer "
        "than this will be truncated, sequences shorter will be padded.",
    )
    parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
    parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
    parser.add_argument("--do_predict", action="store_true", help="Whether to run predictions on the test set.")
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Whether to run evaluation during training at each logging step.",
    )
    parser.add_argument(
        "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
    )

    parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
    parser.add_argument(
        "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
    )
     ## Other parameters
    parser.add_argument("--labels", default="", type=str,
                        help="Path to a file containing all labels. If not specified, CoNLL-2003 labels are used.")
 
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--adam_beta1", default=0.9, type=float, help="BETA1 for Adam optimizer.")
    parser.add_argument("--adam_beta2", default=0.999, type=float, help="BETA2 for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
    parser.add_argument(
        "--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform."
    )
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
    parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
    parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
    parser.add_argument(
        "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory"
    )
    parser.add_argument(
        "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
    )
    parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")

    parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
    parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")
    parser.add_argument("--server_port", type=str, default="", help="For distant debugging.")

    # mean teacher
    parser.add_argument('--mt', type = int, default = 0, help = 'mean teacher.')
    parser.add_argument('--mt_updatefreq', type=int, default=1, help = 'mean teacher update frequency')
    parser.add_argument('--mt_class', type=str, default="kl", help = 'mean teacher class, choices:[smart, prob, logit, kl(default), distill].')
    parser.add_argument('--mt_lambda', type=float, default=1, help= "trade off parameter of the consistent loss.")
    parser.add_argument('--mt_rampup', type=int, default=300, help="rampup iteration.")
    parser.add_argument('--mt_alpha1', default=0.99, type=float, help="moving average parameter of mean teacher (for the exponential moving average).")
    parser.add_argument('--mt_alpha2', default=0.995, type=float, help="moving average parameter of mean teacher (for the exponential moving average).")
    parser.add_argument('--mt_beta', default=10, type=float, help="coefficient of mt_loss term.")
    parser.add_argument('--mt_avg', default="exponential", type=str, help="moving average method, choices:[exponentail(default), simple, double_ema].")
    parser.add_argument('--mt_loss_type', default="logits", type=str, help="subject to measure model difference, choices:[embeds, logits(default)].")

    # virtual adversarial training
    parser.add_argument('--vat', type = int, default = 0, help = 'virtual adversarial training.')
    parser.add_argument('--vat_eps', type = float, default = 1e-3, help = 'perturbation size for virtual adversarial training.')
    parser.add_argument('--vat_lambda', type = float, default = 1, help = 'trade off parameter for virtual adversarial training.')
    parser.add_argument('--vat_beta', type = float, default = 1, help = 'coefficient of the virtual adversarial training loss term.')
    parser.add_argument('--vat_loss_type', default="logits", type=str, help="subject to measure model difference, choices = [embeds, logits(default)].")



    args = parser.parse_args()

    # Prepare NER task
    labels = get_labels(args.labels)
    num_labels = len(labels)

    if (
        os.path.exists(args.output_dir)
        and os.listdir(args.output_dir)
        and args.do_train
        and not args.overwrite_output_dir
    ):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
                args.output_dir
            )
        )

    # Create output directory if needed
    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)
    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logging_fh = logging.FileHandler(os.path.join(args.output_dir, 'log.txt'))
    logging_fh.setLevel(logging.DEBUG)
    logger.addHandler(logging_fh)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)
    labels = get_labels(args.data_dir)
    num_labels = len(labels)
    # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
    pad_token_label_id = CrossEntropyLoss().ignore_index

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    if args.local_rank == 0:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="train")
        global_step, tr_loss, best_dev, best_test = train(args, train_dataset, model, tokenizer, labels, pad_token_label_id)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

    # Saving last-practice: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        logger.info("Saving model checkpoint to %s", args.output_dir)
        model_to_save = (
            model.module if hasattr(model, "module") else model
        )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
        torch.save(model.state_dict(), os.path.join(args.output_dir, "model.pt"))

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
            )
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        if not best_dev:
            best_dev = [0, 0, 0]
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result, _, best_dev, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id, best=best_dev, mode="dev", prefix=global_step)
            if global_step:
                result = {"{}_{}".format(global_step, k): v for k, v in result.items()}
            results.update(result)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            for key in sorted(results.keys()):
                writer.write("{} = {}\n".format(key, str(results[key])))

    if args.do_predict and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        model = model_class.from_pretrained(args.output_dir)
        model.to(args.device)
        
        if not best_test:
            best_test = [0, 0, 0]
        result, predictions, _, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id, best=best_test, mode="test")
        # Save results
        output_test_results_file = os.path.join(args.output_dir, "test_results.txt")
        with open(output_test_results_file, "w") as writer:
            for key in sorted(result.keys()):
                writer.write("{} = {}\n".format(key, str(result[key])))
        # Save predictions
        output_test_predictions_file = os.path.join(args.output_dir, "test_predictions.txt")
        with open(output_test_predictions_file, "w") as writer:
            with open(os.path.join(args.data_dir, "test.json"), "r") as f:
                example_id = 0
                data = json.load(f)
                for item in data:
                    output_line = str(item["str_words"]) + " " + predictions[example_id].pop(0) + "\n"
                    writer.write(output_line)
                    example_id += 1

    return results
Beispiel #17
0
    def train(self, data_dir, epochs=3, callback=None, model_name='vgg16'):
        """
        train 函数, 训练模型
        Keyword arguments::
        epochs: int 训练次数
        """
        if self.busy:
            return 1
        self.busy = True
        label_names = get_labels(data_dir)
        self.NUM_CLASSES = len(label_names)
        base_model = self.get_base_model(model_name)
        x_data, y_label = load_img_from_dir(data_dir,
                                            target_size=(self.IMAGE_SIZE,
                                                         self.IMAGE_SIZE),
                                            max_num=30)
        for i in range(x_data.shape[0]):
            x_data[i] = self.preprocess_input(x_data[i])
        print(x_data.shape)
        print(x_data[0].shape)
        x_data = x_data.reshape(x_data.shape[0], self.IMAGE_SIZE,
                                self.IMAGE_SIZE, 3)
        y_label_one_hot = prepress_labels(y_label)
        # 验证应该使用从未见过的图片
        train_x, test_x, train_y, test_y = train_test_split(x_data,
                                                            y_label_one_hot,
                                                            random_state=0,
                                                            test_size=0.3)
        # 自定义FC层以基本模型的输入为卷积层的最后一层
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(self.FC_NUMS, activation='relu')(x)
        prediction = Dense(self.NUM_CLASSES, activation='softmax')(x)

        # 构造完新的FC层,加入custom层
        model = Model(inputs=base_model.input, outputs=prediction)

        # 获取模型的层数
        print("layer nums:", len(model.layers))

        # 除了FC层,靠近FC层的一部分卷积层可参与参数训练,
        # 一般来说,模型结构已经标明一个卷积块包含的层数,
        # 在这里我们选择TRAIN_LAYERS为3,表示最后一个卷积块和FC层要参与参数训练
        for layer in model.layers:
            layer.trainable = False
        for layer in model.layers[-self.TRAIN_LAYERS:]:
            layer.trainable = True
        for layer in model.layers:
            print("layer.trainable:", layer.trainable)

        # 预编译模型
        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        model.fit(
            train_x,
            train_y,
            validation_data=(test_x, test_y),
            # model.fit(x_data, y_label_one_hot,
            #         validation_split=0.4,
            callbacks=[AccuracyLogger(callback)],
            epochs=epochs,
            batch_size=4,
            # steps_per_epoch=1,validation_steps =1 ,
            verbose=1,
            shuffle=True)
        self.model = model
        model.save(os.path.join(data_dir, 'model.h5'))
        self.label_names = label_names
        self.dump_label_name(label_names)
        # self.convert_tflite()
        self.session = K.get_session()
        self.graph = tf.get_default_graph()
        self.busy = False