def save_classifier(classifier, file_path="./model/"): """Saves classifier in the given location :param classifier: Model to save :param str file_path: Path to file """ make_directory(file_path.rsplit('/', 1)[0]) # Save classifier params with open(os.path.join(file_path, 'params.json'), 'w') as fp: params = {"class_name": type(classifier).__name__, "defences": classifier.defences} json.dump(params, fp) # Serialize model to JSON with open(os.path.join(file_path, "model.json"), "w") as json_file: model_json = classifier.model.to_json() json_file.write(model_json) # Serialize weights to HDF5 classifier.model.save_weights(os.path.join(file_path, "weights.h5")) # Save compilation params to json if classifier.comp_param: with open(os.path.join(file_path, 'comp_par.json'), 'w') as fp: try: json.dump(classifier.comp_param, fp) except: fp.seek(0) json.dump({"loss": 'categorical_crossentropy', "optimizer": "sgd", "metrics": ['accuracy']}, fp) fp.truncate()
def process(args): utils.make_directory(args.path['model']) tokenizer = args.tokenizer(args.path['vocab']) train_x = utils.read_lines(args.path['train_x']) train_y = utils.read_lines(args.path['train_y']) dataset = train_x + train_y keywords = None if args.problem == 'lda': model = LDAModel(args) else: trainset = [tokenizer.encode_line_into_words(i) for i in dataset] train_keywords(trainset, args.path['model']) keywords = load_keywords(args.path['model']) model = TFIDFModel(args) list_toks = [] for n, line in enumerate(train_x): if not n % 10000 and n: utils.verbose('Tokenizing {} lines for {}'.format(n, args.problem)) if keywords is None: list_toks.append([str(s) for s in tokenizer.encode_line_into_words(line)]) else: list_toks.append([str(s) for s in tokenizer.encode_line_into_words(line) if s in keywords[: args.num_keywords]]) model.fit(list_toks)
def run_ner(): """run ner task""" args_opt = parse_args() epoch_num = args_opt.epoch_num assessment_method = args_opt.assessment_method.lower() load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") label_list = [] with open(args_opt.label_file_path) as f: for label in f: label_list.append(label.strip()) tag_to_index = convert_labels_to_index(label_list) if args_opt.use_crf.lower() == "true": max_val = max(tag_to_index.values()) tag_to_index["<START>"] = max_val + 1 tag_to_index["<STOP>"] = max_val + 2 number_labels = len(tag_to_index) else: number_labels = args_opt.num_class if args_opt.do_train.lower() == "true": netwithloss = BertNER(bert_net_cfg, args_opt.train_batch_size, True, num_labels=number_labels, use_crf=(args_opt.use_crf.lower() == "true"), tag_to_index=tag_to_index, dropout_prob=0.1) ds = create_ner_dataset(batch_size=args_opt.train_batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "ner") if args_opt.do_eval.lower() == "true": ds = create_ner_dataset(batch_size=args_opt.eval_batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path, load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label_file_path, tag_to_index, args_opt.eval_batch_size)
def run_squad(): """run squad task""" parser = argparse.ArgumentParser(description="run classifier") parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend") parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false") parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.") parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.") parser.add_argument("--train_data_shuffle", type=str, default="true", help="Enable train data shuffle, default is true") parser.add_argument("--eval_data_shuffle", type=str, default="false", help="Enable eval data shuffle, default is false") parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path") parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json") parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path") parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--train_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--eval_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_file_path", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() epoch_num = args_opt.epoch_num load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false": raise ValueError("At least one of 'do_train' or 'do_eval' must be true") if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "": raise ValueError("'train_data_file_path' must be set when do finetune task") if args_opt.do_eval.lower() == "true": if args_opt.eval_data_file_path == "": raise ValueError("'eval_data_file_path' must be set when do evaluation task") if args_opt.vocab_file_path == "": raise ValueError("'vocab_file_path' must be set when do evaluation task") if args_opt.eval_json_path == "": raise ValueError("'tokenization_file_path' must be set when do evaluation task") target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1) if args_opt.do_train.lower() == "true": ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=1, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "squad") if args_opt.do_eval.lower() == "true": ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=1, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, is_training=False, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, args_opt.vocab_file_path, args_opt.eval_json_path, load_finetune_checkpoint_path, bert_net_cfg.seq_length)
logits, z, _, _ = model(inputs) labels = torch.argmax(logits.data, 1) return labels.cpu().detach().numpy(), z[-1].cpu().detach().numpy() if __name__ == "__main__": data_dir = "../DATA/nc/" model_dir = "results/iresnet/best/" save_dir = "results/iresnet/best/" save_dir_labels = os.path.join(save_dir, "predicted-label-masks") make_directory(save_dir_labels) save_dir_z = os.path.join(save_dir, "predicted-z", "z") save_dir_loc = os.path.join(save_dir, "predicted-z", "locations") make_directory(save_dir_z) make_directory(save_dir_loc) m = np.load(os.path.join(model_dir, "../mean.npy")) s = np.load(os.path.join(model_dir, "../std.npy")) # dataset loader tile_extr = TileExtractor() normalizer = Normalizer(m, s) dataset = CumuloDataset(root_dir="../DATA/nc/", ext="nc",
def run_classifier(): """run classifier task""" parser = argparse.ArgumentParser(description="run classifier") parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], help="Device type, default is Ascend") parser.add_argument( "--assessment_method", type=str, default="Accuracy", choices=["Mcc", "Spearman_correlation", "Accuracy", "F1"], help= "assessment_method including [Mcc, Spearman_correlation, Accuracy, F1],\ default is Accuracy") parser.add_argument("--do_train", type=str, default="false", choices=["true", "false"], help="Enable train, default is false") parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"], help="Enable eval, default is false") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.") parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.") parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"], help="Enable train data shuffle, default is true") parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"], help="Enable eval data shuffle, default is false") parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path") parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--train_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--eval_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_file_path", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() epoch_num = args_opt.epoch_num assessment_method = args_opt.assessment_method.lower() load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower( ) == "false": raise ValueError( "At least one of 'do_train' or 'do_eval' must be true") if args_opt.do_train.lower( ) == "true" and args_opt.train_data_file_path == "": raise ValueError( "'train_data_file_path' must be set when do finetune task") if args_opt.do_eval.lower( ) == "true" and args_opt.eval_data_file_path == "": raise ValueError( "'eval_data_file_path' must be set when do evaluation task") target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") netwithloss = BertCLS(bert_net_cfg, True, num_labels=args_opt.num_class, dropout_prob=0.1, assessment_method=assessment_method) if args_opt.do_train.lower() == "true": ds = create_classification_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory( save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt( load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "classifier") if args_opt.do_eval.lower() == "true": ds = create_classification_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, BertCLS, args_opt.num_class, assessment_method, load_finetune_checkpoint_path)
classifier.compile(comp_params) if args.save is not False: if args.save: MODEL_PATH = os.path.abspath(args.save) else: if args.defences: defences = "-".join(args.defences) else: defences = "" MODEL_PATH = os.path.join(os.path.abspath(DATA_PATH), "classifiers", args.dataset, args.classifier, args.act, defences) v_print("Classifier saved in", MODEL_PATH) make_directory(MODEL_PATH) # Save best classifier weights # checkpoint = ModelCheckpoint(os.path.join(FILEPATH,"best-weights.{epoch:02d}-{val_acc:.2f}.h5"), # monitor='val_acc', verbose=1, save_best_only=True, mode='max') checkpoint = ModelCheckpoint(os.path.join(MODEL_PATH, "best-weights.h5"), monitor='val_acc', verbose=1, save_best_only=True, mode='max') # Remote monitor monitor = TensorBoard(log_dir=os.path.join(MODEL_PATH, 'logs'), write_graph=False) callbacks_list = [checkpoint, monitor] else:
def run_ner(): """run ner task""" parser = argparse.ArgumentParser(description="run classifier") parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], help="Device type, default is Ascend") parser.add_argument( "--assessment_method", type=str, default="F1", choices=["F1", "clue_benchmark"], help="assessment_method include: [F1, clue_benchmark], default is F1") parser.add_argument("--do_train", type=str, default="false", choices=["true", "false"], help="Eable train, default is false") parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"], help="Eable eval, default is false") parser.add_argument("--use_crf", type=str, default="false", choices=["true", "false"], help="Use crf, default is false") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.") parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.") parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"], help="Enable train data shuffle, default is true") parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"], help="Enable eval data shuffle, default is false") parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path, used in clue benchmark") parser.add_argument("--label2id_file_path", type=str, default="", help="label2id file path, used in clue benchmark") parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path") parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--train_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--eval_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_file_path", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() epoch_num = args_opt.epoch_num assessment_method = args_opt.assessment_method.lower() load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower( ) == "false": raise ValueError( "At least one of 'do_train' or 'do_eval' must be true") if args_opt.do_train.lower( ) == "true" and args_opt.train_data_file_path == "": raise ValueError( "'train_data_file_path' must be set when do finetune task") if args_opt.do_eval.lower( ) == "true" and args_opt.eval_data_file_path == "": raise ValueError( "'eval_data_file_path' must be set when do evaluation task") if args_opt.assessment_method.lower( ) == "clue_benchmark" and args_opt.vocab_file_path == "": raise ValueError("'vocab_file_path' must be set to do clue benchmark") if args_opt.use_crf.lower( ) == "true" and args_opt.label2id_file_path == "": raise ValueError("'label2id_file_path' must be set to use crf") if args_opt.assessment_method.lower( ) == "clue_benchmark" and args_opt.label2id_file_path == "": raise ValueError( "'label2id_file_path' must be set to do clue benchmark") target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") tag_to_index = None if args_opt.use_crf.lower() == "true": with open(args_opt.label2id_file_path) as json_file: tag_to_index = json.load(json_file) max_val = max(tag_to_index.values()) tag_to_index["<START>"] = max_val + 1 tag_to_index["<STOP>"] = max_val + 2 number_labels = len(tag_to_index) else: number_labels = args_opt.num_class netwithloss = BertNER(bert_net_cfg, True, num_labels=number_labels, use_crf=(args_opt.use_crf.lower() == "true"), tag_to_index=tag_to_index, dropout_prob=0.1) if args_opt.do_train.lower() == "true": ds = create_ner_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory( save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt( load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "ner") if args_opt.do_eval.lower() == "true": ds = create_ner_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path, load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label2id_file_path, tag_to_index)
def run_ner(): """run ner task""" args_opt = parse_args() epoch_num = args_opt.epoch_num assessment_method = "f1" load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if ernie_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') ernie_net_cfg.compute_type = mstype.float32 if optimizer_cfg.optimizer == 'AdamWeightDecay' and args_opt.use_crf.lower( ) == "false": context.set_context(enable_graph_kernel=True) else: raise Exception("Target error, GPU or Ascend is supported.") with open(args_opt.label_map_config) as f: tag_to_index = json.load(f) number_labels = args_opt.number_labels if args_opt.do_train.lower() == "true": netwithloss = ErnieNER(ernie_net_cfg, args_opt.train_batch_size, True, num_labels=number_labels, use_crf=(args_opt.use_crf.lower() == "true"), tag_to_index=tag_to_index, dropout_prob=0.1) ds = create_finetune_dataset( batch_size=args_opt.train_batch_size, repeat_count=1, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) print("==============================================================") print("processor_name: {}".format(args_opt.device_target)) print("test_name: ERNIE Finetune Training") print("model_name: {}".format("ERNIE+MLP+CRF" if args_opt.use_crf. lower() == "true" else "ERNIE + MLP")) print("batch_size: {}".format(args_opt.train_batch_size)) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory( save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt( load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "ner") if args_opt.do_eval.lower() == "true": ds = create_finetune_dataset( batch_size=args_opt.eval_batch_size, repeat_count=1, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, ErnieNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path, load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label_file_path, tag_to_index, args_opt.eval_batch_size)
def setUp(self): make_directory("./tests/")
def run_squad(): """run squad task""" parser = argparse.ArgumentParser(description="run squad") parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], help="Device type, default is Ascend") parser.add_argument("--do_train", type=str, default="false", choices=["true", "false"], help="Eable train, default is false") parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"], help="Eable eval, default is false") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--epoch_num", type=int, default=3, help="Epoch number, default is 1.") parser.add_argument("--num_class", type=int, default=2, help="The number of class, default is 2.") parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"], help="Enable train data shuffle, default is true") parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"], help="Enable eval data shuffle, default is false") parser.add_argument("--train_batch_size", type=int, default=32, help="Train batch size, default is 32") parser.add_argument("--eval_batch_size", type=int, default=1, help="Eval batch size, default is 1") parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path") parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json") parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path") parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--train_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_file_path", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() epoch_num = args_opt.epoch_num load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower( ) == "false": raise ValueError( "At least one of 'do_train' or 'do_eval' must be true") if args_opt.do_train.lower( ) == "true" and args_opt.train_data_file_path == "": raise ValueError( "'train_data_file_path' must be set when do finetune task") if args_opt.do_eval.lower() == "true": if args_opt.vocab_file_path == "": raise ValueError( "'vocab_file_path' must be set when do evaluation task") if args_opt.eval_json_path == "": raise ValueError( "'tokenization_file_path' must be set when do evaluation task") target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1) if args_opt.do_train.lower() == "true": ds = create_squad_dataset( batch_size=args_opt.train_batch_size, repeat_count=1, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory( save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt( load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "squad") if args_opt.do_eval.lower() == "true": from src import tokenization from src.create_squad_data import read_squad_examples, convert_examples_to_features from src.squad_get_predictions import write_predictions from src.squad_postprocess import SQuad_postprocess tokenizer = tokenization.FullTokenizer( vocab_file=args_opt.vocab_file_path, do_lower_case=True) eval_examples = read_squad_examples(args_opt.eval_json_path, False) eval_features = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=bert_net_cfg.seq_length, doc_stride=128, max_query_length=64, is_training=False, output_fn=None, vocab_file=args_opt.vocab_file_path) ds = create_squad_dataset( batch_size=args_opt.eval_batch_size, repeat_count=1, data_file_path=eval_features, schema_file_path=args_opt.schema_file_path, is_training=False, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) outputs = do_eval(ds, load_finetune_checkpoint_path, args_opt.eval_batch_size) all_predictions = write_predictions(eval_examples, eval_features, outputs, 20, 30, True) SQuad_postprocess(args_opt.eval_json_path, all_predictions, output_metrics="output.json")
import numpy as np import os from shutil import copyfile from src.datasets import CumuloDataset from src.utils import make_directory load_path = "data/npz/" save_path = "datasets/cumulo-dc/" THR = 1000 # select tiles that contain at least <threshold> pixels of class Deep Convection LABEL = 7 # index corresponding to coarse class Deep Convection dataset = CumuloDataset(load_path, ext="npz") make_directory(save_path) for instance in dataset: name, *_, mask, labels = instance dc_pixels = np.sum(np.logical_and(labels == LABEL, mask)) if dc_pixels > THR: print(name, dc_pixels) copyfile(name, os.path.join(save_path, os.path.basename(name)))
def predict_and_save(save_dir, model_path, swath): filename, tiles, locations, _, rois, _ = swath print("processing", filename) model = load_lgbm(model_path) labels = predict_tiles(model, tiles) save_path = os.path.join(save_dir, os.path.basename(filename)).replace(".nc", ".npy") save_labels(labels, locations, rois.squeeze(), save_path) print(save_path, "processed") if __name__ == "__main__": import os model_path = "results/lgbm/lightgbm-model.txt" save_dir = os.path.join("results/lgbm/predicted-label-masks/") make_directory(save_dir) tile_extr = TileExtractor() dataset = CumuloDataset(root_dir="../DATA/nc/", ext="nc", label_preproc=None, tiler=tile_extr) for swath in dataset: predict_and_save(save_dir, model_path, swath)
nb_iter_norm = 5 # shape of the input (channels, height, width) in_shape = (13, t_size, t_size) use_cuda = torch.cuda.is_available() print("using GPUs?", use_cuda) classification_weight = in_shape[0] * in_shape[1] * in_shape[2] save_dir = "results/iresnet" save_dir_best = os.path.join(save_dir, "best") save_dir_last = os.path.join(save_dir, "last") make_directory(save_dir_best) make_directory(save_dir_last) train_log = open(os.path.join(save_dir, "train_log.txt"), 'w') val_log = open(os.path.join(save_dir, "val_log.txt"), 'w') test_log = open(os.path.join(save_dir, "test_log.txt"), 'w') # compute class weights and normalizer try: class_weights = np.load(os.path.join(save_dir, "class-weights.npy")) m = np.load(os.path.join(save_dir, "mean.npy")) s = np.load(os.path.join(save_dir, "std.npy")) except: # load dataset characteristics print("Computing dataset mean, standard deviation and class ratios")
v_print = get_verbose_print(args.verbose) alpha = 0.05 # constant for random perturbation # get dataset (X_train, Y_train), (X_test, Y_test), min_, max_ = load_dataset(args.dataset) session = tf.Session() k.set_session(session) # Load classification model MODEL_PATH = os.path.join(os.path.abspath(args.load), "") classifier = load_classifier(MODEL_PATH, "best-weights.h5") if args.save: SAVE_ADV = os.path.join(os.path.abspath(args.save), args.adv_method) make_directory(SAVE_ADV) with open(os.path.join(SAVE_ADV, "readme.txt"), "w") as wfile: wfile.write("Model used for crafting the adversarial examples is in " + MODEL_PATH) v_print("Adversarials crafted with", args.adv_method, "on", MODEL_PATH, "will be saved in", SAVE_ADV) if args.adv_method in ['fgsm', "vat", "rnd_fgsm"]: eps_ranges = { 'fgsm': [e / 10 for e in range(1, 11)], 'rnd_fgsm': [e / 10 for e in range(1, 11)], 'vat': [1.5, 2.1, 5, 7, 10] }
def process(args): utils.make_directory(args.path['model']) tokenizer = args.tokenizer(args.path['vocab']) train_batch = args.batch(tokenizer, args.max_lens) train_batch.set_data(utils.read_lines(args.path['train_x']), utils.read_lines(args.path['train_y'])) dev_batch = args.batch(tokenizer, args.max_lens) dev_batch.set_data(utils.read_lines(args.path['dev_x']), utils.read_lines(args.path['dev_y'])) model = args.model(args) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_device config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(pad_step_number=True) recorder = Recorder() starter = time.time() for i in range(args.max_steps): input_x, input_y, idx, update_epoch = train_batch.next_batch( args.batch_size, recorder.train_idx) train_features = { 'input_x_ph': input_x, 'input_y_ph': input_y, 'keep_prob_ph': args.keep_prob } recorder.train_idx = idx train_fetches, train_feed = model.train_step(train_features) _, train_loss, train_acc = sess.run(train_fetches, train_feed) recorder.train_losses.append(train_loss) recorder.train_accs.append(train_acc) if not i % args.show_steps and i: input_x, input_y, idx, update_epoch = dev_batch.next_batch( args.batch_size, recorder.dev_idx) dev_features = { 'input_x_ph': input_x, 'input_y_ph': input_y, 'keep_prob_ph': 1.0 } recorder.dev_idx = idx dev_fetches, dev_feed = model.dev_step(dev_features) dev_loss, dev_acc = sess.run(dev_fetches, dev_feed) recorder.dev_losses.append(dev_loss) recorder.dev_accs.append(dev_acc) speed = args.show_steps / (time.time() - starter) utils.verbose( r' step {:05d} | train [{:.5f} {:.5f}] | ' r'dev [{:.5f} {:.5f}] | speed {:.5f} it/s'.format( i, train_loss, train_acc, dev_loss, dev_acc, speed)) starter = time.time() if not i % args.save_steps and i: features = recorder.stats() if features['save']: saver.save(sess, args.path['model']) utils.verbose( r'step {:05d} - {:05d} | train [{:.5f} {:.5f}] | ' r'dev [{:.5f} {:.5f}]'.format(i - args.save_steps, i, features['train_loss'], features['train_acc'], features['dev_loss'], features['dev_acc'])) print('-+' * 55) utils.write_result(args, recorder.lowest_loss) utils.verbose('Start building vector space from dual encoder model') vectors = [] infer_batch = args.batch(tokenizer, args.max_lens) infer_batch.set_data(utils.read_lines(args.path['train_x']), utils.read_lines(args.path['train_y'])) starter = time.time() idx = 0 update_epoch = False i = 0 while not update_epoch: input_x, input_y, idx, update_epoch = infer_batch.next_batch( args.batch_size, idx) infer_features = {'input_x_ph': input_x, 'keep_prob_ph': 1.0} infer_fetches, infer_feed = model.infer_step(infer_features) enc_questions = sess.run(infer_fetches, infer_feed) vectors += enc_questions if not i % args.show_steps and i: speed = args.show_steps / (time.time() - starter) utils.verbose('step : {:05d} | speed: {:.5f} it/s'.format( i, speed)) starter = time.time() i += 1 vectors = np.reshape(np.array(vectors), [-1, args.hidden])[:infer_batch.data_size] vec_dim = vectors.shape[-1] ann = AnnoyIndex(vec_dim) for n, ii in enumerate(vectors): ann.add_item(n, ii) ann.build(args.num_trees) ann.save(args.path['ann']) utils.verbose('Annoy has been dump in {}'.format(args.path['ann']))