def main(args): # 1. 加载配置文件 config = load_json_config(args.model_config_file) # 2. 加载模型 bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) model = model.to(device) print('Initialize model Done'.center(60, '*')) # 3. 数据集的准备 infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.infer_data) print("Load INFER Dataset Done, Total eval line: ", infer_dataset.__len__()) # 4. 开始infer infer_model(model, infer_dataset, num_workers=args.data_load_num_workers, inference_speed=args.inference_speed, dump_info_file=args.dump_info_file)
def init_model(self): bert_config = BertConfig.from_json_file( self.config.get("bert_config_path")) self.model = FastBertModel(bert_config, self.config) logging.info(self.model) logging.info("Initialize Model Done".center(60, "=")) logging.info("Load saved model from: " + self.save_model_path) load_saved_model(self.model, self.save_model_path) logging.info("Load Saved Model Done".center(60, "=")) if self.use_cuda: self.model = self.model.cuda(self.gpu_id) self.model.eval()
def predict(self, criterion): print("Predicting on test set...") if self.use_cuda: self.meta_net.cuda() self.loss_criterion = criterion # Load model self.prev_meta_step_count, self.meta_net, self.meta_optimizer, self.state = utils.load_saved_model( self.model_path, self.meta_net, self.build_optimizers(self.meta_net)) print( f"Model has been loaded step:{self.prev_meta_step_count}, path:{self.model_path}" ) transform_list_test = [] # if Config.predict.use_augmentation: # transform_list_test.extend([transforms.Resize(Config.data.image_size), ImageNetPolicy(Config.predict.num_sample_augmentation)]) transform_list_test.extend([ transforms.Resize( (Config.data.image_size, Config.data.image_size)), # transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose(transform_list_test) test_dataset_imgs = read_dataset_test(Config.data.miniimagenet_path, transform_test)[0] evaluation = self.evaluate(test_dataset_imgs) print(f"Total score: {evaluation}") return evaluation
def test(root, binary, filename=""): model = load_saved_model(filename=filename) dataset_container = SSTContainer(root=root, binary=binary) test_X, test_Y = dataset_container.data("test") pred_Y = model.predict(test_X) pred_Y = np.argmax(pred_Y, axis=1) accuracy_value = accuracy_score(test_Y, pred_Y) precision_value = precision_score(test_Y, pred_Y, average="macro") recall_value = recall_score(test_Y, pred_Y, average="macro") f1_score_value = f1_score( test_Y, pred_Y, average="macro", ) cm = confusion_matrix(test_Y, pred_Y, labels=np.sort(np.unique(np.array(test_Y)))) logger.info( f"accuracy: {accuracy_value}, precision: {precision_value}, recall: {recall_value}, f1-score: {f1_score_value}" ) logger.info(f"confusion matrix: \n {cm}")
def main(args): config = load_json_config(args.model_config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done") #---------------------MODEL GRAPH INIT--------------------------# bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) logging.info(model) logging.info("Initialize Model Done".center(60, "=")) #-----------GPU SETTING, INFER Only Support Max 1 GPU-----------# use_cuda = args.gpu_ids != '-1' device = torch.device('cuda' if use_cuda else 'cpu') model.to(device) master_gpu_id = 0 # if len(args.gpu_ids) == 1 and use_cuda: # master_gpu_id = int(args.gpu_ids) # model = model.cuda(int(args.gpu_ids)) if use_cuda else model # elif not use_cuda: # master_gpu_id = None # else: # raise RuntimeError("GPU Mode not support, INFER Only Support Max 1 GPU: " + args.gpu_ids) #-----------------------Dataset Init---------------------------# infer_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.infer_data) logging.info("Load INFER Dataset Done, Total eval line: %s", infer_dataset.__len__()) #-----------------------Running Mode Start, Batch Size Only Support 1--------------------------------# infer_model(master_gpu_id, model, infer_dataset, use_cuda=use_cuda, num_workers=args.data_load_num_workers, inference_speed=args.inference_speed, dump_info_file=args.dump_info_file)
def init_model(): model_dict = { "sgd": load_saved_model("sgd", os.path.join(ROOT_DIR, "saved_models/sgd.h5")), "adam": load_saved_model("adam", os.path.join(ROOT_DIR, "saved_models/adam.h5")), "adagrad": load_saved_model("adagrad", os.path.join(ROOT_DIR, "saved_models/adagrad.h5")), "adabound": load_saved_model("adabound", os.path.join(ROOT_DIR, "saved_models/adabound.h5")), "amsbound": load_saved_model("amsbound", os.path.join(ROOT_DIR, "saved_models/amsbound.h5")), "adadelta": load_saved_model("adadelta", os.path.join(ROOT_DIR, "saved_models/adadelta.h5")) } print("Model loaded") return model_dict
def embeddings_run(): ast_filepath = "processed/hoc18_ast_block_matrix.npy" embed_input = embeddings.load_asts_from_file(ast_filepath, raejoon=True) embed_output = embeddings.get_output_labels(embed_input) embed_model = embeddings.create_model(embed_input) embed_history = embeddings.fit_model(embed_model, embed_input, embed_output, epochs=2) #print(embed_history.effective_accuracy["train"]) #print(embed_history.effective_accuracy["validate"]) embed_model_filename = "tmp/my_embeddings.h5" utils.save_model(embed_model, embed_model_filename) embed_model = utils.load_saved_model(embed_model_filename) ast_dirpath = "anonymizeddata/data/hoc18/asts/" embed_matrix = embeddings.get_embeddings(embed_model, embed_input, ast_dirpath) print("Embeddings matrix (including 1st row) size: ", np.shape(embed_matrix)) embeddings.save_embeddings(embed_matrix, embed_dict_filename)
def main(args): # 1. 加载预定义的一些配置文件 config = load_json_config(args.model_config_file) bert_config = BertConfig.from_json_file( config.get('bert_config_path')) # bert模型的配置文件 # 2. 预训练模型的加载 if args.run_mode == 'train': # 第一步的训练训练的是teacher cls if args.train_stage == 0: model = FastBertModel.load_pretrained_bert_model( bert_config, config, pretrained_model_path=config.get('bert_pretrained_model_path')) save_model_path_for_train = args.save_model_path # 第二步是去蒸馏student cls elif args.train_stage == 1: model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) save_model_path_for_train = args.save_model_path_distill for name, p in model.named_parameters(): if 'branch_classifier' not in name: p.requires_grad = False print( 'Teacher Classifier Freezed, Student Classifier will Distilling' ) else: print('error, please choose 0 or 1') elif args.run_mode == 'eval': model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) else: print('Operation mode not legal') print("initialize model Done".center(60, '*')) model.to(device) # 3. 数据集的初始化 if args.train_data: train_dataset = PrepareDataset(vocab_file=config.get('vocab_file'), max_seq_len=config.get('max_seq_len'), num_class=config.get('num_class'), data_file=args.train_data) print('load training dataset done. total training num: {}'.format( train_dataset.__len__())) if args.eval_data: eval_dataset = PrepareDataset(vocab_file=config.get('vocab_file'), max_seq_len=config.get('max_seq_len'), num_class=config.get('num_class'), data_file=args.eval_data) print('load eval dataset done. total eval num: {}'.format( eval_dataset.__len__())) # 4.开始训练 if args.run_mode == 'train': optimizer = init_bert_adam_optimizer( model, train_dataset.__len__(), args.epochs, args.batch_size, config.get('gradient_accumulation_steps'), config.get('init_lr'), config.get('warmup_proportion')) train_model(args.train_stage, save_model_path_for_train, model, optimizer, args.epochs, train_dataset, eval_dataset, batch_size=args.batch_size, gradient_accumulation_steps=config.get( 'gradient_accumulation_steps'), num_workers=args.data_load_num_workers) elif args.run_mode == 'eval': eval_model(args.train_stage, model, eval_dataset, batch_size=args.batch_size, num_workers=args.data_load_num_workers) else: print('参数错误')
def main(args): config = load_json_config(args.model_config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done") #---------------------MODEL GRAPH INIT--------------------------# bert_config = BertConfig.from_json_file(config.get("bert_config_path")) if args.run_mode == 'train': #初始训练 if args.train_stage == 0: model = FastBertModel.load_pretrained_bert_model( bert_config, config, pretrained_model_path=config.get("bert_pretrained_model_path")) save_model_path_for_train = args.save_model_path #蒸馏训练 elif args.train_stage == 1: model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) save_model_path_for_train = args.save_model_path_distill #Freeze Part Model for name, p in model.named_parameters(): if "branch_classifier" not in name: p.requires_grad = False logging.info( "Main Graph and Teacher Classifier Freezed, Student Classifier will Distilling" ) else: raise RuntimeError('Operation Train Stage(0 or 1) not Legal') elif args.run_mode == 'eval': model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) else: raise RuntimeError('Operation Mode not Legal') logging.info(model) logging.info("Initialize Model Done".center(60, "=")) #---------------------GPU SETTING--------------------------# # device = torch.device('cuda' if torch.cuda else 'cpu') # model.to(device) # master_gpu_id = 0 use_cuda = args.gpu_ids != '-1' if len(args.gpu_ids) == 1 and use_cuda: master_gpu_id = int(args.gpu_ids) model = model.cuda(int(args.gpu_ids)) if use_cuda else model elif use_cuda: gpu_ids = [int(each) for each in args.gpu_ids.split(",")] master_gpu_id = gpu_ids[0] model = model.cuda(gpu_ids[0]) logging.info("Start multi-gpu dataparallel training/evaluating...") model = torch.nn.DataParallel(model, device_ids=gpu_ids) else: master_gpu_id = None #-----------------------Dataset Init --------------------------------# if args.train_data: train_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.train_data) logging.info("Load Training Dataset Done, Total training line: %s", train_dataset.__len__()) if args.eval_data: eval_dataset = PrepareDataset(vocab_file=config.get("vocab_file"), max_seq_len=config.get("max_seq_len"), num_class=config.get("num_class"), data_file=args.eval_data) logging.info("Load Eval Dataset Done, Total eval line: %s", eval_dataset.__len__()) #-----------------------Running Mode Start--------------------------------# if args.run_mode == "train": optimizer = init_bert_adam_optimizer( model, train_dataset.__len__(), args.epochs, args.batch_size, config.get("gradient_accumulation_steps"), config.get("init_lr"), config.get("warmup_proportion")) train_model(args.train_stage, save_model_path_for_train, master_gpu_id, model, optimizer, args.epochs, train_dataset, eval_dataset, batch_size=args.batch_size, gradient_accumulation_steps=config.get( "gradient_accumulation_steps"), use_cuda=use_cuda, num_workers=args.data_load_num_workers) elif args.run_mode == "eval": eval_model(args.train_stage, master_gpu_id, model, eval_dataset, batch_size=args.batch_size, use_cuda=use_cuda, num_workers=args.data_load_num_workers) else: raise RuntimeError("Mode not support: " + args.mode)
def main(args): logging.info("Loading HyperParameters".center(60, "=")) config = load_json_config(args.config_file) logging.info(json.dumps(config, indent=2, sort_keys=True)) logging.info("Load HyperParameters Done".center(60, "=")) logging.info("Loading Dataset".center(60, "=")) dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), label_weight_file=config.get("label_weight_file"), max_seq_len=config.get("max_seq_len"), training_path=config.get("training_path"), testing_path=config.get("testing_path")) logging.info("Total training line: " + str(dataset.training_len) + ", total testing line: " + str(dataset.testing_len)) label_size = len(dataset.label2idx) logging.info('label size: %d' % label_size) logging.info("Load Dataset Done".center(60, "=")) label_weight = dataset.label_weight.to('cuda') if config.get( "use_cuda") else dataset.label_weight logging.info("Initializing SequenceClassification Model".center(60, "=")) if config.get("pretrained_model_path"): model = BertForMultiLabelClassification.load_pretrained_bert_model( bert_config_path=config.get("bert_config_path"), pretrained_model_path=config.get("pretrained_model_path"), num_labels=len(dataset.label2idx), label_weight=label_weight) else: model = BertForMultiLabelClassification(BertConfig.from_json_file( config.get("bert_config_path")), len(dataset.label2idx), label_weight=label_weight) if config.get("num_tuning_layers") is not None: model.bert.encoder.layer = torch.nn.ModuleList( model.bert.encoder.layer[:config.get("num_tuning_layers")]) logging.info(model) logging.info("Initialize SequenceClassification Model Done".center( 60, "=")) if args.saved_model: logging.info("Loading Saved Model".center(60, "=")) logging.info("Load saved model from: " + args.saved_model) load_saved_model(model, args.saved_model) logging.info("Load Saved Model Done".center(60, "=")) master_gpu_id = None if len(args.gpu_ids) == 1: master_gpu_id = int(args.gpu_ids) model = model.cuda(int( args.gpu_ids)) if config.get("use_cuda") else model else: gpu_ids = [int(each) for each in args.gpu_ids.split(",")] master_gpu_id = gpu_ids[0] model = model.cuda(gpu_ids[0]) logging.info("Start multi-gpu dataparallel training/evaluating...") model = torch.nn.DataParallel(model, device_ids=gpu_ids) if args.mode == "eval": if args.input_file: dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), max_seq_len=config.get("max_seq_len"), label_weight_file=config.get("label_weight_file"), testing_path=args.input_file) eval_model(master_gpu_id, model, dataset, label_size, config.get("eval_batch_size"), config.get("use_cuda"), config.get("num_workers")) elif args.mode == "predict": if args.input_file: dataset = MultiLabelClassificationDataset( vocab_file=config.get("vocab_file"), label_file=config.get("label_file"), max_seq_len=config.get("max_seq_len"), label_weight_file=config.get("label_weight_file"), testing_path=args.input_file) model_predict(master_gpu_id, model, dataset, config, config.get("eval_batch_size"), config.get("use_cuda"), config.get("num_workers"), args.output_file) elif args.mode == "train": optimizer = init_bert_adam_optimizer( model, dataset.training_len, config.get("epochs"), config.get("batch_size"), config.get("gradient_accumulation_steps"), config.get("init_lr"), config.get("warmup_proportion")) train_model(config.get("experiment_name"), master_gpu_id, model, optimizer, config.get("epochs"), dataset, label_size, batch_size=config.get("batch_size"), eval_batch_size=config.get("eval_batch_size"), gradient_accumulation_steps=config.get( "gradient_accumulation_steps"), use_cuda=config.get("use_cuda"), num_workers=config.get("num_workers")) else: raise RuntimeError("Mode not support: " + args.mode)
@app.route('/', methods=['GET']) def Home(): return render_template('real_estate.html', data=[{'name':'JA'}, {'name':'JP'}]) @app.route('/predict_price', methods=['GET', 'POST']) def predict_price(): if request.method == 'POST': area = float(request.form.get('area')) rooms = int(request.form.get('rooms')) suites = int(request.form.get('suites')) bathrooms = int(request.form.get('bathrooms')) parkings = int(request.form.get('parkings')) neighborhood = request.form.get('neighborhood') response = utils.get_estimated_price( neighborhood, area, rooms, suites, bathrooms, parkings) if response < 0: return render_template('real_estate.html', prediction_texts="The price is below zero", prediction_text_dollar="The price is below zero",data=[{'name':'JA'}, {'name':'JP'}]) else: return render_template('real_estate.html', prediction_text="{:.2f}R$".format(response), prediction_text_dollar="{:.2f}$".format(response/5.12),data=[{'name':'JA'}, {'name':'JP'}]) else: return render_template('real_estate.html', data=[{'name':'JA'}, {'name':'JP'}]) if __name__ == "__main__": print('Starting python Flask Server for Real estate Prediction') utils.load_saved_model() app.run(debug=True)
def main(args): # 1. 加载配置文件 config = load_json_config(args.model_config_file) # 2. 加载模型 bert_config = BertConfig.from_json_file(config.get("bert_config_path")) model = FastBertModel(bert_config, config) load_saved_model(model, args.save_model_path) model = model.to(device) print('Initialize model Done'.center(60, '*')) max_seq_len = 60 labels = [] texts = [] inference_speed = 0.5 with open('./data/tcl/test.tsv', 'r') as f: lines = f.readlines() for line in lines: line = line.strip() label, text = line.split(' ') labels.append(int(label)) texts.append(text) sum_num = len(labels) correct_num = 0 result = [] for l, t in zip(labels, texts): start_time = time.time() # 3. 数据集的准备 vocab_file = config.get("vocab_file") do_lower_case = True tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) tokens = tokenizer.tokenize(t) tokens = tokens[:(max_seq_len - 1)] tokens = ["[CLS]"] + tokens tokens = tokenizer.convert_tokens_to_ids(tokens) # return {"text": t, "tokens": tokens, "label": label} # 4. 开始infer segment_ids = [0] * len(tokens) attn_masks = [1] * len(tokens) tokens = torch.LongTensor([tokens]) segment_ids = torch.LongTensor([segment_ids]) attn_masks = torch.LongTensor([attn_masks]) l = torch.LongTensor([l]) # print(tokens.size()) # print(segment_ids.size()) # print(attn_masks.size()) # print(l.size()) with torch.no_grad(): probs, layer_idxes, uncertain_infos = model( tokens, token_type_ids=segment_ids, attention_mask=attn_masks, inference=True, inference_speed=inference_speed) _, top_index = probs.topk(1) spend_time = time.time() - start_time if top_index.view(-1) == l: correct_num += 1 print(l[0].numpy()) print(top_index.view(-1)[0].numpy()) exit() s = str(l[0]) + ' ' + str( top_index.view(-1)[0]) + ' ' + str(spend_time) + ' ' + t result.append(s) print('正确率:{}'.format(correct_num / sum_num)) with open('result.txt', 'w') as f: f.write('\n'.join(result))
def train_fn(self, criterion, optimizer, resume=True): self.loss_criterion = criterion self.fast_optimizer = optimizer self.meta_optimizer = torch.optim.SGD(self.meta_net.parameters(), lr=Config.train.meta_lr) # self.exp_lr_scheduler = lr_scheduler.StepLR(self.classifier_optimizer, step_size=10, gamma=0.1) if resume: self.prev_meta_step_count, self.meta_net, self.meta_optimizer, self.state = utils.load_saved_model( self.model_path, self.meta_net, self.meta_optimizer) print( f"Model has been loaded step:{self.prev_meta_step_count}, path:{self.model_path}" ) self.logger = Logger(os.path.join(self.c_path, 'log.txt'), title=self.title) self.logger.set_names( ['step', 'Learning Rate', 'Train Acc.', 'Valid Acc.']) return self._train
old_data_dictionary = json.loads(utils.get_HDF5(hf_file, 'data_dictionary')) best_parameters = json.loads(utils.get_HDF5(hf_file, 'best_parameters')) model_weights = list() weight_ctr = 0 while True: try: d_key = "weight_" + str(weight_ctr) weights = utils.get_HDF5(hf_file, d_key) model_weights.append(weights) weight_ctr += 1 except Exception as exception: break hf_file.close() loaded_model = utils.load_saved_model(model_config, model_weights) # Extract and process workflows connections = extract_workflow_connections.ExtractWorkflowConnections() workflow_paths, compatible_next_tools = connections.read_tabular_file( sys.argv[1]) # Process the paths from workflows print("Dividing data...") data = prepare_data.PrepareData(maximum_path_length, test_share, retrain) train_data, train_labels, test_data, test_labels, data_dictionary, reverse_dictionary, inverse_class_weights = data.get_data_labels_matrices( workflow_paths, old_data_dictionary) # retrain the model on new data retrain_predict_tool = RetrainPredictTool() results = retrain_predict_tool.retrain_model(