def calc_even_results(vals, bilstm, args): results = {} even_epochs = return_even_epochs(args.dump_dir) for epoch in even_epochs: load_model(epoch, bilstm, args.load_dir, args.gpu) _results, _ = test.run(vals, bilstm, args) results[epoch] = _results return results, epoch
def main(): parser = create_arg_parser() args = parser.parse_args() load_config(args) emb_type = 'Word2VecWiki' dl = DatasetLoading(emb_type, args.emb_path, exo1_word='僕', exo2_word='おまえ', exoX_word='これ') dl.making_intra_df() trains_dict, _, tests_dict = dl.split_each_domain('intra') if args.model == 'CPS' or args.model == 'MIX': statistics_of_each_case_type = train.init_statistics_of_each_case_type( trains_dict, args.case, args.media) else: statistics_of_each_case_type = None bilstm = train.initialize_model( args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, dropout_ratio=0.2, n_layers=3, model=args.model, statistics_of_each_case_type=statistics_of_each_case_type) pprint(args.__dict__) val_results = max_f1_epochs_of_vals(args.load_dir) results = {} logs = {} domain = 'All' epoch = val_results[domain]['epoch'] load_model(epoch, bilstm, args.load_dir, args.gpu) _results, _ = run(tests_dict, bilstm, 1, args) results[domain] = _results[domain] results[domain]['epoch'] = epoch for domain in tests_dict.keys(): epoch = val_results[domain]['epoch'] load_model(epoch, bilstm, args.load_dir, args.gpu) _results, _logs = run(tests_dict, bilstm, 1, args) results[domain] = _results[domain] results[domain]['epoch'] = epoch logs[domain] = _logs[domain] dump_dict(results, args.load_dir, 'test_logs') dump_predict_logs(logs, args.load_dir)
def main(): parser = create_arg_parser() args = parser.parse_args() load_config(args) emb_type = 'Word2VecWiki' dl = DatasetLoading(emb_type, args.emb_path, exo1_word='僕', exo2_word='おまえ', exoX_word='これ') dl.making_intra_df() trains_dict, vals_dict, _ = dl.split_each_domain('intra') if args.model == 'MIX': statistics_of_each_case_type = train.init_statistics_of_each_case_type( trains_dict, args.case, args.media) else: statistics_of_each_case_type = None bilstm = train.initialize_model( args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, dropout_ratio=0.2, n_layers=3, model=args.model, statistics_of_each_case_type=statistics_of_each_case_type) pprint(args.__dict__) val_results = test.max_f1_epochs_of_vals(args.load_dir) for domain in ['OC', 'OY', 'OW', 'PB', 'PM', 'PN']: print(f'--- start {domain} fine tuning ---') dump_dict(args.__dict__, args.dump_dir + f'/{ft_domain}/{args.case}', 'args') epoch = val_results[domain]['epoch'] load_model(epoch, bilstm, args.load_dir, args.gpu) #lr = 0.0001にしてもいいかも run(trains_dict[domain], vals_dict, bilstm, args, ft_domain=domain, lr=0.0001, batch_size=64)
def invoke_model(model_name): model_module_obj = loader.load_model(model_name) name = model_module_obj.get_name() range_val = model_module_obj.get_range(2) resource = model_module_obj.get_resource() print name, range_val, resource
def run(config): assert os.path.isfile(config.data_path), '[{}] 파일이 없습니다.'.format( config.data_path) logging.info("##################### Start Training") logging.debug(vars(config)) logging.info("##################### Build Tokenizer") tokenizer = load_tokenizer(config) ## Tokenizer param Setting config.vocab_size = tokenizer.vocab_size config.tag_size = tokenizer.tag_size config.pad_token_id = tokenizer.pad_token_id ##load data loader logging.info("##################### Load DataLoader") loader = load_dataloader(config, tokenizer) config.batch_size = int(config.batch_size / config.gradient_accumulation_steps) logging.info("##################### adjusted batch size {}".format( config.batch_size)) train, valid = loader.get_train_valid_dataset() logging.info("##################### Train Dataset size : [" + str(len(train)) + "]") logging.info("##################### Valid Dataset size : [" + str(len(valid)) + "]") train = DataLoader(train, batch_size=config.batch_size, shuffle=True) valid = DataLoader(valid, batch_size=config.batch_size, shuffle=False) logging.info("##################### Load Model") model = load_model(config, tokenizer) model = torch.nn.DataParallel(model) model.to(config.device) logging.info("##################### Load Trainer") trainer = load_trainer(config, model) ## Training logging.info("##################### Training..........") best_loss = trainer.train(train, valid) logging.info("##################### Best Training Loss : " + str(best_loss)) ## Testing test = loader.get_test_dataset() logging.info("##################### Test Dataset size : [" + str(len(test)) + "]") test = DataLoader(test, batch_size=config.batch_size, shuffle=False) logging.info("##################### Testing..........") f1_score = trainer.test(test) logging.info("##################### Best Test f1_score : " + str(f1_score)) result = [config.save_path, best_loss, f1_score] return result
def main(): parser = create_arg_parser() args = parser.parse_args() load_config(args) dl = DatasetLoading(args.emb_type, args.emb_path, media=args.media, exo1_word=args.exo1_word, exo2_word=args.exo2_word, exoX_word=args.exoX_word) if args.dataset_type == 'intra': dl.making_intra_df() elif args.dataset_type == 'inter': dl.making_inter_df() else: raise ValueError() _, _, tests = dl.split(args.dataset_type) bilstm = train.initialize_model(args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, emb_requires_grad=args.emb_requires_grad, args=args) pprint(args.__dict__) val_results = max_f1_epochs_of_vals(args.load_dir) results = {} logs = {} domain = 'All' epoch = val_results[domain]['epoch'] load_model(epoch, bilstm, args.load_dir, args.gpu) _results, _ = run(tests, bilstm, args) results[domain] = _results[domain] results[domain]['epoch'] = epoch for domain in args.media: epoch = val_results[domain]['epoch'] load_model(epoch, bilstm, args.load_dir, args.gpu) _results, _logs = run(tests, bilstm, args) results[domain] = _results[domain] results[domain]['epoch'] = epoch logs[domain] = _logs[domain] dump_dict(results, args.load_dir, 'test_logs') dump_predict_logs(logs, args.load_dir)
import global_vars_go as gvg import loader kifuPath = "./kifu" file_load_split = gvg.file_load_split num_games = gvg.num_games print("Loading game data...") load_batches = math.ceil(num_games / file_load_split) hm_epochs = gvg.hm_epochs if gvg.cont_from_save.lower() == "true": model = loader.load_model_from_file(gvg.nn_type) else: model = loader.load_model(gvg.nn_type) for epoch in range(hm_epochs): print("Beginning new epoch...") for lb in range(load_batches): if (lb >= gvg.load_split_offset): games = [] i = 0 for filename in glob.glob(os.path.join(kifuPath, "*.sgf")): if lb * file_load_split <= i < ( lb + 1) * file_load_split and i < num_games: with open(filename, "rb") as f: games.append(sgf.Sgf_game.from_bytes(f.read())) i += 1 print("Done loading file bach of", len(games), "games")
for div_seed in range(5): _, no_weight = detection(18, model, seed=div_seed) print(no_weight.mean_score()) np.savetxt("異常検知結果{}.txt".format(div_seed), [no_weight.mean_score()], fmt='%.5f') with open("異常検知結果{}混合行列.txt".format(div_seed), "w") as f: json.dump(dic_trans(no_weight.confusion_matrixs), f, indent=4) # train, test = no_weight # pprint(test.mean_score()) # pprint(test.confusion_matrixs) score = np.zeros(20) m = {i: None for i in range(20)} for seed in range(0, 20): print("de: {}, ".format(seed), end="") de = load_model( "./detection_model_nagato/weight_{}.pickle".format(seed)) _, weighted = detection(18, model, weight=de.x, seed=div_seed) batch_score = weighted.mean_score() print(batch_score) score[seed] = batch_score m[seed] = dic_trans(weighted.confusion_matrixs) np.savetxt("異常検知結果{}_重み.txt".format(div_seed), [score.mean()], fmt='%.5f') with open("異常検知結果{}混合行列_重み.txt".format(div_seed), "w") as f: json.dump(m, f, indent=4) m2 = np.zeros((20, 4), int) index = np.arange(20) columns = ["TN", "FP", "FN", "P"]
def main(model='en_core_web_sm', output_dir=None, n_iter=100, train_size=0.8): """ Load the model, set up the pipeline and train the entity recognizer. """ ### Split the NER data into train and test sets N = len(NER_DATA) cutoff = int(N * train_size) TRAIN_DATA = NER_DATA[:cutoff] TEST_DATA = NER_DATA[cutoff:] ### Load a pre-trained model nlp = load_model(model_name=model) print('Loaded model "%s"' % model) ### Remove the pre-trained named entity recognizer, if present if 'ner' in nlp.pipe_names: nlp.remove_pipe('ner') ### Add a blank named entity recognizer ner = nlp.create_pipe('ner') nlp.add_pipe(ner, last=True) ### Add labels for _, annotations in TRAIN_DATA: for ent in annotations.get('entities'): ner.add_label(ent[2]) ### Only train the NER by disabling other pipes pipes_to_disable = [pipe for pipe in nlp.pipe_names if pipe != 'ner'] with nlp.disable_pipes(*pipes_to_disable): print('\nBEGIN TRAINING \n') optimizer = nlp.begin_training() for itn in range(n_iter): random.shuffle(TRAIN_DATA) losses = {} ### Batch up the examples using spaCy's minibatch batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001)) for batch in batches: texts, annotations = zip(*batch) nlp.update( texts, # batch of texts annotations, # batch of annotations drop=0.5, # dropout - make it harder to memorise data sgd=optimizer, # callable to update weights losses=losses) print('Losses @ i={}: {}'.format(itn, losses)) ### Print statements to make sure the new model works test_model(nlp, TRAIN_DATA, 'TRAINING') test_model(nlp, TEST_DATA, 'TESTING') ### Save model to output directory if output_dir is not None: output_dir = Path(output_dir) if not output_dir.exists(): output_dir.mkdir() nlp.to_disk(output_dir) print('\nSaved model to', output_dir) ### Test the saved model to make sure it saved correctly try: nlp2 = spacy.load(output_dir) doc = nlp2("It's a test, not a trap!") except: raise ValueError('Failed to load newly saved model.')
from datetime import datetime from operator import itemgetter import spacy import re import pickle import os from loader import load_model from addins import regex_patterns, geo nlp = load_model() with open('data/dicts/names.set', 'rb') as f: names_set = pickle.load(f) def scrub_ml(doc): """ Performs named entity recognition. Assumes that any entities are PHI. Positional Args: doc: Object A sequence of Token objects in Spacy. Returns: A list of PHI entries (from ML) with location indices. """ for ent in doc.ents: ent.merge() entries = map(replace_entity, doc)
import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.externals.joblib import load, dump LOAD_PATH = 'C:\\Users\\Tom\\PycharmProjects\\wiseNeuro\\data\\pred.csv' SAVE_PATH = 'C:\\Users\\Tom\\PycharmProjects\\wiseNeuro\\data\\prediction.csv' df = loader.load_file(LOAD_PATH, ',') df = loader.drop_tables(df, tables_to_leave.TABLE_LIST) df = df.fillna(0) df = df.to_numpy(dtype=float) scaler = load('std_scaler.bin') #scaler = StandardScaler() #scaler.fit(df) df = scaler.transform(df) model = loader.load_model() prediction = model.predict(df) newframe = pd.DataFrame() newframe['result'] = prediction.flatten().astype(float) def drop_result(df): df_head = list(df) for head in df_head: if head not in ('RAJ2000', 'DEJ2000'): del df[head] return df df = loader.load_file(LOAD_PATH, ',') df = drop_result(df)
def main(): parser = create_arg_parser() args = parser.parse_args() load_config(args, args.load_FAdir) load_config(args, args.load_CPSdir) emb_type = 'Word2VecWiki' dl = DatasetLoading(emb_type, args.emb_path, exo1_word='僕', exo2_word='おまえ', exoX_word='これ') dl.making_intra_df() trains_dict, _, tests_dict = dl.split_each_domain('intra') statistics_of_each_case_type = train.init_statistics_of_each_case_type( trains_dict, args.case, args.media) bilstm_FT = train.initialize_model(args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, dropout_ratio=0.2, n_layers=3, model='Base', statistics_of_each_case_type=None) bilstm_FA = train.initialize_model(args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, dropout_ratio=0.2, n_layers=3, model='FA', statistics_of_each_case_type=None) bilstm_CPS = train.initialize_model( args.gpu, vocab_size=len(dl.wv.index2word), v_vec=dl.wv.vectors, dropout_ratio=0.2, n_layers=3, model='CPS', statistics_of_each_case_type=statistics_of_each_case_type) results = {} logs = {} # domain = 'All' pprint(args.__dict__) for domain in tests_dict.keys(): load_config(args, args.load_FTdir + f'/{domain}/{args.case}') val_results_FT = max_f1_epochs_of_vals(args.load_FTdir + f'/{domain}/{args.case}') epoch_FT = val_results_FT[domain]['epoch'] val_results_FA = max_f1_epochs_of_vals(args.load_FAdir) epoch_FA = val_results_FA[domain]['epoch'] val_results_CPS = max_f1_epochs_of_vals(args.load_CPSdir) epoch_CPS = val_results_CPS[domain]['epoch'] load_model(epoch_FT, bilstm_FT, args.load_FTdir + f'/{domain}/{args.case}', args.gpu) load_model(epoch_FA, bilstm_FA, args.load_FAdir, args.gpu) load_model(epoch_CPS, bilstm_CPS, args.load_CPSdir, args.gpu) _results, _logs = run(tests_dict, bilstm_FT, bilstm_FA, bilstm_CPS, 1, args) results[domain] = _results[domain] results[domain]['epoch_FT'] = epoch_FT results[domain]['epoch_FA'] = epoch_FA results[domain]['epoch_CPS'] = epoch_CPS logs[domain] = _logs[domain] dump_dict(results, args.dump_dir + f'/{args.case}', 'test_logs') dump_predict_logs(logs, args.dump_dir + f'/{args.case}')