def calc_even_results(vals, bilstm, args):
    results = {}
    even_epochs = return_even_epochs(args.dump_dir)
    for epoch in even_epochs:
        load_model(epoch, bilstm, args.load_dir, args.gpu)
        _results, _ = test.run(vals, bilstm, args)
        results[epoch] = _results
    return results, epoch
def main():
    parser = create_arg_parser()
    args = parser.parse_args()
    load_config(args)

    emb_type = 'Word2VecWiki'

    dl = DatasetLoading(emb_type,
                        args.emb_path,
                        exo1_word='僕',
                        exo2_word='おまえ',
                        exoX_word='これ')
    dl.making_intra_df()

    trains_dict, _, tests_dict = dl.split_each_domain('intra')

    if args.model == 'CPS' or args.model == 'MIX':
        statistics_of_each_case_type = train.init_statistics_of_each_case_type(
            trains_dict, args.case, args.media)
    else:
        statistics_of_each_case_type = None

    bilstm = train.initialize_model(
        args.gpu,
        vocab_size=len(dl.wv.index2word),
        v_vec=dl.wv.vectors,
        dropout_ratio=0.2,
        n_layers=3,
        model=args.model,
        statistics_of_each_case_type=statistics_of_each_case_type)

    pprint(args.__dict__)
    val_results = max_f1_epochs_of_vals(args.load_dir)
    results = {}
    logs = {}
    domain = 'All'
    epoch = val_results[domain]['epoch']
    load_model(epoch, bilstm, args.load_dir, args.gpu)
    _results, _ = run(tests_dict, bilstm, 1, args)
    results[domain] = _results[domain]
    results[domain]['epoch'] = epoch
    for domain in tests_dict.keys():
        epoch = val_results[domain]['epoch']
        load_model(epoch, bilstm, args.load_dir, args.gpu)
        _results, _logs = run(tests_dict, bilstm, 1, args)
        results[domain] = _results[domain]
        results[domain]['epoch'] = epoch
        logs[domain] = _logs[domain]
    dump_dict(results, args.load_dir, 'test_logs')
    dump_predict_logs(logs, args.load_dir)
Exemple #3
0
def main():
    parser = create_arg_parser()
    args = parser.parse_args()
    load_config(args)

    emb_type = 'Word2VecWiki'

    dl = DatasetLoading(emb_type,
                        args.emb_path,
                        exo1_word='僕',
                        exo2_word='おまえ',
                        exoX_word='これ')
    dl.making_intra_df()

    trains_dict, vals_dict, _ = dl.split_each_domain('intra')

    if args.model == 'MIX':
        statistics_of_each_case_type = train.init_statistics_of_each_case_type(
            trains_dict, args.case, args.media)
    else:
        statistics_of_each_case_type = None

    bilstm = train.initialize_model(
        args.gpu,
        vocab_size=len(dl.wv.index2word),
        v_vec=dl.wv.vectors,
        dropout_ratio=0.2,
        n_layers=3,
        model=args.model,
        statistics_of_each_case_type=statistics_of_each_case_type)

    pprint(args.__dict__)
    val_results = test.max_f1_epochs_of_vals(args.load_dir)

    for domain in ['OC', 'OY', 'OW', 'PB', 'PM', 'PN']:
        print(f'--- start {domain} fine tuning ---')
        dump_dict(args.__dict__, args.dump_dir + f'/{ft_domain}/{args.case}',
                  'args')
        epoch = val_results[domain]['epoch']
        load_model(epoch, bilstm, args.load_dir, args.gpu)

        #lr = 0.0001にしてもいいかも
        run(trains_dict[domain],
            vals_dict,
            bilstm,
            args,
            ft_domain=domain,
            lr=0.0001,
            batch_size=64)
Exemple #4
0
def invoke_model(model_name):
    model_module_obj = loader.load_model(model_name)

    name = model_module_obj.get_name()
    range_val = model_module_obj.get_range(2)
    resource = model_module_obj.get_resource()
    print name, range_val, resource
Exemple #5
0
def run(config):

    assert os.path.isfile(config.data_path), '[{}] 파일이 없습니다.'.format(
        config.data_path)

    logging.info("##################### Start Training")
    logging.debug(vars(config))

    logging.info("##################### Build Tokenizer")
    tokenizer = load_tokenizer(config)
    ## Tokenizer param Setting
    config.vocab_size = tokenizer.vocab_size
    config.tag_size = tokenizer.tag_size
    config.pad_token_id = tokenizer.pad_token_id

    ##load data loader
    logging.info("##################### Load DataLoader")
    loader = load_dataloader(config, tokenizer)

    config.batch_size = int(config.batch_size /
                            config.gradient_accumulation_steps)
    logging.info("##################### adjusted batch size {}".format(
        config.batch_size))

    train, valid = loader.get_train_valid_dataset()
    logging.info("##################### Train Dataset size : [" +
                 str(len(train)) + "]")
    logging.info("##################### Valid Dataset size : [" +
                 str(len(valid)) + "]")
    train = DataLoader(train, batch_size=config.batch_size, shuffle=True)
    valid = DataLoader(valid, batch_size=config.batch_size, shuffle=False)

    logging.info("##################### Load Model")
    model = load_model(config, tokenizer)
    model = torch.nn.DataParallel(model)
    model.to(config.device)

    logging.info("##################### Load Trainer")
    trainer = load_trainer(config, model)

    ## Training
    logging.info("##################### Training..........")
    best_loss = trainer.train(train, valid)
    logging.info("##################### Best Training Loss : " +
                 str(best_loss))

    ## Testing
    test = loader.get_test_dataset()
    logging.info("##################### Test Dataset size : [" +
                 str(len(test)) + "]")
    test = DataLoader(test, batch_size=config.batch_size, shuffle=False)

    logging.info("##################### Testing..........")
    f1_score = trainer.test(test)
    logging.info("##################### Best Test f1_score : " + str(f1_score))

    result = [config.save_path, best_loss, f1_score]

    return result
Exemple #6
0
def main():
    parser = create_arg_parser()
    args = parser.parse_args()
    load_config(args)

    dl = DatasetLoading(args.emb_type,
                        args.emb_path,
                        media=args.media,
                        exo1_word=args.exo1_word,
                        exo2_word=args.exo2_word,
                        exoX_word=args.exoX_word)
    if args.dataset_type == 'intra':
        dl.making_intra_df()
    elif args.dataset_type == 'inter':
        dl.making_inter_df()
    else:
        raise ValueError()

    _, _, tests = dl.split(args.dataset_type)

    bilstm = train.initialize_model(args.gpu,
                                    vocab_size=len(dl.wv.index2word),
                                    v_vec=dl.wv.vectors,
                                    emb_requires_grad=args.emb_requires_grad,
                                    args=args)

    pprint(args.__dict__)
    val_results = max_f1_epochs_of_vals(args.load_dir)
    results = {}
    logs = {}
    domain = 'All'
    epoch = val_results[domain]['epoch']
    load_model(epoch, bilstm, args.load_dir, args.gpu)
    _results, _ = run(tests, bilstm, args)
    results[domain] = _results[domain]
    results[domain]['epoch'] = epoch
    for domain in args.media:
        epoch = val_results[domain]['epoch']
        load_model(epoch, bilstm, args.load_dir, args.gpu)
        _results, _logs = run(tests, bilstm, args)
        results[domain] = _results[domain]
        results[domain]['epoch'] = epoch
        logs[domain] = _logs[domain]
    dump_dict(results, args.load_dir, 'test_logs')
    dump_predict_logs(logs, args.load_dir)
Exemple #7
0
import global_vars_go as gvg
import loader

kifuPath = "./kifu"

file_load_split = gvg.file_load_split
num_games = gvg.num_games

print("Loading game data...")

load_batches = math.ceil(num_games / file_load_split)
hm_epochs = gvg.hm_epochs
if gvg.cont_from_save.lower() == "true":
    model = loader.load_model_from_file(gvg.nn_type)
else:
    model = loader.load_model(gvg.nn_type)

for epoch in range(hm_epochs):
    print("Beginning new epoch...")
    for lb in range(load_batches):
        if (lb >= gvg.load_split_offset):
            games = []
            i = 0
            for filename in glob.glob(os.path.join(kifuPath, "*.sgf")):
                if lb * file_load_split <= i < (
                        lb + 1) * file_load_split and i < num_games:
                    with open(filename, "rb") as f:
                        games.append(sgf.Sgf_game.from_bytes(f.read()))
                i += 1

            print("Done loading file bach of", len(games), "games")
    for div_seed in range(5):
        _, no_weight = detection(18, model, seed=div_seed)
        print(no_weight.mean_score())
        np.savetxt("異常検知結果{}.txt".format(div_seed), [no_weight.mean_score()],
                   fmt='%.5f')
        with open("異常検知結果{}混合行列.txt".format(div_seed), "w") as f:
            json.dump(dic_trans(no_weight.confusion_matrixs), f, indent=4)
        # train, test = no_weight
        # pprint(test.mean_score())
        # pprint(test.confusion_matrixs)
        score = np.zeros(20)
        m = {i: None for i in range(20)}
        for seed in range(0, 20):
            print("de: {}, ".format(seed), end="")
            de = load_model(
                "./detection_model_nagato/weight_{}.pickle".format(seed))
            _, weighted = detection(18, model, weight=de.x, seed=div_seed)
            batch_score = weighted.mean_score()
            print(batch_score)
            score[seed] = batch_score
            m[seed] = dic_trans(weighted.confusion_matrixs)
        np.savetxt("異常検知結果{}_重み.txt".format(div_seed), [score.mean()],
                   fmt='%.5f')
        with open("異常検知結果{}混合行列_重み.txt".format(div_seed), "w") as f:
            json.dump(m, f, indent=4)

        m2 = np.zeros((20, 4), int)

        index = np.arange(20)
        columns = ["TN", "FP", "FN", "P"]
Exemple #9
0
def main(model='en_core_web_sm', output_dir=None, n_iter=100, train_size=0.8):
    """
    Load the model, set up the pipeline and train the entity recognizer.
    """

    ### Split the NER data into train and test sets
    N = len(NER_DATA)
    cutoff = int(N * train_size)
    TRAIN_DATA = NER_DATA[:cutoff]
    TEST_DATA = NER_DATA[cutoff:]

    ### Load a pre-trained model
    nlp = load_model(model_name=model)
    print('Loaded model "%s"' % model)

    ### Remove the pre-trained named entity recognizer, if present
    if 'ner' in nlp.pipe_names:
        nlp.remove_pipe('ner')

    ### Add a blank named entity recognizer
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)

    ### Add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    ### Only train the NER by disabling other pipes
    pipes_to_disable = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*pipes_to_disable):
        print('\nBEGIN TRAINING \n')
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            ### Batch up the examples using spaCy's minibatch
            batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
                    losses=losses)
            print('Losses @ i={}: {}'.format(itn, losses))

    ### Print statements to make sure the new model works
    test_model(nlp, TRAIN_DATA, 'TRAINING')
    test_model(nlp, TEST_DATA, 'TESTING')

    ### Save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.to_disk(output_dir)
        print('\nSaved model to', output_dir)

        ### Test the saved model to make sure it saved correctly
        try:
            nlp2 = spacy.load(output_dir)
            doc = nlp2("It's a test, not a trap!")
        except:
            raise ValueError('Failed to load newly saved model.')
Exemple #10
0
from datetime import datetime
from operator import itemgetter

import spacy
import re
import pickle
import os

from loader import load_model
from addins import regex_patterns, geo

nlp = load_model()

with open('data/dicts/names.set', 'rb') as f:
    names_set = pickle.load(f)


def scrub_ml(doc):
    """
    Performs named entity recognition. Assumes that any entities are PHI.

    Positional Args:
        doc: Object     A sequence of Token objects in Spacy.

    Returns:
        A list of PHI entries (from ML) with location indices.
    """

    for ent in doc.ents:
        ent.merge()
    entries = map(replace_entity, doc)
Exemple #11
0
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.externals.joblib import load, dump

LOAD_PATH = 'C:\\Users\\Tom\\PycharmProjects\\wiseNeuro\\data\\pred.csv'
SAVE_PATH = 'C:\\Users\\Tom\\PycharmProjects\\wiseNeuro\\data\\prediction.csv'

df = loader.load_file(LOAD_PATH, ',')
df = loader.drop_tables(df, tables_to_leave.TABLE_LIST)
df = df.fillna(0)
df = df.to_numpy(dtype=float)
scaler = load('std_scaler.bin')
#scaler = StandardScaler()
#scaler.fit(df)
df = scaler.transform(df)
model = loader.load_model()
prediction = model.predict(df)
newframe = pd.DataFrame()
newframe['result'] = prediction.flatten().astype(float)


def drop_result(df):
    df_head = list(df)
    for head in df_head:
        if head not in ('RAJ2000', 'DEJ2000'):
            del df[head]
    return df


df = loader.load_file(LOAD_PATH, ',')
df = drop_result(df)
Exemple #12
0
def main():
    parser = create_arg_parser()
    args = parser.parse_args()
    load_config(args, args.load_FAdir)
    load_config(args, args.load_CPSdir)

    emb_type = 'Word2VecWiki'

    dl = DatasetLoading(emb_type,
                        args.emb_path,
                        exo1_word='僕',
                        exo2_word='おまえ',
                        exoX_word='これ')
    dl.making_intra_df()

    trains_dict, _, tests_dict = dl.split_each_domain('intra')

    statistics_of_each_case_type = train.init_statistics_of_each_case_type(
        trains_dict, args.case, args.media)

    bilstm_FT = train.initialize_model(args.gpu,
                                       vocab_size=len(dl.wv.index2word),
                                       v_vec=dl.wv.vectors,
                                       dropout_ratio=0.2,
                                       n_layers=3,
                                       model='Base',
                                       statistics_of_each_case_type=None)
    bilstm_FA = train.initialize_model(args.gpu,
                                       vocab_size=len(dl.wv.index2word),
                                       v_vec=dl.wv.vectors,
                                       dropout_ratio=0.2,
                                       n_layers=3,
                                       model='FA',
                                       statistics_of_each_case_type=None)
    bilstm_CPS = train.initialize_model(
        args.gpu,
        vocab_size=len(dl.wv.index2word),
        v_vec=dl.wv.vectors,
        dropout_ratio=0.2,
        n_layers=3,
        model='CPS',
        statistics_of_each_case_type=statistics_of_each_case_type)

    results = {}
    logs = {}
    # domain = 'All'

    pprint(args.__dict__)
    for domain in tests_dict.keys():
        load_config(args, args.load_FTdir + f'/{domain}/{args.case}')
        val_results_FT = max_f1_epochs_of_vals(args.load_FTdir +
                                               f'/{domain}/{args.case}')
        epoch_FT = val_results_FT[domain]['epoch']
        val_results_FA = max_f1_epochs_of_vals(args.load_FAdir)
        epoch_FA = val_results_FA[domain]['epoch']
        val_results_CPS = max_f1_epochs_of_vals(args.load_CPSdir)
        epoch_CPS = val_results_CPS[domain]['epoch']

        load_model(epoch_FT, bilstm_FT,
                   args.load_FTdir + f'/{domain}/{args.case}', args.gpu)
        load_model(epoch_FA, bilstm_FA, args.load_FAdir, args.gpu)
        load_model(epoch_CPS, bilstm_CPS, args.load_CPSdir, args.gpu)

        _results, _logs = run(tests_dict, bilstm_FT, bilstm_FA, bilstm_CPS, 1,
                              args)
        results[domain] = _results[domain]
        results[domain]['epoch_FT'] = epoch_FT
        results[domain]['epoch_FA'] = epoch_FA
        results[domain]['epoch_CPS'] = epoch_CPS
        logs[domain] = _logs[domain]
    dump_dict(results, args.dump_dir + f'/{args.case}', 'test_logs')
    dump_predict_logs(logs, args.dump_dir + f'/{args.case}')