コード例 #1
0
ファイル: main.py プロジェクト: johnson7788/GLRE
def train(parameters):
    # 模型保存的目录 eg: './results/docred-dev/docred_basebert_full/'
    model_folder = setup_log(parameters, parameters['save_pred'] + '_train')
    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    # if parameters['re_train']:
    #     print('\nLoading mappings ...')
    #     train_loader = load_mappings(parameters['remodelfile'])
    # else:
    print('加载训练数据 ...')
    train_loader = DataLoader(parameters['train_data'], parameters)
    train_loader(embeds=parameters['embeds'], parameters=parameters)
    train_data, _ = DocRelationDataset(train_loader, 'train', parameters,
                                       train_loader).__call__()

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
    test_loader(parameters=parameters)
    test_data, prune_recall = DocRelationDataset(test_loader, 'test',
                                                 parameters,
                                                 train_loader).__call__()

    # print("prune_recall-->", str(prune_recall))
    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {
        'train': train_data,
        'test': test_data
    }, model_folder, prune_recall)

    trainer.run()
コード例 #2
0
    def measure_accuracy(self, nregistered=40, nunregistered=40):
        s = System()
        registered = sample(list(DataLoader.get_img_data('registered')),
                            nregistered)
        unregistered = sample(list(DataLoader.get_img_data('unregistered')),
                              nunregistered)

        self._test_identification(s, registered)
        self._test_unregistered(s, unregistered)
コード例 #3
0
ファイル: eval.py プロジェクト: pnarsina/w266_final
def evaluate_model(evalparams):

    torch.manual_seed(evalparams.seed)
    random.seed(1234)
    if evalparams.cpu:
        evalparams.cuda = False
    elif evalparams.cud:
        torch.cuda.manual_seed(args.seed)

    # load opt
    print(evalparams.model_dir, evalparams.model)
    #     model_file = evalparams.model_dir + "/" + evalparams.model
    model_file = 'best_model.pt'
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = evalparams.model_dir + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # load data
    data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt['batch_size']))
    batch = DataLoader(data_file,
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)

    helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    predictions = []
    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, _ = model.predict(b)
        predictions += preds
        all_probs += probs
    predictions = [id2label[p] for p in predictions]
    p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True)

    # save probability scores
    if len(evalparams.out) > 0:
        helper.ensure_dir(os.path.dirname(evalparams.out))
        with open(evalparams.out, 'wb') as outfile:
            pickle.dump(all_probs, outfile)
        print("Prediction scores saved to {}.".format(evalparams.out))

    print("Evaluation ended.")

    return (batch.gold(), predictions, model)
コード例 #4
0
ファイル: train.py プロジェクト: abdulaziz-8694/Hyperface
 def __init__(self, config):
     self.batch_size = config.batch_size
     self.checkpoint_path = config.checkpoint_path
     self.model_path = config.model_save_path
     self.epochs = config.epochs
     self.arch_type = config.arch_type
     self.sample_data_file = config.sample_data_file
     self.input_shape = config.input_shape
     self.learning_rate = config.learning_rate
     self.data_loader = DataLoader(self.sample_data_file)
コード例 #5
0
def train(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_train')
    set_seed(parameters['seed'])

    ###################################
    # Data Loading
    ###################################
    # if parameters['re_train']:
    #     print('\nLoading mappings ...')
    #     train_loader = load_mappings(parameters['remodelfile'])
    # else:
    flag=False
    processed_dataset=parameters['remodelfile']
    if flag and os.path.exists(os.path.join(processed_dataset, 'train_loader.pkl')):
        with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'rb') as f:
            train_loader = pkl.load(f)
        with open(os.path.join(processed_dataset, 'train_data.pkl'), 'rb') as f:
            train_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'test_data.pkl'), 'rb') as f:
            test_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'rb') as f:
            prune_recall = pkl.load(f)
    # print('Loading training data ...')
    else:
        train_loader = DataLoader(parameters['train_data'], parameters)
        train_loader(embeds=parameters['embeds'], parameters=parameters)
        train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__()
        # operate_data(train_data, "train_data.json")
        print('\nLoading testing data ...')
        test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
        test_loader(parameters=parameters)
        test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()
        with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'wb') as f:
            pkl.dump(train_loader, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'train_data.pkl'), 'wb') as f:
            pkl.dump(train_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'test_data.pkl'), 'wb') as f:
            pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'wb') as f:
            pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL)

    #

    ###################################
    # Training
    ###################################
    trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder, prune_recall)

    trainer.run()
    write_metrics(trainer,model_folder)

    if parameters['plot']:
        plot_learning_curve(trainer, model_folder)
        plot_P_R(trainer, model_folder)
コード例 #6
0
ファイル: processor.py プロジェクト: Molten-Ice/WhiteBoxAI
def data_processor(bs, url="MNIST_URL"):
    x_train, y_train, x_valid, y_valid = get_data(url)
    train_mean, train_std = x_train.mean(), x_train.std()
    x_train = normalize(x_train, train_mean, train_std)
    # NB: Use training, not validation mean for validation set
    x_valid = normalize(x_valid, train_mean, train_std)

    train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

    train_samp = Sampler(train_ds, bs, shuffle=True)
    valid_samp = Sampler(valid_ds, bs, shuffle=False)

    train_dl = DataLoader(train_ds, sampler=train_samp)
    valid_dl = DataLoader(valid_ds, sampler=valid_samp)
    return train_dl, valid_dl
コード例 #7
0
def predict(sentences):
    test_data = list()
    for sent in sentences:
        tokens = jieba.lcut(sent, cut_all=False)
        test_data.append({
            'text': tokens,
            'aspects': [constant.ID_TO_ASP[0]],
            'polarities': [constant.ID_TO_LABEL[0]]
        })
    test_batch = DataLoader(test_data, opt['batch_size'], opt, vocab)
    print("Predicting on test set...")
    labels = list()
    for i, (batch, indices) in enumerate(test_batch):
        predicts = trainer.predict(batch)
        labels += [predicts[k] for k in indices]
    results = list()
    for i, label in enumerate(labels):
        aspects = [x1 for x1, x2 in label]
        polarities = [x2 for x1, x2 in label]
        results.append({
            'text': test_data[i]['text'],
            'aspects': aspects,
            'polarities': polarities
        })
    return results
コード例 #8
0
def _test(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(parameters['remodelfile'])
    flag=True
    print('\nLoading testing data ...')
    processed_dataset=parameters['remodelfile']
    if flag and os.path.exists(os.path.join(processed_dataset, 'test_test_data.pkl')):
        with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'rb') as f:
            test_data = pkl.load(f)
        with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'rb') as f:
            prune_recall = pkl.load(f)
    else:
        test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
        test_loader(parameters=parameters)
        test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__()
        with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'wb') as f:
            pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL)
        with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'wb') as f:
            pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL)
    m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder, prune_recall)
    trainer = load_model(parameters['remodelfile'], m)
    _, _,_,p,r=trainer.eval_epoch(final=True, save_predictions=True)
    print('Saving test metrics ... ', end="")
    np.savetxt(parameters['remodelfile']+"/p.txt", p)
    np.savetxt(parameters['remodelfile']+"/r.txt", r)

        # b = numpy.loadtxt("filename.txt", delimiter=',')
    print('DONE')
コード例 #9
0
 def _create_database(self):
     self._images = dict()
     for key, img in DataLoader.get_img_data():
         if key not in self._images.keys():
             self._images[key] = img
     self._graphs = {
         k: Graph(i).get_stats()
         for k, i in self._images.items()
     }
コード例 #10
0
    def __init__(self):
        print("init")
        #self.clasificador = Clasificador()
        #self.autoencoder = Autoencoder()
        self.pathLib = os.path.normpath(os.getcwd() +
                                        "/lib/deepfakes/faceswap.py")
        self.pathImgGenerada = os.path.normpath(os.getcwd() + "/tmp/faces")
        self.modeloIniciado = False
        self.dataLoader = DataLoader()
        self.dataLoader.setPathClassData(
            os.path.normpath(os.getcwd() + "/bd/categoriasImg"))
        self.dataLoader.setPathTrainingData(
            os.path.normpath(os.getcwd() + "/bd/categoriasImg"))
        self.dataLoader.cargarClases()
        self.setNumClasses(self.dataLoader.getNumClasses())
        self.classes = self.dataLoader.getClasses()
        self.threshold = 5e-8
        self.inputDim = 64

        self.batchSize = 40
        self.dataLoader.setBatchSize(self.batchSize)
        self.epochs = 10
コード例 #11
0
def get_scores(data_file, opt, vocab, model):
    print(
        "Loading data from {} with batch size {}...".format(
            data_file, opt["batch_size"]
        )
    )
    batch = DataLoader(data_file, opt["batch_size"], opt, vocab, evaluation=True)

    predictions = []
    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, attn_weights, _ = model.predict(b)
        predictions += preds
        all_probs += probs
    predictions = [id2label[p] for p in predictions]
    
    # print("predictions")
    # for a, b in zip(batch.gold(), predictions):
    # 	print(f"{a:<28} {b:<28}")

    p, r, f1 = scorer.score(batch.gold(), predictions, verbose=False)
    return p, r, f1
コード例 #12
0
ファイル: attack.py プロジェクト: Neehan/robust-re
 def modelfn(inp):
     batch = DataLoader(json.dumps([inp]),
                        3,
                        opt,
                        vocab,
                        evaluation=True,
                        load_from_file=False)
     predictions = []
     all_probs = []
     for i, b in enumerate(batch):
         preds, probs, _ = model.predict(b)
         predictions += preds
         all_probs += probs
     predictions = [id2label[p] for p in predictions]
     return all_probs[0], predictions
コード例 #13
0
ファイル: main.py プロジェクト: johnson7788/GLRE
def _test(parameters):
    model_folder = setup_log(parameters, parameters['save_pred'] + '_test')

    print('\nLoading mappings ...')
    train_loader = load_mappings(parameters['remodelfile'])

    print('\nLoading testing data ...')
    test_loader = DataLoader(parameters['test_data'], parameters, train_loader)
    test_loader(parameters=parameters)
    test_data, prune_recall = DocRelationDataset(test_loader, 'test',
                                                 parameters,
                                                 train_loader).__call__()

    m = Trainer(train_loader, parameters, {
        'train': [],
        'test': test_data
    }, model_folder, prune_recall)
    trainer = load_model(parameters['remodelfile'], m)
    trainer.eval_epoch(final=True, save_predictions=True)
コード例 #14
0
ファイル: debiased_train.py プロジェクト: Neehan/robust-re
def get_biased_model_class_probs(args):
    # load opt
    model_file = args.model_dir + "/" + args.model
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = args.model_dir + "/vocab.pkl"
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        "vocab_size"] == vocab.size, "Vocab size must match that in the saved model."
    opt["vocab_size"] = vocab.size
    emb_file = opt["vocab_dir"] + "/embedding.npy"
    emb_matrix = np.load(emb_file)
    assert emb_matrix.shape[0] == vocab.size
    assert emb_matrix.shape[1] == opt["emb_dim"]

    # load data
    data_file = args.data_dir + "/{}".format(args.data_name)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt["batch_size"]))
    batch = DataLoader(data_file,
                       opt["batch_size"],
                       opt,
                       vocab,
                       evaluation=True)

    # helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, _ = model.predict(b)
        all_probs.append(probs)
    return all_probs
コード例 #15
0
# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
train_batch = DataLoader(opt['data_dir'] + '/train.json',
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False)
dev_batch = DataLoader(opt['data_dir'] + '/dev.json',
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)

model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
model_save_dir = opt['save_dir'] + '/' + model_id
opt['model_save_dir'] = model_save_dir
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
コード例 #16
0
    # print("eval samples of subj:"+subj+" obj:"+obj)
    # args.model_dir = 'saved_models/02'
    # if os.path.exists(args.model_dir+'/'+subj+"_"+obj+"_"+"best_model.pt"):
    #     model_file = args.model_dir +'/'+subj+"_"+obj+"_"+"best_model.pt"
    # else:
    #     model_file = args.model_dir + '/best_model.pt'
    model_file=args.model_dir+'/best_model' \
                              '.pt'
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
    trainer = GCNTrainer(opt, lbstokens=lbstokens)
    trainer.load(model_file)
    batch = DataLoader([data_file],
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True,
                       corefresolve=True)
    batch_iter = tqdm(batch)

    all_probs = []
    samples = []
    for i, b in enumerate(batch_iter):
        preds, probs, _, sample = trainer.predict(b)
        predictions += preds
        all_probs += probs
        # effsum+=lab_eff
        # lab_nums+=lab_num
        samples = samples + sample

    key += batch.gold()
コード例 #17
0

# load spacy model
spacy_model = spacy.load("en_core_web_lg")


# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size']))

batch = DataLoader(data_file, opt['batch_size'], opt, vocab, spacy_model, evaluation=True)

# predict
predictions = []
all_probs = []
for i, b in enumerate(batch):
    preds, probs, _ = model.predict(b)
    predictions += preds
    all_probs += probs

# class to label
class2id = dict([(v, k) for k, v in constant.ID_TO_CLASS.items()])
id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
predictions = [class2id[p] for p in predictions]
predictions = [id2label[p] for p in predictions]
コード例 #18
0
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']
# Add subject/object indices
opt['subject_indices'] = vocab.subj_idxs
opt['object_indices'] = vocab.obj_idxs

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
train_batch = DataLoader(opt['data_dir'] + '/train.json',
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False)
dev_batch = DataLoader(opt['data_dir'] + '/kg_dev_reduced.json',
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True,
                       kg_graph=train_batch.kg_graph)
test_batch = DataLoader(opt['data_dir'] + '/kg_test_reduced.json',
                        opt['batch_size'],
                        opt,
                        vocab,
                        evaluation=True,
                        kg_graph=dev_batch.kg_graph)
コード例 #19
0
ファイル: eval.py プロジェクト: luckmoon/neural-ner
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

char_vocab_file = args.model_dir + '/vocab_char.pkl'
char_vocab = Vocab(char_vocab_file, load=True)
assert opt[
    'char_vocab_size'] == char_vocab.size, "Char vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.jsonl'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file,
                   opt['batch_size'],
                   opt,
                   vocab,
                   char_vocab,
                   evaluation=True)

helper.print_config(opt)
if opt['scheme'] == 'iob':
    label2id = constant.TYPE_TO_ID_IOB
elif opt['scheme'] == 'iobes':
    label2id = constant.TYPE_TO_ID_IOBES
else:
    raise Exception("Tagging scheme not found: " + opt['scheme'])
id2label = dict([(v, k) for k, v in label2id.items()])

predictions = []
for i, b in enumerate(tqdm(batch)):
    preds, _ = trainer.predict(b)
コード例 #20
0
ファイル: train.py プロジェクト: Saintfe/RECENT
label2id = constant.LABEL_TO_ID[type_pair_id]
opt['num_class'] = len(label2id)

# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# load data
print("Loading data from {} with batch size {}...".format(opt['data_dir'], opt['batch_size']))
train_batch = DataLoader( '%s/%s_%s' % (opt['data_dir'], constant.ID_TO_TYPE_PAIR[type_pair_id], 'train.json'),
    opt['batch_size'], opt, vocab, data_type='training')
dev_batch = DataLoader('%s/%s_%s' % (opt['data_dir'], constant.ID_TO_TYPE_PAIR[type_pair_id], 'dev.json'),  
    opt['batch_size'], opt, vocab, data_type='development')

# model_id = opt['id'] if len(opt['id']) > 1 lese '0' + opt['id']
model_id = opt['id'].strip()
model_save_dir = "%s/%d-%s" % (opt['save_dir'], type_pair_id, model_id)
opt['model_save_dir'] = model_save_dir
opt['log'] = 'log.%d-%s.txt' % (type_pair_id, model_id)
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
vocab.save(model_save_dir + '/vocab.pkl')
file_logger = helper.FileLogger(model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")
コード例 #21
0
ファイル: train_base.py プロジェクト: liuwq168/KD4NRE
# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
train_batch = DataLoader(opt['data_dir'] + '/train.json',
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False,
                         pattern_file=opt['pattern_file'])
dev_batch = DataLoader(opt['data_dir'] + '/dev.json',
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True,
                       pattern_file=opt['pattern_file'])
test_batch = DataLoader(opt['data_dir'] + '/test.json',
                        opt['batch_size'],
                        opt,
                        vocab,
                        evaluation=True,
                        pattern_file=opt['pattern_file'])
コード例 #22
0
ファイル: eval_student.py プロジェクト: liuwq168/KD4NRE
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
student_model = StudentModel(opt)
student_model.load(model_file)

# load vocab
vocab_file = 'saved_models/' + args.model_id + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

final_predictions, inst_predictions, aux_predictions = [], [], []
all_final_probs, all_inst_probs, all_aux_probs = [], [], []
for i, b in enumerate(batch):
    final_preds, inst_preds, aux_preds, final_probs, inst_probs, aux_probs = student_model.predict_all(
        b)
    final_predictions += final_preds
    inst_predictions += inst_preds
    aux_predictions += aux_preds
    all_final_probs += final_probs
    all_inst_probs += inst_probs
    all_aux_probs += aux_probs
コード例 #23
0
 helper.print_config(opt)
 # model
 id2label = dict([(v, k) for k, v in label2id.items()])
 aug_train_epoch = 5
 # for subj in SUBJ_LIST:
 #     for obj in OBJ_LIST:
 print("labeled dataset for class with subj:" + str(subj) + " and obj: " +
       str(obj))
 # model_file = "saved_models/02/" + subj + "_" + obj + "_" + "best_model.pt"
 # if not os.path.exists(model_file):
 #     model_file="saved_models/02/"+"best_model_aug.pt"
 train_batch = DataLoader([opt['data_dir'] + '/train_coref.json'],
                          opt['batch_size'],
                          opt,
                          vocab,
                          evaluation=False,
                          is_aug=False,
                          corefresolve=True,
                          subj=subj,
                          obj=obj)
 dev_batch = DataLoader([opt['data_dir'] + '/dev_rev_coref.json'],
                        opt['batch_size'],
                        opt,
                        vocab,
                        evaluation=True,
                        corefresolve=True,
                        subj=subj,
                        obj=obj)
 test_batch = DataLoader([opt['data_dir'] + '/test_rev_coref.json'],
                         opt['batch_size'],
                         opt,
コード例 #24
0
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
label2id = constant.LABEL_TO_ID
id2label = dict([(v, k) for k, v in label2id.items()])

predictions = []
all_probs = []
batch_iter = tqdm(batch)
index = 0
error = [[[] for j in range(len(id2label.keys()))]
         for i in range(len(id2label.keys()))]
for i, b in enumerate(batch_iter):
    preds, probs, label, token, sub_pos, obj_pos, _ = trainer.predict(b)
    for j in range(len(token)):
        idx = index
コード例 #25
0
ファイル: train.py プロジェクト: yubowen-ph/Re-TACRED
# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
train_batch = DataLoader(opt['data_dir'] + '/train_full.json',
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False)
dev_batch = DataLoader(opt['data_dir'] + '/dev_full.json',
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)
test_batch = DataLoader(opt['data_dir'] + '/test_full.json',
                        opt['batch_size'],
                        opt,
                        vocab,
                        evaluation=True)

model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
model_save_dir = opt['save_dir'] + '/' + model_id
コード例 #26
0
class faceSearch:
    def __init__(self):
        print("init")
        #self.clasificador = Clasificador()
        #self.autoencoder = Autoencoder()
        self.pathLib = os.path.normpath(os.getcwd() +
                                        "/lib/deepfakes/faceswap.py")
        self.pathImgGenerada = os.path.normpath(os.getcwd() + "/tmp/faces")
        self.modeloIniciado = False
        self.dataLoader = DataLoader()
        self.dataLoader.setPathClassData(
            os.path.normpath(os.getcwd() + "/bd/categoriasImg"))
        self.dataLoader.setPathTrainingData(
            os.path.normpath(os.getcwd() + "/bd/categoriasImg"))
        self.dataLoader.cargarClases()
        self.setNumClasses(self.dataLoader.getNumClasses())
        self.classes = self.dataLoader.getClasses()
        self.threshold = 5e-8
        self.inputDim = 64

        self.batchSize = 40
        self.dataLoader.setBatchSize(self.batchSize)
        self.epochs = 10

    #def setEncoderDim(self, dim):
    #	self.clasificador.setEncoderDim(dim)
    #	self.autoencoder.setEncoderDim(dim)

    def setInputDim(self, dim):
        self.inputDim = dim

    def setNumClasses(self, num):
        self.numClasses = num

    def entrenar(self):

        self.dataLoader.cargarData()
        if (self.modeloIniciado == False):
            self.setInputDim(self.dataLoader.getInputDim())
            self.initModel()
        #exit()
        loss = 1
        lossBuscador = 1
        contador = 0
        #optimizer = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)

        while lossBuscador > self.threshold:
            self.trainingSet, self.labelsSet = self.dataLoader.nextTrainingData(
                labels=True)
            self.testingSet, self.testLabelSet = self.dataLoader.nextTestingData(
                labels=True)

            #loss = self.buscador.fit(self.trainingSet, self.labelsSet,
            #	batch_size=self.batchSize,
            #	epochs=self.epochs,
            #	verbose=0,
            #	validation_data=(self.testingSet, self.testLabelSet))
            loss = self.buscador.train_on_batch(self.trainingSet,
                                                self.labelsSet)
            score = self.buscador.evaluate(self.testingSet,
                                           self.testLabelSet,
                                           verbose=0)
            lossBuscador = score[0]
            #print('Test loss:', score[0])
            #print('Test accuracy:', score[1])

            #
            #lossBuscador = self.buscador.train_on_batch(self.trainingSet, self.labelsSet)
            #print("% Completado " + str(score[0]) + "            ", end='\r')
            print("% Completado " + str(
                (self.threshold / lossBuscador) * 100) + "      loss: " +
                  str(score[0]) + " accurracy " + str(score[1]),
                  end='\r')
            contador += 1
            if contador % 100 == 0:
                self.guardarAvance()
            if contador > 1000:
                contador = 0

    def guardarAvance(self):
        self.buscador.save_weights(
            os.path.normpath(os.getcwd() + "/models/pesos/model.h5"), True)

    def initModel(self):
        if (self.modeloIniciado == True):
            return

        self.buscador = Sequential()
        self.buscador.add(
            Conv2D(64,
                   kernel_size=(5, 5),
                   activation='relu',
                   data_format='channels_first',
                   border_mode='same',
                   input_shape=(1, self.inputDim, self.inputDim)))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        self.buscador.add(Conv2D(32, (10, 10), activation='relu'))

        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        self.buscador.add(Conv2D(16, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        self.buscador.add(Conv2D(8, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        self.buscador.add(Conv2D(4, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        self.buscador.add(Conv2D(2, (5, 5), activation='relu'))

        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(Conv2D(12, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(Conv2D(20, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(Conv2D(12, (5, 5), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(Conv2D(12, (1, 1), activation='relu'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(Conv2D(64, (5, 5), activation='tanh'))
        #self.buscador.add(MaxPooling2D(pool_size=(2, 2)))
        #self.buscador.add(AveragePooling2D(pool_size=(2, 2)))
        #self.buscador.add(Dropout(0.25))
        self.buscador.add(Flatten())
        #self.buscador.add(Dense(128, activation='tanh'))

        #, kernel_regularizer=keras.regularizers.l2(0.01)
        #, activity_regularizer=keras.regularizers.l1(0.01)))
        #self.buscador.add(Dropout(0.25))
        #self.buscador.add(Dense(128, activation='tanh'))
        #, kernel_regularizer=keras.regularizers.l2(0.01)
        #, activity_regularizer=keras.regularizers.l1(0.01)))
        #self.buscador.add(Dropout(0.25))
        #self.buscador.add(Dense(128, activation='tanh'))
        #, kernel_regularizer=keras.regularizers.l2(0.01)
        #, activity_regularizer=keras.regularizers.l1(0.01)))
        #self.buscador.add(Dropout(0.25))
        #self.buscador.add(Dense(128, activation='tanh'))
        #, kernel_regularizer=keras.regularizers.l2(0.01)
        #, activity_regularizer=keras.regularizers.l1(0.01)))
        #self.buscador.add(Dropout(0.5))
        self.buscador.add(Dropout(0.25))

        self.buscador.add(Dense(self.numClasses, activation='softmax'))
        #self.buscador.summary()
        #exit()
        #optimizer = Adam(lr=5e-9, beta_1=0.5, beta_2=0.999)

        #este funciona!
        optimizer = keras.optimizers.Adadelta()
        self.buscador.compile(loss=keras.losses.categorical_crossentropy,
                              optimizer=optimizer,
                              metrics=['accuracy'])
        try:
            self.buscador.load_weights(
                os.path.normpath(os.getcwd() + "/models/pesos/model.h5"), True)
            print("pesos cargados")
        except OSError:
            print("no se han creado los pesos")
        self.modeloIniciado = True

    def search(self, pathImage):

        command_line = "python " + self.pathLib + " extract -v -i " + pathImage + " -o " + self.pathImgGenerada

        p = subprocess.Popen(command_line,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)  # Success!
        result = []
        for line in p.stdout:
            result.append(line)
            errcode = p.returncode
            print(errcode)
        for line in result:
            print(line)

        p.kill()

        filesList = []
        for subdir, dirs, files in os.walk(self.pathImgGenerada):
            for file in files:
                filesList.append(os.path.join(subdir, file))

        imgs = []
        for file in filesList:
            im = cv2.imread(file)
            im = cv2.resize(im, (64, 64), interpolation=cv2.INTER_AREA)
            im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            #cv2.imwrite(file,im)
            im = np.reshape(im, (1, 1, im.shape[0], im.shape[1]))

            if (self.modeloIniciado == False):
                self.setInputDim(im.shape[2])
                self.initModel()
            #imgs.append(im)

        #predicted = self.buscador.predict(imgs)
            print("prediccion " +
                  str(self.classes[self.buscador.predict(im).argmax()]))
コード例 #27
0
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir']  + '/test.json'
print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size']))
data = read_file(data_file, vocab, opt, False)
batch = DataLoader(data, opt['batch_size'], opt, evaluation=True)

helper.print_config(opt)
label2id = constant.LABEL_TO_ID
id2label = dict([(v, k) for k, v in label2id.items()])

predictions = []
all_probs = []
cross_list = []
batch_iter = tqdm(batch)
for i, b in enumerate(batch_iter):
    cross_list += b[8]
    preds, probs, _ = trainer.predict(b)
    predictions += preds
    all_probs += probs
コード例 #28
0
ファイル: train.py プロジェクト: yubowen-ph/Re-TACRED
# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
train_batch = DataLoader(opt['data_dir'] + '/train.json',
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False)
dev_batch = DataLoader(opt['data_dir'] + '/dev.json',
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)
test_batch = DataLoader(opt['data_dir'] + '/test.json',
                        opt['batch_size'],
                        opt,
                        vocab,
                        evaluation=True)

model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
model_save_dir = opt['save_dir'] + '/' + model_id
コード例 #29
0
if config.dataset_path != 'imdb' and config.dataset_path != 'imdb_small':
    train = pd.read_csv(config.dataset_path + '/6_train.csv')
    test = pd.read_csv(config.dataset_path + '/6_test.csv')
    val = pd.read_csv(config.dataset_path + '/6_val.csv')

    X_train, meta_train, Y_train, label_encoder_train = splitFeatures(train)
    X_test, meta_test, Y_test, label_encoder_test = splitFeatures(test)
    X_val, meta_val, Y_val, label_encoder_val = splitFeatures(val)

    n_classes = max(len(label_encoder_val.classes_),
                    len(label_encoder_test.classes_),
                    len(label_encoder_train.classes_))

elif config.dataset_path == 'imdb_small':
    dl = DataLoader()
    X_train, X_val, X_test, Y_train, Y_val, Y_test, _, _, _ = dl.load_data(
        data_path='../imdb_small/budgetandactors2.txt')
    Y_val = [1 if y == 1 else 0 for y in Y_val]
    Y_test = [1 if y == 1 else 0 for y in Y_test]
    n_classes = 2

else:
    dl = DataLoader()
    X_train, X_val, X_test, Y_train, Y_val, Y_test, _, _, _ = dl.load_data(
        data_path='./data/imdb/budgetandactors.txt')
    Y_val = [1 if y == 1 else 0 for y in Y_val]
    Y_test = [1 if y == 1 else 0 for y in Y_test]
    n_classes = 2

#  print("X_val", X_val)
コード例 #30
0
opt = torch_utils.load_config(args.model_dir)
helper.print_config(opt)
model = MyTrainer(opt)
model.load(args.model_dir)

print("Loading data from {} with batch size {}...".format(os.path.join(args.data_dir, args.test_filename), opt['batch_size']))

# split_test_data for multi
if opt['type'] == 'multi':
    split_test_data(opt['coarse_name'])

is_multi_eval = False
if opt['type'] == 'multi':
    is_multi_eval = True

test_batch = DataLoader(os.path.join(args.data_dir, args.test_filename), opt['batch_size'], opt, is_multi_eval)
print("Evaluating...")
if opt['type'] == 'multi':
    predictions,data_ids = [],[]
    test_step = 0
    for i, batch in enumerate(test_batch):
        pred,data_id = model.predict(batch,only_pred=True)
        predictions += pred
        test_step += 1
        data_ids += data_id
else:
    predictions, labels, data_ids = [], [], []
    test_loss, test_acc, test_step = 0., 0., 0
    for i, batch in enumerate(test_batch):
        loss, acc, pred, label, data_id = model.predict(batch)
        test_loss += loss