예제 #1
0
def main(cosim):
    dense = cosim.matrix.todense()
    affinity = 0.5 * dense + 0.5 * dense.T
    distance = np.maximum(1.0 - affinity, 0)
    for nc in NUM_COMPONENTS:
        algs = [
            ('isomap', manifold.Isomap(nc)),
            #('TSNE', manifold.TSNE(nc, metric='precomputed')),
            ('spectral', manifold.SpectralEmbedding(nc,
                                                    affinity='precomputed')),
            ('MDS', manifold.MDS(nc, dissimilarity='precomputed')),
        ]
        print
        print
        print '=' * 80
        print 'Results for all algorithms with %d components' % nc
        print '=' * 80
        print

        for name, alg in algs:
            M = distance
            if name in ('spectral', ): M = affinity
            embedding = alg.fit_transform(M)
            evaluator = Evaluator(cosim, embedding)

            print
            print 'results for', name, 'rank', nc, ':'
            evaluator.evaluate()
예제 #2
0
def main(args):
    """Method to run the data processing/model building pipeline"""
    version = None

    if args.acquire or args.pipeline:
        da = DataAcquisition()
        da.acquire("ds-project-train.csv", "training")
        da.acquire("ds-project-validation.csv", "validation")

    if args.train or args.pipeline:
        trainer = ShipmentTrainer()
        version = trainer.train()

        with open("model_version.txt", "w") as version_fd:
            version_fd.write(version)

    if args.evaluate or args.pipeline:
        if not version and not args.version:
            logger.error("Must either train or specify model version to evaluate")

        if not version:
            version = args.version

        evaluator = Evaluator()
        evaluator.evaluate(version)

    if args.upload:
        if not version and not args.version:
            logger.error("Must either train or specify model version to upload model")

        if not version:
            version = args.version
        upload_model_files(version)
예제 #3
0
    def __init__(self, n_class, in_ch):
        super().__init__()
        with self.init_scope():
            self.conv1=L.Convolution2D(in_ch, 96, 7, stride=2, pad=3)
            self.fire2=Fire(96, 16, 64, 64)
            self.fire3=Fire(128, 16, 64, 64)
            self.fire4=Fire(128, 16, 128, 128)
            self.fire5=Fire(256, 32, 128, 128)
            self.fire6=Fire(256, 48, 192, 192)
            self.fire7=Fire(384, 48, 192, 192)
            self.fire8=Fire(384, 64, 256, 256)
            self.fire9=Fire(512, 64, 256, 256)

            self.score_pool1=L.Convolution2D(96, n_class, 1, stride=1, pad=0)
            self.score_pool4=L.Convolution2D(256, n_class, 1, stride=1, pad=0)
            self.score_pool9=L.Convolution2D(512, n_class, 1, stride=1, pad=0)

            self.add_layer=L.Convolution2D(n_class*3, n_class, 1, stride=1, pad=0)

            # padding means reduce pixels in deconvolution.
            self.upsample_pool4=L.Deconvolution2D(n_class, n_class, ksize= 4, stride=2, pad=1)
            self.upsample_pool9=L.Deconvolution2D(n_class, n_class, ksize= 8, stride=4, pad=2)
            self.upsample_final=L.Deconvolution2D(n_class, n_class, ksize=16, stride=4, pad=(6,6))

        self.n_class = n_class
        self.active_learn = False
        self.evaluator = Evaluator(False, n_class)
예제 #4
0
def evaluate(results):
	vocab_file = results.vocab
	labels_file = results.labels

	gold_file = results.gold
	pred_file = results.pred

	logger.debug(	'Started evaluation with options:'		+ "\n" +
					'gold file:		' + str(results.gold) 	+ "\n" +
					'pred file:		' + str(results.pred) 	+ "\n" +
					'vocab file:	' + str(results.vocab)	+ "\n" +
					'labels file:	' + str(results.labels)	+ "\n")


	if not os.path.exists('model/meta_data'):
		meta_data_instance = MetaData(vocab_file,labels_file)
		meta_data = meta_data_instance.get_meta_data()
		logger.info("Writing meta data file")
		write_obj(meta_data,'meta_data')
	else:
		logger.info("meta data file already exists ... loading")
		meta_data = read_obj('meta_data')


	evaluator = Evaluator(meta_data, pred_file, gold_file)
	evaluator.evaluate()
예제 #5
0
def main(args):
    path = os.path.join(os.getcwd(), 'soft_label', 'soft_label_resnet50.txt')
    if not os.path.isfile(path):
        print('soft label file is not exist')

    train_loader = getTrainLoader(args, path)
    _, val_loader, num_query, num_classes, train_size = make_data_loader(args)

    #train_loader, val_loader, num_query, num_classes, train_size = make_data_loader(args)
    model = build_model(args, num_classes)
    optimizer = make_optimizer(args, model)
    scheduler = WarmupMultiStepLR(optimizer, [30, 55], 0.1, 0.01, 5, "linear")

    loss_func = make_loss(args)

    model.to(device)

    for epoch in range(args.Epochs):
        model.train()
        running_loss = 0.0
        running_klloss = 0.0
        running_softloss = 0.0
        running_corrects = 0.0
        for index, data in enumerate(tqdm(train_loader)):
            img, target, soft_target = data
            img = img.cuda()
            target = target.cuda()
            soft_target = soft_target.cuda()
            score, _ = model(img)
            preds = torch.max(score.data, 1)[1]
            loss, klloss, softloss = loss_func(score, target, soft_target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_klloss += klloss.item()
            running_softloss += softloss.item()
            running_corrects += float(torch.sum(preds == target.data))

        scheduler.step()
        epoch_loss = running_loss / train_size
        epoch_klloss = running_klloss / train_size
        epoch_softloss = running_softloss / train_size
        epoch_acc = running_corrects / train_size
        print(
            "Epoch {}   Loss : {:.4f} KLLoss:{:.8f}  SoftLoss:{:.4f}  Acc:{:.4f}"
            .format(epoch, epoch_loss, epoch_klloss, epoch_softloss,
                    epoch_acc))

        if (epoch + 1) % args.n_save == 0:
            evaluator = Evaluator(model, val_loader, num_query)
            cmc, mAP = evaluator.run()
            print('---------------------------')
            print("CMC Curve:")
            for r in [1, 5, 10]:
                print("Rank-{} : {:.1%}".format(r, cmc[r - 1]))
            print("mAP : {:.1%}".format(mAP))
            print('---------------------------')
            save_model(args, model, optimizer, epoch)
예제 #6
0
def validate(val_loader, model, criterion, args, old_model=None, cls_num=1000):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5],
                             prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    if args.use_feat:
        if args.cross_eval and old_model is not None:
            old_model.eval()
            evaluator = Evaluator(model, old_model)
        else:
            evaluator = Evaluator(model)
        top1, top5 = evaluator.evaluate(val_loader)
        print(' * Acc@1 {top1:.3f} Acc@5 {top5:.3f}'.format(top1=top1,
                                                            top5=top5))
        return top1

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(args.gpu, non_blocking=True)

            if cls_num in target:
                print('Only have {} classes, test stop!'.format(cls_num))
                break

            # compute output
            if args.old_fc is None:
                output = model(images)
            else:
                output, _, _ = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg
예제 #7
0
def evaluate(predictor):
    test_file_name = 'data_valid.json'
    print('loading evaluator...')
    evaluator = Evaluator(predictor)
    print('output result...')
    evaluator.output_result(test_file_name)
    print('scoring...')
    score = evaluator.scoring(test_file_name)
    print(score)
예제 #8
0
def test_multiple_truth():
    reference = {
        'pst': {
            'burn': {'burned', 'burnt'},
            'shrink': {'shrank', 'shrunk'}
        }
    }
    evaluator = Evaluator(reference)
    prediction = {'one': {'burn': 'burnt', 'shrink': 'shrunk'}}
    assert evaluator.score(prediction) == 1
예제 #9
0
    def test_calculate_level_performance(self):
        evaluator = Evaluator(".")
        id_to_label_map = dict({
            0: "其他",
            1: "教育--中小学",
            2: "教育--学历教育--高等教育",
            3: "教育--学历教育--其他",
            4: "教育--其他",
            5: "体育--健身",
            6: "体育--其他"
        })
        right_count_category = dict()
        predict_count_category = dict()
        standard_count_category = dict()
        for label_id, label_name in id_to_label_map.items():
            right_count_category[label_name] = label_id + 1
            predict_count_category[label_name] = label_id + 3
            standard_count_category[label_name] = label_id + 2
        predict_count_category["教育--学历教育--高等教育"] = 0
        standard_count_category["教育--学历教育--其他"] = 0
        precision_dict, recall_dict, fscore_dict = \
            evaluator.calculate_level_performance(
                id_to_label_map, right_count_category, predict_count_category,
                standard_count_category, exclude_method="start")

        self.assertAlmostEqual(precision_dict[evaluator.MACRO_AVERAGE],
                               0.56812169)
        self.assertAlmostEqual(recall_dict[evaluator.MACRO_AVERAGE],
                               0.663690476)
        self.assertAlmostEqual(fscore_dict[evaluator.MACRO_AVERAGE],
                               0.612198785)
        self.assertAlmostEqual(precision_dict[evaluator.MICRO_AVERAGE],
                               0.794117647)
        self.assertAlmostEqual(recall_dict[evaluator.MICRO_AVERAGE],
                               0.964285714)
        self.assertAlmostEqual(fscore_dict[evaluator.MICRO_AVERAGE],
                               0.870967741)

        precision_dict, recall_dict, fscore_dict = \
            evaluator.calculate_level_performance(
                id_to_label_map, right_count_category, predict_count_category,
                standard_count_category, exclude_method="contain")

        self.assertAlmostEqual(precision_dict[evaluator.MACRO_AVERAGE],
                               0.41666666)
        self.assertAlmostEqual(recall_dict[evaluator.MACRO_AVERAGE],
                               0.757936507)
        self.assertAlmostEqual(fscore_dict[evaluator.MACRO_AVERAGE],
                               0.537725225)
        self.assertAlmostEqual(precision_dict[evaluator.MICRO_AVERAGE],
                               0.916666666)
        self.assertAlmostEqual(recall_dict[evaluator.MICRO_AVERAGE],
                               0.785714285)
        self.assertAlmostEqual(fscore_dict[evaluator.MICRO_AVERAGE],
                               0.846153846)
예제 #10
0
def main(cosim):
    for nc in NUM_COMPONENTS:
            dense = cosim.matrix.todense()
            affinity = 0.5 * dense + 0.5 * dense.T
            distance = np.maximum(1.0 - affinity, 0)
            #embedding = manifold.mds._smacof_single(D, n_components=nc)[0]
            embedding = mds(D, n_components=nc)

            print 'results for %d components\n\n\n' % nc
            evaluator = Evaluator(cosim, embedding)
            evaluator.evaluate()
예제 #11
0
def main(args):
    sys.stdout = Logger(
        os.path.join(args.log_path, args.log_description,
                     'log' + time.strftime(".%m_%d_%H:%M:%S") + '.txt'))

    train_loader, val_loader, num_query, num_classes, train_size = make_data_loader(
        args)
    model = build_model(args, num_classes)
    print(model)
    optimizer = make_optimizer(args, model)
    scheduler = WarmupMultiStepLR(optimizer, [30, 55], 0.1, 0.01, 5, "linear")

    loss_func = make_loss(args)

    model.to(device)

    for epoch in range(args.Epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0.0
        for index, data in enumerate(tqdm(train_loader)):
            img, target = data
            img = img.cuda()
            target = target.cuda()
            score, _ = model(img)
            preds = torch.max(score.data, 1)[1]
            loss = loss_func(score, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_corrects += float(torch.sum(preds == target.data))

        scheduler.step()
        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects / train_size
        print("Epoch {}   Loss : {:.6f}   Acc:{:.4f}".format(
            epoch, epoch_loss, epoch_acc))

        if (epoch + 1) % args.n_save == 0:
            evaluator = Evaluator(model, val_loader, num_query)
            cmc, mAP = evaluator.run()
            print('---------------------------')
            print("CMC Curve:")
            for r in [1, 5, 10]:
                print("Rank-{} : {:.1%}".format(r, cmc[r - 1]))
            print("mAP : {:.1%}".format(mAP))
            print('---------------------------')
            save_model(args, model, optimizer, epoch)
예제 #12
0
def test_load(tmp_path):
    ref_file = os.path.join(tmp_path, 'test.gold')
    prd_file = os.path.join(tmp_path, 'test.pred')
    # The same test case as `test_basic`, but written to file
    with open(ref_file, 'w') as f:
        print('get', 'gets', '3sg.prs', sep='\t', file=f)
        print('get', 'got', 'pst', sep='\t', file=f)
        print('set', 'sets', '3sg.prs', sep='\t', file=f)
        print('set', 'set', 'pst', sep='\t', file=f)
    with open(prd_file, 'w') as f:
        print('get', 'gets', '3sg.prs', sep='\t', file=f)
        print('get', 'get', 'pst', sep='\t', file=f)
        print('set', 'sets', '3sg.prs', sep='\t', file=f)
        print('set', 'set', 'pst', sep='\t', file=f)
    evaluator = Evaluator(load_reference(ref_file))
    assert evaluator.score(load_prediction(prd_file)) == 0.75
예제 #13
0
파일: run.py 프로젝트: lmaoyeet/Yeethoveen
def main():
    filename = 'main.yeet'
    file = open(filename, 'r')
    lexer = Lexer(file)
    parse = Parse(lexer.tokens)

    lexer.tokenizer()
    # print("Tokens: ")
    # print(lexer.tokens, "\n")

    parse.build_AST()
    # print("AST:")
    # print (parse.AST, "\n")

    evaluator = Evaluator(parse.AST)
    print("the f*****g output:")
    evaluator.run(parse.AST)
예제 #14
0
def main():

    from model.voxel2mesh import Voxel2Mesh as network
    exp_id = 2

    # Initialize
    cfg = load_config(exp_id)
    trial_path, trial_id = init(cfg)

    print('Experiment ID: {}, Trial ID: {}'.format(cfg.experiment_idx,
                                                   trial_id))

    print("Create network")
    classifier = network(cfg)
    classifier.cuda()

    wandb.init(name='Experiment_{}/trial_{}'.format(cfg.experiment_idx,
                                                    trial_id),
               project="vm-net",
               dir='/cvlabdata1/cvlab/datasets_udaranga/experiments/wanb')

    print("Initialize optimizer")
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  classifier.parameters()),
                           lr=cfg.learning_rate)

    print("Load data")
    data_obj = Chaos()

    # During the first run use load_data function. It will do the necessary preprocessing and save the files to disk.
    # data = data_obj.pre_process_dataset(cfg, trial_id)
    data = data_obj.quick_load_data(cfg, trial_id)

    loader = DataLoader(data[DataModes.TRAINING],
                        batch_size=classifier.config.batch_size,
                        shuffle=True)

    print("Trainset length: {}".format(loader.__len__()))

    print("Initialize evaluator")
    evaluator = Evaluator(classifier, optimizer, data, trial_path, cfg,
                          data_obj)

    print("Initialize trainer")
    trainer = Trainer(classifier, loader, optimizer, cfg.numb_of_itrs,
                      cfg.eval_every, trial_path, evaluator)

    if cfg.trial_id is not None:
        print("Loading pretrained network")
        save_path = trial_path + '/best_performance/model.pth'
        checkpoint = torch.load(save_path)
        classifier.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
    else:
        epoch = 0

    trainer.train(start_iteration=epoch)
예제 #15
0
def main(args):
    train_loader, val_loader, num_query, num_classes, train_size = make_data_loader(
        args)

    #load the parameters
    net = Net(reid=True)
    state_dict = torch.load(
        './ckpt.t7', map_location=lambda storage, loc: storage)['net_dict']
    net.load_state_dict(state_dict)

    evaluator = Evaluator(net, val_loader, num_query)
    cmc, mAP = evaluator.run()
    print('---------------------------')
    print("CMC Curve:")
    for r in [1, 5, 10]:
        print("Rank-{} : {:.1%}".format(r, cmc[r - 1]))
    print("mAP : {:.1%}".format(mAP))
    print('---------------------------')
예제 #16
0
def _test(config):
  if config.data_from == "20newsgroup": config.test_batch_size = 281

  word2idx = Counter(json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"])
  idx2word = json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["idx2word"]
  assert len(word2idx) == len(idx2word)
  for i in range(10):  assert word2idx[idx2word[i]] == i
  vocab_size = len(word2idx)
  word2vec = Counter(json.load(open("../data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"])
  # word2vec = {} if config.debug or config.load  else get_word2vec(config, word2idx)
  idx2vec = {word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx}
  unk_embedding = np.random.multivariate_normal(np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size))
  config.emb_mat = np.array([idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size)])
  config.vocab_size = vocab_size
  test_dict = {}
  if os.path.exists("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype)):
    test_dict = json.load(open("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype), "r"))

  if config.data_from == "reuters":
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_reuters(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "20newsgroup":
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_news(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "ice":
    dev_data = DataSet(test_dict, config.dev_type)

  config.dev_size = dev_data.get_data_size()
  # if config.use_glove_for_unk:
  pprint(config.__flags, indent=2)
  model = get_model(config)
  graph_handler = GraphHandler(config, model)
  sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
  graph_handler.initialize(sess)
  # check
  #w_embeddings = sess.run(model.word_embeddings)
  #print("w_embeddings:", w_embeddings.shape, w_embeddings)

  dev_evaluate = Evaluator(config, model)
  num_steps = math.floor(dev_data.num_examples / config.test_batch_size)
  if 0 < config.val_num_batches < num_steps:
    num_steps = config.val_num_batches
  # print("num_steps:", num_steps)
  e_dev = dev_evaluate.get_evaluation_from_batches(
    sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps))
예제 #17
0
def main():

    exp_id = 3

    # Initialize
    cfg = load_config(exp_id)
    trial_path, trial_id = init(cfg)

    print('Experiment ID: {}, Trial ID: {}'.format(cfg.experiment_idx,
                                                   trial_id))

    print("Create network")
    classifier = network(cfg)
    classifier.cuda()

    wandb.init(name='Experiment_{}/trial_{}'.format(cfg.experiment_idx,
                                                    trial_id),
               project="vm-net",
               dir=trial_path)

    print("Initialize optimizer")
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  classifier.parameters()),
                           lr=cfg.learning_rate)

    print("Load pre-processed data")
    data_obj = cfg.data_obj
    data = data_obj.quick_load_data(cfg, trial_id)

    loader = DataLoader(data[DataModes.TRAINING],
                        batch_size=classifier.config.batch_size,
                        shuffle=True)

    print("Trainset length: {}".format(loader.__len__()))

    print("Initialize evaluator")
    evaluator = Evaluator(classifier, optimizer, data, trial_path, cfg,
                          data_obj)

    print("Initialize trainer")
    trainer = Trainer(classifier, loader, optimizer, cfg.numb_of_itrs,
                      cfg.eval_every, trial_path, evaluator)

    if cfg.trial_id is not None:
        print("Loading pretrained network")
        save_path = trial_path + '/best_performance/model.pth'
        checkpoint = torch.load(save_path)
        classifier.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
    else:
        epoch = 0

    trainer.train(start_iteration=epoch)
예제 #18
0
def evaluator():
    reference = {
        '3sg.prs': {
            'get': {'gets'},
            'set': {'sets'}
        },
        'pst': {
            'get': {'got'},
            'set': {'set'}
        }
    }
    return Evaluator(reference)
예제 #19
0
def prepare_experiment(config, task, text_encoder, device, verbose):
    train_dataloader, validation_dataloader, test_dataloader, document_structure = get_dataloaders(task, text_encoder, config['test_split'], config['validation_split'], config['batch_size'], device, verbose, sequence_dim=config['sequence_dim'])
    max_position_encoding = train_dataloader.dataset.max_position_encoding
    sequence_dim = train_dataloader.dataset.sequence_dim
    vocab_size = len(text_encoder.encoder) + max_position_encoding
    num_output = task['target']['num_classes'] if not document_structure == 'one_to_many' else 1
    
    target_type = task['target']['target_type']
    if target_type == 'classification':
        task_criterion = nn.CrossEntropyLoss(reduction='none')
    elif target_type == 'regression':
        task_criterion = nn.MSELoss(reduction='none')
    lm_criterion = nn.CrossEntropyLoss(reduction='none')
    train_evaluator = Evaluator(lm_criterion, task_criterion, config['lm_coef'], 1., target_type)
    test_evaluator = Evaluator(lm_criterion, task_criterion, 0., 1., target_type)

    dh_model = DoubleHeadModel(config, text_encoder.classify_token, num_output, vocab_size, sequence_dim)
    load_openai_pretrained_model(dh_model.transformer, n_ctx=sequence_dim, n_special=3, verbose=verbose)
    dh_model.to(device)

    return dh_model, (train_dataloader, test_dataloader), (train_evaluator, test_evaluator)
예제 #20
0
 def on_epoch_end(self, epoch, logs={}):
     self.loss.append(logs["val_loss"])
     if len(self.loss) == 1 or min(self.loss[:-1]) > self.loss[-1]:
         ev = Evaluator(self.model, SIGMA_REL_TH)
         ev.process_NED2012(self.NED2012_path)
         ev.print_res()
         sd_SD, sd_SI = ev.get_res()
         logs["NED2012 SD component error"] = np.array(sd_SD,
                                                       dtype="float64")
         logs["NED2012 SI component error"] = np.array(sd_SI,
                                                       dtype="float64")
예제 #21
0
파일: embedder.py 프로젝트: pcy1302/TapEM
    def __init__(self, args):
        # parameters setting
        self.top_K = [int(elem) for elem in eval(args.top_K)]
        self.embedder = args.embedder
        self.embed_d = args.embed_dim
        self.hidden_n = args.embed_dim
        self.c_len = args.c_len
        self.c_reg = args.c_reg
        self.margin_d = args.margin_d
        self.c_tradeoff = args.c_tradeoff
        self.batch_s = args.batch_size
        self.lr = args.learn_rate
        self.iter_max = args.train_iter_max
        self.data_path = args.data_path
        self.version = args.version
        self.year = args.year

        self.model_path = args.model_path
        self.gpu_num = gpu_dict[args.gpu_num]

        self.device = torch.device(
            "cuda:" +
            str(self.gpu_num) if torch.cuda.is_available() else "cpu")

        self.input_data = Dataset(args)
        self.word_embed = self.input_data.word_embed
        self.word_n = self.word_embed.shape[0] - 2
        self.word_dim = self.word_embed.shape[1]

        self.dnn_dims = [int(elem) for elem in eval(args.dnn_dims)]
        self.num_ctx_neg = args.num_ctx_neg
        self.metric = args.metric
        self.early_stop = args.early_stop
        self.save = args.save
        self.init_std = args.init_std
        self.reg_ml = args.reg_ml
        self.reg_semi = args.reg_semi
        self.reg_ctx = args.reg_ctx
        self.ctx_window = args.ctx_window
        self.pretrain = args.pretrain
        self.rescale_grad = args.rescale_grad
        self.grad_norm = args.grad_norm
        self.ctx_ratio = args.ctx_ratio
        self.scheduler_factor = args.scheduler_factor
        self.scheduler_patience = args.scheduler_patience
        self.scheduler_threshold = args.scheduler_threshold
        self.pretrained_f_name = "_{}_{}".format(args.version, args.year)
        self.evaluator = Evaluator(self.input_data, self.metric,
                                   self.early_stop, self.top_K, self.save,
                                   args)
예제 #22
0
파일: train.py 프로젝트: ChenX17/bdasr
        def maybe_save_model(config):
            mp = config.model_dir + '/model_epoch_{}'.format(epoch)
            model.saver.save(sess, mp)
            logger.info('Save model in %s.' % mp)

            if config.train.eval_on_dev:
                evaluator = Evaluator(config)
                evaluator.init_from_existed(config, model, sess)
                evaluator.translate(
                    config.dev.feat_file_pattern, config.dev.output_file +
                    'decode_result_epoch_' + '{}'.format(str(epoch)))
class Trainer():
    def __init__(self, model, optimizer):
        self.model = model
        self.optimizer = optimizer
        self.evaluator = Evaluator(model)

    def train(self, epoch_num, train_X, train_y, val_X, val_y, save_every,
              save_dir, evaluate_every):

        for epoch in tqdm(range(epoch_num)):
            loss_history = []
            print('Epoch :%d ' % (epoch + 1))

            while True:
                X, y = train_X.next_batch(), train_y.next_batch()
                if X is None:
                    try:
                        assert y is None
                    except:
                        print(y)
                    train_X.rewind()
                    train_y.rewind()
                    break
                self.model.zero_grad()
                probs = self.model(*X)
                loss = loss_function(probs, y)
                loss.backward()
                clip = 5.0
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()
                loss_history.append(loss.item())

            print('avg loss is :%.3f' % (np.array(loss_history).mean()))

            if (epoch + 1) % save_every == 0:
                print('save model to %s on epoch %d' % (save_dir, epoch + 1))
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                torch.save({'epoch':epoch,'model':self.model.state_dict(),'optimzer':self.optimizer.state_dict(),\
                    'model_hyper':self.model.hyper_parameters()},os.path.join(save_dir,'%d.pkl'%(epoch)))

            if (epoch + 1) % evaluate_every == 0:
                print('validation on epoch : %d' % (epoch + 1))
                loss, accu = self.evaluator.evaluate(val_X, val_y,
                                                     loss_function, True, True)
                print('(loss,accu)--> (%.3f,%.3f)' % (loss, accu))
예제 #24
0
def main(cosim, algs):
    dense = cosim.matrix.toarray()
    for nc in NUM_COMPONENTS:
        print
        print
        print '=' * 80
        print 'Results for all algorithms with %d components' % nc
        print '=' * 80
        print

        for name in ALGS:
            f = getattr(sklearn.decomposition, name)
            alg = f(n_components=nc)
            if name in SPARSE_ALGS:
                embedding = alg.fit_transform(cosim.matrix)
            else:
                embedding = alg.fit_transform(dense)
            evaluator = Evaluator(cosim, embedding)

            print
            print 'results for', name, 'rank', nc, ':'
            evaluator.evaluate_examples()
            evaluator.evaluate_precision()
            evaluator.evaluate_correlation()
예제 #25
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename
    data_length = args.data_length

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    in_domain_classes_num = len(config.labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_csv = 'fold1_train.csv'

    validate_csv = 'fold1_test.csv'

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins))

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)

    model = Model(in_domain_classes_num, activation='logsoftmax')
    loss_func = nll_loss

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(feature_hdf5_path=feature_hdf5_path,
                                   train_csv=train_csv,
                                   validate_csv=validate_csv,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    if subtask in ['a', 'c']:
        evaluator.evaluate(data_type='validate', verbose=True)

    elif subtask == 'b':
        evaluator.evaluate(data_type='validate', verbose=True)
        evaluator.evaluate(data_type='validate', verbose=True)
        evaluator.evaluate(data_type='validate', verbose=True)

    # Visualize log mel spectrogram
    if visualize:
        evaluator.visualize(data_type='validate')
예제 #26
0
def evaluate(cfg, args):
    device = select_device(args.device)
    # Initialize/load model
    if cfg.MODEL.META_ARCHITECTURE:

        # Initialize model
        model = YOLOv3(cfg).to(device)

        # Load weights
        if cfg.TEST.WEIGHTS.endswith(".pth"):
            state = torch.load(cfg.TEST.WEIGHTS, map_location=device)
            model.load_state_dict(state["state_dict"])
        else:
            load_darknet_weights(model, cfg.TEST.WEIGHTS)

        if device.type != "cpu" and torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        warnings.warn(
            "WARNNING: Backbone network cannot be empty! "
            f"Default load Darknet53 meta architecture for `{cfg.CONFIG_FILE}`!"
        )
        model = YOLOv3(cfg).to(device)

    if cfg.TEST.TASK == "visual":
        images = os.listdir(os.path.join(os.getcwd(), "data", "test"))
        for filename in images:
            path = os.path.join(
                os.path.join(os.getcwd(), "data", "test", filename))

            images = cv2.imread(path)
            assert images is not None

            bboxes_prd = Evaluator(model, cfg=cfg).get_bbox(images)
            if bboxes_prd.shape[0] != 0:
                boxes = bboxes_prd[..., :4]
                class_inds = bboxes_prd[..., 5].astype(np.int32)
                scores = bboxes_prd[..., 4]

                visualize_boxes(image=images,
                                boxes=boxes,
                                labels=class_inds,
                                probs=scores,
                                class_labels=cfg.CLASSES)
                path = os.path.join(f"./outputs/{filename}")

                cv2.imwrite(path, images)

    elif cfg.TEST.TASK == "eval":
        maps = 0.
        with torch.no_grad():
            aps = Evaluator(model, visiual=True,
                            cfg=cfg).calculate_aps(cfg.TEST.MULTI_SCALE,
                                                   cfg.TEST.FLIP)

            for i in aps:
                print(f"{i:25s} --> mAP : {aps[i]:.4f}")
                maps += aps[i]
            maps = maps / len(cfg.CLASSES)
            print(f'mAP:{maps:.6f}')

        return maps
예제 #27
0
  parser.add_argument('--task', dest='task', action='store', required=True, type=str, help='Which task (tac08)')
  parser.add_argument('--soldir', dest='soldir', action='store', required=True, type=str, help='Path to the ILP solution')
  parser.add_argument('--outputdir', dest='outputdir', action='store', type=str, help='Path to the output summary')
  parser.add_argument('--method', dest='method', action='store', default='joint', type=str, help='Joint or extractive')

  args = parser.parse_args()
  sol_dir = args.soldir
  print sol_dir
  sol_files = glob.glob('%s/*.sol' % sol_dir)
  if not args.outputdir:
    output_dir = re.sub(r'(.*)/[^/]*$', r'\1/summary', args.soldir)
  if not os.path.isdir(output_dir):
    os.makedirs(output_dir)
  gold_dir = '%s/dat/%s/models' % (base_dir, args.task)
  task = args.task

  for sol_file in sol_files:
    solution = get_solution(sol_file)
    problem = re.search(r'.*/([^/]*).sol', sol_file).group(1)
    sent_file = '%s/dat/%s/sents/%s.aligned' % (base_dir, task, problem)
    summary = get_summary(sent_file, solution)
    output_file = '%s/%s' % (output_dir, problem[:5]+problem[6:])
    with open(output_file, 'w') as fout:
      for sent in summary:
        fout.write('%s\n' % ' '.join(sent))

  # evaluate
  evaluator = Evaluator(gold_dir, output_dir, log=sys.stdout)
  evaluator.evaluate()
  evaluator.clear_config()
예제 #28
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    sources = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_train.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    #checkpoint = torch.load(checkpoint_path)
    #model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    if subtask in ['a', 'c']:
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        
    elif subtask == 'b':
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        evaluator.evaluate(data_type='validate', source='b', verbose=True)
        evaluator.evaluate(data_type='validate', source='c', verbose=True)
    
    # Visualize log mel spectrogram
    if visualize:
        evaluator.visualize(data_type='validate', source='a')
예제 #29
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    fixed = args.fixed
    finetune = args.finetune
    ite_train = args.ite_train
    ite_eva = args.ite_eva
    ite_store = args.ite_store
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None      # Number of mini-batches to evaluate on training data
    reduce_lr = True
    
    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'validate_statistics.pickle')
    
    create_folder(os.path.dirname(validate_statistics_path))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)
    
    # Model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        if fixed=='True':
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True)
        else :
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False)
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()
    
    # Optimizer
    if fixed=='True':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999),
                         eps=1e-08, weight_decay=0., amsgrad=True)
    else :
        optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999),
                               eps=1e-08, weight_decay=0., amsgrad=True)

    if finetune=='True':
        model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth'
        device = torch.device('cuda')
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model'])
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    # Statistics
    validate_statistics_container = StatisticsContainer(validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():
        
        # Evaluate
        #1800
        if iteration % 200 == 0 and iteration > ite_eva:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(
                    data_type='train', 
                    source=source, 
                    max_iteration=None, 
                    verbose=False)
            
            if holdout_fold != 'none':
                for source in sources_to_evaluate:
                    validate_statistics = evaluator.evaluate(
                        data_type='validate', 
                        source=source, 
                        max_iteration=None, 
                        verbose=False)

                    validate_statistics_container.append_and_dump(
                        iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'Train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 200 == 0 and iteration > ite_store:
            checkpoint = {
                'iteration': iteration, 
                'model': model.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            
        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.93
        
        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']:
                batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
        
        # Train
#         batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns'])
#         loss = loss_func(batch_output, batch_data_dict['target'])
    
        # Using Mixup
        model.train()
        mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2)
        batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4)

        if batch_output.shape[1] == 10: # single scale models
            loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam)
        else:                  # multi scale models
            losses = []
            for ite in range(batch_output.shape[1]-1):
                loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam)
                losses.append(loss)
            loss = sum(losses)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        # 12000 for scratch
        if iteration == ite_train:
            break
            
        iteration += 1
예제 #30
0
def train(args, i):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    audio_num = config.audio_num
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True
    in_domain_classes_num = len(config.labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_train.csv')

    validate_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_test.csv')

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins))

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')

    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Model
    Model = eval(model_type)

    model = Model(in_domain_classes_num, activation='logsoftmax')
    loss_func = nll_loss

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    #     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)
    # Data generator
    data_generator = DataGenerator(feature_hdf5_path=feature_hdf5_path,
                                   train_csv=train_csv,
                                   validate_csv=validate_csv,
                                   holdout_fold=holdout_fold,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0 and iteration >= 1500:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            train_statistics = evaluator.evaluate(data_type='train',
                                                  iteration=iteration,
                                                  max_iteration=None,
                                                  verbose=False)

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(data_type='validate',
                                                         iteration=iteration,
                                                         max_iteration=None,
                                                         verbose=False)
                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()


#         Save model
        if iteration % 100 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 100 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        for i in range(audio_num):
            model.train()
            data, target_a, target_b, lam = mixup_data(
                x=batch_data_dict['feature'][:, i, :, :],
                y=batch_data_dict['target'],
                alpha=0.2)
            batch_output = model(data)
            #         batch_output = model(batch_data_dict['feature'])
            # loss
            loss = loss_func(batch_output, batch_data_dict['target'])
            loss = mixup_criterion(loss_func, batch_output, target_a, target_b,
                                   lam)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Stop learning
        if iteration == 4000:
            break

        iteration += 1
예제 #31
0
파일: model.py 프로젝트: yysherlock/msae
    def train(self):
        outputPrefix=self.readField(self.config,self.name,"output_directory")
        outputDir=os.path.join(outputPrefix,self.name)
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir,'vis')
            if not os.path.exists(visDir):
                os.mkdir(visDir)
        #do normalization for images if they are not normalized before
        normalize=self.str2bool(self.readField(self.config, self.name, "normalize"))
        trainDataSize=int(self.readField(self.config, self.name, "train_size"))
        numBatch = trainDataSize / self.batchsize
        trainDataPath = self.readField(self.config, self.name, "train_data")
        if self.readField(self.config,self.name,"extract_reps")=="True":
            trainRepsPath=self.readField(self.config, self.name, "train_reps")
        else:
            trainRepsPath=None
        trainDataLoader=DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize,numBatch, normalize)

        evalFreq=int(self.readField(self.config,self.name,'eval_freq'))
        if evalFreq!=0:
            qsize=int(self.readField(self.config, self.name, "query_size"))
            evalPath=self.readField(self.config,self.name,"validation_data")
            labelPath=self.readField(self.config,self.name,"label")
            queryPath=self.readField(self.config, self.name, "query")
            label=np.load(labelPath)
            eval=Evaluator(queryPath,label ,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose)
            validation_data=gp.garray(np.load(evalPath))
            if normalize:
                validation_data=trainDataLoader.doNormalization(validation_data)

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))

        nCommon, nMetric, title=self.getDisplayFields()
        if self.verbose:
            print title
        for epoch in range(maxEpoch):
            perf=np.zeros( nMetric)
            trainDataLoader.reset()
            for i in range(numBatch):
                batch = trainDataLoader.getOneBatch()
                curr = self.trainOneBatch(batch, epoch, computeStat=True)
                perf=self.aggregatePerf(perf, curr)

            if showFreq != 0 and (1+epoch) % showFreq == 0:
                validation_code=self.getReps(validation_data)
                np.save(os.path.join(visDir, '%dvis' % (1+epoch)), validation_code)
            if evalFreq !=0 and (1+epoch) % evalFreq ==0:
                validation_code=self.getReps(validation_data)
                eval.evalSingleModal(validation_code,epoch,self.name+'V')
                validation_code=None
            if self.verbose:
                self.printEpochInfo(epoch,perf,nCommon)

        if self.readField(self.config,self.name,"checkpoint")=="True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config,self.name,"extract_reps")=="True":
            if evalFreq!=0:
                validation_reps_path=self.readField(self.config, self.name, "validation_reps")
                self.extractValidationReps(validation_data, validation_reps_path)
            self.extractTrainReps(trainDataLoader, numBatch)

        self.saveConfig(outputDir)
예제 #32
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    seq_len = 640
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, seq_len, mel_bins, cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    print('cliqueNet parameters:',
          sum(param.numel() for param in model.parameters()))
    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 3000:
            break

        iteration += 1
예제 #33
0
def main(_):
    num_gpus = get_num_gpus()
    assert num_gpus
    print('num_gpus', get_num_gpus())

    batch_size = FLAGS.batch_size

    # ordinal2 better then ordinal since do not need 5 bits just need 4 bits
    # so classification, sigmoid_regression and ordinal2_classification similar result, classification maybe slightly better
    # for regression, tend to predict less 4..
    loss_types = ['classification', 'linear_regression', \
                  'sigmoid_regression', 'sigmoid_regression_mae', 'sigmoid2_regression', \
                  'ordinal_classification', 'ordinal2_classification',
                  'earth_classification', 'kappa_classification']

    loss_type = FLAGS.loss_type
    print('loss_type', loss_type)
    assert loss_type in loss_types

    #----------- read input
    df_train = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
    df_test = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')

    x = df_train['id_code']
    y = df_train['diagnosis']
    x, y = shuffle(x, y, random_state=random_state)

    # https://stackoverflow.com/questions/48508036/sklearn-stratifiedkfold-valueerror-supported-target-types-are-binary-mul
    # Supported target types are: ('binary', 'multiclass'). Got 'multilabel-indicator' instead.
    # can not put after to_categorical
    train_x, valid_x, train_y, valid_y = folds.get_train_valid(
        x, y, FLAGS.fold, FLAGS.num_folds, random_state=2019)

    # # check if exactly same as gen-folds, so fold 0 valid.csv should be same as ../input/train_0.csv
    # df = pd.DataFrame()
    # df['id_code'] = valid_x
    # df['diagnosis'] = valid_y
    # df.to_csv('valid.csv', index=False)

    train_y = to_categorical(train_y, num_classes=NUM_CLASSES)
    train_y = trans_y(train_y, loss_type)

    valid_y = to_categorical(valid_y, num_classes=NUM_CLASSES)
    valid_y = trans_y(valid_y, loss_type)

    #----------  init
    train_data = Dataset(train_x, train_y, 128, is_train=True)

    #batch_size_ = int(batch_size * (3 / 4)) if num_gpus == 1 else batch_size
    batch_size_ = int(batch_size * (3 / 4)) * FLAGS.multiplier  # 24 48

    print('batch_size_', batch_size_)
    train_mixup = Dataset(train_x,
                          train_y,
                          batch_size_,
                          is_train=True,
                          mix=False,
                          augment=True)
    valid_data = Dataset(valid_x, valid_y, batch_size, is_train=False)

    # train step1, warm up model
    model = create_model(input_shape=(SIZE, SIZE, 3),
                         n_out=NUM_CLASSES,
                         loss_type=loss_type)

    for layer in model.layers:
        layer.trainable = False

    for i in range(-3, 0):
        model.layers[i].trainable = True

    if num_gpus > 1:
        smodel = model
        model = keras.utils.multi_gpu_model(model, num_gpus, cpu_merge=False)

    loss_fn = get_loss(loss_type)

    print('loss_fn', loss_fn)

    model.compile(loss=loss_fn, optimizer=Adam(1e-3))

    #model.summary()

    dir = '../working/{}/{}'.format(FLAGS.fold, loss_type)
    if FLAGS.multiplier > 1:
        dir += '_{}'.format(FLAGS.multiplier)
    if num_gpus > 1:
        dir += '_{}gpu'.format(num_gpus)

    print('dir:', dir)

    tb = TensorBoard(log_dir=dir,
                     histogram_freq=0,
                     write_graph=True,
                     write_images=False)

    eval = Evaluator(dir,
                     validation_data=(valid_data, valid_y),
                     interval=1,
                     loss_type=loss_type)

    ## for faster check evaluate probelm but may cuase problem for mutltigpu wrong eval TODO FIXME
    # eval.model = model
    # eval.on_epoch_end(-1)
    # print('image_dict size', len(image_dict))

    model.fit_generator(train_data,
                        validation_data=valid_data,
                        epochs=2,
                        workers=WORKERS,
                        use_multiprocessing=True,
                        verbose=1,
                        callbacks=[eval, tb])

    # seems if use_multiprocessing=True will not update image_dict
    print('image_dict size', len(image_dict))

    # train step2, train all layers
    checkpoint = ModelCheckpoint('{}/densenet_.h5'.format(dir),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 save_weights_only=True)
    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.5,
                                       patience=4,
                                       verbose=1,
                                       mode='auto',
                                       epsilon=0.0001)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=9)

    csv_logger = CSVLogger(filename='{}/training_log.csv'.format(dir),
                           separator=',',
                           append=True)

    # from lr import WarmUpCosineDecayScheduler
    # warmup_epoch = 2
    # warmup_steps = warmup_epoch * len(train_mixup)
    # warm_up_lr = WarmUpCosineDecayScheduler(learning_rate_base=2e-4,
    #                                         total_steps=len(train_mixup) * (epochs - 2),
    #                                         warmup_learning_rate=0.0,
    #                                         warmup_steps=warmup_steps,
    #                                         hold_base_rate_steps=0)

    if num_gpus > 1:
        model = smodel

    for layer in model.layers:
        layer.trainable = True

    callbacks_list = [checkpoint, csv_logger, reduceLROnPlat, early, eval, tb]
    #callbacks_list = [checkpoint, csv_logger, warm_up_lr, early, eval, tb]

    if num_gpus > 1:
        smodel = model
        model = keras.utils.multi_gpu_model(model, num_gpus, cpu_merge=False)

    lr = 1e-4
    lr *= FLAGS.multiplier
    # Notice if using warm_up_lr then lr here not on effect
    model.compile(loss=loss_fn, optimizer=Adam(lr=lr))

    epoch_now = 2
    model.fit_generator(
        train_mixup,
        validation_data=valid_data,
        epochs=epochs,
        verbose=1,
        ## FIXME probelm with callback save model OSError: Unable to create file (unable to lock file, errno = 11, error message = 'Resource temporarily unavailable')
        ## will be slower (50->57) not using multiprocessing seems problem only occur for validation when saving checkpoint
        # workers=WORKERS,
        # use_multiprocessing=True,
        workers=1,
        use_multiprocessing=False,
        callbacks=callbacks_list,
        initial_epoch=epoch_now)