Ejemplo n.º 1
0
def start_evaluating(run_config, train_config, data_config, model_config):
    # hack to prevent the data loader from going on GPU 0
    import os
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(run_config['cuda_device'])
    # torch.cuda.set_device(run_config['cuda_device'])
    torch.cuda.set_device(0)

    logger = Logger(run_config)

    # load the data
    train_data, val_data, test_data = load_data(data_config,
                                                train_config['batch_size'])
    if test_data is None:
        test_data = val_data

    # load the model
    print('Loading model...')
    model = load_model(model_config)

    assert run_config[
        'resume_path'] is not None, 'Run path must be set for evaluation.'
    print('Loading checkpoint ' + run_config['resume_path'])
    # model = logger.load_best(model)
    model = logger.load_epoch(model, 500)

    print('Putting the model on the GPU...')
    model.cuda()

    model.eval()

    output = eval_model(test_data, model, train_config)
    path = os.path.join(run_config['log_root_path'], run_config['log_dir'])
    with open(path, 'wb') as f:
        pickle.dump(output, f)
Ejemplo n.º 2
0
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
                    data_loader: Iterable, optimizer: torch.optim.Optimizer,
                    device: torch.device, epoch: int, max_norm: float = 0):
    model.train()
    criterion.train()
    logger = Logger()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
    header = 'Epoch: [{}]'.format(epoch)
    print_freq = 10

    for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
        samples = samples.to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        wandb.log({"Input Images": wandb.Image(samples.tensors, caption="Batch Input Images")})

        outputs = model(samples)

        loss_dict = criterion(outputs, targets)
        weight_dict = criterion.weight_dict
        losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        loss_dict_reduced_unscaled = {f'{k}_unscaled': v
                                      for k, v in loss_dict_reduced.items()}
        loss_dict_reduced_scaled = {k: v * weight_dict[k]
                                    for k, v in loss_dict_reduced.items() if k in weight_dict}
        losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())

        loss_value = losses_reduced_scaled.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        if max_norm > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        optimizer.step()

        metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled)
        metric_logger.update(class_error=loss_dict_reduced['class_error'])
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
    
    # Log output and GT
    logger.log_gt(samples.tensors, targets)
    logger.log_predictions(samples.tensors, outputs)
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def main():
    args = get_args()
    args_dict = vars(args)
    check_args(args)

    level = logging.DEBUG if args.verbose else logging.WARNING
    logger = Logger('FSID', level=level)

    logger.info('Loading embedder...')
    if args.embedder == "fastText":
        embedder = FastTextEmbedder(language=args.language)
    elif args.embedder == "elmo":
        embedder = ELMoEmbedder(language=args.language)
    elif args.embedder == "bert":
        embedder = BERTEmbedder(args.model_name_or_path)
    else:
        raise NotImplementedError

    if args.method == 'nKNN':
        pseudo_labeler = NaiveKNNPseudoLabeler(embedder=embedder)
    elif args.method == 'spectral':
        pseudo_labeler = SpectralPseudoLabeler(embedder=embedder)
    elif args.method == 'fold-unfold':
        pseudo_labeler = FoldUnfoldPseudoLabeler(embedder=embedder)
    elif args.method == 'aggregated':
        pseudo_labeler = AggregatedPseudoLabeler(embedder=embedder)
    else:
        raise NotImplementedError
    logger.info('Finding pseudo-labels...')
    pseudo_labels = pseudo_labeler.find_pseudo_labels(
        labeled_file_path=args.input_labeled_file,
        unlabeled_file_path=args.input_unlabeled_file,
        **args_dict)
    os.makedirs(args.output)
    with open(os.path.join(args.output, 'args.json'), 'w') as file:
        json.dump(args_dict, file, indent=2, ensure_ascii=False)
    save_data_jsonl(pseudo_labels,
                    os.path.join(args.output, 'pseudo_labels.jsonl'))
Ejemplo n.º 4
0
import os
import re
import string
from typing import List

from util.constants import FASTTEXT_MODELS_PATH, ELMO_MODELS_PATH
from util.logging import Logger
import torch

logger = Logger('FSID')


class Embedder:
    def __init__(self, language=None):
        self.language = language

    def check_download(self):
        pass

    def embed_sentences(self, sentences, **kwargs):
        pass


class FastTextEmbedder(Embedder):
    def __init__(self, language):
        import fasttext

        super(FastTextEmbedder, self).__init__(language=language)
        self.language = language  # type: str
        self.fasttext = fasttext.load_model(self.check_download())
Ejemplo n.º 5
0
from util.optimization import load_opt_sched
from util.train_val import visualize

import matplotlib.pyplot as plt
import numpy as np
import cPickle
from util.plotting.audio_util import write_audio

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(run_config['cuda_device'])
# torch.cuda.set_device(run_config['cuda_device'])
torch.cuda.set_device(0)

# initialize logging
logger = Logger(run_config)

# load the data
train_data, val_data, test_data = load_data(data_config, train_config['batch_size'])
if val_data is None:
    val_data = test_data

data = val_data

# load the model, optimizers
print('Loading model...')
model = load_model(model_config)

assert run_config['resume_path'] is not None, 'Model must be resumed from checkpoint.'

if run_config['resume_path']:
Ejemplo n.º 6
0
    args = vars(parser.parse_args())
    print(args)

    parameter_set_nr = args['parameter_set_nr']
    time_dim_df = args['time_dim_df']
    number_of_measurements = args['number_of_measurements']
    value_mask_file = args['value_mask_file']
    output_file = args['output_file']
    debug = args['debug']
    if value_mask_file is not None:
        value_mask = np.load(value_mask_file)
    else:
        value_mask = None
    initial_individuals = args['initial_individuals']
    generations = args['generations']

    with Logger(debug):
        p = np.loadtxt(
            SIMULATION_OUTPUT_DIR +
            '/model_dop_po4/time_step_0001/parameter_set_{:0>5}/parameters.txt'
            .format(parameter_set_nr))
        cf = CostFunction(p, time_dim_df=time_dim_df, value_mask=value_mask)
        p_opt = cf.optimize(number_of_measurements,
                            number_of_initial_individuals=initial_individuals,
                            number_of_generations=generations)
        np.save(output_file, p_opt)

    print('FINISHED')
    sys.exit()
Ejemplo n.º 7
0
from util.logging import Logger
from interface import plot_correlogram as plot

with Logger():
    for min_measurements in (10, 25, 50, 100, 200, 500):
        plot(show_model=False, min_measurements=min_measurements)
Ejemplo n.º 8
0
def start_training(run_config, train_config, data_config, model_config):
    # hack to prevent the data loader from going on GPU 0
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(run_config['cuda_device'])
    # torch.cuda.set_device(run_config['cuda_device'])
    torch.cuda.set_device(0)

    # initialize logging and plotting
    logger = Logger(run_config)
    plotter = Plotter(logger.log_dir, run_config, train_config, model_config,
                      data_config)

    # load the data
    train_data, val_data, test_data = load_data(data_config,
                                                train_config['batch_size'])
    if val_data is None:
        val_data = test_data

    # load the model, optimizers
    print('Loading model...')
    model = load_model(model_config)
    print('Loading optimizers...')
    optimizers, schedulers = load_opt_sched(train_config, model)

    if run_config['resume_path']:
        print('Resuming checkpoint ' + run_config['resume_path'])
        model, optimizers, schedulers = logger.load_checkpoint(
            model, optimizers)

    print('Putting the model on the GPU...')
    model.cuda()

    while True:
        # training
        out = train(train_data, model, optimizers, train_config, data_config)
        logger.log(out, 'Train')
        plotter.plot(out, 'Train')
        if val_data:
            # validation
            out = validate(val_data, model, train_config, data_config)
            logger.log(out, 'Val')
            plotter.plot(out, 'Val')
        if logger.save_epoch():
            logger.save_checkpoint(model, optimizers)
        logger.step()
        plotter.step()
        schedulers[0].step()
        schedulers[1].step()
        plotter.save()
Ejemplo n.º 9
0
 def __init__(self):
     self.logger = Logger.defaultLogger()
     self.__init_folders()
Ejemplo n.º 10
0
from util.handlers import Handlers
from util.logging import Logger
from util.cache import Cache
from util.database import Database

parser = argparse.ArgumentParser(description="Runs the FFF bot")
parser.add_argument(
    "--debug",
    help=
    "Add this argument if you wish to run the bot in debug mode which changes the token, prefix and loads some "
    "debug modules.",
    action="store_true")

debug = parser.parse_args().debug
logging_level = "DEBUG" if debug else "INFO"
logger = Logger(level=logging_level).logger

config = Handlers.JSON.read("config")


class FFF(commands.AutoShardedBot):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.config = config
        self.bot_config = self.config['bot']
        self.session = aiohttp.ClientSession()
        self.logger = logger
        self.cache = Cache()
        self.database = Database(self.config['database'])
        self.pool = None
        self.debug = debug