Beispiel #1
0
def main():
    args = get_parser().parse_args()
    print(args)
    # Make datasets
    train_dir = os.path.join(args.datadir, 'train')
    val_dir = os.path.join(args.datadir, 'val')
    print('loading train dataset')
    train_loader = get_dataloader(train_dir, args.batch_size, args.pretrained,
                                  args.augmented)
    print('loading val dataset')
    val_loader = get_dataloader(val_dir, args.batch_size, args.pretrained,
                                False)

    args.num_class = 2  # np.unique(train_loader[1])
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialisation model
    model = Classifier(args=args)

    while model.counter['epochs'] < args.epochs:
        train(model=model, dataloader=train_loader)
        val(model=model, dataloader=val_loader)
        if model.early_stopping.early_stop:
            break
    if model.writer:
        model.writer.close()
Beispiel #2
0
def main():
    args = get_parser().parse_args()
    # Arguments by hand
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args.target_name = "LST_status"

    table = pd.read_csv(args.table_data)
    list_wsi = os.listdir(args.wsi)
    list_lst = [
        table[table['ID'] == x][args.target_name].item() for x in list_wsi
    ]
    list_dataset = []

    ## Initialisation model
    model = Classifier(args=args)

    ## Création des datasets
    for path in list_wsi:
        args.wsi = os.path.join(args.wsi, path)
        list_dataset.append(dataset(args))
        args.wsi = os.path.dirname(args.wsi)
    list_dataset = np.array(list_dataset)

    ## Kfold_validation
    splitter = StratifiedKFold(n_splits=3)
    for r_eval, (id_train,
                 id_val) in enumerate(splitter.split(list_lst, list_lst)):

        model.name = 'repeat_val_{}'.format(r_eval)
        dataset_train = list_dataset[id_train]
        dataset_val = list_dataset[id_val]
        for db in dataset_train:
            db.transform = get_transform(train=True)
        for db in dataset_val:
            db.transform = get_transform(train=False)
        dataset_train = torch.utils.data.ConcatDataset(dataset_train)
        dataset_val = torch.utils.data.ConcatDataset(dataset_val)
        dataloader_train = DataLoader(dataset=dataset_train,
                                      batch_size=args.batch_size,
                                      num_workers=24)
        dataloader_val = DataLoader(dataset=dataset_val,
                                    batch_size=args.batch_size,
                                    num_workers=24)

        # Initialize dataloader Creates 2 dataset : Careful, if I want to load all in memory ill have to change that, to have only one dataset.
        dataloader_train, dataloader_val = make_loaders(args=args)

        while model.counter['epochs'] < args.epochs:
            print("Begin training")
            train(model=model, dataloader=dataloader_train)
            val(model=model, dataloader=dataloader_val)
            if model.early_stopping.early_stop:
                break
        model.writer.close()
    def convert_to_arguments(self, configuration):
        """
        Convert from the configuration dict to a valid dict to send as arguments to the AWS API
        :param configuration: dict
        :return: dict
        """
        configuration_converted = {}

        for key in configuration:
            new_key = self.__to_camel_case(key)
            if arguments.parser_exists(key):
                configuration_converted.update(arguments.get_parser(key).parse(configuration[key]))
            else:
                configuration_converted[new_key] = configuration[key]

        return configuration_converted
Beispiel #4
0
    def convert_to_arguments(self, configuration):
        """
        Convert from the configuration dict to a valid dict to send as arguments to the AWS API
        :param configuration: dict
        :return: dict
        """
        configuration_converted = {}

        for key in configuration:
            new_key = self.__to_camel_case(key)
            if arguments.parser_exists(key):
                configuration_converted.update(
                    arguments.get_parser(key).parse(configuration[key]))
            else:
                configuration_converted[new_key] = configuration[key]

        return configuration_converted
Beispiel #5
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    update_dir(args)
    print('===================== Parameters =======================')
    print('Device: {}'.format(args.device))
    print('Batch size: {}'.format(args.batch_size))
    print('Epoch number: {}'.format(args.epochs))
    print('Learning rate: {} || Decay rate: {}'.format(args.lr_rate,
                                                       args.weight_decay))
    print(
        '===================== Starting modified VecRoad ======================='
    )
    env = Environment(args)
    if args.teacher_forcing_number > 0:
        args.epochs = 1
        args.lr_rate = 0.005
    if args.test:
        time_start = time.time()
        env.network.val_mode()
        run_val(env, 0)
        print('Testing time usage: {}h'.format(
            round((time.time() - time_start) / 3600, 3)))
    else:
        for epoch in range(args.epochs):
            env.epoch_counter += 1
            for iCurb_image_index, data in enumerate(
                    env.network.dataloader_train):
                # load a single one tiff image and run training on this image
                # validation mode
                if (iCurb_image_index % 1000) == 0 and iCurb_image_index:
                    env.network.val_mode()
                    f1 = run_val(env, iCurb_image_index)
                    if f1 > env.network.best_f1:
                        env.network.best_f1 = f1
                        env.network.save_checkpoints(iCurb_image_index)
                # training mode
                env.network.train_mode()
                run_train(env, data, iCurb_image_index)
                if env.training_step > args.teacher_forcing_number and args.teacher_forcing_number > 0:
                    break
            if env.training_step > args.teacher_forcing_number and args.teacher_forcing_number > 0:
                break
Beispiel #6
0
def main():
    print("Running")
    torch.set_default_dtype(torch.float64)

    parser = get_parser()
    args = parser.parse_args(sys.argv[1:])

    if args.dataset is None:
        print("No Dataset given, make argument for dataset path")
        return -1

    # build saving folder
    save_dir = args.save_dir
    try:
        os.makedirs(save_dir, exist_ok=True)
    except OSError:
        pass

    morl.run(args)
Beispiel #7
0
def main():
    torch.set_default_dtype(torch.float64)
    
    # ppo parameters
    args_list = ['--lr', '3e-4',
                 '--use-linear-lr-decay',
                 '--gamma', '0.995',
                 '--use-gae',
                 '--gae-lambda', '0.95',
                 '--entropy-coef', '0',
                 '--value-loss-coef', '0.5',
                 '--num-steps', '2048',
                 '--num-processes', '4',
                 '--ppo-epoch', '10',
                 '--num-mini-batch', '32',
                 '--use-proper-time-limits',
                 '--ob-rms',
                 '--obj-rms',
                 '--raw']

    solve_argv_conflict(args_list)
    parser = get_parser()
    args = parser.parse_args(args_list + sys.argv[1:])

    # build saving folder
    save_dir = args.save_dir
    try:
        os.makedirs(save_dir, exist_ok = True)
    except OSError:
        pass
    
    # output arguments
    fp = open(os.path.join(save_dir, 'args.txt'), 'w')
    fp.write(str(args_list + sys.argv[1:]))
    fp.close()

    logfile = open(os.path.join(args.save_dir, 'log.txt'), 'w')
    sys.stdout = Logger(sys.stdout, logfile)

    morl.run(args)

    logfile.close()
Beispiel #8
0
def parse_cl_args():
    #TODO: away with this function, in theory.
    '''
    Takes arguments from the command line and ignores as many of them as possible.
    '''

    # assume the user passes no args, and these are defaults/dummy
    #TODO: trash all of this
    parser = get_parser()

    parser.add_argument('--non-det', action='store_true', default=False,
                        help='whether to use a non-deterministic policy')
    parser.add_argument('--active-column', default=None, type=int,
                        help='Run only one vertical column of a fractal model to see what it\
                        has learnt independently')
    parser.add_argument('--evaluate', action='store_true', default=False,
                        help='record trained network\'s performance')
    # add any experiment-specific args here
    args = parser.parse_args()
    args.im_render = True
   #args.render = True
    args.random_terrain = False
    args.random_builds = False
    return args
Beispiel #9
0
import imageio
from PIL import Image, ImageDraw
import numpy as np
from multiprocessing import Pool
import subprocess
from scipy.spatial import cKDTree
import os
from skimage import measure
import pickle
import random
import json
import time
from arguments import get_parser, update_dir_candidate_train

parser = get_parser()
args = parser.parse_args()
update_dir_candidate_train(args)

tiff_dir = args.image_dir
gt_instance_mask_dir = args.instance_mask_dir
gt_mask_dir = args.mask_dir
skeleton_dir = './records/skeleton/train'

with open('./dataset/data_split.json', 'r') as jf:
    images = json.load(jf)['train']


class Graph():
    def __init__(self):
        self.curbs = []
        self.all_vertices = []
Beispiel #10
0
def main():
    #create parser
    parser = arguments.get_parser()
    #initialize global iteration counter
    n_iter = 0
    #cuda configuration
    cudnn.benchmark = True
    #initializing train from argumnents and creating args namespace
    training_writer, args = prepare_train.initialize_train(parser)
    #creating data loaders
    train_loader, val_loader, args = prepare_train.create_dataloaders(args)
    #create invrepnet model
    invrep_net = prepare_train.create_model(args, device)
    #create optimizer
    optimizer = prepare_train.create_optimizer(args, invrep_net)

    #train loop by epoch
    for epoch in range(args.epochs):
        # train for one epoch
        train_loss, n_iter = run_epoch.train_epoch(args, train_loader,
                                                   invrep_net, optimizer,
                                                   args.epoch_size,
                                                   training_writer, epoch,
                                                   n_iter)
        print(' * Epoch: {}, Avg Train Loss : {:.3f}'.format(
            epoch, train_loss))

        # loss evaluate on validation set
        if ((epoch + 1) % args.validation_freq
                == 0) and not args.skip_validation:
            val_loss, data_time, batch_time = run_epoch.validate_epoch(
                args, val_loader, invrep_net, args.epoch_size, training_writer,
                epoch)
            print(
                'Val: Epoch={}, AlgTime={:.3f}, DataTime={:.3f}, AvgLoss={:.4f}'
                .format(epoch, batch_time, data_time, val_loss))

        # save checkpoint of model
        utils.save_checkpoint(args.save_path, {
            'epoch': epoch + 1,
            'state_dict': invrep_net.module.state_dict()
        }, args.checkpoint_freq, epoch + 1)

        #patch matching task validation
        if args.with_task_val:
            if (epoch + 1) % args.checkpoint_freq == 0 or (epoch +
                                                           1) == args.epochs:
                #inference
                print('==> Starting Inference\n')
                args.pretrained_model = os.path.join(
                    args.save_path, 'invrep_checkpoint.pth.tar')
                args.output = os.path.join(
                    args.save_path, 'inference_epoch_{}'.format(epoch + 1))
                run_inf(args)
                print('==> Finished Inference\n')
                #directories for image saving
                args.task_image_dirs = os.path.join(args.output, 'img')
                args.task_invrep_dirs = os.path.join(args.output, 'rep')

                print('==> Starting Template Matching\n')
                pmres, methods = run_pm(pm_mode=args.pm_mode, args=args)
                for rr, m_res in enumerate(pmres):
                    training_writer.add_scalar(
                        'val-task/pm_{}_32'.format(methods[rr]),
                        np.around(m_res[0], decimals=4), epoch + 1)
                    training_writer.add_scalar(
                        'val-task/pm_{}_64'.format(methods[rr]),
                        np.around(m_res[1], decimals=4), epoch + 1)
                    training_writer.add_scalar(
                        'val-task/pm_{}_128'.format(methods[rr]),
                        np.around(m_res[2], decimals=4), epoch + 1)
                print('==> Finished Template Matching\n')
Beispiel #11
0
def main():
    clear_loss()
    parser = get_parser()
    parsed = parser.parse_args()

    assert ((parsed.output_dir is None and parsed.save_step is None) or
            (parsed.output_dir is not None and parsed.save_step is not None)), "Save step and output directory must be " \
                                                                               "null at the same time or not null at the same time"

    ds_type = parsed.dataset

    if ds_type == 'cifar10':
        dataset = CIFAR10Dataset()
        dataset.process()
        img_shape = [32, 32, 3]
    elif ds_type == 'mnist':
        dataset = MNISTDataset()
        dataset.process()
        img_shape = [28, 28, 1]
    elif parsed.dataset == 'fashion':
        dataset = FashionDataset()
        dataset.process()
        img_shape = [28, 28, 1]
    elif parsed.dataset == 'stl10':
        dataset = STLDataset(is_ae=True)
        dataset.process()
        img_shape = [96, 96, 3]
    else:
        print("Unknown dataset")
        exit()

    layers = parse_layers(parsed.layer_str)
    fc_size = parsed.fc_layers

    sess = tf.Session()
    swwae = SWWAE(sess,
                  img_shape,
                  'autoencode',
                  layers,
                  learning_rate=parsed.learning_rate,
                  lambda_rec=parsed.lambda_rec,
                  lambda_M=parsed.lambda_M,
                  dtype=tf.float32,
                  tensorboard_id=parsed.tensorboard_id,
                  encoder_train=True,
                  rep_size=fc_size,
                  batch_size=parsed.batch_size,
                  sparsity=parsed.sparsity,
                  beta=parsed.beta)

    if parsed.rest_dir is not None:
        swwae.restore(parsed.rest_dir)

    X_test, _ = dataset.get_batches(parsed.batch_size, train=False)
    test_steps = len(X_test)

    print("Preprocessing")
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        0.0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen.fit(dataset.training_data)

    train_steps = int(len(dataset.training_data) / parsed.batch_size)

    print("Started training.\nTrain steps: {}".format(train_steps))

    for e in range(parsed.num_epochs):
        total_loss = 0.0
        epoch_loss = 0.0
        batches = 0
        for x_batch in datagen.flow(dataset.training_data,
                                    batch_size=parsed.batch_size):
            loss, global_step = swwae.train(x_batch)
            batches += 1

            total_loss += loss
            epoch_loss += loss

            if (batches + 1) % parsed.info_step == 0:
                avg_loss = total_loss / parsed.info_step
                save_loss(avg_loss)

                for test_step in range(test_steps):
                    X_test_step = X_test[test_step]
                    swwae.eval(input=X_test_step)

                #print("Train epoch {}:\n\tstep {}\n\tavg. L2 Loss: {}".format(e + 1, step + 1, avg_loss),
                #     flush=True)

                total_loss = 0.0

            if parsed.save_step is not None:
                if (global_step + 1) % parsed.save_step == 0:
                    swwae.save(path=parsed.output_dir)

            if batches >= train_steps:
                break

        print("Train epoch {}: avg. loss: {}".format(e + 1,
                                                     epoch_loss / train_steps),
              flush=True)

    if parsed.output_dir is not None:
        swwae.save(path=parsed.output_dir)

    print("Starting test..")

    total_loss = 0.0

    for test_step in range(test_steps):
        X_test_step = X_test[test_step]

        loss = swwae.eval(input=X_test_step)

        total_loss += loss

    print("Test average loss: {}".format(total_loss / test_steps))
Beispiel #12
0
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_torch_policy import PPOTorchPolicy
from ray.rllib.env import BaseEnv
from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
from ray.rllib.models import ModelCatalog
from ray.rllib.policy import Policy

import arguments
import constants
from MinerTraining import Metrics
from exploration_annealing import ExplorationAnnealing
from models import TorchRNNModel, SecondModel, ThirdModel, FourthModel, FifthModel, SixthModel, SeventhModel
from rllib_envs import v0
from utils import policy_mapping

parser = arguments.get_parser()
args = parser.parse_args()
params = vars(args)


class MinerCallbacks(DefaultCallbacks):
    def __init__(self):
        super().__init__()

        self.training_policies = [f"policy_{i}" for i in range(8)]

    def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                       policies: Dict[str, Policy], episode: MultiAgentEpisode,
                       **kwargs):

        for (agent_name, policy), v in episode.agent_rewards.items():
Beispiel #13
0
def main():
    parser = arguments.get_parser()
    cudnn.benchmark = True
    args = prepare_train.initialize_test(parser)

    #regular invrepnet and original images mode
    if args.additional_rep == False:
        #LOADING ARGS FROM COMMAND LINE
        pretrained_model = args.pretrained_model
        use_cpu = args.use_cpu
        data_dir = args.data
        pm_mode = args.pm_mode

        #LOADING ARGS FROM FILE
        args_file = (args.pretrained_model).rsplit('/', 1)[0]
        with open(os.path.join(args_file, 'args_in_run.txt')) as fp:
            for line in fp:
                if line.startswith('--'):
                    tokens = line[2:].strip().split()
                    if tokens[1].isdigit():
                        tokens[1] = int(tokens[1])
                    if tokens[1] == "True":
                        tokens[1] = True
                    if tokens[1] == "False":
                        tokens[1] = False
                    if tokens[1] == "None":
                        tokens[1] = None
                    if tokens[1] == "[]":
                        tokens[1] = []
                    if tokens[1] is not 'None':
                        # print('arg.{}={}'.format(tokens[0], tokens[1]))
                        vars(args)[tokens[0]] = tokens[1]

        #EVAL ARGS
        args.freeze_model = True
        args.batch_size = 1

        # OVERRIDE ARGS FROM FILE WITH ARGS FROM COMMAND LINE
        args.pretrained_model = pretrained_model
        args.data = data_dir
        args.use_cpu = use_cpu
        args.pm_mode = pm_mode

        # inference
        args.output = args.pretrained_model.replace(".pth.tar",
                                                    "_patchmatching_images")
        if not os.path.exists(args.output):
            os.makedirs(args.output)
        print('\n=> Starting Inference\n')
        run_inf(args)
        print('=> Finished Inference\n')

        # tasks
        args.task_image_dirs = os.path.join(args.output, 'img')
        args.task_invrep_dirs = os.path.join(args.output, 'rep')

        #start patch matching eval after inference
        print('=> Starting Template Matching\n')
        run_pm(pm_mode=args.pm_mode, args=args)
        print('=> Finished Template Matching\n')
    else:
        args.task_invrep_dirs = args.data
        args.task_image_dirs = args.data
        args.pm_mode = 2
        print('=> Starting --ADDITIONAL REPRESENTATION-- Patch Matching\n')
        run_pm(pm_mode=args.pm_mode, args=args)
        print('=> Finished --ADDITIONAL REPRESENTATION-- Patch Matching\n')
Beispiel #14
0
    try:
        table.insert(TO_BE_INSERTED, continue_on_error=True)
    except cm.pymongo.errors.DuplicateKeyError:
        pass
    except cm.pymongo.errors.OperationFailure as e:
        print(e, e.code)
    del TO_BE_INSERTED[:]


if __name__ == '__main__':
    # pylint: disable=C0103
    import sys
    import arguments
    from glob import glob
    sources = sorted(glob('all_*.csv'))
    parser = arguments.get_parser()
    args = parser.parse_args()
    print(args)
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    TABLE = db['checkin']
    TABLE.ensure_index([('loc', cm.pymongo.GEOSPHERE),
                        ('city', cm.pymongo.ASCENDING)])
    csv.field_size_limit(sys.maxsize)
    total, unmatched = 0, 0
    for fn in sources:
        with open(fn, 'rb') as f:
            reader = csv.DictReader(f, delimiter=';')
            for i, row in enumerate(reader):
                checkin = reformat(row)
                total += 1
                if checkin:
def main():
    parser = get_parser(MODEL_TYPES, ALL_MODELS)
    args = parser.parse_args()

    if args.doc_stride >= args.max_seq_length - args.max_query_length:
        logger.warning(
            "WARNING - You've set a doc stride which may be superior to the document length in some "
            "examples. This could result in errors when building features from the examples. Please reduce the doc "
            "stride or increase the maximum length to ensure the features are correctly built."
        )

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    args.model_type = args.model_type.lower()
    config = AutoConfig.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None,
        output_attentions=args.uncertainty_model)
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model = AutoModelForQuestionAnswering.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    if args.uncertainty_model:
        uncertainty_inp_size = config.num_hidden_layers * config.num_attention_heads
        uncertainty_model = torch.nn.Sequential(
            torch.nn.Linear(uncertainty_inp_size, uncertainty_inp_size // 2),
            torch.nn.Dropout(0.1), torch.nn.ReLU(), torch.nn.Dropout(0.1),
            torch.nn.Linear(uncertainty_inp_size // 2, 2))
        uncertainty_model.to(args.device)

    if args.local_rank == 0:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex

            apex.amp.register_half_function(torch, "einsum")
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False,
                                                output_examples=False)
        global_step, tr_loss = train(
            args, train_dataset, model, tokenizer,
            uncertainty_model if args.uncertainty_model else None)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Save the trained model and the tokenizer
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        # Take care of distributed/parallel training
        model_to_save = model.module if hasattr(model, "module") else model
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForQuestionAnswering.from_pretrained(
            args.output_dir)  # , force_download=True)
        tokenizer = AutoTokenizer.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)
        if args.uncertainty_model:
            uncertainty_model.to(args.device)

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        if args.do_train:
            logger.info(
                "Loading checkpoints saved during training for evaluation")
            checkpoints = [args.output_dir]
            if args.eval_all_checkpoints:
                checkpoints = list(
                    os.path.dirname(c) for c in sorted(
                        glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                                  recursive=True)))
                logging.getLogger("transformers.modeling_utils").setLevel(
                    logging.WARN)  # Reduce model loading logs
        else:
            logger.info("Loading checkpoint %s for evaluation",
                        args.model_name_or_path)
            checkpoints = [args.model_name_or_path]

        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            model = AutoModelForQuestionAnswering.from_pretrained(
                checkpoint)  # , force_download=True)
            model.to(args.device)

            # Evaluate
            result = evaluate(args, model, tokenizer, prefix=global_step)

            result = dict(
                (k + ("_{}".format(global_step) if global_step else ""), v)
                for k, v in result.items())
            results.update(result)

    logger.info("Results: {}".format(results))

    return results
Beispiel #16
0
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = 'times/' + city
    city += '_weekly' if weekly else '_daily'
    sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
    # vf.plt.savefig(city+'_time.png', dpi=160, transparent=False, frameon=False,
    #                bbox_inches='tight', pad_inches=0.1)
    # vf.plt.clf()


if __name__ == '__main__':
    # pylint: disable=C0103
    import arguments
    import persistent as p
    args = arguments.get_parser().parse_args()
    DB, CLIENT = xp.cm.connect_to_db('foursquare', args.host, args.port)
    res = {}
    for city in reversed(xp.cm.cities.SHORT_KEY):
        print(city)
        plot_city(city, weekly=False, clusters=5)
        # plot_city(ciy, weekly=True, clusters=)
    #     venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    #     res.update({k: len(v) for k, v in venue_visits.iteritems()})
    # p.save_var('venue_visits', res)
Beispiel #17
0
    locale.setlocale(locale.LC_ALL, '')


def ordered(counts, cities, threshold=10):
    """Return `counts` ordered by cities."""
    as_dict = {v['_id']: v['count'] for v in counts}
    count = [as_dict.get(city, 0) for city in cities]
    count.append(sum(count))
    fmt = lambda v: locale.format('%d', v, grouping=True)
    return [fmt(c) if c > threshold else '' for c in count]


if __name__ == '__main__':
    #pylint: disable=C0103
    import arguments
    args = arguments.get_parser().parse_args()
    foursquare, client = cm.connect_to_db('foursquare', args.host, args.port)
    checkins = foursquare.checkin
    venues = foursquare.venue
    photos = client.world.photos
    newer = dt(2001, 2, 1)
    t = pt.PrettyTable()
    t.junction_char = '|'
    # checkin = checkins.aggregate([{'$match': {'time': {'$lt': newer}}},
    #                               {'$project': {'city': 1}},
    #                               {'$group': {'_id': '$city',
    #                                           'count': {'$sum': 1}}},
    #                               {'$sort': {'count': -1}}])
    located = checkins.aggregate([{'$match': {'lid': {'$ne': None},
                                              'time': {'$lt': newer}}},
                                  {'$project': {'city': 1}},