Ejemplo n.º 1
0
def main():

    parser = argparse.ArgumentParser(
        description="rfretrieval: Atmospheric retrieval with random forests.")
    parser.set_defaults(func=show_usage, parser=parser)
    parser.add_argument("--quiet", action='store_true')
    subparsers = parser.add_subparsers()

    parser_train = subparsers.add_parser('train', help="train a model")
    parser_train.add_argument(
        "training_dataset",
        type=str,
        help="JSON file with the training dataset description")
    parser_train.add_argument(
        "model_path",
        type=str,
        help="path where the trained model will be saved")
    parser_train.add_argument("--num-trees",
                              type=int,
                              default=1000,
                              help="number of trees in the forest")
    parser_train.add_argument(
        "--num-jobs",
        type=int,
        default=5,
        help="number of parallel jobs for fitting the random forest")
    parser_train.add_argument(
        "--feature-importance",
        action='store_true',
        help="compute feature importances after training")
    parser_train.set_defaults(func=main_train)

    parser_test = subparsers.add_parser(
        'predict', help="use a trained model to perform a prediction")
    parser_test.set_defaults(func=main_predict)
    parser_test.add_argument("model_path",
                             type=str,
                             help="path to the trained model")
    parser_test.add_argument("data_file",
                             type=str,
                             help="NPY file with the data for the prediction")
    parser_test.add_argument(
        "output_path",
        type=str,
        help="path to write the results of the prediction")
    parser_test.add_argument(
        "--plot-posterior",
        action='store_true',
        help="plot and save the scatter matrix of the posterior distribution")

    args = parser.parse_args()
    config_logger(level=logging.WARNING if args.quiet else logging.INFO)
    args.func(**vars(args))
 def __init__(self, envs, args, net):
     self.envs = envs 
     self.args = args
     # define the newtork...
     self.net = net
     self.old_net = copy.deepcopy(self.net)
     # if use the cuda...
     if self.args.cuda:
         self.net.cuda()
         self.old_net.cuda()
     # define the optimizer...
     self.optimizer = optim.Adam(self.net.parameters(), self.args.lr, eps=self.args.eps)
     # check saving folder..
     if not os.path.exists(self.args.save_dir):
         os.mkdir(self.args.save_dir)
     # env folder..
     self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
     if not os.path.exists(self.model_path):
         os.mkdir(self.model_path)
     # logger folder
     if not os.path.exists(self.args.log_dir):
         os.mkdir(self.args.log_dir)
     self.log_path = self.args.log_dir + self.args.env_name + '.log'
     # get the observation
     self.batch_ob_shape = (self.args.num_workers * self.args.nsteps, ) + self.envs.observation_space.shape
     self.obs = np.zeros((self.args.num_workers, ) + self.envs.observation_space.shape, dtype=self.envs.observation_space.dtype.name)
     self.obs[:] = self.envs.reset()
     self.dones = [False for _ in range(self.args.num_workers)]
     self.logger = config_logger(self.log_path)
Ejemplo n.º 3
0
def main():
    cmd_parser = argparse.ArgumentParser(
        description='Simcoin a cryptocurrency simulator.',
        usage='''<command> [<args>]

        The commands are:
        nodes       creates the {} for a simulation
        network     creates the {} for a simulation
        ticks       creates the {} for a simulation
        simulate    executes a simulation based on the {}, {} and {}
        run         runs all above commands
        multi-run   run the simulation multiple times
        '''.format(
            config.nodes_csv_file_name,
            config.network_csv_file_name,
            config.ticks_csv_file_name,
            config.nodes_csv_file_name,
            config.network_csv_file_name,
            config.ticks_csv_file_name,
        ))

    cmd_parser.add_argument('command', help='Subcommand to run')

    # parse_args defaults to [1:] for args, but you need to
    # exclude the rest of the args too, or validation will fail
    args = cmd_parser.parse_args(sys.argv[1:2])
    command = args.command
    if command not in commands:
        print('Unrecognized command')
        cmd_parser.print_help()
        exit(1)
    # use dispatch pattern to invoke method with same name

    if not os.path.exists(config.data_dir):
        os.makedirs(config.data_dir)

    bitcoin.SelectParams('regtest')

    args = _parse_args()
    utils.config_logger(args.verbose)
    logging.info("Arguments called with: {}".format(sys.argv))
    logging.info("Parsed arguments in simcoin.py: {}".format(args))

    logging.info('Executing command={}'.format(command))
    commands[command]()
Ejemplo n.º 4
0
def main():
    cmd_parser = argparse.ArgumentParser(description='BlockPerf',
                                         usage='''<command> [<args>]

    The commands are:
    nodes       creates the {} for a run
    network     creates peer {} for a run
    simulate    executes a simulation based on the {} nd {}
    run         runs all above commands
    '''.format(
                                             config.nodes_csv_file_name,
                                             config.network_csv_file_name,
                                             config.nodes_csv_file_name,
                                             config.network_csv_file_name,
                                         ))

    cmd_parser.add_argument('command', help='Subcommand to run')

    args = cmd_parser.parse_args(sys.argv[1:2])
    command = args.command
    if command not in commands:
        print('Unrecognized command')
        cmd_parser.print_help()
        exit(1)
    # use dispatch pattern to invoke method with same name

    if not os.path.exists(config.data_dir):
        os.makedirs(config.data_dir)

    bitcoin.SelectParams('regtest')

    args = _parse_args()
    utils.config_logger(args.verbose)
    logging.info("Arguments called with: {}".format(sys.argv))

    logging.info('Executing command={}'.format(command))
    commands[command]()
Ejemplo n.º 5
0
        "'multi2', 'multi2_ext'. Use multichar labels to reduce sequence size")
    parser.add_argument('--remove-splits',
                        action="store_true",
                        default=False,
                        help="Clean splits and chords during kern cleaning")
    args = parser.parse_args()

    data_prep_job = f'{args.id}_{args.label_encoder}{"_splits" if not args.remove_splits else ""}'  # noqa E501
    outdir = Path(args.out_dir) / data_prep_job
    outdir.mkdir(parents=True, exist_ok=True)
    args.out_dir = str(outdir)

    # Logging config.
    log_file = outdir / f'{datetime.now().strftime("%Y%m%d-%H%M%S")}_{data_prep_job}.log'  # noqa E501
    logger = config_logger('data_prep',
                           console_level='ERROR',
                           log_file=log_file)

    # Main execution.
    logger.info("Preprocessing humdrum data...")
    root = Path(args.data_dir)
    scores = sorted([x.relative_to(root) for x in root.rglob('*.krn')])

    logger.info("Spliting train/test samples...")
    scores_train, scores_test = train_test_split(scores,
                                                 test_size=args.test_split,
                                                 random_state=45)

    middle = round(len(scores_test) /
                   2)  # Favor validation if number of samples is odd
    scores_val = scores_test[:middle]
Ejemplo n.º 6
0
def main():
    config_logger()
    log.info('Start main')
    boo()
    goo()
    log.info('Finish main')
import pika
import sys
import logging
from utils import config_logger

log = logging.getLogger(__name__)


def send_tasks(channel, queue_name, count):
    for i in range(count):
        msg = 'Task {}'.format(i)
        channel.basic_publish(exchange='',
                              routing_key=queue_name,
                              body=msg,
                              properties=pika.BasicProperties(delivery_mode=2))
        log.info('{} published'.format(msg))


def main(queue_name='tasks', tasks_count=5):
    logging.getLogger('pika').setLevel(logging.ERROR)
    connection = pika.BlockingConnection(
        pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()
    # channel.queue_declare(queue=queue_name, durable=True)
    send_tasks(channel, queue_name, tasks_count)
    connection.close()


if __name__ == '__main__':
    config_logger(log_file='logs/tasks_producer.log')
    main()
Ejemplo n.º 8
0
def main(args):
    # init some setting
    # config logging
    log_path = os.path.join(args.log_root, '{}.log'.format(args.model_name))
    logger = config_logger(log_path)

    gpu_idx = args.gpu
    if not gpu_idx:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(gpu_idx - 1)
    logger.info("Using ctx: {}".format(ctx))

    # Loading vocab and model
    ch_bert, ch_vocab = gluonnlp.model.get_model(
        args.bert_model,
        dataset_name=args.ch_bert_dataset,
        pretrained=True,
        ctx=ctx,
        use_pooler=False,
        use_decoder=False,
        use_classifier=False)
    model = BertClass(bert=ch_bert, max_seq_len=args.max_seq_len, ctx=ctx)
    logger.info("Model Creating Completed.")

    # init or load params for model
    if args.istrain:
        model.output_dense.initialize(init.Xavier(), ctx)
    else:
        model.load_parameters(args.model_params_path, ctx=ctx)
    logger.info("Parameters Initing and Loading Completed")

    model.hybridize()

    if args.istrain:
        # Loading dataloader
        assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab,
                                            max_seq_len=args.max_seq_len)
        dataset = ClassDataset(args.train_file_path)
        train_dataset, dev_dataset = train_valid_split(dataset,
                                                       valid_ratio=0.1)
        train_dataiter = ClassDataLoader(train_dataset,
                                         batch_size=args.batch_size,
                                         assiant=assiant,
                                         shuffle=True).dataiter
        dev_dataiter = ClassDataLoader(dev_dataset,
                                       batch_size=args.batch_size,
                                       assiant=assiant,
                                       shuffle=True).dataiter
        logger.info("Data Loading Completed")
    else:
        assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab,
                                            max_seq_len=args.max_seq_len,
                                            istrain=args.istrain)
        test_dataset = ClassTestDataset(args.test_file_path)
        test_dataiter = ClassDataLoader(test_dataset,
                                        batch_size=args.batch_size,
                                        assiant=assiant,
                                        shuffle=True).dataiter

    # build trainer
    finetune_trainer = gluon.Trainer(ch_bert.collect_params(), args.optimizer,
                                     {"learning_rate": args.finetune_lr})
    trainer = gluon.Trainer(model.collect_params("dense*"), args.optimizer,
                            {"learning_rate": args.train_lr})

    loss_func = gloss.SoftmaxCELoss()

    if args.istrain:
        logger.info("## Trainning Start ##")
        train_and_valid(ch_bert=ch_bert,
                        model=model,
                        ch_vocab=ch_vocab,
                        train_dataiter=train_dataiter,
                        dev_dataiter=dev_dataiter,
                        trainer=trainer,
                        finetune_trainer=finetune_trainer,
                        epochs=args.epochs,
                        loss_func=loss_func,
                        ctx=ctx,
                        lr=args.train_lr,
                        batch_size=args.batch_size,
                        params_save_step=args.params_save_step,
                        params_save_path_root=args.params_save_path_root,
                        eval_step=args.eval_step,
                        log_step=args.log_step,
                        check_step=args.check_step,
                        logger=logger,
                        num_train_examples=len(train_dataset),
                        warmup_ratio=args.warmup_ratio)
    else:
        predict(ch_bert=ch_bert,
                model=model,
                ch_vocab=ch_vocab,
                test_dataiter=test_dataiter,
                logger=logger,
                ctx=ctx)
Ejemplo n.º 9
0
    'SMOD':
    'GHS_SMOD_POP_GLOBE_R2019A/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K/V2-0/tiles/'
    'GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0_{column}_{row}.zip',
    'POP':
    'GHS_POP_MT_GLOBE_R2019A/GHS_POP_E2015_GLOBE_R2019A_54009_1K/V1-0/tiles/'
    'GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0_{column}_{row}.zip'
}
OUTPUT_GHS = {
    'SMOD': '{country_iso3}_SMOD_2015_1km_mosaic.tif',
    'POP': '{country_iso3}_POP_2015_1km_mosaic.tif'
}
GHS_CRS = 'ESRI:54009'
URBAN_MIN_MAX = (21, 30)
RURAL_MIN_MAX = (11, 13)

utils.config_logger()
logger = logging.getLogger(__name__)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('country_iso3', help='Country ISO3')
    parser.add_argument('-d',
                        '--download-ghs',
                        action='store_true',
                        help='Download the GHS data')
    return parser.parse_args()


def main(country_iso3, download_ghs=False):
Ejemplo n.º 10
0
    parser.add_argument('embeddings',
                        help='Text or numpy file with word embeddings')
    parser.add_argument('--vocab',
                        help='Vocabulary file (only needed if numpy'
                        'embedding file is given)')
    parser.add_argument('-a',
                        help='Plot attention values graph',
                        dest='attention',
                        action='store_true')
    parser.add_argument('-i',
                        help='Run inference classifier',
                        dest='inference',
                        action='store_true')
    args = parser.parse_args()

    utils.config_logger(verbose=False)
    logger = utils.get_logger()
    params = ioutils.load_params(args.load)
    if args.inference:
        label_dict = ioutils.load_label_dict(args.load)
        number_to_label = {v: k for (k, v) in label_dict.items()}

    logger.info('Reading model')
    sess = tf.InteractiveSession()
    model_class = utils.get_model_class(params)
    model = model_class.load(args.load, sess)
    word_dict, embeddings = ioutils.load_embeddings(args.embeddings,
                                                    args.vocab,
                                                    generate=False,
                                                    load_extra_from=args.load,
                                                    normalize=True)
Ejemplo n.º 11
0
parser.add_argument('--seed', default=45, type=int, help='Seed to generators')
parser.add_argument('--mixed-precision',
                    action='store_true',
                    help='Uses mixed precision to train a model '
                    '(suggested with volta and above)')

if __name__ == '__main__':
    args = parser.parse_args()
    config = configparser.ConfigParser()

    save_folder = os.path.dirname(args.model_path)
    os.makedirs(save_folder, exist_ok=True)  # Ensure save folder exists
    # Logging config.
    train_job = f"train_{Path(args.model_path).with_suffix('.log').name}"
    log_file = f'{save_folder}/{datetime.now().strftime("%Y%m%d-%H%M%S")}_{train_job}'
    logger = config_logger('train', log_file=log_file)

    # Main execution.
    # Get train config.
    config.read(args.config_path)
    model_name = config['train']['model']
    model_conf = config[model_name]
    audio_conf = config['audio']

    if model_name == "deepspeech":
        from deepspeech.loss import Loss
        from deepspeech.model import DeepSpeech as Model
    else:
        raise NotImplementedError

    train_conf = config['train']
Ejemplo n.º 12
0
def combine_datasets(data_dir, dataset_config, sample_rate=22050):
    logger = config_logger('combine_datasets', console_level='INFO')

    if not isinstance(data_dir, Path):
        data_dir = Path(data_dir)

    group = data_dir.name

    # Label file must be the same. Just make a copy and rename.
    labels_path = [
        i for i in data_dir.rglob(f'*{dataset_config}/labels*.json')
    ][0]
    outpath = data_dir / f'labels_{group}_{dataset_config}.json'
    logger.info(f'Saving encoder labels to {outpath}')
    outpath.write_text(labels_path.read_text())

    dataset_partitions = ['test', 'train', 'val']
    extension = 'csv'
    for dataset_partition in dataset_partitions:
        logger.info(
            f'Looking for partition {dataset_partition} from configuration {dataset_config}.'
        )
        all_filenames = [
            i for i in data_dir.rglob(
                f'*{dataset_config}/{dataset_partition}*.{extension}')
        ]
        logger.info(f'Found {len(all_filenames)} files.')

        # combine all files in the list
        combined_csv = pd.concat([
            pd.read_csv(f, header=None).assign(filename=f.name)
            for f in all_filenames
        ]).rename(columns={
            0: "audio",
            1: "seq"
        })
        logger.info(combined_csv.groupby('filename')['audio'].count())
        logger.info(
            f"Combined partition {dataset_partition}: {combined_csv['audio'].count()} samples."
        )

        durations = []
        total_duration = 0
        audio_errors = 0
        for audio_file in combined_csv['audio'].tolist():
            try:
                y = load_audio(str(audio_file))
                duration = len(y) / sample_rate
            except Exception as e:
                logger.exception(
                    f"Exception while loading {audio_file} audio. Reason: {e}")
                audio_errors += 1
                duration = 0
                continue
            durations.append(duration)

        combined_csv = combined_csv.assign(duration=durations)
        # SortaGrad
        combined_csv = combined_csv.sort_values(by='duration')

        total_duration = combined_csv['duration'].sum()
        logger.info(f'Total duration: {total_duration/60/60} hours.')
        logger.info(f'Found {audio_errors} errors during loading.')

        # export to csv
        outpath = data_dir / f'{dataset_partition}_{group}_{dataset_config}.{extension}'
        logger.info(f'Saving to {outpath}')
        combined_csv.drop(['filename', 'duration'],
                          axis=1).to_csv(outpath,
                                         index=False,
                                         header=False,
                                         encoding='iso-8859-1')
Ejemplo n.º 13
0
 def run(self):
     utils.config_logger(self.args)
     tracker.submit(
         self.num_workers, fun_submit=self.submit(), pscmd=self.cmd)
Ejemplo n.º 14
0
parser.add_argument(
    '--verbose',
    action="store_true",
    help="logger.info out decoded output and error of each sample")
parser.add_argument('--output-path',
                    default=None,
                    type=str,
                    help="Where to save raw acoustic output")
args = parser.parse_args()

if __name__ == '__main__':
    save_folder = os.path.dirname(args.model_path)
    manifest_name = '_'.join([*Path(args.test_manifest).parts[-2:]])
    test_job = f"test_{manifest_name}_{Path(args.model_path).with_suffix('.log').name}"
    log_file = f'{save_folder}/{datetime.now().strftime("%Y%m%d-%H%M%S")}_{test_job}'
    logger = config_logger('test', log_file=log_file, console_level='ERROR')

    torch.set_grad_enabled(False)
    model, _ = load_model(args.model_path)
    device = torch.device("cuda" if args.cuda else "cpu")
    label_decoder = LabelDecoder(model.labels)
    model.eval()
    model = model.to(device)

    test_dataset = SpectrogramDataset(audio_conf=model.audio_conf,
                                      manifest_filepath=args.test_manifest,
                                      labels=model.labels)
    test_sampler = BucketingSampler(test_dataset, batch_size=args.batch_size)
    test_loader = AudioDataLoader(test_dataset,
                                  batch_sampler=test_sampler,
                                  num_workers=args.num_workers)
Ejemplo n.º 15
0
    parser.add_argument('--cnn')
    parser.add_argument('--pred')

    parser.add_argument('--test-set')

    parser.add_argument('--output', default=None)

    parser.add_argument('--gpu', default='0')

    parser.add_argument('--keep-ratio', type=float, default=0.1)
    parser.add_argument('--consider-trains', action='store_true')
    parser.add_argument('--test-train', action='store_true')
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args()
    config_logger(args.pred + '.' + args.test_set + '.log',
                  verbose=args.verbose)
    set_gpu(args.gpu)

    test_sets = json.load(open('materials/imagenet-testsets.json', 'r'))
    train_wnids = test_sets['train']
    test_wnids = test_sets[args.test_set]

    logging.info('pred: {}'.format(args.pred))
    logging.info('cnn: {}'.format(args.cnn))
    logging.info('test set: {}, {} classes, ratio={}'.format(
        args.test_set, len(test_wnids), args.keep_ratio))
    logging.info('consider train classifiers: {}'.format(args.consider_trains))

    pred_file = torch.load(args.pred)
    pred_wnids = pred_file['wnids']
    pred_vectors = pred_file['pred']
Ejemplo n.º 16
0
    parser.add_argument("--batch_size", "--b", type=int)
    parser.add_argument("--mean", type=tuple)
    parser.add_argument("--std", type=tuple)
    parser.add_argument("--epochs", type=int)
    parser.add_argument("--pretrain_path", type=str)
    parser.add_argument("--logs", type=str, default="./logs/")
    parser.add_argument("--desc", type=str, default="no_description_specified")
    parser.add_argument("--kd", action="store_true")
    parser.add_argument("--save", action="store_true")
    parser.add_argument("--test", action="store_true")
    parser.add_argument("--logfile", type=str, required=True)

    parser.set_defaults(save=False)
    parser.set_defaults(kd=False)
    parser.set_defaults(test=False)
    args = parser.parse_args()

    loggername = datetime.now().strftime(f"D%d_%H:%M:%S_{args.desc}.log")
    loggername = os.path.join(args.logs, loggername)
    logger = config_logger(args.logfile, args.test)

    assert os.environ.get("CONFIG_PATH") is not None

    config = json.load(open(os.environ.get("CONFIG_PATH"), "r"))
    for arg in vars(args):
        if not getattr(args, arg) and arg in config.keys():
            setattr(args, arg, config[arg])
            logger.info("{} : {}".format(arg, getattr(args, arg)))

    main(args)
Ejemplo n.º 17
0
    print('Received: {}'.format(body))
    ch.basic_ack(delivery_tag=method.delivery_tag)
    log.info('Message {} received and processed'.format(body))


def main():
    logging.getLogger('pika').setLevel(logging.ERROR)
    if len(sys.argv) < 2:
        worker_id = 1
    else:
        worker_id = sys.argv[1]
    worker_name = 'Worker_{}'.format(worker_id)
    connection = pika.BlockingConnection(
        pika.ConnectionParameters('localhost'))
    channel = connection.channel()

    channel.queue_declare(queue='tasks')
    log.info('{} started'.format(worker_name))
    print("[*] {}: waiting for messages. To exit press CTRL+C".format(
        worker_name))

    channel.basic_qos(prefetch_count=1)
    channel.basic_consume(callback, queue='tasks')

    channel.start_consuming()
    log.info('{} finished'.format(worker_name))


if __name__ == '__main__':
    config_logger(log_file='logs/worker.log')
    main()
Ejemplo n.º 18
0
#!/usr/bin/env python

import logging
import re

from utils import config_logger, add_logger
logger = logging.getLogger(__name__)
config_logger(logger)


class Token(object):
    """A token"""
    name = None
    value = None

    def __init__(self, name, value):

        self.name = name
        self.value = value

    def __str__(self):
        return "{cls}({name},{value})".format(cls=self.__class__.__name__,
                                              name=self.name,
                                              value=repr(self.value))


class TokenizerException(Exception):
    pass


class Lexer(object):
Ejemplo n.º 19
0
def main(args):
    # init some setting
    # config logging
    log_path = os.path.join(args.log_root, '{}.log'.format(args.model_name))
    if not os.path.exists(args.log_root):
        os.makedirs(args.log_root)
    logger = config_logger(log_path)

    gpu_idx = args.gpu
    if not gpu_idx:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(gpu_idx - 1)
    logger.info("Using ctx: {}".format(ctx))

    # Loading vocab and model
    src_bert, src_vocab = gluonnlp.model.get_model(
        args.bert_model,
        dataset_name=args.src_bert_dataset,
        pretrained=True,
        ctx=ctx,
        use_pooler=False,
        use_decoder=False,
        use_classifier=False)
    _, tgt_vocab = gluonnlp.model.get_model(args.bert_model,
                                            dataset_name=args.tgt_bert_dataset,
                                            pretrained=True,
                                            ctx=ctx,
                                            use_pooler=False,
                                            use_decoder=False,
                                            use_classifier=False)

    mt_model = MTModel_Hybird(src_vocab=src_vocab,
                              tgt_vocab=tgt_vocab,
                              embedding_dim=args.mt_emb_dim,
                              model_dim=args.mt_model_dim,
                              head_num=args.mt_head_num,
                              layer_num=args.mt_layer_num,
                              ffn_dim=args.mt_ffn_dim,
                              dropout=args.mt_dropout,
                              att_dropout=args.mt_att_dropout,
                              ffn_dropout=args.mt_ffn_dropout,
                              ctx=ctx)
    logger.info("Model Creating Completed.")

    # init or load params for model
    mt_model.initialize(init.Xavier(), ctx)

    if args.src_bert_load_path:
        src_bert.load_parameters(args.src_bert_load_path, ctx=ctx)
    if args.mt_model_load_path:
        mt_model.load_parameters(args.mt_model_load_path, ctx=ctx)
    logger.info("Parameters Initing and Loading Completed")

    src_bert.hybridize()
    mt_model.hybridize()

    # Loading dataloader
    assiant = DatasetAssiantTransformer(src_vocab=src_vocab,
                                        tgt_vocab=tgt_vocab,
                                        max_src_len=args.max_src_len,
                                        max_tgt_len=args.max_tgt_len)
    train_dataset = MTDataset(args.train_data_path)
    eval_dataset = MTDataset(args.eval_data_path)

    train_dataiter = MTDataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  assiant=assiant,
                                  shuffle=True).dataiter
    dev_dataiter = MTDataLoader(eval_dataset,
                                batch_size=args.batch_size,
                                assiant=assiant,
                                shuffle=True).dataiter
    logger.info("Data Loading Completed")

    # build trainer
    finetune_trainer = gluon.Trainer(src_bert.collect_params(), args.optimizer,
                                     {"learning_rate": args.finetune_lr})
    trainer = gluon.Trainer(mt_model.collect_params(), args.optimizer,
                            {"learning_rate": args.train_lr})

    # loss function
    if args.label_smooth:
        loss_func = MaskedCELoss(sparse_label=False)
    else:
        loss_func = MaskedCELoss()

    logger.info("## Trainning Start ##")
    train_and_valid(src_bert=src_bert,
                    mt_model=mt_model,
                    src_vocab=src_vocab,
                    tgt_vocab=tgt_vocab,
                    train_dataiter=train_dataiter,
                    dev_dataiter=dev_dataiter,
                    trainer=trainer,
                    finetune_trainer=finetune_trainer,
                    epochs=args.epochs,
                    loss_func=loss_func,
                    ctx=ctx,
                    lr=args.train_lr,
                    batch_size=args.batch_size,
                    params_save_path_root=args.params_save_path_root,
                    eval_step=args.eval_step,
                    log_step=args.log_step,
                    check_step=args.check_step,
                    label_smooth=args.label_smooth,
                    logger=logger,
                    num_train_examples=len(train_dataset),
                    warmup_ratio=args.warmup_ratio)
Ejemplo n.º 20
0
    parser.add_argument('--save-epoch', type=int, default=300)
    parser.add_argument('--save-path', default='gcn-dense-att')

    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--gpu', default='0')
    parser.add_argument('--no-pred', action='store_true')
    args = parser.parse_args()

    random.seed(args.seed)
    set_gpu(args.gpu)

    save_path = args.save_path

    save_path = osp.join('save', args.trainval, save_path)
    ensure_path(save_path)
    config_logger(save_path+'/train.log')
    logging.info(args)

    graph = json.load(open('materials/imagenet-dense-grouped-graph.json', 'r'))
    wnids = graph['wnids']
    n = len(wnids)

    edges_set = graph['edges_set']
    logging.info('edges_set{}'.format([len(l) for l in edges_set]))

    lim = 4
    for i in range(lim + 1, len(edges_set)):
        edges_set[lim].extend(edges_set[i])
    edges_set = edges_set[:lim + 1]
    logging.info('edges_set{}'.format( [len(l) for l in edges_set]))
    
Ejemplo n.º 21
0
from app import create_flask_app
from common_constants.constants import FLASK_CONFIG_MODULE
from database import session
from restful_apis import create_restful_api
from utils import config_logger
from flask_jwt_extended import JWTManager

app = create_flask_app('Flask Jwt')
app.config.from_object(FLASK_CONFIG_MODULE)
app.config['JWT_SECRET_KEY'] = 'demo_app'
jwt = JWTManager(app)
app.config['JWT_HEADER_TYPE'] = 'JWT'

config_logger(app)
create_restful_api(app)


def close_session(resp):
    session.close()  # used to remove actual session
    # session.remove() https://groups.google.com/forum/#!msg/sqlalchemy/twoHzgXcR60/nZqMKkCz9UwJ
    return resp

app.teardown_request(close_session)
app.teardown_appcontext(close_session)


if __name__ == '__main__':
    app.run()
Ejemplo n.º 22
0
    parser.add_argument('--k', type=int, default=2)

    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--gpu', default='0')
    parser.add_argument('--no-pred', action='store_true')
    parser.add_argument('-v', '--verbose', action='store_true')
    args = parser.parse_args()

    random.seed(args.seed)
    set_gpu(args.gpu)

    save_path = args.save_path
    save_path += '-k={}'.format(args.k)
    save_path = osp.join('save', args.trainval, save_path)
    ensure_path(save_path)
    config_logger(save_path + '/train.log', verbose=args.verbose)
    logging.info(args)

    graph = json.load(open('materials/imagenet-induced-graph.json', 'r'))
    wnids = graph['wnids']
    n = len(wnids)
    edges = graph['edges']

    edges = edges + [(v, u) for (u, v) in edges]
    edges = edges + [(u, u) for u in range(n)]

    word_vectors = torch.tensor(graph['vectors']).cuda()
    word_vectors = F.normalize(word_vectors)

    fcfile = json.load(open('materials/fc-weights.json', 'r'))
    train_wnids = [x[0] for x in fcfile]
Ejemplo n.º 23
0
    return hit, tot


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cnn')
    parser.add_argument('--pred')

    parser.add_argument('--gpu', default='0')
    parser.add_argument('--consider-trains', action='store_true')

    parser.add_argument('--output', default=None)
    args = parser.parse_args()

    config_logger(args.pred + '.awa2.log')

    set_gpu(args.gpu)

    awa2_split = json.load(open('materials/awa2-split.json', 'r'))
    train_wnids = awa2_split['train']
    test_wnids = awa2_split['test']

    logging.info('pred: {}'.format(args.pred))
    logging.info('cnn: {}'.format(args.cnn))
    logging.info('train: {}, test: {}'.format(len(train_wnids),
                                              len(test_wnids)))
    logging.info('consider train classifiers: {}'.format(args.consider_trains))

    accs = []
    for epoch in range(900, 3100, 300):
Ejemplo n.º 24
0
#!/usr/bin/env python

import logging
from utils import add_logger, config_logger
config_logger(logging.getLogger())
Ejemplo n.º 25
0
                        help='Number of batches between '
                        'performance reports',
                        default=100,
                        type=int)
    parser.add_argument('-v',
                        help='Verbose',
                        action='store_true',
                        dest='verbose')
    parser.add_argument('--optim',
                        help='Optimizer algorithm',
                        default='adagrad',
                        choices=['adagrad', 'adadelta', 'adam'])

    args = parser.parse_args()

    utils.config_logger(args.verbose)
    logger = utils.get_logger('train')
    logger.info('Training with following options: %s' % ' '.join(sys.argv))
    #train_pairs = ioutils.read_corpus(args.train, args.lower, args.lang)
    #valid_pairs = ioutils.read_corpus(args.validation, args.lower, args.lang)
    # whether to generate embeddings for unknown, padding, null
    word_dict, embeddings = ioutils.load_embeddings(args.embeddings,
                                                    args.vocab,
                                                    True,
                                                    normalize=True)
    # test
    #print(word_dict)
    print(embeddings)
    logger.info('Converting words to indices')
    # find out which labels are there in the data
    # (more flexible to different datasets)
Ejemplo n.º 26
0
def main(management, hps):
    # Setup Experiment
    task = hps.task
    config_logger(management.log_file, saving=management.save_logs)
    logger = logging.getLogger('Exp')
    train_logger = logging.getLogger('Exp.Train')
    eval_logger = logging.getLogger('Exp.Eval')
    try:
        state = State(hps.seed, management)
    except SingletonError:
        State.instance = None
        state = State(hps.seed, management)

    logger.info(
        f"Initializing experiment `{management.exp_name}` with hyperparameters:\n%s",
        repr(hps))
    stats = accumulator()

    # Setup Data
    if task == 'dirac':

        def train_data():
            dirac = State().convert(torch.Tensor(1, 1).fill_(hps.dirac_target))
            while True:
                yield dirac

        train_iter = train_data()
    else:
        dataset_cfg = dict(
            type=task,
            root=management.data_path,
            download=True,
            preload_to_gpu=management.preload_to_gpu,
            num_threads=management.num_workers,
            batch_size=hps.batch_size,
        )
        train_data = Dataset(**dataset_cfg, mode='train')
        eval_data = Dataset(**dataset_cfg, mode='test')
        train_iter = train_data.sampler(infinite=True, project=0)

    # Setup Generator
    if task == 'dirac':
        generator = dirac.DiracGenerator()
        stats.g_params.append(generator.param.clone().detach().cpu())
    else:
        generator = model.Generator(dimz=hps.generator_dimz,
                                    dimh=hps.generator_dimh,
                                    default_batch_size=hps.batch_size)
    test_generator = generator
    if hps.generator_alpha_ema is not None:
        test_generator = deepcopy(generator)
        test_generator.to(device=State().device)
        test_generator.train()
    generator.to(device=State().device)
    generator.train()
    generator_optim = optim.init_optimizer(generator.parameters(),
                                           type=hps.optimizer,
                                           lr=hps.generator_lr,
                                           betas=hps.generator_betas,
                                           wd=hps.generator_wd)
    logger.info("Generator:\n%s", generator)

    # Setup Critic
    if task == 'dirac':
        critic = dirac.DiracCritic()
        stats.c_params.append(critic.param.clone().detach().cpu())
    else:
        critic = model.Critic(dimh=hps.critic_dimh, sn=hps.critic_use_sn)
    critic.to(device=State().device)
    critic.train()
    critic_optim = optim.init_optimizer(critic.parameters(),
                                        type=hps.optimizer,
                                        lr=hps.critic_lr,
                                        betas=hps.critic_betas,
                                        wd=hps.critic_wd)
    logger.info("Critic:\n%s", critic)

    # Train
    step = 0
    train_loss_meter = running_average_meter()
    train_step = gan.make_train_step(hps.loss_type,
                                     critic_inner_iters=hps.critic_inner_iters,
                                     reg_type=hps.critic_reg_type,
                                     reg_cf=hps.critic_reg_cf,
                                     alpha_ema=hps.generator_alpha_ema)
    if task != 'dirac':
        eval_step = gan.make_eval_step(os.path.join(
            management.exp_path, task + '_inception_stats.npz'),
                                       eval_data.sampler(infinite=False,
                                                         project=0),
                                       hps.generator_dimz,
                                       persisting_Z=100,
                                       device=State().device)

    logger.info("Training")
    while True:
        if step >= hps.max_iters: break
        step += 1
        train_loss = train_step(train_iter, critic, critic_optim, generator,
                                test_generator, generator_optim)
        train_loss_meter.update(train_loss.clone().detach())

        if step % management.log_every == 0 and task != 'dirac':
            train_logger.info("step %d | loss(%s) %.3f (%.3f)", step,
                              hps.loss_type, train_loss_meter.avg.item(),
                              train_loss_meter.val.item())
        if task == 'dirac':
            stats.g_params.append(test_generator.param.clone().detach().cpu())
            stats.c_params.append(critic.param.clone().detach().cpu())

        if step % management.eval_every == 0 and task != 'dirac':
            eval_iter = eval_data.sampler(infinite=False, project=0)
            samples, results = eval_step(eval_iter, critic, test_generator)
            if management.viz:
                from IPython.display import clear_output, display, update_display
                grid_img = torchvision.utils.make_grid(samples,
                                                       nrow=10,
                                                       normalize=True,
                                                       value_range=(-1., 1.),
                                                       padding=0)
                plt.imshow(grid_img.permute(1, 2, 0).cpu())
                display(plt.gcf())
            eval_logger.info(
                "step %d | " +
                ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step)
            torchvision.utils.save_image(samples.cpu(),
                                         os.path.join(management.log_path,
                                                      f'samples-{step}.png'),
                                         nrow=10,
                                         normalize=True,
                                         value_range=(-1., 1.),
                                         padding=0)

    logger.info("Final Evaluation")
    if task == 'dirac':
        g_params = torch.stack(stats.g_params)
        c_params = torch.stack(stats.c_params)
        trajectory = torch.cat([c_params, g_params], dim=-1).numpy()
        logger.info(f"Final point in parameter space: {trajectory[-1]}")
        anima = dirac.animate(trajectory, hps)
        if management.viz:
            from IPython.display import HTML, display
            display(HTML(anima.to_html5_video()))
        anima.save(os.path.join(management.log_path, 'evolution.mp4'))
    else:
        eval_iter = eval_data.sampler(infinite=False, project=0)
        samples, results = eval_step(eval_iter, critic, test_generator)
        logger.info(
            "step %d | " +
            ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step)
        torchvision.utils.save_image(samples,
                                     os.path.join(management.log_path,
                                                  f'samples-final.png'),
                                     nrow=10,
                                     normalize=True,
                                     value_range=(-1., 1.),
                                     padding=0)