コード例 #1
0
def main():
    printer = Printer('log.txt')
    utils = Utils(printer)
    utils.setup_and_verify()
    utils.evaluate_baseline()
    learner = Learner(utils.learner_utils)
    learner.train_and_evaluate()
    utils.printer.print('finished!')
コード例 #2
0
ファイル: main.py プロジェクト: ybw9000/pytorch-cifar
def test(args, model, testloader, criterion) -> Printer:
    model.eval()
    printer = Printer()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in tqdm(enumerate(testloader),
                                                 total=len(testloader)):
            inputs, targets = inputs.to(args.device), targets.to(args.device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            printer.update(loss, outputs, targets)

    print(printer)
    return printer
コード例 #3
0
ファイル: main.py プロジェクト: ybw9000/pytorch-cifar
def train(args, model, trainloader, criterion, optimizer, scheduler) -> None:
    if args.transfer:
        print("transfer in eval mode")
        model.eval()
    else:
        model.train()
    printer = Printer()
    pbar = tqdm(range(args.epochs), total=args.epochs)
    for epoch in pbar:
        scheduler.step()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(args.device), targets.to(args.device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            printer.update(loss, outputs, targets)

        # if epoch % args.print == 0:
        pbar.set_description(f'Epoch: {epoch}, {printer}')
    )
    # for reproducibility
    np.random.seed(SEED)
    rn.seed(SEED)
    tf.set_random_seed(SEED)
    # Model.compile
    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['acc'],
    )
    return model


# main code
printer = Printer(filepath=LOGS_FPATH)
D = printer.date
P = printer.print
D('Start!')
P('────────────────────────────────\n'
  '─── BioCreative VI ─────────────\n'
  '─────── Track 5 (CHEMPROT) ─────\n'
  '────────────────────────────────\n')
P('See the Keras Model consulting the \'build_model\' function '
  'in the \'-main.py\' saved script.\n')
P('main input arguments\n')
P('\tSEED\n' '\t\t{}\n'.format(SEED))
P('\tMODEL\n' '\t\t{}\n'.format(MODEL))
P('\tEXTERNAL_GROUPS\n' '\t\t{}\n'.format(EXTERNAL_GROUPS))
P('\tTRAINING_GROUPS\n' '\t\t{}\n'.format(TRAINING_GROUPS))
P('\tTEST_GROUPS\n' '\t\t{}\n'.format(TEST_GROUPS))
コード例 #5
0
ファイル: train.py プロジェクト: jjhu94/dgl-1
def main(rank, args):
    """
    Parameters
    ----------
    rank : int
        Subprocess id
    args : dict
        Configuration
    """
    if rank == 0:
        t1 = time.time()

    set_random_seed(args['seed'])
    # Remove the line below will result in problems for multiprocess
    torch.set_num_threads(1)

    # Setup dataset and data loader
    dataset = MoleculeDataset(args['dataset'],
                              args['order'], ['train', 'val'],
                              subset_id=rank,
                              n_subsets=args['num_processes'])

    # Note that currently the batch size for the loaders should only be 1.
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)

    if rank == 0:
        try:
            from tensorboardX import SummaryWriter
            writer = SummaryWriter(args['log_dir'])
        except ImportError:
            print(
                'If you want to use tensorboard, install tensorboardX with pip.'
            )
            writer = None
        train_printer = Printer(args['nepochs'], len(dataset.train_set),
                                args['batch_size'], writer)
        val_printer = Printer(args['nepochs'], len(dataset.val_set),
                              args['batch_size'])
    else:
        val_printer = None

    # Initialize model
    model = DGMG(atom_types=dataset.atom_types,
                 bond_types=dataset.bond_types,
                 node_hidden_size=args['node_hidden_size'],
                 num_prop_rounds=args['num_propagation_rounds'],
                 dropout=args['dropout'])

    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())

        synchronize(args['num_processes'])

        # Validation
        val_log_prob = evaluate(epoch, model, val_loader, val_printer)
        if args['num_processes'] > 1:
            dist.all_reduce(val_log_prob, op=dist.ReduceOp.SUM)
        val_log_prob /= args['num_processes']
        # Strictly speaking, the computation of probability here is different from what is
        # performed on the training set as we first take an average of log likelihood and then
        # take the exponentiation. By Jensen's inequality, the resulting value is then a
        # lower bound of the real probabilities.
        val_prob = (-val_log_prob).exp().item()
        val_log_prob = val_log_prob.item()
        if val_prob >= best_val_prob:
            if rank == 0:
                torch.save({'model_state_dict': model.state_dict()},
                           args['checkpoint_dir'])
                print(
                    'Old val prob {:.10f} | new val prob {:.10f} | model saved'
                    .format(best_val_prob, val_prob))
            best_val_prob = val_prob
        elif epoch >= args['warmup_epochs']:
            optimizer.decay_lr()

        if rank == 0:
            print('Validation')
            if writer is not None:
                writer.add_scalar('validation_log_prob', val_log_prob, epoch)
                writer.add_scalar('validation_prob', val_prob, epoch)
                writer.add_scalar('lr', optimizer.lr, epoch)
            print('Validation log prob {:.4f} | prob {:.10f}'.format(
                val_log_prob, val_prob))

        synchronize(args['num_processes'])

    if rank == 0:
        t3 = time.time()
        print('It took {} to setup.'.format(datetime.timedelta(seconds=t2 -
                                                               t1)))
        print('It took {} to finish training.'.format(
            datetime.timedelta(seconds=t3 - t2)))
        print(
            '--------------------------------------------------------------------------'
        )
        print('On average, an epoch takes {}.'.format(
            datetime.timedelta(seconds=(t3 - t2) / args['nepochs'])))
コード例 #6
0
# -- coding: UTF-8

import os
import sys
import json

from contextlib import closing

ENV_ROOT = str(os.path.dirname(os.path.abspath(__file__)))
APP_ROOT = "{}/app".format(ENV_ROOT)
sys.path.append(ENV_ROOT)

from utils import Printer, Config

logger = Printer()
'''
加载环境包
'''


def load_packages():
    # 只导入存在的包
    # for package in sys.path:
    #     if not os.path.exists(package):
    #         logger.warning("Package not found, then environment remove it '{}'".format(package))
    #         sys.path.remove(package)
    #     else:
    #         logger.info("Checking '{}'".format(package))

    # 如果是模块目录的话,需要进一步导入
    # ??? 避免导入多余包,参照package时的操作,做一次转换
コード例 #7
0
# (so we can restore during build turn)
N_REINFORCE_ARMIES = 0

# Stuff we want to keep track of from battle modes
ATTACKER = None
DEFENDER = None
N_DEFEND_ARMIES = None
N_ATTACK_ARMIES = None
# On load, we infer piece counts (n_attack_pieces, n_defend_pieces) and
# piece lists (att_pieces, def_pieces) from chessboard, to ensure consistency.
# Also must infer NRANKS, NFILES.
CHESSBOARD = None

from utils import UserInputter, Printer
UI = UserInputter()
prnt = Printer()

_CLASSIC_CHESSMEN_VALUES = {
                   'p': 1,
                   'k': 3,
                   'b': 3,
                   'r': 5,
                   'q': 9,
                   'g': 0
                   }

_BETTER_CHESSMEN_VALUES = {
                   'p': 1,
                   'k': 2,
                   'b': 2,
                   'r': 5,
コード例 #8
0
class OperatorEntry(SuperBase):
    logger = Printer()

    '''初始化构造函数'''
    def __init__(self, name):
        pass

    """
    用于启动子类算子
    接收参数:
    参数1:环境根目录
    参数2:模块
    参数3:执行脚本
    参数4:队列文件
    """
    def run_main(self, factory, args):
        # 创建算子
        operator = import_class(args.module, args.script)()
        
        # 创建执行环境
        env = factory.get_execution_environment()

        # 创建数据源抓取对象
        settings_conf = json.loads(args.settings)
        boot_conf = settings_conf["boot_conf"]

        # 合并 boot_conf 内容
        boot_name = boot_conf.get("name", None)
        source_key = boot_conf.get("source_key")
        env_parallelism = boot_conf.get("parallelism")

        # 主要用于像sink为socket时,对应的source为socket且并未优先启动的情况。
        # 端口没有提前开启写入,则会报错。泛指调整启动优先级的情况。
        if "boot_delay" in boot_conf:
            boot_delay = int(boot_conf.get("boot_delay"))
            self.logger.warning("Operator of '{}' delay for {} seconds".format(operator.__module__, boot_delay))
            time.sleep(boot_delay)
        
        # 依据环境变化计算出来的配置
        dynamic_conf = {
            "source_key": source_key,
            "ref": operator.__module__
        }

        # 内部函数
        def load_flow_class(load_type):
            load_type_name = boot_conf.get("{}_type".format(load_type))
            load_type_driver_name = boot_conf.get("{}_driver".format(load_type))
            load_type_conf_name = boot_conf.get("{}_conf".format(load_type))
            load_type_conf = settings_conf.get(load_type_conf_name)

            if load_type_conf == None:
                raise Exception("Cannot find section of '{}' from the settings.conf case when load the type of '{}'".format(load_type_conf_name, load_type))

            load_type_conf = dict(dynamic_conf, **load_type_conf)
            instance = import_class("{}s".format(load_type), "{}.{}".format(load_type_name, load_type_driver_name))(load_type_conf)
            self.logger.info("Operator [{}] -> [{}] = '{}' & [config] = '{}'".format(operator.__module__, load_type, instance.__module__, load_type_conf))
            return instance

        # 创建数据源适配
        data_source = load_flow_class("source")
        data_source.set_format_args(source_key)
        
        # 将动态部分依据KEY提前格式化
        # 注意:同一个模块运算会出现相同运算的SINK,这时候就得显示什么sink_key
        sink = load_flow_class("sink")
        sink.set_format_args(boot_conf.get("sink_key", "{}_{}".format(
            operator.__module__, operator.__class__.__name__)))


        # 数据源包裹器
        data_generator = Generator(operator.__module__, data_source)

        # 创建输出适配
        data_sinks = [sink, MarkProcess(data_source)]

        # 打印日志
        self.logger.info("Worker running local at '{}'".format(str(os.path.dirname(os.path.abspath(__file__)))))
        self.logger.info("Worker commited to job '{}' by '{}'".format(boot_name, operator.__module__))

        # 动态运行算子
        operator.main(boot_name, env_parallelism,
                      env, data_generator, data_sinks)
コード例 #9
0
# n2c2 train dataset
TRAIN_DPATH = os.path.join(REPO, 'data', 'n2c2', 'train')

# n2c2 test dataset
TEST_DPATH = os.path.join(REPO, 'data', 'n2c2', 'test')

# stopwords filepath
STOPWORDS_FPATH = os.path.join(REPO, 'data', 'wrd_stop.txt')

# logs file
FN = datetime.now().strftime('%Y-%m-%d-%H%M%S-%f')
LOGS_FPATH = os.path.join(REPO, 'logs', FN + '-logs.txt')

# printer (logging)
PRINTER = Printer(filepath=LOGS_FPATH)
D_ = PRINTER.date
P_ = PRINTER.print

# stopwords
STOPWORDS = load_stopwords(STOPWORDS_FPATH)

# cross-validation
SKF = StratifiedKFold(
    n_splits=3,
    shuffle=False,
    random_state=None,
)

RULES_TAGS = [
    'ADVANCED-CAD',