Beispiel #1
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = opj(RESULT_DIR, 'models', args.out_dir,
                        'fold%d' % args.fold)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not ope(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    # move network to gpu
    model = DataParallel(model)
    model.cuda()

    if args.ema:
        ema_model = copy.deepcopy(model)
        ema_model.cuda()
    else:
        ema_model = None

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_epoch = 0
    best_dice = 0
    best_dice_arr = np.zeros(3)

    # define scheduler
    try:
        scheduler = eval(args.scheduler)()
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # optionally resume from a checkpoint
    if args.resume:
        model_fpath = os.path.join(model_out_dir, args.resume)
        if os.path.isfile(model_fpath):
            # load checkpoint weights and update model and optimizer
            log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath))

            checkpoint = torch.load(model_fpath)
            start_epoch = checkpoint['epoch']
            best_epoch = checkpoint['best_epoch']
            best_dice_arr = checkpoint['best_dice_arr']
            best_dice = np.max(best_dice_arr)
            model.module.load_state_dict(checkpoint['state_dict'])

            optimizer_fpath = model_fpath.replace('.pth', '_optim.pth')
            if ope(optimizer_fpath):
                log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                    optimizer_fpath))
                optimizer.load_state_dict(
                    torch.load(optimizer_fpath)['optimizer'])

            if args.ema:
                ema_model_fpath = model_fpath.replace('.pth', '_ema.pth')
                if ope(ema_model_fpath):
                    log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                        ema_model_fpath))
                    ema_model.module.load_state_dict(
                        torch.load(ema_model_fpath)['state_dict'])
            log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format(
                model_fpath, checkpoint['epoch']))
        else:
            log.write(">> No checkpoint found at '{}'\n".format(model_fpath))

    # Data loading code
    train_transform = train_multi_augment9
    train_split_file = opj(DATA_DIR, args.split_type, args.split_name,
                           'random_train_cv%d.csv' % args.fold)
    train_dataset = SiimDataset(
        train_split_file,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=train_transform,
        return_label=True,
        crop_version=args.crop_version,
        pseudo=args.pseudo,
        pseudo_ratio=args.pseudo_ratio,
        dataset='train',
    )
    if args.is_balance:
        train_sampler = BalanceClassSampler(
            train_dataset, args.sample_times * len(train_dataset))
    else:
        train_sampler = RandomSampler(train_dataset)
    train_loader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        pin_memory=True,
    )
    valid_split_file = opj(DATA_DIR, args.split_type, args.split_name,
                           'random_valid_cv%d.csv' % args.fold)
    valid_dataset = SiimDataset(
        valid_split_file,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=True,
        crop_version=args.crop_version,
        dataset='val',
    )
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=max(int(args.batch_size // 2), 1),
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     | smooth_loss/dice | valid_loss/dice | best_epoch/best_score |  min \n'
    )
    log.write(
        '------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1
    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_dice = train(train_loader,
                                             model,
                                             ema_model,
                                             criterion,
                                             optimizer,
                                             epoch,
                                             args,
                                             lr=lr)

        with torch.no_grad():
            if args.ema:
                valid_loss, valid_dice = validate(valid_loader, ema_model,
                                                  criterion, epoch)
            else:
                valid_loss, valid_dice = validate(valid_loader, model,
                                                  criterion, epoch)

        # remember best loss and save checkpoint
        is_best = valid_dice >= best_dice
        if is_best:
            best_epoch = epoch
            best_dice = valid_dice

        if args.ema:
            save_top_epochs(model_out_dir,
                            ema_model,
                            best_dice_arr,
                            valid_dice,
                            best_epoch,
                            epoch,
                            best_dice,
                            ema=True)
        best_dice_arr = save_top_epochs(model_out_dir,
                                        model,
                                        best_dice_arr,
                                        valid_dice,
                                        best_epoch,
                                        epoch,
                                        best_dice,
                                        ema=False)

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |  %0.4f  %6.4f |  %6.1f     %6.4f    | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice,
                   best_epoch, best_dice, (time.time() - end) / 60))

        model_name = '%03d' % epoch
        if args.ema:
            save_model(ema_model,
                       model_out_dir,
                       epoch,
                       model_name,
                       best_dice_arr,
                       is_best=is_best,
                       optimizer=optimizer,
                       best_epoch=best_epoch,
                       best_dice=best_dice,
                       ema=True)
        save_model(model,
                   model_out_dir,
                   epoch,
                   model_name,
                   best_dice_arr,
                   is_best=is_best,
                   optimizer=optimizer,
                   best_epoch=best_epoch,
                   best_dice=best_dice,
                   ema=False)
Beispiel #2
0
import os
import sys
dir_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(dir_path)
import json
import pickle
from login_page import Login
from utils.path_manage import PathManage
from authority_management import AuthorityManage
from crawler_page import CrawlerManage
from office_page import OfficeManage
from image_page import ImageManage
from utils.log_util import Logger

mylog = Logger(__name__).getlog()
# 关闭日志功能
# mylog.disable(logging.DEBUG)


class BackgroundPage:
    '''后台页面'''
    def __init__(self):
        '''初始化'''
        self.remove_user_session()

    def get_user_session(self):
        '''读取session文件'''
        with open(PathManage.db_path('session.pkl'), 'rb') as f:
            data = pickle.load(f)
            return data[0]
Beispiel #3
0
    def run(self):
        self.create_directory(logs_directory)
        self.create_directory(pid_directory)
        poller_config = self.get_poller_config()

        if poller_config:
            poller_config = poller_config[0]
            subnet = poller_config["subnet"]
            community_string = poller_config["community_string"]
            interval = poller_config["interval"]
            table_name = poller_config["table_name"]
            poll_name = poller_config["poll_name"]
            self.logger = Logger(logs_directory, self.poller_id, table_name,
                                 'snmp_poller_logs')
            self.pid_util = ProcessIdUtil(pid_directory, self.poller_id,
                                          table_name, 'snmp_poller', None)
            self.logger.config_logging()

            if self.pid_util.is_process_running():
                self.logger.log('Already running.')
                sys.exit(1)
            else:
                self.logger.log("[{0}] : Poller running...".format(poll_name))
                self.poller_event(1)
                ip_list = self.get_ip_address(table_name)
                try:
                    self.pid_util.create_pid()
                    self.pid_util.save_pid()
                except Exception as err:
                    self.logger.log(err, 'critical')
                    sys.exit(1)
                runner = True

                while runner:
                    # start
                    for ip_address in ip_list:
                        mdi_runner = main_device_info(ip_address,
                                                      community_string)
                        mdi_output = mdi_runner.run()
                        if mdi_output["is_valid"]:
                            try:
                                mdi_data = mdi_output["main_info"]
                                for_mdd = self.get_selected_oid(
                                    self.poller_id, mdi_data['brand'])
                                mdd_runner = main_device_details(
                                    mdi_data["ip_address"], for_mdd,
                                    community_string)
                                mdd_output = mdd_runner.run()
                                mdd_output["ip_address"] = mdi_data[
                                    "ip_address"]
                                self.insert_update(mdd_output, table_name)
                            except Exception as err:
                                self.logger.log(
                                    "[{0}] : Stopped due to an error : {1}".
                                    format(poll_name, err))
                                self.poller_event('-1')
                        else:
                            update_util = sql_utils.sql_templates[
                                "poll_update_down"].value
                            update_query = update_util.format(
                                table_name, ip_address)
                            self.conn.update_query(update_query)
                            self.logger.log(
                                "[{0}] : No SNMP response for {1}".format(
                                    poll_name, ip_address))
                        time.sleep(2)
                    #end
                time.sleep(interval)
        else:
            print('Poller does not exist.')
Beispiel #4
0
class start_polling:
    def __init__(self, poller_id=0):
        self.poller_id = poller_id
        # self.conn = DatabaseUtil(os.environ.get("DB_CONN"), os.environ.get("DB_USER"), os.environ.get("DB_PASSWORD"), os.environ.get("DB_NAME"))
        try:
            self.conn = DatabaseUtil(os.environ.get("DB_CONN"),
                                     os.environ.get("DB_USER"),
                                     os.environ.get("DB_PASSWORD"),
                                     os.environ.get("DB_NAME"))
            conn = self.conn.get_connection(interval=60)
            conn.close()
        except Exception as err:
            print(err)
            sys.exit()
        self.logger = None
        self.pid_util = None
        atexit.register(self.exit_handler)

    def poller_event(self, status):
        status = int(status)
        try:
            self.conn.jinja_update_query(
                sql_utils.sql_templates["update_event"].value, {
                    "status": status,
                    "id": self.poller_id
                })
            if status is not 1:
                os.kill(os.getpid(), signal.SIGINT)
        except Exception as err:
            self.logger.log(
                'An error has occured while updating poller status: {0}'.
                format(err), 'critical')
            sys.exit(1)

    def get_poller_config(self):
        sql_query = sql_utils.sql_templates["poller_config"].value
        return self.conn.jinja_select_query(sql_query,
                                            {'poll_id': self.poller_id})

    def get_selected_oid(self, id, brand):
        oid_list = {"oid_list": []}
        oid_inner = dict()
        sql_query = sql_utils.sql_templates["oid_config"].value
        oid_raw = self.conn.jinja_select_query(sql_query, {
            'poll_id': id,
            'brand': brand
        })
        for key, value in enumerate(oid_raw):
            oid_inner[value['oid_key']] = value['oid']
        oid_list['oid_list'] = [oid_inner]
        return oid_list

    def get_ip_address(self, table):
        ip_list = list()
        raw_list = self.conn.select_query(
            """SELECT ip_address from {0}""".format(table))
        for key, items in enumerate(raw_list):
            ip_list.append(items["ip_address"])
        return ip_list

    def insert_update(self, device_detail, table_name):
        ip_address = device_detail['ip_address']
        del device_detail['ip_address']

        placeholder = ', '.join('?' * len(device_detail))
        update_util = sql_utils.sql_templates["poll_update_up"].value

        update_query = update_util.format(
            table_name,
            ', '.join("{0}='{1}'".format(value, device_detail[value])
                      for key, value in enumerate(device_detail)), ip_address,
            self.poller_id)
        self.conn.update_query(update_query)

    def exit_handler(self):
        if self.pid_util:
            if str(self.pid_util.read_pid()) == str(os.getpid()):
                try:
                    self.pid_util.delete_pid()
                except Exception as err:
                    self.logger.log(err, 'critical')

    def signal_handler(self, signum, frame):
        self.exit_handler()
        if self.logger:
            self.logger.log('Service stopped.')
        sys.exit(1)

    def create_directory(self, directory):
        if not os.path.exists(os.path.join(os.getcwd(), directory)):
            os.mkdir(os.path.join(os.getcwd(), directory))
            print(directory, 'Directory created.')

    def run(self):
        self.create_directory(logs_directory)
        self.create_directory(pid_directory)
        poller_config = self.get_poller_config()

        if poller_config:
            poller_config = poller_config[0]
            subnet = poller_config["subnet"]
            community_string = poller_config["community_string"]
            interval = poller_config["interval"]
            table_name = poller_config["table_name"]
            poll_name = poller_config["poll_name"]
            self.logger = Logger(logs_directory, self.poller_id, table_name,
                                 'snmp_poller_logs')
            self.pid_util = ProcessIdUtil(pid_directory, self.poller_id,
                                          table_name, 'snmp_poller', None)
            self.logger.config_logging()

            if self.pid_util.is_process_running():
                self.logger.log('Already running.')
                sys.exit(1)
            else:
                self.logger.log("[{0}] : Poller running...".format(poll_name))
                self.poller_event(1)
                ip_list = self.get_ip_address(table_name)
                try:
                    self.pid_util.create_pid()
                    self.pid_util.save_pid()
                except Exception as err:
                    self.logger.log(err, 'critical')
                    sys.exit(1)
                runner = True

                while runner:
                    # start
                    for ip_address in ip_list:
                        mdi_runner = main_device_info(ip_address,
                                                      community_string)
                        mdi_output = mdi_runner.run()
                        if mdi_output["is_valid"]:
                            try:
                                mdi_data = mdi_output["main_info"]
                                for_mdd = self.get_selected_oid(
                                    self.poller_id, mdi_data['brand'])
                                mdd_runner = main_device_details(
                                    mdi_data["ip_address"], for_mdd,
                                    community_string)
                                mdd_output = mdd_runner.run()
                                mdd_output["ip_address"] = mdi_data[
                                    "ip_address"]
                                self.insert_update(mdd_output, table_name)
                            except Exception as err:
                                self.logger.log(
                                    "[{0}] : Stopped due to an error : {1}".
                                    format(poll_name, err))
                                self.poller_event('-1')
                        else:
                            update_util = sql_utils.sql_templates[
                                "poll_update_down"].value
                            update_query = update_util.format(
                                table_name, ip_address)
                            self.conn.update_query(update_query)
                            self.logger.log(
                                "[{0}] : No SNMP response for {1}".format(
                                    poll_name, ip_address))
                        time.sleep(2)
                    #end
                time.sleep(interval)
        else:
            print('Poller does not exist.')
Beispiel #5
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}')
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}')
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not ope(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model = resnet34(pretrained=False, num_classes=5)

    # move network to gpu
    model = DataParallel(model)
    model.cuda()
    ema_model = None

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_epoch = 0
    best_dice = 0
    best_dice_arr = np.zeros(3)

    # define scheduler
    try:
        scheduler = eval(args.scheduler)()
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # optionally resume from a checkpoint
    if args.resume:
        model_fpath = os.path.join(model_out_dir, args.resume)
        if os.path.isfile(model_fpath):
            # load checkpoint weights and update model and optimizer
            log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath))

            checkpoint = torch.load(model_fpath)
            start_epoch = checkpoint['epoch']
            best_epoch = checkpoint['best_epoch']
            best_dice_arr = checkpoint['best_dice_arr']
            best_dice = np.max(best_dice_arr)
            model.module.load_state_dict(checkpoint['state_dict'])

            optimizer_fpath = model_fpath.replace('.pth', '_optim.pth')
            if ope(optimizer_fpath):
                log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                    optimizer_fpath))
                optimizer.load_state_dict(
                    torch.load(optimizer_fpath)['optimizer'])
            log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format(
                model_fpath, checkpoint['epoch']))
        else:
            log.write(">> No checkpoint found at '{}'\n".format(model_fpath))

    # Data loading code
    train_transform = eval(args.train_transform)
    steel_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
    steel_df['ImageId'], steel_df['ClassId'] = zip(
        *steel_df['ImageId_ClassId'].apply(lambda x: x.split('_')))
    steel_df = pd.pivot_table(steel_df,
                              index='ImageId',
                              columns='ClassId',
                              values='EncodedPixels',
                              aggfunc=lambda x: x,
                              dropna=False)
    steel_df = steel_df.reset_index()
    steel_df.columns = [str(i) for i in steel_df.columns.values]
    steel_df['class_count'] = steel_df[['1', '2', '3', '4']].count(axis=1)
    steel_df['split_label'] = steel_df[['1', '2', '3', '4', 'class_count'
                                        ]].apply(lambda x: make_split_label(x),
                                                 axis=1)
    steel_df['label'] = steel_df['split_label'].apply(lambda x: make_label(x))
    train_idx, valid_idx, _, _ = train_test_split(steel_df.index,
                                                  steel_df['split_label'],
                                                  test_size=0.2,
                                                  random_state=43)

    train_dataset = SteelDataset(
        steel_df.iloc[train_idx],
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=train_transform,
        return_label=True,
        dataset='train',
    )
    train_loader = DataLoader(
        train_dataset,
        sampler=RandomSampler(train_dataset),
        # sampler=SequentialSampler(train_dataset),
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        pin_memory=True,
    )
    valid_dataset = SteelDataset(
        steel_df.iloc[valid_idx],
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=True,
        dataset='val',
    )
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=max(int(args.batch_size // 2), 1),
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     | smooth_loss/dice | valid_loss/dice | best_epoch/best_score |  min \n'
    )
    log.write(
        '------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1
    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_dice = train(train_loader,
                                             model,
                                             ema_model,
                                             criterion,
                                             optimizer,
                                             epoch,
                                             args,
                                             lr=lr)

        with torch.no_grad():
            valid_loss, valid_dice = validate(valid_loader, model, criterion,
                                              epoch)

        # remember best loss and save checkpoint
        is_best = valid_dice >= best_dice
        if is_best:
            best_epoch = epoch
            best_dice = valid_dice

        best_dice_arr = save_top_epochs(model_out_dir,
                                        model,
                                        best_dice_arr,
                                        valid_dice,
                                        best_epoch,
                                        epoch,
                                        best_dice,
                                        ema=False)

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |  %0.4f  %6.4f |  %6.1f     %6.4f    | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice,
                   best_epoch, best_dice, (time.time() - end) / 60))

        model_name = '%03d' % epoch
        save_model(model,
                   model_out_dir,
                   epoch,
                   model_name,
                   best_dice_arr,
                   is_best=is_best,
                   optimizer=optimizer,
                   best_epoch=best_epoch,
                   best_dice=best_dice,
                   ema=False)
Beispiel #6
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.train.txt'), mode='a')

    model_out_dir = opj(RESULT_DIR, 'models', args.out_dir,
                        'fold%d' % args.fold)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        model_out_dir))
    if not ope(model_out_dir):
        os.makedirs(model_out_dir)

    # set cuda visible device
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True

    # set random seeds
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    np.random.seed(0)

    model_params = {}
    model_params['architecture'] = args.arch
    model_params['num_classes'] = args.num_classes
    model_params['in_channels'] = args.in_channels
    model = init_network(model_params)

    # move network to gpu
    model = DataParallel(model)
    model.cuda()

    # define loss function (criterion)
    try:
        criterion = eval(args.loss)().cuda()
    except:
        raise (RuntimeError("Loss {} not available!".format(args.loss)))

    start_epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_focal = 1e5

    # define scheduler
    try:
        scheduler = eval(args.scheduler)()
    except:
        raise (RuntimeError("Scheduler {} not available!".format(
            args.scheduler)))
    optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0]

    # optionally resume from a checkpoint
    if args.resume:
        args.resume = os.path.join(model_out_dir, args.resume)
        if os.path.isfile(args.resume):
            # load checkpoint weights and update model and optimizer
            log.write(">> Loading checkpoint:\n>> '{}'\n".format(args.resume))

            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            best_epoch = checkpoint['best_epoch']
            best_focal = checkpoint['best_score']
            model.module.load_state_dict(checkpoint['state_dict'])

            optimizer_fpath = args.resume.replace('.pth', '_optim.pth')
            if ope(optimizer_fpath):
                log.write(">> Loading checkpoint:\n>> '{}'\n".format(
                    optimizer_fpath))
                optimizer.load_state_dict(
                    torch.load(optimizer_fpath)['optimizer'])
            log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format(
                args.resume, checkpoint['epoch']))
        else:
            log.write(">> No checkpoint found at '{}'\n".format(args.resume))

    # Data loading code
    train_transform = train_multi_augment2
    train_split_file = opj(DATA_DIR, 'split', args.split_name,
                           'random_train_cv%d.csv' % args.fold)
    train_dataset = ProteinDataset(
        train_split_file,
        img_size=args.img_size,
        is_trainset=True,
        return_label=True,
        in_channels=args.in_channels,
        transform=train_transform,
        crop_size=args.crop_size,
        random_crop=True,
    )
    train_loader = DataLoader(
        train_dataset,
        sampler=RandomSampler(train_dataset),
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        pin_memory=True,
    )
    valid_split_file = opj(DATA_DIR, 'split', args.split_name,
                           'random_valid_cv%d.csv' % args.fold)
    valid_dataset = ProteinDataset(
        valid_split_file,
        img_size=args.img_size,
        is_trainset=True,
        return_label=True,
        in_channels=args.in_channels,
        transform=None,
        crop_size=args.crop_size,
        random_crop=False,
    )
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=args.batch_size,
                              drop_last=False,
                              num_workers=args.workers,
                              pin_memory=True)

    focal_loss = FocalLoss().cuda()
    log.write('** start training here! **\n')
    log.write('\n')
    log.write(
        'epoch    iter      rate     |  train_loss/acc  |    valid_loss/acc/focal/kaggle     |best_epoch/best_focal|  min \n'
    )
    log.write(
        '-----------------------------------------------------------------------------------------------------------------\n'
    )
    start_epoch += 1
    for epoch in range(start_epoch, args.epochs + 1):
        end = time.time()

        # set manual seeds per epoch
        np.random.seed(epoch)
        torch.manual_seed(epoch)
        torch.cuda.manual_seed_all(epoch)

        # adjust learning rate for each epoch
        lr_list = scheduler.step(model, epoch, args.epochs)
        lr = lr_list[0]

        # train for one epoch on train set
        iter, train_loss, train_acc = train(train_loader,
                                            model,
                                            criterion,
                                            optimizer,
                                            epoch,
                                            clipnorm=args.clipnorm,
                                            lr=lr)

        with torch.no_grad():
            valid_loss, valid_acc, valid_focal_loss, kaggle_score = validate(
                valid_loader, model, criterion, epoch, focal_loss)

        # remember best loss and save checkpoint
        is_best = valid_focal_loss < best_focal
        best_loss = min(valid_focal_loss, best_loss)
        best_epoch = epoch if is_best else best_epoch
        best_focal = valid_focal_loss if is_best else best_focal

        print('\r', end='', flush=True)
        log.write('%5.1f   %5d    %0.6f   |  %0.4f  %0.4f  |    %0.4f  %6.4f %6.4f %6.4f    |  %6.1f    %6.4f   | %3.1f min \n' % \
                  (epoch, iter + 1, lr, train_loss, train_acc, valid_loss, valid_acc, valid_focal_loss, kaggle_score,
                   best_epoch, best_focal, (time.time() - end) / 60))

        save_model(model,
                   is_best,
                   model_out_dir,
                   optimizer=optimizer,
                   epoch=epoch,
                   best_epoch=best_epoch,
                   best_focal=best_focal)
Beispiel #7
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    args.predict_epoch = 'final' if args.predict_epoch is None else '%03d' % args.predict_epoch
    network_path = opj(RESULT_DIR, 'models', args.out_dir,
                       'fold%d' % args.fold, '%s.pth' % args.predict_epoch)

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir,
                         'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    args.augment = args.augment.split(',')
    for augment in args.augment:
        if augment not in augment_list:
            raise ValueError(
                'Unsupported or unknown test augmentation: {}!'.format(
                    augment))

    model_params = {}
    model_params['architecture'] = args.arch
    model_params['num_classes'] = args.num_classes
    model_params['in_channels'] = args.in_channels
    model = init_network(model_params)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(
        checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'test':
        test_split_file = opj(DATA_DIR, 'split', 'test_11702.csv')
    elif dataset == 'val':
        test_split_file = opj(DATA_DIR, 'split', args.split_name,
                              'random_valid_cv%d.csv' % args.fold)
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
    test_dataset = ProteinDataset(
        test_split_file,
        img_size=args.img_size,
        is_trainset=(dataset != 'test'),
        return_label=False,
        in_channels=args.in_channels,
        transform=None,
        crop_size=args.crop_size,
        random_crop=False,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    seeds = [args.seed] if args.seeds is None else [
        int(i) for i in args.seeds.split(',')
    ]
    for seed in seeds:
        test_dataset.random_crop = (seed != 0)
        for augment in args.augment:
            test_loader.dataset.transform = eval('augment_%s' % augment)
            if args.crop_size > 0:
                sub_submit_out_dir = opj(submit_out_dir,
                                         '%s_seed%d' % (augment, seed))
            else:
                sub_submit_out_dir = opj(submit_out_dir, augment)
            if not ope(sub_submit_out_dir):
                os.makedirs(sub_submit_out_dir)
            with torch.no_grad():
                predict(test_loader, model, sub_submit_out_dir, dataset)
Beispiel #8
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    args.predict_epoch = 'final' if args.predict_epoch is None else '%03d' % args.predict_epoch
    network_path = opj(RESULT_DIR, 'models', args.out_dir, '%s.pth' % args.predict_epoch)

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, 'epoch_%s' % args.predict_epoch)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    model_params = {}
    model_params['architecture'] = args.arch
    model_params['num_classes'] = args.num_classes
    model_params['in_channels'] = args.in_channels
    model = init_network(model_params)
    model.set_configs(extract_feature=True)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'test':
        test_split_file = opj(DATA_DIR, 'split', 'test_11702.csv')
    elif dataset == 'ext':
        test_split_file = opj(DATA_DIR, 'split', 'external_antibody_split.csv')
    elif dataset == 'train':
        test_split_file = opj(DATA_DIR, 'split', 'external_trainset_antibody_split.csv')
    elif dataset == 'val':
        test_split_file = opj(DATA_DIR, 'split', 'external_validset_antibody_split.csv')
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
    test_dataset = ProteinMLDataset(
        test_split_file,
        img_size=args.img_size,
        is_trainset=False,
        return_label=False,
        in_channels=args.in_channels,
        transform=None,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    with torch.no_grad():
        predict(test_loader, model, submit_out_dir, dataset)
Beispiel #9
0
from backend_api.app.config import config
from utils.log_util import Logger
from utils.pid_util import ProcessIdUtil
import subprocess
import datetime
import sys
import os

enviroment = "production"

if len(sys.argv) == 2:
    print('python <api.py> <development|staging>')
    enviroment = sys.argv[1]

logger = Logger(logs_directory=config.LOG_PATH['logs'],
                module_id=1,
                module_name='API',
                table_name='api_logs')
logger.create_directory()
logger.config_logging()

app = Flask(__name__)
CORS(app)
app.config.from_object(config.app_config['development'])
api = Api(app)
db = SQLAlchemy(app)
jwt = JWTManager(app)
blacklist = set()

from backend_api.app.routes import urls
from backend_api.app.config import default_handling
from backend_api.app.config import restart_service
Beispiel #10
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}')
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    if args.ema:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           f'fold{args.fold}', f'{args.predict_epoch}_ema.pth')
    else:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           f'fold{args.fold}', f'{args.predict_epoch}.pth')

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir,
                         f'fold{args.fold}', f'epoch_{args.predict_epoch}')
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    args.augment = args.augment.split(',')
    for augment in args.augment:
        if augment not in augment_list:
            raise ValueError(
                'Unsupported or unknown test augmentation: {}!'.format(
                    augment))

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(
        checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'test':
        steel_test_df = pd.read_csv(opj('..', 'input',
                                        'sample_submission.csv'))
    elif dataset == 'val':
        steel_test_df = pd.read_csv(
            opj(DATA_DIR, args.split_type, args.split_name,
                f'random_valid_cv{args.fold}.csv'))
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))

    steel_test_df['ImageId'], steel_test_df['ClassId'] = zip(
        *steel_test_df['ImageId_ClassId'].apply(lambda x: x.split('_')))
    imageId = pd.DataFrame(steel_test_df['ImageId'].unique(),
                           columns=['ImageId'])

    test_dataset = SteelDataset(
        imageId,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=False,
        dataset=args.dataset,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    for augment in args.augment:
        test_loader.dataset.transform = eval('augment_%s' % augment)
        unaugment_func = eval('unaugment_%s' % augment)
        sub_submit_out_dir = opj(submit_out_dir, augment)
        if not ope(sub_submit_out_dir):
            os.makedirs(sub_submit_out_dir)
        with torch.no_grad():
            predict(test_loader,
                    model,
                    sub_submit_out_dir,
                    dataset,
                    args,
                    unaugment_func=unaugment_func)
Beispiel #11
0
def main():
    args = parser.parse_args()

    log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold)
    if not ope(log_out_dir):
        os.makedirs(log_out_dir)
    log = Logger()
    log.open(opj(log_out_dir, 'log.submit.txt'), mode='a')

    if args.ema:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           'fold%d' % args.fold,
                           '%s_ema.pth' % args.predict_epoch)
    else:
        network_path = opj(RESULT_DIR, 'models', args.out_dir,
                           'fold%d' % args.fold, '%s.pth' % args.predict_epoch)

    submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir,
                         'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch)
    log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(
        submit_out_dir))
    if not ope(submit_out_dir):
        os.makedirs(submit_out_dir)

    # setting up the visible GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    args.augment = args.augment.split(',')
    for augment in args.augment:
        if augment not in augment_list:
            raise ValueError(
                'Unsupported or unknown test augmentation: {}!'.format(
                    augment))

    model_params = {}
    model_params['architecture'] = args.arch
    model = init_network(model_params)

    log.write(">> Loading network:\n>>>> '{}'\n".format(network_path))
    checkpoint = torch.load(network_path)
    model.load_state_dict(checkpoint['state_dict'])
    log.write(">>>> loaded network:\n>>>> epoch {}\n".format(
        checkpoint['epoch']))

    # moving network to gpu and eval mode
    model = DataParallel(model)
    model.cuda()
    model.eval()

    # Data loading code
    dataset = args.dataset
    if dataset == 'train':
        test_split_file = opj(DATA_DIR, args.split_type, 'train.csv')
    elif dataset == 'test':
        test_split_file = opj(DATA_DIR, args.split_type, 'test.csv')
    elif dataset == 'val':
        test_split_file = opj(DATA_DIR, args.split_type, args.split_name,
                              'random_valid_cv%d.csv' % args.fold)
    elif dataset == 'nih':
        test_split_file = opj(DATA_DIR, args.split_type, 'nih_112120.csv')
    elif dataset == 'chexpert':
        test_split_file = opj(DATA_DIR, args.split_type, 'chexpert_188521.csv')
    else:
        raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
    test_dataset = SiimDataset(
        test_split_file,
        img_size=args.img_size,
        mask_size=args.img_size,
        transform=None,
        return_label=False,
        crop_version=args.crop_version,
        dataset=args.dataset,
        predict_pos=args.predict_pos,
    )
    test_loader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    for augment in args.augment:
        test_loader.dataset.transform = eval('augment_%s' % augment)
        unaugment_func = eval('unaugment_%s' % augment)
        sub_submit_out_dir = opj(submit_out_dir, augment)
        if not ope(sub_submit_out_dir):
            os.makedirs(sub_submit_out_dir)
        with torch.no_grad():
            predict(test_loader,
                    model,
                    sub_submit_out_dir,
                    dataset,
                    args,
                    unaugment_func=unaugment_func)