Exemple #1
0
def check_resources_usage():
    mapping = {
        'CPU': {
            'used': psutil.cpu_percent(),
            'limit': CPU_LIMIT
        },
        'Memory': {
            'used': psutil.virtual_memory().percent,
            'limit': MEMORY_LIMIT
        },
        'Disc space': {
            'used': psutil.disk_usage('/').percent,
            'limit': DISC_LIMIT
        },
    }

    warning_messages = []
    info_messages = []

    for resource_name, params in mapping.items():
        used = params['used']
        limit = params['limit']
        if used >= limit:
            warning_messages.append(
                '*Warning:* {} usage: *{}%* (limit: {})'.format(
                    resource_name, used, limit))
        else:
            info_messages.append('{} usage: *{}%*'.format(resource_name, used))

    if warning_messages:
        warning_messages_str = '\n'.join(warning_messages)
        info_messages_str = '\n'.join(info_messages)
        message = '{}\n{}'.format(warning_messages_str, info_messages_str)
        send_slack_message(text=message)
Exemple #2
0
def check_celery_workers_heartbeat():
    """
    Notifies through Slack when there are no active Celery workers.
    """
    if not is_process_running('celery'):
        memory_info = psutil.virtual_memory()
        disk_info = psutil.disk_usage('/')
        error_message = (
            '*Error:* Celery workers are not running.\nMemory used: *{}%*\nDisk used: *{}%*'
        ).format(memory_info.percent, disk_info.percent)
        send_slack_message(text=error_message)
Exemple #3
0
    metadata = dict(zip(metadata_list, metadata_values))

    # calculate result
    start_time = time.process_time()
    cur.execute(metadata['query_left'])
    result_left = cur.fetchone()[0]

    # evaluate result, set status, send slack msg if requested
    if eval(str(result_left) + metadata['operand'] + str(metadata['value_right'])):
        status = 'ok'
    else:
        status = 'alerting'
        cur.execute("select ch.webhook from alerts.rules_channels rch join alerts.channels ch "
                    "on rch.channel_id = ch.id where rch.rule_id = " + str(rule_id[0]))
        webhook_urls = cur.fetchone()
        utils.send_slack_message(rule_id[0], metadata['name'], webhook_urls)

    if metadata['debug']:
        final_query = metadata['query_left'] + metadata['operand'] + str(metadata['value_right'])
    else:
        final_query = None
    end_time = time.process_time()

    # insert result to results table
    cur.execute("insert into alerts.results (rule_id, calculated_at, results, status, duration, query) "
                "VALUES(%s, %s, %s, %s, %s, %s)",
                (rule_id, datetime.datetime.now(), result_left, status, end_time - start_time, final_query))
    conn.commit()

cur.close()
conn.close()
Exemple #4
0
create_pull_result = functions.create_pull(args.head, args.base)
logger.info(f'Successfully created PR: {create_pull_result["html_url"]}')

# 「Slack 通知不要」ならばここで終了です。
if not args.slack_notification:
    sys.exit()

# NOTE: ここ以降の処理は、
#       「PR の commits 一覧をリリースノートとして Slack へ通知したい」
#       というニーズのためにある処理です。
#       もともとは、「PR コメントにリリースノートを投稿し、それを Slack へ通知する」と
#       わざわざ「PR コメント」を経由していたため、ラベルをつけたりなんだりと非常に苦労しましたが
#       「いや直接 Slack にメッセージしたらいいじゃん」ということで簡略化できました。
# NOTE: ラベル指定での Slack notification は知見として残しておきます。
#       下記コマンドで通知可能です。
#       /github subscribe OWNER/REPO pulls,comments,+label:"CONTINUOUS-PR"

# api を使って PR の commits 一覧を取得します。
# NOTE: 「リリースノート」となる一覧を取得するための処理です。
list_commits_on_pull_result = functions.list_commits_on_pull(
    create_pull_result['number'])
logger.info(f'Successfully listed commits, count: {len(list_commits_on_pull_result)}')  # noqa: E501
comment_body = functions.create_comment_body(
    list_commits_on_pull_result,
    args.base,
)

# comment_body として取得した内容は、リリースノートとして扱い Slack へ送ります。
utils.send_slack_message(comment_body)
logger.info('Successfully sent message to Slack')
Exemple #5
0
def main():
    # save input stats for later use

    print(args.work_dir, args.exp)
    work_dir = os.path.join(args.work_dir, args.exp)
    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    # copy this file to work dir to keep training configuration
    shutil.copy(__file__, os.path.join(work_dir, 'main.py'))
    with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # transform
    transform1 = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize([0.5], [0.5])])

    # 1.train_dataset

    train_path, test_path = loader.make_dataset(args.train_site,
                                                train_size=args.train_size,
                                                mode='train')

    np.save(os.path.join(work_dir, '{}_test_path.npy'.format(args.train_site)),
            test_path)

    train_image_path = train_path[0]
    train_label_path = train_path[1]
    test_image_path = test_path[0]
    test_label_path = test_path[1]

    train_dataset = loader.CustomDataset(train_image_path,
                                         train_label_path,
                                         args.train_site,
                                         args.input_size,
                                         transform1,
                                         arg_mode=args.arg_mode,
                                         arg_thres=args.arg_thres)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=4)

    val_dataset = loader.CustomDataset(test_image_path,
                                       test_label_path,
                                       args.train_site,
                                       args.input_size,
                                       transform1,
                                       arg_mode=False)
    val_loader = data.DataLoader(val_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=4)

    Train_test_dataset = loader.CustomDataset(test_image_path, test_label_path,
                                              args.train_site, args.input_size,
                                              transform1)
    Train_test_loader = data.DataLoader(Train_test_dataset,
                                        batch_size=1,
                                        shuffle=True,
                                        num_workers=4)

    trn_logger = Logger(os.path.join(work_dir, 'train.log'))
    trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log'))
    val_logger = Logger(os.path.join(work_dir, 'validation.log'))

    # 3.model_select
    my_net, model_name = model_select(
        args.arch,
        args.input_size,
    )

    # 4.gpu select
    my_net = nn.DataParallel(my_net).cuda()
    cudnn.benchmark = True

    # 5.optim

    if args.optim == 'adam':
        gen_optimizer = torch.optim.Adam(my_net.parameters(),
                                         lr=args.initial_lr,
                                         eps=args.eps)
    elif args.optim == 'sgd':
        gen_optimizer = torch.optim.SGD(my_net.parameters(),
                                        lr=args.initial_lr,
                                        momentum=0.9,
                                        weight_decay=args.weight_decay)

    # lr decay
    lr_schedule = args.lr_schedule
    lr_scheduler = optim.lr_scheduler.MultiStepLR(gen_optimizer,
                                                  milestones=lr_schedule[:-1],
                                                  gamma=args.gamma)

    # 6.loss
    if args.loss_function == 'bce':
        criterion = nn.BCEWithLogitsLoss(
            pos_weight=torch.Tensor([args.bce_weight])).cuda()
    elif args.loss_function == 'mse':
        criterion = nn.MSELoss().cuda()


#####################################################################################

# train

    send_slack_message(args.token, '#jm_private',
                       '{} : starting_training'.format(args.exp))
    best_iou = 0
    try:
        if args.train_mode:
            for epoch in range(lr_schedule[-1]):

                train(my_net, train_loader, gen_optimizer, epoch, criterion,
                      trn_logger, trn_raw_logger)
                iou = validate(val_loader,
                               my_net,
                               criterion,
                               epoch,
                               val_logger,
                               save_fig=False,
                               work_dir_name='jsrt_visualize_per_epoch')
                print(
                    'validation_iou **************************************************************'
                )

                lr_scheduler.step()

                if args.val_size == 0:
                    is_best = 1
                else:
                    is_best = iou > best_iou
                best_iou = max(iou, best_iou)
                checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(
                    epoch + 1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': my_net.state_dict(),
                        'optimizer': gen_optimizer.state_dict()
                    },
                    is_best,
                    work_dir,
                    filename='checkpoint.pth')

        print("train end")
    except RuntimeError as e:
        send_slack_message(
            args.token, '#jm_private',
            '-----------------------------------  error train : send to message JM  & Please send a kakao talk ----------------------------------------- \n error message : {}'
            .format(e))
        import ipdb
        ipdb.set_trace()

    draw_curve(work_dir, trn_logger, val_logger)
    send_slack_message(args.token, '#jm_private',
                       '{} : end_training'.format(args.exp))

    if args.test_mode:
        print('Test mode ...')
        main_test(model=my_net, test_loader=test_data_list, args=args)
Exemple #6
0
def main():
    if args.server == 'server_A':
        work_dir = os.path.join('/data1/JM/lung_segmentation', args.exp)
        print(work_dir)
    elif args.server == 'server_B':
        work_dir = os.path.join('/data1/workspace/JM_gen/lung_seg', args.exp)
        print(work_dir)

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    # copy this file to work dir to keep training configuration
    shutil.copy(__file__, os.path.join(work_dir, 'main.py'))
    with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # transform
    transform1 = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize([0.5], [0.5])])

    # 1.train_dataset
    if args.val_size == 0:
        train_path, test_path = loader.make_dataset(args.server,
                                                    args.train_dataset +
                                                    '_dataset',
                                                    train_size=args.train_size)

        np.save(
            os.path.join(work_dir,
                         '{}_test_path.npy'.format(args.train_dataset)),
            test_path)

        train_image_path = train_path[0]
        train_label_path = train_path[1]
        test_image_path = test_path[0]
        test_label_path = test_path[1]

        train_dataset = loader.CustomDataset(train_image_path,
                                             train_label_path,
                                             transform1,
                                             arg_mode=args.arg_mode,
                                             arg_thres=args.arg_thres,
                                             arg_range=args.arg_range,
                                             dataset=args.train_dataset)
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=4)

        # Organize images and labels differently.
        train_dataset_random = loader.CustomDataset(train_image_path,
                                                    train_label_path,
                                                    transform1,
                                                    arg_mode=args.arg_mode,
                                                    arg_thres=args.arg_thres,
                                                    arg_range=args.arg_range,
                                                    dataset=args.train_dataset)
        train_loader_random = data.DataLoader(train_dataset_random,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=4)

        val_dataset = loader.CustomDataset(test_image_path,
                                           test_label_path,
                                           transform1,
                                           arg_mode=False,
                                           dataset=args.train_dataset)
        val_loader = data.DataLoader(val_dataset,
                                     batch_size=1,
                                     shuffle=False,
                                     num_workers=4)

        # 'JSRT' test_dataset
        Train_test_dataset = loader.CustomDataset(test_image_path,
                                                  test_label_path,
                                                  transform1,
                                                  dataset=args.train_dataset)
        Train_test_loader = data.DataLoader(Train_test_dataset,
                                            batch_size=1,
                                            shuffle=True,
                                            num_workers=4)

        # 2.test_dataset_path

        # 'MC'test_dataset
        test_data1_path, _ = loader.make_dataset(args.server,
                                                 args.test_dataset1 +
                                                 '_dataset',
                                                 train_size=1)
        test_data1_dataset = loader.CustomDataset(test_data1_path[0],
                                                  test_data1_path[1],
                                                  transform1,
                                                  dataset=args.test_dataset1)
        test_data1_loader = data.DataLoader(test_data1_dataset,
                                            batch_size=1,
                                            shuffle=True,
                                            num_workers=4)

        # 'sh'test_dataset
        test_data2_path, _ = loader.make_dataset(args.server,
                                                 args.test_dataset2 +
                                                 '_dataset',
                                                 train_size=1)
        test_data2_dataset = loader.CustomDataset(test_data2_path[0],
                                                  test_data2_path[1],
                                                  transform1,
                                                  dataset=args.test_dataset2)
        test_data2_loader = data.DataLoader(test_data2_dataset,
                                            batch_size=1,
                                            shuffle=True,
                                            num_workers=0)

        test_data_list = [
            Train_test_loader, test_data1_loader, test_data2_loader
        ]

        # np.save(os.path.join(work_dir, 'input_stats.npy'), train_dataset.input_stats)

        trn_logger = Logger(os.path.join(work_dir, 'train.log'))
        trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log'))
        val_logger = Logger(os.path.join(work_dir, 'validation.log'))

    # 3.model_select
    model_seg, model_name = model_select(args.arch_seg)
    model_ae, _ = model_select(args.arch_ae)

    # 4.gpu select
    model_seg = nn.DataParallel(model_seg).cuda()
    model_ae = nn.DataParallel(model_ae).cuda()

    #model_seg = model_seg.cuda()
    #model_ae = model_ae.cuda()

    cudnn.benchmark = True

    # 5.optim
    if args.optim == 'adam':
        optimizer_seg = torch.optim.Adam(model_seg.parameters(),
                                         lr=args.initial_lr)
        optimizer_ae = torch.optim.Adam(model_ae.parameters(),
                                        lr=args.initial_lr)

    elif args.optim == 'sgd':
        optimizer_seg = torch.optim.SGD(model_seg.parameters(),
                                        lr=args.initial_lr,
                                        weight_decay=args.weight_decay)

        optimizer_ae = torch.optim.SGD(model_ae.parameters(),
                                       lr=args.initial_lr,
                                       weight_decay=args.weight_decay)

    # if args.clip_grad :
    #
    #     import torch.nn.utils as torch_utils
    #     max_grad_norm = 1.
    #
    #     torch_utils.clip_grad_norm_(model_seg.parameters(),
    #                                 max_grad_norm
    #                                 )
    #     torch_utils.clip_grad_norm_(model_ae.parameters(),
    #                                 max_grad_norm
    #                                 )

    # lr decay
    lr_schedule = args.lr_schedule
    lr_scheduler_seg = optim.lr_scheduler.MultiStepLR(
        optimizer_seg, milestones=lr_schedule[:-1], gamma=args.gamma)

    lr_scheduler_ae = optim.lr_scheduler.MultiStepLR(
        optimizer_ae, milestones=lr_schedule[:-1], gamma=args.gamma)

    # 6.loss

    criterion_seg = loss_function_select(args.seg_loss_function)
    criterion_ae = loss_function_select(args.ae_loss_function)
    criterion_embedding = loss_function_select(args.embedding_loss_function)

    #####################################################################################

    # train

    send_slack_message('#jm_private',
                       '{} : starting_training'.format(args.exp))
    best_iou = 0
    try:
        if args.train_mode:
            for epoch in range(lr_schedule[-1]):

                train(model_seg=model_seg,
                      model_ae=model_ae,
                      train_loader=train_loader,
                      train_loder_random=train_loader_random,
                      optimizer_seg=optimizer_seg,
                      optimizer_ae=optimizer_ae,
                      criterion_seg=criterion_seg,
                      criterion_ae=criterion_ae,
                      criterion_embedding=criterion_embedding,
                      epoch=epoch,
                      logger=trn_logger,
                      sublogger=trn_raw_logger)

                iou = validate(model=model_seg,
                               val_loader=val_loader,
                               criterion=criterion_seg,
                               epoch=epoch,
                               logger=val_logger,
                               work_dir=work_dir,
                               save_fig=False,
                               work_dir_name='{}_visualize_per_epoch'.format(
                                   args.train_dataset))
                print(
                    'validation result **************************************************************'
                )

                lr_scheduler_seg.step()
                lr_scheduler_ae.step()

                if args.val_size == 0:
                    is_best = 1
                else:
                    is_best = iou > best_iou

                best_iou = max(iou, best_iou)
                checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(
                    epoch + 1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model_seg.state_dict(),
                        'optimizer': optimizer_seg.state_dict()
                    },
                    is_best,
                    work_dir,
                    filename='checkpoint.pth')

        print("train end")
    except RuntimeError as e:
        send_slack_message(
            '#jm_private',
            '-----------------------------------  error train : send to message JM  & Please send a kakao talk ----------------------------------------- \n error message : {}'
            .format(e))

        import ipdb
        ipdb.set_trace()

    draw_curve(work_dir, trn_logger, val_logger)
    send_slack_message('#jm_private', '{} : end_training'.format(args.exp))
    #--------------------------------------------------------------------------------------------------------#
    #here is load model for last pth
    load_filename = os.path.join(work_dir, 'model_best.pth')
    checkpoint = torch.load(load_filename)
    ch_epoch = checkpoint['epoch']
    save_check_txt = os.path.join(work_dir, str(ch_epoch))
    f = open("{}_best_checkpoint.txt".format(save_check_txt), 'w')
    f.close()

    # --------------------------------------------------------------------------------------------------------#

    # validation
    if args.test_mode:
        print('Test mode ...')
        main_test(model=model_seg, test_loader=test_data_list, args=args)