def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--debug', default=False, type=strtobool)

    parser.add_argument('--dir_dataset', type=Path, default='../../dataset')

    parser.add_argument('--batch_size', type=int, default=8)

    parser.add_argument('--epochs', type=int, default=1)

    parser.add_argument('--alpha', type=float, default=-1)

    parser.add_argument('--seed', type=int, default=1048)

    params = parser.parse_args()

    if params.debug:
        logger = init_logger('_log/debug.log', level=10)
    else:
        logger = init_logger('_log/main.log', level=20)

    logger.info(vars(params))

    try:
        run(**vars(params))
    except Exception:
        logger.info(traceback.format_exc())
def main():

    if params.debug:
        logger = init_logger('_log/train_debug.log', level=10)
    else:
        logger = init_logger(f'_log/train_{params.model_name}.log', level=20)

    logger.info(f'torch version {torch.__version__}')
    logger.info(f'numpy version {np.__version__}')
    logger.info(f'pandas version {pd.__version__}')

    logger.info(vars(params))

    train(**vars(params))
예제 #3
0
# from lib import dl
# dl.trace_start("trace.html",interval=5,auto=True)
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('command', metavar='COMMAND', help='command: init, tests')
    parser.add_argument('-c', '--config', metavar='CONFIG', help='config file')
    args = parser.parse_args()

    if args.command == 'tests':
        suite = TestLoader().discover('tests', pattern='*.py')
        result = TextTestRunner(verbosity=2).run(suite)
        result = 0 if result.wasSuccessful() else 1
        exit(result)

    cfg = read_config(args.config)
    logger = init_logger()

    renderer = DistributedRenderer()
    qualifier = DistributedQualifier()
    base_image_path = cfg['main']['populationPath'] + basename(cfg['main']['baseImage'])
    fitnessMachine = MeshFitnessMachine(base_image_path, renderer, qualifier)
    population = Population(MeshGenome, fitnessMachine)
    population.generation = int(db.get('generation', default=0))

    accuracy.register(population)
    monitor.register(population)

    if args.command == 'reset' or not population.generation:
        population.initialize()
    else:
        population.load()
def main():

    tic = time.time()

    if params.debug:
        logger = init_logger('_log/03_make_sub_debug.log', level=10)
    else:
        logger = init_logger('_log/03_make_sub.log', level=20)

    level5data = LyftDataset(data_path='../../dataset/test',
                             json_path='../../dataset/test/data/',
                             verbose=True)

    sub_pre = pd.read_csv(params.path_sub_pre)
    print(sub_pre.head())

    sample = pd.read_csv('../../dataset/sample_submission.csv')
    print(sample.head())

    target_tokens = sample['Id']

    if params.debug:
        target_tokens = target_tokens[:20]

    list_subs = list()

    img_size = 2048
    lidar_range = 100

    for i, target_token in enumerate(tqdm(target_tokens)):

        target_subs = sub_pre.query('token==@target_token')

        list_sub_token = list()

        for _, target_sub in target_subs.iterrows():

            x = target_sub['x']
            y = target_sub['y']
            z = target_sub['z']
            length = target_sub['length']
            width = target_sub['width']
            height = target_sub['height']
            rotate = target_sub['rotate']

            width = width * (lidar_range * 2) / (img_size - 1)
            length = length * (lidar_range * 2) / (img_size - 1)
            height = height * (lidar_range * 2) / (img_size - 1)

            x = x * (lidar_range * 2) / (img_size - 1) - lidar_range
            y = y * (lidar_range * 2) / (img_size - 1) - lidar_range
            z = z * (lidar_range * 2) / (img_size - 1) - lidar_range

            rotate = -rotate

            quat = Quaternion(math.cos(rotate / 2), 0, 0, math.sin(rotate / 2))
            print(quat.yaw_pitch_roll)

            pred_box = Box([x, y, z], [width, length, height], quat)

            my_sample = level5data.get('sample', target_token)
            rev_token = level5data.get('sample_data',
                                       my_sample['data']['LIDAR_TOP'])['token']

            rev_box = reverse_box(pred_box, level5data, rev_token)

            sub_i = '{:.9f} '.format(target_sub['confidence']) + \
                    ' '.join(['{:.3f}'.format(v) for v in rev_box.center]) + \
                    ' ' + ' '.join(['{:.3f}'.format(v) for v in rev_box.wlh]) + \
                    ' {:.3f}'.format(rev_box.orientation.yaw_pitch_roll[0]) + ' {}'.format(target_sub['name'])

            logger.debug('sub_i')
            logger.debug(sub_i)

            list_sub_token.append(sub_i)

        if len(list_sub_token) == 0:
            sub_token = ''
        else:
            sub_token = ' '.join(list_sub_token)

        logger.info('submit token !')
        logger.info(sub_token)

        list_subs.append(sub_token)

    submission = pd.DataFrame()
    submission['Id'] = target_tokens
    submission['PredictionString'] = list_subs

    dir_sub = Path('_submission')
    dir_sub.mkdir(exist_ok=True)

    submission.to_csv(dir_sub / params.path_sub_pre.name, index=False)

    logger.info('elapsed time: {:.1f} [min]'.format(
        (time.time() - tic) / 60.0))
예제 #5
0
def main():

    random.seed(params.seed)
    np.random.seed(params.seed)

    tic = time.time()
    logger = init_logger('_log/log_b{:d}_l{:d}_nd{:d}_seed{:d}'.format(
        params.batch_size, params.num_layer, params.node_dim, params.seed))

    logger.info('parameters')
    logger.info(vars(params))

    train, valid, test, train_moles, valid_moles = load_dataset(params.seed)

    train_moles = sorted(train_moles)
    valid_moles = sorted(valid_moles)
    valid.sort_values('molecule_name', inplace=True)

    logger.info('train moles: {} ...'.format(train_moles[:5]))
    logger.info('valid moles: {} ...'.format(valid_moles[:5]))

    test_moles = sorted(list(set(test['molecule_name'])))
    test.sort_values('molecule_name', inplace=True)

    logger.info('train data: {}'.format(train.shape))
    logger.info('valid data: {}'.format(valid.shape))
    logger.info('test data: {}'.format(test.shape))

    structures = pd.read_csv('../../input/structures.csv')
    structures_groups = structures.groupby('molecule_name')

    bonds = pd.read_csv('../../input/bonds.csv')
    bonds_gp = bonds.groupby('molecule_name')

    train_charges = pd.read_csv('../../input/train_ob_charges.csv')
    train_charges_gp = train_charges.groupby('molecule_name')

    test_charges = pd.read_csv('../../input/test_ob_charges.csv')
    test_charges_gp = test_charges.groupby('molecule_name')

    train_targets = train.groupby('molecule_name')
    valid_targets = valid.groupby('molecule_name')
    test_targets = test.groupby('molecule_name')

    if params.debug:
        random.shuffle(train_moles)
        train_moles = train_moles[:5000]
        test_moles = test_moles[:1000]

    valid.sort_values('id', inplace=True)
    test.sort_values('id', inplace=True)

    list_atoms = list(set(structures['atom']))

    train_graphs = dict()
    for mole in tqdm(train_moles):
        train_graphs[mole] = Graph(structures_groups.get_group(mole),
                                   bonds_gp.get_group(mole), list_atoms,
                                   train_charges_gp.get_group(mole))

    valid_graphs = dict()
    for mole in tqdm(valid_moles):
        valid_graphs[mole] = Graph(structures_groups.get_group(mole),
                                   bonds_gp.get_group(mole), list_atoms,
                                   train_charges_gp.get_group(mole))

    test_graphs = dict()
    for mole in tqdm(test_moles):
        test_graphs[mole] = Graph(structures_groups.get_group(mole),
                                  bonds_gp.get_group(mole), list_atoms,
                                  test_charges_gp.get_group(mole))

    model = EdgeUpdateNet(num_layer=params.num_layer,
                          node_dim=params.node_dim,
                          edge_dim=params.edge_dim,
                          gpu=params.gpu)
    if params.gpu >= 0:
        logger.info('transfer model to GPU {}'.format(params.gpu))
        model.to_gpu(params.gpu)

    optimizer = optimizers.Adam(alpha=5e-4)
    optimizer.setup(model)
    model.cleargrads()

    epoch = 2 if params.debug else params.epoch

    for ep in range(epoch):

        logger.info('')
        logger.info('')
        logger.info('start epoch {}'.format(ep))
        logger.info('')

        # -------------------------
        logger.info('')
        logger.info('training')

        loss_value = 0
        random.shuffle(train_moles)
        train_batches_moles = generate_batches(structures_groups, train_moles,
                                               params.batch_size)
        random.shuffle(train_batches_moles)

        for batch_moles in tqdm(train_batches_moles):

            list_train_X = list()
            list_train_y = list()

            for target_mol in batch_moles:
                list_train_X.append(train_graphs[target_mol])
                list_train_y.append(train_targets.get_group(target_mol))

            with chainer.using_config('train', ep == 0):

                loss = model(list_train_X, list_train_y)

                model.cleargrads()
                loss.backward()
                optimizer.update()

            loss_value += cuda.to_cpu(loss.data)

        logger.info('train loss: {:.3f}'.format(
            float(loss_value) / len(train_moles)))

        # -------------------------
        logger.info('')
        logger.info('validation')

        valid_df = predicts(structures_groups, valid_moles, valid_graphs,
                            valid_targets, model, params.batch_size)

        valid_pred = valid_df[['fc', 'sd', 'pso', 'dso']]

        valid_score = calc_score(valid, valid_pred.values)
        logger.info('valid score: {:.3f}'.format(valid_score))

        # -------------------------

        optimizer.alpha = optimizer.alpha * 0.95

        logger.info('change learning rate: {:.6f}'.format(optimizer.alpha))

        if (ep + 1) % 20 == 0:

            # -------------------------
            # save model

            dir_model = Path('_model')
            logger.info('save model')
            dir_model.mkdir(exist_ok=True)
            serializers.save_npz(
                dir_model / 'model_ep{}_seed{}.npz'.format(ep, params.seed),
                model)

            # -------------------------
            # make submission

            logger.info('')
            logger.info('test')

            test_df = predicts(structures_groups, test_moles, test_graphs,
                               test_targets, model, params.batch_size)
            make_submission(test_df,
                            ep,
                            valid_score,
                            params.seed,
                            dir_sub=Path('_submission'))
            make_submission(valid_df,
                            ep,
                            valid_score,
                            params.seed,
                            dir_sub=Path('_valid'))

    toc = time.time() - tic
    logger.info('Elapsed time {:.1f} [min]'.format(toc / 60))
def main():

    if params.debug:
        logger = init_logger('_log/01_train_debug.log', level=10)
    else:
        logger = init_logger('_log/01_train.log', level=20)

    tic = time.time()

    logger.info('parameters')
    logger.info(vars(params))

    num_iter = 100 if params.debug else params.num_iter
    num_epoch = 2 if params.debug else params.num_epoch

    list_train_img_all = os.listdir(f'../../input/{params.dataset}/train')
    random.shuffle(list_train_img_all)

    coords = pd.read_csv(f'../../input/{params.dataset}/coordinates.csv')
    target_classes = params.target_classes.split(',')

    logger.info(f'target_classes: {target_classes}')
    coords = coords.query('name in @target_classes')
    target_imgs = [f'{s}.png' for s in coords['token']]

    list_train_img_all = list(set(list_train_img_all) & set(target_imgs))

    rate_valid = 0.1

    list_train_img = list_train_img_all[:-int(
        len(list_train_img_all) * rate_valid)]
    list_valid_img = list_train_img_all[
        -int(len(list_train_img_all) * rate_valid):]

    if params.debug:
        list_train_img = list(list_train_img[:16])
        list_valid_img = list(list_train_img)
    else:
        assert len(set(list_train_img) & set(list_valid_img)) == 0

    logger.info('number of train images: {}'.format(len(list_train_img)))
    logger.info('number of valid images: {}'.format(len(list_valid_img)))

    # build model
    model = build_model()

    # optimizer

    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=1e-4)

    dir_model = Path('_models')
    dir_model.mkdir(exist_ok=True)

    # train for each epoch

    for ep in range(num_epoch):

        logger.info('')
        logger.info('==> start epoch {}'.format(ep))

        # train
        model = train_main(model, params.dataset, optimizer, list_train_img,
                           target_classes, num_iter, ep,
                           params.distance_upper_bound)

        # validate
        validate(model,
                 params.dataset,
                 list_valid_img,
                 target_classes,
                 epoch=ep)

        # change learning rate
        for param_group in optimizer.param_groups:

            param_group['lr'] *= 0.95
            logger.info('change learning rate into: {:.6f}'.format(
                param_group['lr']))

        # save model
        torch.save(model, dir_model / 'model_ep{}.pt'.format(ep))

    # save model
    torch.save(model, dir_model / 'model.pt')

    # show elapsed time

    toc = time.time() - tic
    logger.info('Elapsed time: {:.1f} [min]'.format(toc / 60.0))
def predict_main(model_name, dataset_name, threshold, threshold2, overlap):

    list_sample_submission = pd.read_csv('../../dataset/sample_submission.csv')

    list_test_img = list_sample_submission['Id'].tolist()

    if params.debug:
        logger = init_logger('_log/02_detect_debug.log', level=10)
        logger.info('debug mode !')

        list_test_img = list_test_img[:100]  # use only 100 images for dry-run

    else:
        logger = init_logger('_log/02_detect.log', level=20)

    logger.info('parameters')
    logger.info(vars(params))

    tic = time.time()

    # load model
    dir_model = Path('_models')
    model = torch.load(dir_model / model_name)

    cpu_count = multiprocessing.cpu_count()
    logger.info('num_cpu: {}'.format(cpu_count))

    model.eval()

    list_subs = list()

    dataset = SampleDataset(
        dir_img=f'../../input/{dataset_name}/test',
        coord_path=f'../../input/{dataset_name}/coordinates.csv',
        crop_type=0
    )

    dir_save = Path('./_test')
    dir_save.mkdir(exist_ok=True)

    step = 64

    predict_one_wrap = partial(predict_one,
                               dir_save=dir_save,
                               threshold=threshold,
                               threshold2=threshold2,
                               overlap=overlap)

    for j in tqdm(range(0, len(list_test_img), step), desc='batch loop'):

        list_inputs = list()

        # for i, target_name in enumerate(tqdm(list_test_img)):
        for i in tqdm(range(j, min(j+step, len(list_test_img))), desc='gpu loop'):

            target_name = list_test_img[i]
            img_input, _, original = dataset[f'{target_name}.png']

            input_tensor = torch.unsqueeze(img_input, 0)
            # input_tensor = augment_input(img_input)

            with torch.no_grad():
                net_out = model.forward(input_tensor.cuda())

            net_out_numpy = [tensor.cpu().data.numpy() for tensor in net_out]

            net_out_numpy_batch = [tensor[0, :, :, :] for tensor in net_out_numpy]

            list_inputs.append((i, target_name, net_out_numpy_batch))

            if i < 50:
                original.save(dir_save / '{}_original_{}.png'.format(i, target_name[:12]))

        # list_subs_batch = [predict_one_wrap(ip) for ip in tqdm(list_inputs)]

        with Pool(cpu_count) as p:
            list_subs_batch = list(tqdm(p.imap_unordered(predict_one_wrap, list_inputs),
                                        total=len(list_inputs), desc='nmp loop'))

        list_subs.extend(list_subs_batch)

    submission = pd.concat(list_subs, axis=0)

    # save submission

    dir_submission = Path('_submission_pre')

    dir_submission.mkdir(exist_ok=True)
    submission.to_csv(dir_submission / 'submit_{}_th{}_gth{}_ov{}.csv'.format(
        params.model.stem, params.threshold, params.threshold2, params.overlap),
                      index=False, float_format='%.9f')

    logger.info('elapsed time: {:.1f} [min]'.format((time.time() - tic) / 60.0))