Example #1
0
    X = artgor_utils.reduce_mem_usage(X)
    X_test = artgor_utils.reduce_mem_usage(X_test)
    gc.collect()

    if debug:
        print('saving debug data...')
        X.to_csv("../data/debug_data/debug_acsf.csv", index=False)

    print("training models...")
    result_dict_lgb = artgor_utils.train_model_regression(
        X=X,
        X_test=X_test,
        y=y,
        params=params,
        folds=folds,
        model_type='lgb',
        eval_metric='group_mae',
        plot_feature_importance=True,
        verbose=100,
        early_stopping_rounds=1000,
        n_estimators=n_estimators,
        res_filename=result_filename)

    if not debug:
        print("saving results...")
        np.save(result_filename, result_dict_lgb)

        print("making submission...")
        sub['scalar_coupling_constant'] = result_dict_lgb['prediction']
        sub.to_csv(sub_filename, index=False)
Example #2
0
        index_type_test = (test['type'] == type)

        X_t = train.loc[index_type].copy()
        X_test_t = test.loc[index_type_test].copy()
        y_t = y[index_type]

        print('X_test_t.shape ', X_test_t.shape)
        print('X_t.shape ', X_t.shape)
        print('Training...')
        result_dict_lgb = artgor_utils.train_model_regression(
            X=X_t,
            y=y_t,
            X_test=X_test_t,
            params=params,
            columns=best_features,
            folds=folds,
            n_folds=n_folds,
            model_type='lgb',
            eval_metric='group_mae',
            verbose=50,
            early_stopping_rounds=1000,
            n_estimators=n_estimators,
            res_filename=result_filename)

        del X_t, X_test_t
        gc.collect()

        if not debug:
            np.save(str(result_filename), result_dict_lgb)

        X_short.loc[X_short['type'] == type, 'oof'] = result_dict_lgb['oof']
        X_short_test.loc[X_short_test['type'] == type,
Example #3
0
            if oof_colomn == 'scalar_coupling_constant':
                y = coupling_constant
            else:
                y = folder_data[oof_colomn]

            folder_data[categorical_cols] = folder_data[
                categorical_cols].astype('category')

            print('test_shape', test[features].shape)
            print('train shape', folder_data[features].shape)

            result_dict = artgor_utils.train_model_regression(
                X=folder_data,
                X_test=test,
                y=y,
                params=params,
                folds=folds,
                columns=features,
                model_type='lgb',
                eval_metric='group_mae',
                verbose=50,
                early_stopping_rounds=20,
                n_estimators=n_estimators,
                res_filename=result_filename,
            )

            print("saving results...")
            if result_filename:
                np.save(str(result_filename), result_dict)
Example #4
0
        type_test = type_dist_test
        gc.collect()

        params = types_config[type]
        result_filename = result_filename_prefix / f'distance_kernel_{type}.npy'
        type_y = full_train[train_mask]['scalar_coupling_constant']

        print('X_test_t.shape ', type_test.shape)
        print('X_t.shape ', type_train.shape)
        print('Training...')
        result_dict_lgb = artgor_utils.train_model_regression(
            X=type_train, y=type_y, X_test=type_test,
            params=params,
            columns=best_features,
            folds=folds,
            n_folds=n_folds,
            model_type='lgb',
            eval_metric='mae',
            verbose=50,
            early_stopping_rounds=1000,
            res_filename=result_filename
        )

        del type_train, type_test
        gc.collect()

        if not debug:
            np.save(str(result_filename), result_dict_lgb)

        X_short.loc[X_short['type'] == type, 'oof'] = result_dict_lgb['oof']
        X_short_test.loc[X_short_test['type'] == type, 'prediction'] = result_dict_lgb['prediction']