Esempio n. 1
0
if __name__ == '__main__':
    sys.path.append("../..")
    sys.path.append("../")
    sys.path.append("./")

    from script.common.utils import timer, get_logger
    from feature_tools import robust_denoised_data

    parser = ArgumentParser()
    parser.add_argument("--n_dims", default=160, type=int)

    args = parser.parse_args()
    outdir = Path(f"../features/robust-denoising/{args.n_dims}")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(
        name="robust-denoising", tag=f"robust-denoising/{args.n_dims}")
    meta_train = pd.read_csv("../input/metadata_train.csv")
    meta_test = pd.read_csv("../input/metadata_test.csv")

    train_path = Path("../input/train.parquet")
    test_path = Path("../input/test.parquet")

    n_line = int(meta_train.shape[0] // 3)
    nchunk_train = 2
    step = (n_line // nchunk_train) * 3
    current_head = meta_train.signal_id[0]
    logger.info(f"step: {step}")
    logger.info(f"initial head: {current_head}")
    X = []
    for i in range(nchunk_train):
        with timer(f"chunk{i+1}", logger):
Esempio n. 2
0
    parser.add_argument("--seed", default=42, type=int)
    parser.add_argument("--enable_local_test", action="store_true")
    parser.add_argument("--test_size", default=0.3, type=float)
    parser.add_argument("--scaling", action="store_true")

    parser.add_argument("--device", default="cpu")

    parser.add_argument("--n_epochs", default=50, type=int)
    parser.add_argument("--sample_ratio", default=10.0, type=float)

    parser.add_argument("--features", help="paths of features", nargs="*")
    parser.add_argument("--metadata", help="metadata to retrieve answer")

    args = parser.parse_args()

    logger = get_logger("lstm-attention", "lstm-attention")
    logger.info(
        f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}")
    logger.info(f"n_attention: {args.n_attention}, anneal: {args.anneal}")
    logger.info(
        f"train_batch: {args.train_batch}, val_batch: {args.val_batch}")
    logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}")
    logger.info(f"enable_local_test: {args.enable_local_test}")
    logger.info(f"test_size: {args.test_size}")
    logger.info(f"n_epochs: {args.n_epochs}")
    logger.info(f"Under sampling ratio: {args.sample_ratio}")
    logger.info(f"features: {args.features}")

    features = []

    for path in args.features:
Esempio n. 3
0
    parser.add_argument("--enable_local_test", action="store_true")
    parser.add_argument("--test_size", default=0.3, type=float)
    parser.add_argument("--scaling", action="store_true")

    parser.add_argument("--device", default="cpu")

    parser.add_argument("--n_epochs", default=50, type=int)

    parser.add_argument("--features", help="paths of features", nargs="*")
    parser.add_argument("--metadata",
                        help="metadata to retrieve answer",
                        nargs="*")

    args = parser.parse_args()

    logger = get_logger("lstm-cnn", "lstm-cnn")
    logger.info(
        f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}")
    logger.info(f"o_channels: {args.o_channels}, anneal: {args.anneal}")
    logger.info(
        f"train_batch: {args.train_batch}, val_batch: {args.val_batch}")
    logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}")
    logger.info(f"enable_local_test: {args.enable_local_test}")
    logger.info(f"test_size: {args.test_size}")
    logger.info(f"n_epochs: {args.n_epochs}")
    logger.info(f"features: {args.features}")

    features = []

    for path in args.features:
        path = Path(path)
Esempio n. 4
0
    parser.add_argument("--n_epochs", default=50, type=int)
    parser.add_argument("--fit_to_loc", action="store_true")

    parser.add_argument("--model", default="LSTMAttentionNet")

    parser.add_argument("--train_set")
    parser.add_argument("--validation_set")

    args = parser.parse_args()

    if args.fit_to_loc:
        from script.common.adversarial_trainer import NNTrainer
    else:
        from trainer import NNTrainer

    logger = get_logger("av-lstm-attention", "av-lstm-attention")
    logger.info(
        f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}")
    logger.info(f"n_attention: {args.n_attention}")
    logger.info(f"o_channels: {args.o_channels}")
    logger.info(f"train_batch: {args.train_batch}")
    logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}")
    logger.info(f"loc_lambda: {args.loc_lambda}")
    logger.info(f"n_epochs: {args.n_epochs}")
    logger.info(f"train_set: {args.train_set}")
    logger.info(f"validation_set: {args.validation_set}")

    with open(args.train_set, "rb") as f:
        train_set = pickle.load(f)

    with open(args.validation_set, "rb") as f:
Esempio n. 5
0
    parser.add_argument("--n_trial", default=10, type=int)

    parser.add_argument("--device", default="cpu")
    parser.add_argument("--n_epochs", default=30, type=int)

    parser.add_argument("--allow", default=0.3, type=float)

    parser.add_argument(
        "--train", default="../features/basic-features/160d/train_basic.pkl")
    parser.add_argument(
        "--test", default="../features/basic-features/160d/basic_test.pkl")
    args = parser.parse_args()

    data_path = Path(args.train)
    rel_path = data_path.parent.relative_to("../features")
    logger = get_logger("adversarial-validation", str(rel_path))
    logger.info(
        f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}")
    logger.info(f"n_attention: {args.n_attention}")
    logger.info(f"n_splits: {args.n_splits}, n_trial: {args.n_trial}")
    logger.info(f"device: {args.device}, n_epochs: {args.n_epochs}")
    logger.info(f"allow: {args.allow}")
    logger.info(f"train: {args.train}")
    logger.info(f"test: {args.test}")

    with open(Path(args.train), "rb") as f:
        train = pickle.load(f)

    with open(Path(args.test), "rb") as f:
        test = pickle.load(f)
Esempio n. 6
0
    sys.path.append("../..")
    sys.path.append("..")
    sys.path.append("./")
    from script.common.utils import get_logger, timer
    from script.common.train_helpers import sigmoid
    from model import LSTMAttentionNet

    parser = ArgumentParser()
    parser.add_argument("--tag")
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--features", help="path of features", nargs="*")
    parser.add_argument("--sample", default="../input/sample_submission.csv")
    parser.add_argument("--scaling", action="store_true")

    args = parser.parse_args()
    logger = get_logger("av-lstm-attention-test", "av-lstm-attention-test")
    logger.info(f"tag: {args.tag}")
    logger.info(f"device: {args.device}")
    logger.info(f"features: {args.features}")

    features = []
    batch_size = 512

    for path in args.features:
        path = Path(path)
        assert path.exists()
        with open(path, "rb") as f:
            feats = pickle.load(f)
        if isinstance(feats, list):
            feats = np.concatenate(feats)
        features.append(feats)
Esempio n. 7
0
    from feature_extraction import prep_data_feature_wise

    parser = ArgumentParser()
    parser.add_argument("--metadata")
    parser.add_argument("--path", default="../input/train.parquet")
    parser.add_argument("--name", default="train.pkl")
    parser.add_argument("--n_dims", default=160, type=int)
    parser.add_argument("--tag")
    parser.add_argument("--nchunk", default=2, type=int)
    parser.add_argument("--func", nargs="*")

    args = parser.parse_args()
    outdir = Path(f"../features/feature-wise/{args.n_dims}d/{args.tag}")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(
        name="feature-wise", tag=f"feature-wise/{args.n_dims}d")
    logger.info(f"path: {args.path}, name: {args.name}")
    logger.info(f"tag: {args.tag}")
    logger.info(f"func: {args.func}")
    logger.info(f"n_dims: {args.n_dims}, nchunk: {args.nchunk}")

    meta = pd.read_csv(args.metadata)
    n_line = int(meta.shape[0] // 3)
    logger.info(f"n_line: {n_line}")

    if n_line % args.nchunk == 0:
        nchunk = args.nchunk
    else:
        nchunk = args.nchunk + 1
    step = (n_line // args.nchunk) * 3
    current_head = meta.signal_id[0]
Esempio n. 8
0
    sys.path.append("..")
    sys.path.append("./")

    from script.common.utils import get_logger
    from vae import VAETrainer, VariationalAutoEncoder

    parser = ArgumentParser()
    parser.add_argument("--n_dim", default=5, type=int)
    parser.add_argument("--train")
    parser.add_argument("--test")

    args = parser.parse_args()
    outdir = Path("../features/vae")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(name="vae", tag="vae/basic")

    meta = pd.read_csv("../input/metadata_train.csv")
    target = meta.target.values[::3]

    submissions = os.listdir("../submission")
    sub_dfs = [pd.read_csv("../submission/" + f) for f in submissions]
    targets = [s.target.values for s in sub_dfs]
    sub_means = np.asarray(targets).mean(axis=0)
    idx = np.argwhere(sub_means == 0.0).reshape(-1)
    idx = (idx[::3] / 3).astype(int)

    with open(Path(args.train), "rb") as f:
        X = pickle.load(f)

    with open(Path(args.test), "rb") as f:
Esempio n. 9
0
    from script.common.utils import timer, get_logger
    from feature_extraction import prep_data, _transform_ts

    parser = ArgumentParser()
    parser.add_argument("--metadata")
    parser.add_argument("--path", default="../input/train.parquet")
    parser.add_argument("--name", default="train_basic.pkl")
    parser.add_argument("--n_dims", default=160, type=int)
    parser.add_argument("--nchunk", default=2, type=int)

    args = parser.parse_args()
    outdir = Path(f"../features/basic-features/{args.n_dims}d")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(name="basic", tag=f"basic_features/{args.n_dims}d")
    logger.info(f"path: {args.path}, name: {args.name}")

    meta = pd.read_csv(args.metadata)
    n_line = int(meta.shape[0] // 3)
    logger.info(f"n_line: {n_line}")
    if n_line % args.nchunk == 0:
        nchunk = args.nchunk
    else:
        nchunk = args.nchunk + 1
    step = (n_line // args.nchunk) * 3
    current_head = meta.signal_id[0]
    logger.info(f"step: {step}")
    logger.info(f"initial head: {current_head}")
    X = []
    for i in range(nchunk):
Esempio n. 10
0
    from feature_extraction import square_data

    parser = ArgumentParser()
    parser.add_argument(
        "--path", default="../features/basic-features/160d/train_basic.pkl")
    parser.add_argument("--scaler", default="")
    parser.add_argument("--name", default="square_train.pkl")

    args = parser.parse_args()
    path = Path(args.path)

    rel_path = path.parent.relative_to("../features")
    outdir = Path("../features/square-features") / rel_path
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(name="square",
                        tag=f"{str('square_features' / rel_path)}")
    logger.info(f"path: {args.path}, name: {args.name}")

    if args.scaler == "":
        scaler = None
    else:
        scaler_path = Path(args.scaler)
        with open(scaler_path, "rb") as f:
            scaler = pickle.load(f)

    scaler, features = square_data(path, scaler)
    logger.info(f"X_shape: {features.shape}")
    with open(outdir / args.name, "wb") as f:
        pickle.dump(features, f)

    with open(outdir / "scaler.pkl", "wb") as f:
Esempio n. 11
0
    from lgbm_features_extractor import signal_origins, prep_features
    # from feature_extraction import flatiron
    from script.common.utils import timer, get_logger

    parser = ArgumentParser()
    parser.add_argument("--metadata", default="../input/metadata_train.csv")
    parser.add_argument("--parquet", default="../input/train.parquet")
    parser.add_argument("--name", default="train.pkl")
    parser.add_argument("--nchunk", default=2, type=int)

    args = parser.parse_args()
    outdir = Path("../features/lgbm-features")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger("lgbm", tag="lgbm")
    logger.info(f"parquet: {args.parquet}")

    metadata = pd.read_csv(args.metadata)
    n_line = int(metadata.shape[0] // 3)
    if n_line % args.nchunk == 0:
        nchunk = args.nchunk
    else:
        nchunk = args.nchunk + 1
    step = (n_line // args.nchunk) * 3
    current_head = metadata.signal_id[0]

    logger.info(f"n_line: {n_line}")
    logger.info(f"nchunk: {nchunk}")
    logger.info(f"step: {step}")
    logger.info(f"current_head: {current_head}")
Esempio n. 12
0
    parser = ArgumentParser()
    parser.add_argument("--metadata")
    parser.add_argument("--path", default="../input/train.parquet")
    parser.add_argument("--name", default="train.pkl")
    parser.add_argument("--n_dims", default=160, type=int)
    parser.add_argument("--nchunk", default=2, type=int)
    parser.add_argument("--dn", default="dn")
    parser.add_argument("--hp", default="hp")

    args = parser.parse_args()
    outdir = Path(
        f"../features/denoised-basic/{args.n_dims}d/flat/{args.dn}{args.hp}")
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(name="denoised", tag=f"denoised-basic/{args.n_dims}d")
    logger.info(f"path: {args.path}, name: {args.name}")

    if args.dn == "dn":
        dn = True
    else:
        dn = False

    if args.hp == "hp":
        hp = True
    else:
        hp = False

    meta = pd.read_csv(args.metadata)
    n_line = int(meta.shape[0] // 3)
    logger.info(f"n_line: {n_line}")
Esempio n. 13
0
    parser.add_argument("--n_jobs", default=2, type=int)
    parser.add_argument("--parameters",
                        default="tsfresh-features/init_parameters.json")
    args = parser.parse_args()
    feats_list = []

    with open(args.parameters) as f:
        parameters = json.load(f)

    parameters = parse_dict(parameters)
    filename = re.search(r"[a-zA-Z_]+.json$", args.parameters).group()

    outdir = Path(f"../features/tsfresh-features/{filename}")
    outdir.mkdir(exist_ok=True)

    logger = get_logger(name="tsfresh", tag=f"tsfresh_features/{filename}")
    logger.info(f"path: {args.path}, name: {args.name}, nchunk: {args.nchunk}")

    meta = pd.read_csv(args.metadata)
    n_line = int(meta.shape[0] // 3)
    logger.info(f"n_line: {n_line}")
    if n_line % args.nchunk == 0:
        nchunk = args.nchunk
    else:
        nchunk = args.nchunk + 1
    step = n_line // args.nchunk
    current_head = meta.signal_id[0]
    logger.info(f"step: {step}")
    logger.info(f"initial head: {current_head}")
    logger.info(f"parameters: {parameters}")
Esempio n. 14
0
if __name__ == "__main__":
    sys.path.append("../..")
    sys.path.append("..")
    sys.path.append("./")
    from script.common.utils import get_logger, timer
    from script.common.train_helpers import sigmoid
    from model import LSTMAttentionNet

    parser = ArgumentParser()
    parser.add_argument("--tag")
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--features", help="path of features", nargs="*")
    parser.add_argument("--sample", default="../input/sample_submission.csv")

    args = parser.parse_args()
    logger = get_logger("lstm-attention-test", "lstm-gru-attention-test")
    logger.info(f"tag: {args.tag}")
    logger.info(f"device: {args.device}")
    logger.info(f"features: {args.features}")

    features = []
    batch_size = 512

    for path in args.features:
        path = Path(path)
        assert path.exists()
        with open(path, "rb") as f:
            feats = pickle.load(f)
        if isinstance(feats, list):
            feats = np.concatenate(feats)
        features.append(feats)
Esempio n. 15
0
    parser.add_argument("--n_splits", default=5, type=int)
    parser.add_argument("--seed", default=42, type=int)
    parser.add_argument("--enable_local_test", action="store_true")
    parser.add_argument("--test_size", default=0.3, type=float)
    parser.add_argument("--scaling", action="store_true")

    parser.add_argument("--device", default="cpu")

    parser.add_argument("--n_epochs", default=50, type=int)

    parser.add_argument("--train_set", help="paths of features")

    args = parser.parse_args()

    logger = get_logger("denoised-av-lstm-attention",
                        "denoised-av-lstm-attention")
    logger.info(
        f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}")
    logger.info(f"n_attention: {args.n_attention}, anneal: {args.anneal}")
    logger.info(
        f"train_batch: {args.train_batch}, val_batch: {args.val_batch}")
    logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}")
    logger.info(f"enable_local_test: {args.enable_local_test}")
    logger.info(f"test_size: {args.test_size}")
    logger.info(f"n_epochs: {args.n_epochs}")
    logger.info(f"features: {args.train_set}")

    with open(Path(args.train_set), "rb") as f:
        train_set = pickle.load(f)

    train = train_set[0]
Esempio n. 16
0
    parser = ArgumentParser()
    parser.add_argument("--train")
    parser.add_argument("--test")
    parser.add_argument("--train_name", default="train.pkl")
    parser.add_argument("--test_name", default="test.pkl")

    args = parser.parse_args()
    train_path = Path(args.train)
    test_path = Path(args.test)

    rel_path = train_path.parent.relative_to("../features")
    outdir = Path("../features/scaled-features") / rel_path
    outdir.mkdir(exist_ok=True, parents=True)

    logger = get_logger(name="scaled",
                        tag=f"(str('scaled_features') / rel_path)")
    logger.info(f"train: {args.train}, test: {args.test}")
    logger.info(f"trian_name: {args.train_name}, test_name: {args.test_name}")

    with open(args.train, "rb") as f:
        train = pickle.load(f)

    with open(args.test, "rb") as f:
        test = pickle.load(f)

    X_all = np.concatenate([train, test])
    scalers = {}
    for row in range(X_all.shape[1]):
        scalers[row] = StandardScaler()
        scalers[row].fit(X_all[:, row, :])
Esempio n. 17
0
    parser.add_argument("--min_child_weight", default=1e-3, type=float)
    parser.add_argument("--subsample", default=0.8, type=float)
    parser.add_argument("--subsample_freq", default=5, type=int)
    parser.add_argument("--colsample_bytree", default=0.8, type=float)
    parser.add_argument("--reg_alpha", default=0.01, type=float)
    parser.add_argument("--reg_lambda", default=0.01, type=float)
    parser.add_argument("--n_jobs", default=2, type=int)

    parser.add_argument("--metadata")
    parser.add_argument("--features")

    parser.add_argument("--test_features")

    args = parser.parse_args()

    logger = get_logger("lightgbm", "lightgbm")

    logger.info(
        f"num_leaves: {args.num_leaves}, learning_rate: {args.learning_rate}")
    logger.info(f"min_child_weight: {args.min_child_weight}")
    logger.info(
        f"subsample: {args.subsample}, subsample_freq: {args.subsample_freq}")
    logger.info(f"colsample_bytree: {args.colsample_bytree}")
    logger.info(f"reg_alpha: {args.reg_alpha}, reg_lambda: {args.reg_lambda}")
    logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}")
    logger.info(f"n_epochs: {args.n_epochs}")

    with open(args.features, "rb") as f:
        train = pickle.load(f)

    answer = pd.read_csv(args.metadata).target.values