if __name__ == '__main__': sys.path.append("../..") sys.path.append("../") sys.path.append("./") from script.common.utils import timer, get_logger from feature_tools import robust_denoised_data parser = ArgumentParser() parser.add_argument("--n_dims", default=160, type=int) args = parser.parse_args() outdir = Path(f"../features/robust-denoising/{args.n_dims}") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger( name="robust-denoising", tag=f"robust-denoising/{args.n_dims}") meta_train = pd.read_csv("../input/metadata_train.csv") meta_test = pd.read_csv("../input/metadata_test.csv") train_path = Path("../input/train.parquet") test_path = Path("../input/test.parquet") n_line = int(meta_train.shape[0] // 3) nchunk_train = 2 step = (n_line // nchunk_train) * 3 current_head = meta_train.signal_id[0] logger.info(f"step: {step}") logger.info(f"initial head: {current_head}") X = [] for i in range(nchunk_train): with timer(f"chunk{i+1}", logger):
parser.add_argument("--seed", default=42, type=int) parser.add_argument("--enable_local_test", action="store_true") parser.add_argument("--test_size", default=0.3, type=float) parser.add_argument("--scaling", action="store_true") parser.add_argument("--device", default="cpu") parser.add_argument("--n_epochs", default=50, type=int) parser.add_argument("--sample_ratio", default=10.0, type=float) parser.add_argument("--features", help="paths of features", nargs="*") parser.add_argument("--metadata", help="metadata to retrieve answer") args = parser.parse_args() logger = get_logger("lstm-attention", "lstm-attention") logger.info( f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}") logger.info(f"n_attention: {args.n_attention}, anneal: {args.anneal}") logger.info( f"train_batch: {args.train_batch}, val_batch: {args.val_batch}") logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}") logger.info(f"enable_local_test: {args.enable_local_test}") logger.info(f"test_size: {args.test_size}") logger.info(f"n_epochs: {args.n_epochs}") logger.info(f"Under sampling ratio: {args.sample_ratio}") logger.info(f"features: {args.features}") features = [] for path in args.features:
parser.add_argument("--enable_local_test", action="store_true") parser.add_argument("--test_size", default=0.3, type=float) parser.add_argument("--scaling", action="store_true") parser.add_argument("--device", default="cpu") parser.add_argument("--n_epochs", default=50, type=int) parser.add_argument("--features", help="paths of features", nargs="*") parser.add_argument("--metadata", help="metadata to retrieve answer", nargs="*") args = parser.parse_args() logger = get_logger("lstm-cnn", "lstm-cnn") logger.info( f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}") logger.info(f"o_channels: {args.o_channels}, anneal: {args.anneal}") logger.info( f"train_batch: {args.train_batch}, val_batch: {args.val_batch}") logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}") logger.info(f"enable_local_test: {args.enable_local_test}") logger.info(f"test_size: {args.test_size}") logger.info(f"n_epochs: {args.n_epochs}") logger.info(f"features: {args.features}") features = [] for path in args.features: path = Path(path)
parser.add_argument("--n_epochs", default=50, type=int) parser.add_argument("--fit_to_loc", action="store_true") parser.add_argument("--model", default="LSTMAttentionNet") parser.add_argument("--train_set") parser.add_argument("--validation_set") args = parser.parse_args() if args.fit_to_loc: from script.common.adversarial_trainer import NNTrainer else: from trainer import NNTrainer logger = get_logger("av-lstm-attention", "av-lstm-attention") logger.info( f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}") logger.info(f"n_attention: {args.n_attention}") logger.info(f"o_channels: {args.o_channels}") logger.info(f"train_batch: {args.train_batch}") logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}") logger.info(f"loc_lambda: {args.loc_lambda}") logger.info(f"n_epochs: {args.n_epochs}") logger.info(f"train_set: {args.train_set}") logger.info(f"validation_set: {args.validation_set}") with open(args.train_set, "rb") as f: train_set = pickle.load(f) with open(args.validation_set, "rb") as f:
parser.add_argument("--n_trial", default=10, type=int) parser.add_argument("--device", default="cpu") parser.add_argument("--n_epochs", default=30, type=int) parser.add_argument("--allow", default=0.3, type=float) parser.add_argument( "--train", default="../features/basic-features/160d/train_basic.pkl") parser.add_argument( "--test", default="../features/basic-features/160d/basic_test.pkl") args = parser.parse_args() data_path = Path(args.train) rel_path = data_path.parent.relative_to("../features") logger = get_logger("adversarial-validation", str(rel_path)) logger.info( f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}") logger.info(f"n_attention: {args.n_attention}") logger.info(f"n_splits: {args.n_splits}, n_trial: {args.n_trial}") logger.info(f"device: {args.device}, n_epochs: {args.n_epochs}") logger.info(f"allow: {args.allow}") logger.info(f"train: {args.train}") logger.info(f"test: {args.test}") with open(Path(args.train), "rb") as f: train = pickle.load(f) with open(Path(args.test), "rb") as f: test = pickle.load(f)
sys.path.append("../..") sys.path.append("..") sys.path.append("./") from script.common.utils import get_logger, timer from script.common.train_helpers import sigmoid from model import LSTMAttentionNet parser = ArgumentParser() parser.add_argument("--tag") parser.add_argument("--device", default="cpu") parser.add_argument("--features", help="path of features", nargs="*") parser.add_argument("--sample", default="../input/sample_submission.csv") parser.add_argument("--scaling", action="store_true") args = parser.parse_args() logger = get_logger("av-lstm-attention-test", "av-lstm-attention-test") logger.info(f"tag: {args.tag}") logger.info(f"device: {args.device}") logger.info(f"features: {args.features}") features = [] batch_size = 512 for path in args.features: path = Path(path) assert path.exists() with open(path, "rb") as f: feats = pickle.load(f) if isinstance(feats, list): feats = np.concatenate(feats) features.append(feats)
from feature_extraction import prep_data_feature_wise parser = ArgumentParser() parser.add_argument("--metadata") parser.add_argument("--path", default="../input/train.parquet") parser.add_argument("--name", default="train.pkl") parser.add_argument("--n_dims", default=160, type=int) parser.add_argument("--tag") parser.add_argument("--nchunk", default=2, type=int) parser.add_argument("--func", nargs="*") args = parser.parse_args() outdir = Path(f"../features/feature-wise/{args.n_dims}d/{args.tag}") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger( name="feature-wise", tag=f"feature-wise/{args.n_dims}d") logger.info(f"path: {args.path}, name: {args.name}") logger.info(f"tag: {args.tag}") logger.info(f"func: {args.func}") logger.info(f"n_dims: {args.n_dims}, nchunk: {args.nchunk}") meta = pd.read_csv(args.metadata) n_line = int(meta.shape[0] // 3) logger.info(f"n_line: {n_line}") if n_line % args.nchunk == 0: nchunk = args.nchunk else: nchunk = args.nchunk + 1 step = (n_line // args.nchunk) * 3 current_head = meta.signal_id[0]
sys.path.append("..") sys.path.append("./") from script.common.utils import get_logger from vae import VAETrainer, VariationalAutoEncoder parser = ArgumentParser() parser.add_argument("--n_dim", default=5, type=int) parser.add_argument("--train") parser.add_argument("--test") args = parser.parse_args() outdir = Path("../features/vae") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger(name="vae", tag="vae/basic") meta = pd.read_csv("../input/metadata_train.csv") target = meta.target.values[::3] submissions = os.listdir("../submission") sub_dfs = [pd.read_csv("../submission/" + f) for f in submissions] targets = [s.target.values for s in sub_dfs] sub_means = np.asarray(targets).mean(axis=0) idx = np.argwhere(sub_means == 0.0).reshape(-1) idx = (idx[::3] / 3).astype(int) with open(Path(args.train), "rb") as f: X = pickle.load(f) with open(Path(args.test), "rb") as f:
from script.common.utils import timer, get_logger from feature_extraction import prep_data, _transform_ts parser = ArgumentParser() parser.add_argument("--metadata") parser.add_argument("--path", default="../input/train.parquet") parser.add_argument("--name", default="train_basic.pkl") parser.add_argument("--n_dims", default=160, type=int) parser.add_argument("--nchunk", default=2, type=int) args = parser.parse_args() outdir = Path(f"../features/basic-features/{args.n_dims}d") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger(name="basic", tag=f"basic_features/{args.n_dims}d") logger.info(f"path: {args.path}, name: {args.name}") meta = pd.read_csv(args.metadata) n_line = int(meta.shape[0] // 3) logger.info(f"n_line: {n_line}") if n_line % args.nchunk == 0: nchunk = args.nchunk else: nchunk = args.nchunk + 1 step = (n_line // args.nchunk) * 3 current_head = meta.signal_id[0] logger.info(f"step: {step}") logger.info(f"initial head: {current_head}") X = [] for i in range(nchunk):
from feature_extraction import square_data parser = ArgumentParser() parser.add_argument( "--path", default="../features/basic-features/160d/train_basic.pkl") parser.add_argument("--scaler", default="") parser.add_argument("--name", default="square_train.pkl") args = parser.parse_args() path = Path(args.path) rel_path = path.parent.relative_to("../features") outdir = Path("../features/square-features") / rel_path outdir.mkdir(exist_ok=True, parents=True) logger = get_logger(name="square", tag=f"{str('square_features' / rel_path)}") logger.info(f"path: {args.path}, name: {args.name}") if args.scaler == "": scaler = None else: scaler_path = Path(args.scaler) with open(scaler_path, "rb") as f: scaler = pickle.load(f) scaler, features = square_data(path, scaler) logger.info(f"X_shape: {features.shape}") with open(outdir / args.name, "wb") as f: pickle.dump(features, f) with open(outdir / "scaler.pkl", "wb") as f:
from lgbm_features_extractor import signal_origins, prep_features # from feature_extraction import flatiron from script.common.utils import timer, get_logger parser = ArgumentParser() parser.add_argument("--metadata", default="../input/metadata_train.csv") parser.add_argument("--parquet", default="../input/train.parquet") parser.add_argument("--name", default="train.pkl") parser.add_argument("--nchunk", default=2, type=int) args = parser.parse_args() outdir = Path("../features/lgbm-features") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger("lgbm", tag="lgbm") logger.info(f"parquet: {args.parquet}") metadata = pd.read_csv(args.metadata) n_line = int(metadata.shape[0] // 3) if n_line % args.nchunk == 0: nchunk = args.nchunk else: nchunk = args.nchunk + 1 step = (n_line // args.nchunk) * 3 current_head = metadata.signal_id[0] logger.info(f"n_line: {n_line}") logger.info(f"nchunk: {nchunk}") logger.info(f"step: {step}") logger.info(f"current_head: {current_head}")
parser = ArgumentParser() parser.add_argument("--metadata") parser.add_argument("--path", default="../input/train.parquet") parser.add_argument("--name", default="train.pkl") parser.add_argument("--n_dims", default=160, type=int) parser.add_argument("--nchunk", default=2, type=int) parser.add_argument("--dn", default="dn") parser.add_argument("--hp", default="hp") args = parser.parse_args() outdir = Path( f"../features/denoised-basic/{args.n_dims}d/flat/{args.dn}{args.hp}") outdir.mkdir(exist_ok=True, parents=True) logger = get_logger(name="denoised", tag=f"denoised-basic/{args.n_dims}d") logger.info(f"path: {args.path}, name: {args.name}") if args.dn == "dn": dn = True else: dn = False if args.hp == "hp": hp = True else: hp = False meta = pd.read_csv(args.metadata) n_line = int(meta.shape[0] // 3) logger.info(f"n_line: {n_line}")
parser.add_argument("--n_jobs", default=2, type=int) parser.add_argument("--parameters", default="tsfresh-features/init_parameters.json") args = parser.parse_args() feats_list = [] with open(args.parameters) as f: parameters = json.load(f) parameters = parse_dict(parameters) filename = re.search(r"[a-zA-Z_]+.json$", args.parameters).group() outdir = Path(f"../features/tsfresh-features/{filename}") outdir.mkdir(exist_ok=True) logger = get_logger(name="tsfresh", tag=f"tsfresh_features/{filename}") logger.info(f"path: {args.path}, name: {args.name}, nchunk: {args.nchunk}") meta = pd.read_csv(args.metadata) n_line = int(meta.shape[0] // 3) logger.info(f"n_line: {n_line}") if n_line % args.nchunk == 0: nchunk = args.nchunk else: nchunk = args.nchunk + 1 step = n_line // args.nchunk current_head = meta.signal_id[0] logger.info(f"step: {step}") logger.info(f"initial head: {current_head}") logger.info(f"parameters: {parameters}")
if __name__ == "__main__": sys.path.append("../..") sys.path.append("..") sys.path.append("./") from script.common.utils import get_logger, timer from script.common.train_helpers import sigmoid from model import LSTMAttentionNet parser = ArgumentParser() parser.add_argument("--tag") parser.add_argument("--device", default="cpu") parser.add_argument("--features", help="path of features", nargs="*") parser.add_argument("--sample", default="../input/sample_submission.csv") args = parser.parse_args() logger = get_logger("lstm-attention-test", "lstm-gru-attention-test") logger.info(f"tag: {args.tag}") logger.info(f"device: {args.device}") logger.info(f"features: {args.features}") features = [] batch_size = 512 for path in args.features: path = Path(path) assert path.exists() with open(path, "rb") as f: feats = pickle.load(f) if isinstance(feats, list): feats = np.concatenate(feats) features.append(feats)
parser.add_argument("--n_splits", default=5, type=int) parser.add_argument("--seed", default=42, type=int) parser.add_argument("--enable_local_test", action="store_true") parser.add_argument("--test_size", default=0.3, type=float) parser.add_argument("--scaling", action="store_true") parser.add_argument("--device", default="cpu") parser.add_argument("--n_epochs", default=50, type=int) parser.add_argument("--train_set", help="paths of features") args = parser.parse_args() logger = get_logger("denoised-av-lstm-attention", "denoised-av-lstm-attention") logger.info( f"hidden_size: {args.hidden_size}, linear_size: {args.linear_size}") logger.info(f"n_attention: {args.n_attention}, anneal: {args.anneal}") logger.info( f"train_batch: {args.train_batch}, val_batch: {args.val_batch}") logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}") logger.info(f"enable_local_test: {args.enable_local_test}") logger.info(f"test_size: {args.test_size}") logger.info(f"n_epochs: {args.n_epochs}") logger.info(f"features: {args.train_set}") with open(Path(args.train_set), "rb") as f: train_set = pickle.load(f) train = train_set[0]
parser = ArgumentParser() parser.add_argument("--train") parser.add_argument("--test") parser.add_argument("--train_name", default="train.pkl") parser.add_argument("--test_name", default="test.pkl") args = parser.parse_args() train_path = Path(args.train) test_path = Path(args.test) rel_path = train_path.parent.relative_to("../features") outdir = Path("../features/scaled-features") / rel_path outdir.mkdir(exist_ok=True, parents=True) logger = get_logger(name="scaled", tag=f"(str('scaled_features') / rel_path)") logger.info(f"train: {args.train}, test: {args.test}") logger.info(f"trian_name: {args.train_name}, test_name: {args.test_name}") with open(args.train, "rb") as f: train = pickle.load(f) with open(args.test, "rb") as f: test = pickle.load(f) X_all = np.concatenate([train, test]) scalers = {} for row in range(X_all.shape[1]): scalers[row] = StandardScaler() scalers[row].fit(X_all[:, row, :])
parser.add_argument("--min_child_weight", default=1e-3, type=float) parser.add_argument("--subsample", default=0.8, type=float) parser.add_argument("--subsample_freq", default=5, type=int) parser.add_argument("--colsample_bytree", default=0.8, type=float) parser.add_argument("--reg_alpha", default=0.01, type=float) parser.add_argument("--reg_lambda", default=0.01, type=float) parser.add_argument("--n_jobs", default=2, type=int) parser.add_argument("--metadata") parser.add_argument("--features") parser.add_argument("--test_features") args = parser.parse_args() logger = get_logger("lightgbm", "lightgbm") logger.info( f"num_leaves: {args.num_leaves}, learning_rate: {args.learning_rate}") logger.info(f"min_child_weight: {args.min_child_weight}") logger.info( f"subsample: {args.subsample}, subsample_freq: {args.subsample_freq}") logger.info(f"colsample_bytree: {args.colsample_bytree}") logger.info(f"reg_alpha: {args.reg_alpha}, reg_lambda: {args.reg_lambda}") logger.info(f"n_splits: {args.n_splits}, seed: {args.seed}") logger.info(f"n_epochs: {args.n_epochs}") with open(args.features, "rb") as f: train = pickle.load(f) answer = pd.read_csv(args.metadata).target.values