def main(): # instantiate the class d = DataPrep() # read the data # path = '/content/bank-additional-full.csv' path = sys.argv[1] data = d.read_data(path) print('Original shape:', data.shape) # preprocessing data = d.treat_null(data) data = d.outlier_correcter(data) data = d.generate_features(data) print('After feature generation:', data.shape) data = d.scaler(data) print('After scaling:', data.shape) data = d.encoder(data) print('After encoding:', data.shape) data = d.over_sample(data) print('After resampling:', data.shape) data = drop_unwanted(data) print('After dropping unwanted features:', data.shape) print(data.head()) # split data t = Transform() x, y = t.split(data) # modeling m = Model(x, y) # using mlp (best predictor of the 3) pred = m.mlp() pred_df = pd.DataFrame(pred, columns = ['y']) # save the predictions to a df pred_df.to_csv('pred.csv') # save predictions to csv # evaluation x_train, x_test, y_train, y_test = split(x, y) e = Evaluation() precision, recall, fscore, support = e.precision_recall_f1_support(y_test, pred) print('precision:', precision) print('precision:', precision) print('precision:', precision) print('precision:', precision)
parser.add_argument('--batch_size', default=256, type=int, help='Per device batch size.') parser.add_argument( '--data_dir', default='./', type=str, help='Directory for pre-downloaded ImageNet or cache for CIFAR10.') parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # Can be swapped to CIFAR10DataModule dm = ImagenetDataModule(batch_size=args.batch_size, data_dir=args.data_dir, train_transforms=Transform(), test_transforms=Transform(), val_transforms=Transform()) model = BarlowTwins(lr=0.2, weight_decay=1e-6, lambd=0.0051, projector=[8192, 8192, 8192], scale_loss=0.024, per_device_batch_size=args.batch_size) trainer = pl.Trainer.from_argparse_args( args, max_epochs=1000, precision=16, accelerator='ddp',
help="path to save the synthesized audio") args = parser.parse_args() with open(args.config, 'rt') as f: config = ruamel.yaml.safe_load(f) ljspeech_meta = LJSpeechMetaData(args.data) data_config = config["data"] sample_rate = data_config["sample_rate"] n_fft = data_config["n_fft"] win_length = data_config["win_length"] hop_length = data_config["hop_length"] n_mels = data_config["n_mels"] train_clip_seconds = data_config["train_clip_seconds"] transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels) ljspeech = TransformDataset(ljspeech_meta, transform) valid_size = data_config["valid_size"] ljspeech_valid = SliceDataset(ljspeech, 0, valid_size) ljspeech_train = SliceDataset(ljspeech, valid_size, len(ljspeech)) model_config = config["model"] n_loop = model_config["n_loop"] n_layer = model_config["n_layer"] filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size,
replace_pronounciation_prob = transform_config[ "replace_pronunciation_prob"] sample_rate = transform_config["sample_rate"] preemphasis = transform_config["preemphasis"] n_fft = transform_config["n_fft"] win_length = transform_config["win_length"] hop_length = transform_config["hop_length"] fmin = transform_config["fmin"] fmax = transform_config["fmax"] n_mels = transform_config["n_mels"] min_level_db = transform_config["min_level_db"] ref_level_db = transform_config["ref_level_db"] max_norm = transform_config["max_norm"] clip_norm = transform_config["clip_norm"] transform = Transform(replace_pronounciation_prob, sample_rate, preemphasis, n_fft, win_length, hop_length, fmin, fmax, n_mels, min_level_db, ref_level_db, max_norm, clip_norm) ljspeech = TransformDataset(meta, transform) # =========================dataiterator========================= # use meta data's text length as a sort key for the sampler train_config = config["train"] batch_size = train_config["batch_size"] text_lengths = [len(example[2]) for example in meta] sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths, batch_size) # some hyperparameters affect how we process data, so create a data collector! model_config = config["model"] downsample_factor = model_config["downsample_factor"] r = model_config["outputs_per_step"]