예제 #1
0
def main():
    # instantiate the class
    d = DataPrep()

    # read the data
    # path = '/content/bank-additional-full.csv'
    path = sys.argv[1]
    data = d.read_data(path)
    print('Original shape:', data.shape)

    # preprocessing 
    data = d.treat_null(data)
    data = d.outlier_correcter(data)
    data = d.generate_features(data)
    print('After feature generation:', data.shape)
    data = d.scaler(data)
    print('After scaling:', data.shape)
    data = d.encoder(data)
    print('After encoding:', data.shape)
    data = d.over_sample(data)
    print('After resampling:', data.shape)
    data = drop_unwanted(data)
    print('After dropping unwanted features:', data.shape)
    print(data.head())

    # split data
    t = Transform()
    x, y = t.split(data)


    # modeling
    m = Model(x, y)
    # using mlp (best predictor of the 3)
    pred = m.mlp()
    pred_df = pd.DataFrame(pred, columns = ['y']) # save the predictions to a df
    pred_df.to_csv('pred.csv') # save predictions to csv


    # evaluation
    x_train, x_test, y_train, y_test = split(x, y)
    e = Evaluation()
    precision, recall, fscore, support = e.precision_recall_f1_support(y_test, pred)
    print('precision:', precision)
    print('precision:', precision)
    print('precision:', precision)
    print('precision:', precision)
예제 #2
0
    parser.add_argument('--batch_size',
                        default=256,
                        type=int,
                        help='Per device batch size.')
    parser.add_argument(
        '--data_dir',
        default='./',
        type=str,
        help='Directory for pre-downloaded ImageNet or cache for CIFAR10.')
    parser = pl.Trainer.add_argparse_args(parser)
    args = parser.parse_args()

    # Can be swapped to CIFAR10DataModule
    dm = ImagenetDataModule(batch_size=args.batch_size,
                            data_dir=args.data_dir,
                            train_transforms=Transform(),
                            test_transforms=Transform(),
                            val_transforms=Transform())

    model = BarlowTwins(lr=0.2,
                        weight_decay=1e-6,
                        lambd=0.0051,
                        projector=[8192, 8192, 8192],
                        scale_loss=0.024,
                        per_device_batch_size=args.batch_size)

    trainer = pl.Trainer.from_argparse_args(
        args,
        max_epochs=1000,
        precision=16,
        accelerator='ddp',
예제 #3
0
                        help="path to save the synthesized audio")

    args = parser.parse_args()
    with open(args.config, 'rt') as f:
        config = ruamel.yaml.safe_load(f)

    ljspeech_meta = LJSpeechMetaData(args.data)

    data_config = config["data"]
    sample_rate = data_config["sample_rate"]
    n_fft = data_config["n_fft"]
    win_length = data_config["win_length"]
    hop_length = data_config["hop_length"]
    n_mels = data_config["n_mels"]
    train_clip_seconds = data_config["train_clip_seconds"]
    transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels)
    ljspeech = TransformDataset(ljspeech_meta, transform)

    valid_size = data_config["valid_size"]
    ljspeech_valid = SliceDataset(ljspeech, 0, valid_size)
    ljspeech_train = SliceDataset(ljspeech, valid_size, len(ljspeech))

    model_config = config["model"]
    n_loop = model_config["n_loop"]
    n_layer = model_config["n_layer"]
    filter_size = model_config["filter_size"]
    context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)])
    print("context size is {} samples".format(context_size))
    train_batch_fn = DataCollector(context_size, sample_rate, hop_length,
                                   train_clip_seconds)
    valid_batch_fn = DataCollector(context_size,
예제 #4
0
파일: train.py 프로젝트: zhouwei25/Parakeet
    replace_pronounciation_prob = transform_config[
        "replace_pronunciation_prob"]
    sample_rate = transform_config["sample_rate"]
    preemphasis = transform_config["preemphasis"]
    n_fft = transform_config["n_fft"]
    win_length = transform_config["win_length"]
    hop_length = transform_config["hop_length"]
    fmin = transform_config["fmin"]
    fmax = transform_config["fmax"]
    n_mels = transform_config["n_mels"]
    min_level_db = transform_config["min_level_db"]
    ref_level_db = transform_config["ref_level_db"]
    max_norm = transform_config["max_norm"]
    clip_norm = transform_config["clip_norm"]
    transform = Transform(replace_pronounciation_prob, sample_rate,
                          preemphasis, n_fft, win_length, hop_length, fmin,
                          fmax, n_mels, min_level_db, ref_level_db, max_norm,
                          clip_norm)
    ljspeech = TransformDataset(meta, transform)

    # =========================dataiterator=========================
    # use meta data's text length as a sort key for the sampler
    train_config = config["train"]
    batch_size = train_config["batch_size"]
    text_lengths = [len(example[2]) for example in meta]
    sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths,
                                                         batch_size)

    # some hyperparameters affect how we process data, so create a data collector!
    model_config = config["model"]
    downsample_factor = model_config["downsample_factor"]
    r = model_config["outputs_per_step"]