Exemple #1
0
    train_length = int(len(all_data) * args.train_ratio)
    val_length = len(all_data) - train_length

    if args.validation_split_seed is not None:
        torch.manual_seed(args.validation_split_seed)
    train_data, val_data = torch.utils.data.random_split(
        all_data, [train_length, val_length])

    if args.seed is not None:
        torch.manual_seed(args.seed)
    elif args.validation_split_seed is not None:
        torch.manual_seed(torch.initial_seed())

    train_loader = get_variable_length_protein_dataLoader(
        train_data,
        batch_size=args.batch_size,
        shuffle=True,
        use_weights=args.use_weights)
    val_loader = get_variable_length_protein_dataLoader(
        val_data, batch_size=args.batch_size, use_weights=args.use_weights)

    print("Data loaded!")

    model = WaveNet(input_channels=NUM_TOKENS,
                    residual_channels=args.residual_channels,
                    out_channels=NUM_TOKENS,
                    stacks=args.stacks,
                    layers_per_stack=args.layers,
                    total_samples=train_length,
                    l2_lambda=args.L2,
                    bias=args.bias,
    if args.seed is not None:
        torch.manual_seed(args.seed)
        print(f"Random seed set to {args.seed}")

    data_device = torch.device(args.device)
    if args.multi_gpu:
        data_device = torch.device("cpu")

    # Load data
    train_data = IterProteinDataset(args.train_data, device = data_device)
    validation_data = IterProteinDataset(args.validation_data, device = data_device)
    val_len = len(validation_data)
    train_seqs_per_epoch = val_len * 9

    train_loader = get_variable_length_protein_dataLoader(train_data, batch_size = args.batch_size)
    val_loader = get_variable_length_protein_dataLoader(validation_data, batch_size = args.batch_size)
    print("Data loaded!")

    total_samples = 39_069_211 # magic number

    model = WaveNet(
        input_channels = NUM_TOKENS,
        residual_channels = args.residual_channels,
        out_channels = NUM_TOKENS,
        stacks = args.stacks,
        layers_per_stack = args.layers,
        total_samples = total_samples,
        l2_lambda = args.L2,
		bias = args.bias,
        dropout = args.dropout,