Esempio n. 1
0
def train_small_model_on_read_tensors_and_annotations(args):
    '''Trains a reference, read, and annotation CNN architecture on tensors at the supplied data directory.

    This architecture looks at reads, read flags, reference sequence, and variant annotations.
    Tensors must be generated by calling write_read_and_annotation_tensors() before this function is used.
    After training with early stopping performance curves are plotted on the test dataset.

    Arguments:
        args.data_dir: must be set to an appropriate directory with
            subdirectories of test, valid and train, each containing
            subdirectories for each label with tensors stored as hd5 files.

    '''
    train_paths, valid_paths, test_paths = get_train_valid_test_paths(args)

    generate_train = tensor_generator_from_label_dirs_and_args(
        args, train_paths)
    generate_valid = tensor_generator_from_label_dirs_and_args(
        args, valid_paths)

    weight_path = vqsr_cnn.weight_path_from_args(args)
    model = vqsr_cnn.build_small_2d_annotation_model(args)
    model = vqsr_cnn.train_model_from_generators(args, model, generate_train,
                                                 generate_valid, weight_path)

    test = load_tensors_and_annotations_from_class_dirs(
        args, test_paths, per_class_max=args.samples)
    if args.image_dir:
        vqsr_cnn.plot_roc_per_class(model, [test[0], test[1]],
                                    test[2],
                                    args.labels,
                                    args.id,
                                    prefix=args.image_dir,
                                    batch_size=args.batch_size)
Esempio n. 2
0
def train_small_model_on_read_tensors_and_annotations(args):
    '''Trains a reference, read, and annotation CNN architecture on tensors at the supplied data directory.

    This architecture looks at reads, read flags, reference sequence, and variant annotations.
    Tensors must be generated by calling write_read_and_annotation_tensors() before this function is used.
    After training with early stopping performance curves are plotted on the test dataset.

    Arguments:
        args.data_dir: must be set to an appropriate directory with
            subdirectories of test, valid and train, each containing
            subdirectories for each label with tensors stored as hd5 files.

    '''
    train_paths, valid_paths, test_paths = get_train_valid_test_paths(args)

    generate_train = tensor_generator_from_label_dirs_and_args(args, train_paths)
    generate_valid = tensor_generator_from_label_dirs_and_args(args, valid_paths)

    weight_path = vqsr_cnn.weight_path_from_args(args)
    model = vqsr_cnn.build_small_2d_annotation_model(args)
    model = vqsr_cnn.train_model_from_generators(args, model, generate_train, generate_valid, weight_path)

    test = load_tensors_and_annotations_from_class_dirs(args, test_paths, per_class_max=args.samples)
    if args.image_dir:
        vqsr_cnn.plot_roc_per_class(model, [test[0], test[1]], test[2], args.labels, args.id,
                                    prefix=args.image_dir, batch_size=args.batch_size)
Esempio n. 3
0
def train_on_reference_tensors_and_annotations(args):
    '''Train a 1D Convolution plus reference tracks and MLP Annotation architecture.

    Arguments:
        args.data_dir: must be set to an appropriate directory with
            subdirectories of test, valid and train, each containing
            subdirectories for each label with tensors stored as hd5 files.

    Reference and Annotation tensors must be generated by calling
    write_reference_and_annotation_tensors() before this function is used.
    Performance curves for CNN are plotted on the test dataset.
    '''
    train_paths, valid_paths, test_paths = get_train_valid_test_paths(args)

    generate_train = dna_annotation_generator(args, train_paths)
    generate_valid = dna_annotation_generator(args, valid_paths)

    weight_path = vqsr_cnn.weight_path_from_args(args)
    model = vqsr_cnn.build_reference_annotation_model(args)
    model = vqsr_cnn.train_model_from_generators(args, model, generate_train,
                                                 generate_valid, weight_path)

    test = load_dna_annotations_positions_from_class_dirs(
        args, test_paths, per_class_max=args.samples)
    if args.image_dir:
        vqsr_cnn.plot_roc_per_class(model, [test[0], test[1]],
                                    test[2],
                                    args.labels,
                                    args.id,
                                    prefix=args.image_dir)
Esempio n. 4
0
def train_on_reference_tensors_and_annotations(args):
    '''Train a 1D Convolution plus reference tracks and MLP Annotation architecture.

    Arguments:
        args.data_dir: must be set to an appropriate directory with
            subdirectories of test, valid and train, each containing
            subdirectories for each label with tensors stored as hd5 files.

    Reference and Annotation tensors must be generated by calling
    write_reference_and_annotation_tensors() before this function is used.
    Performance curves for CNN are plotted on the test dataset.
    '''
    train_paths, valid_paths, test_paths = get_train_valid_test_paths(args)

    generate_train = dna_annotation_generator(args, train_paths)
    generate_valid = dna_annotation_generator(args, valid_paths)

    weight_path = vqsr_cnn.weight_path_from_args(args)
    model = vqsr_cnn.build_reference_annotation_model(args)
    model = vqsr_cnn.train_model_from_generators(args, model, generate_train, generate_valid, weight_path)

    test = load_dna_annotations_positions_from_class_dirs(args, test_paths, per_class_max=args.samples)
    if args.image_dir:
        vqsr_cnn.plot_roc_per_class(model, [test[0], test[1]], test[2], args.labels, args.id, prefix=args.image_dir)