def parse_arguments():
    import argparse
    parser = argparse.ArgumentParser(
        description='Simulate a SELEX experiment.')

    parser.add_argument( '--energy-model', type=file,
                         help='An energy model to simulate from.')
    parser.add_argument( '--pwm', type=file,
                         help='A pwm to simulate from.')
    
    parser.add_argument( '--sim-sizes', type=int, nargs='+',
                         help='Number of reads to simulate for each round.')    
    
    parser.add_argument( '--prot-conc', type=float, default=7.75e-10,
                         help='The protein concentration.')
    parser.add_argument( '--dna-conc', type=float, default=2e-8,
                         help='The DNA concentration.')
    
    parser.add_argument( '--random-seed', type=int,
                         help='Set the random number generator seed.')
    parser.add_argument( '--random-seq-pool-size', type=float, default=1e5,
        help='The random pool size for the bootstrap.')
    
    parser.add_argument( '--verbose', default=False, action='store_true',
                         help='Print extra status information.')
    
    args = parser.parse_args()

    pyTFbindtools.VERBOSE = args.verbose
    
    if args.random_seed != None:
        np.random.seed(args.random_seed)

    if args.pwm != None:
        pyTFbindtools.log("Loading PWM starting location", 'VERBOSE')
        motifs = load_motifs(args.pwm.name)
        assert len(motifs) == 1, "Motif file contains multiple motifs"
        motif = motifs.values()[0][0]
        args.pwm.close()
    else:
        assert args.energy_model != None, \
            "Either --energy-model or --pwm must be specified"
        pyTFbindtools.log("Loading energy data", 'VERBOSE')
        motif = load_energy_data(args.energy_model.name)
        args.starting_energy_model.close()
    
    return ( motif, args.prot_conc, args.dna_conc, 
             args.sim_sizes,
             int(args.random_seq_pool_size) )
Beispiel #2
0
def parse_arguments():
    import argparse
    parser = argparse.ArgumentParser(
        description='Estimate energy models from a SELEX experiment.')

    parser.add_argument( '--selex-files', nargs='+', type=file, required=True,
                         help='Files containing SELEX reads.')

    parser.add_argument( '--background-sequence', type=file, 
        help='File containing reads sequenced from round 0.')

    parser.add_argument( '--starting-pwm', type=file,
                         help='A PWM to start from.')
    parser.add_argument( '--starting-energy-model', type=file,
                         help='An energy model to start from.')
    parser.add_argument( '--initial-binding-site-len', type=int, default=6,
        help='The starting length of the binding site (this will grow)')

    parser.add_argument( '--lhd-convergence-eps', type=float, default=1e-8,
                         help='Convergence tolerance for lhd change.')
    parser.add_argument( '--max-iter', type=float, default=1e5,
                         help='Maximum number of optimization iterations.')
    parser.add_argument( '--momentum', type=float, default=0.1,
                         help='Optimization tuning param (between 0 and 1).')

    parser.add_argument( '--random-seed', type=int,
                         help='Set the random number generator seed.')
    parser.add_argument( '--random-seq-pool-size', type=float, default=1e6,
        help='The random pool size for the bootstrap.')


    parser.add_argument( '--verbose', default=False, action='store_true',
                         help='Print extra status information.')
    parser.add_argument( '--debug-verbose', default=False, action='store_true',
                         help='Print debug information.')
    
    args = parser.parse_args()
    assert not (args.starting_pwm and args.starting_energy_model), \
            "Can not set both --starting-pwm and --starting-energy_model"

    pyTFbindtools.VERBOSE = args.verbose or args.debug_verbose
    pyTFbindtools.DEBUG = args.debug_verbose

    pyTFbindtools.selex.CONVERGENCE_MAX_LHD_CHANGE = args.lhd_convergence_eps
    pyTFbindtools.selex.MAX_NUM_ITER = int(args.max_iter)
    assert args.momentum < 1 and args.momentum >= 0
    pyTFbindtools.selex.MOMENTUM = args.momentum
    
    if args.random_seed != None:
        np.random.seed(args.random_seed)

    pyTFbindtools.log("Loading sequences", 'VERBOSE')
    rnds_and_seqs = load_sequences(x.name for x in args.selex_files)

    if args.starting_pwm != None:
        pyTFbindtools.log("Loading PWM starting location", 'VERBOSE')
        motifs = load_motifs(args.starting_pwm)
        assert len(motifs) == 1, "Motif file contains multiple motifs"
        motif = motifs.values()[0]
        args.starting_pwm.close()
    elif args.starting_energy_model != None:
        pyTFbindtools.log("Loading energy data", 'VERBOSE')
        motif = load_energy_data(args.starting_energy_model.name)
        args.starting_energy_model.close()
    else:
        pyTFbindtools.log(
            "Initializing starting location from %imer search" % args.initial_binding_site_len, 
            'VERBOSE')
        factor_name = 'TEST'
        bs_len = args.initial_binding_site_len
        pwm = find_pwm(rnds_and_seqs, args.initial_binding_site_len)
        motif = Motif("aligned_%imer" % args.initial_binding_site_len, 
                      factor_name, pwm)
    
    return motif, rnds_and_seqs, int(args.random_seq_pool_size)