def parse_arguments(): import argparse parser = argparse.ArgumentParser( description='Simulate a SELEX experiment.') parser.add_argument( '--energy-model', type=file, help='An energy model to simulate from.') parser.add_argument( '--pwm', type=file, help='A pwm to simulate from.') parser.add_argument( '--sim-sizes', type=int, nargs='+', help='Number of reads to simulate for each round.') parser.add_argument( '--prot-conc', type=float, default=7.75e-10, help='The protein concentration.') parser.add_argument( '--dna-conc', type=float, default=2e-8, help='The DNA concentration.') parser.add_argument( '--random-seed', type=int, help='Set the random number generator seed.') parser.add_argument( '--random-seq-pool-size', type=float, default=1e5, help='The random pool size for the bootstrap.') parser.add_argument( '--verbose', default=False, action='store_true', help='Print extra status information.') args = parser.parse_args() pyTFbindtools.VERBOSE = args.verbose if args.random_seed != None: np.random.seed(args.random_seed) if args.pwm != None: pyTFbindtools.log("Loading PWM starting location", 'VERBOSE') motifs = load_motifs(args.pwm.name) assert len(motifs) == 1, "Motif file contains multiple motifs" motif = motifs.values()[0][0] args.pwm.close() else: assert args.energy_model != None, \ "Either --energy-model or --pwm must be specified" pyTFbindtools.log("Loading energy data", 'VERBOSE') motif = load_energy_data(args.energy_model.name) args.starting_energy_model.close() return ( motif, args.prot_conc, args.dna_conc, args.sim_sizes, int(args.random_seq_pool_size) )
def parse_arguments(): import argparse parser = argparse.ArgumentParser( description='Estimate energy models from a SELEX experiment.') parser.add_argument( '--selex-files', nargs='+', type=file, required=True, help='Files containing SELEX reads.') parser.add_argument( '--background-sequence', type=file, help='File containing reads sequenced from round 0.') parser.add_argument( '--starting-pwm', type=file, help='A PWM to start from.') parser.add_argument( '--starting-energy-model', type=file, help='An energy model to start from.') parser.add_argument( '--initial-binding-site-len', type=int, default=6, help='The starting length of the binding site (this will grow)') parser.add_argument( '--lhd-convergence-eps', type=float, default=1e-8, help='Convergence tolerance for lhd change.') parser.add_argument( '--max-iter', type=float, default=1e5, help='Maximum number of optimization iterations.') parser.add_argument( '--momentum', type=float, default=0.1, help='Optimization tuning param (between 0 and 1).') parser.add_argument( '--random-seed', type=int, help='Set the random number generator seed.') parser.add_argument( '--random-seq-pool-size', type=float, default=1e6, help='The random pool size for the bootstrap.') parser.add_argument( '--verbose', default=False, action='store_true', help='Print extra status information.') parser.add_argument( '--debug-verbose', default=False, action='store_true', help='Print debug information.') args = parser.parse_args() assert not (args.starting_pwm and args.starting_energy_model), \ "Can not set both --starting-pwm and --starting-energy_model" pyTFbindtools.VERBOSE = args.verbose or args.debug_verbose pyTFbindtools.DEBUG = args.debug_verbose pyTFbindtools.selex.CONVERGENCE_MAX_LHD_CHANGE = args.lhd_convergence_eps pyTFbindtools.selex.MAX_NUM_ITER = int(args.max_iter) assert args.momentum < 1 and args.momentum >= 0 pyTFbindtools.selex.MOMENTUM = args.momentum if args.random_seed != None: np.random.seed(args.random_seed) pyTFbindtools.log("Loading sequences", 'VERBOSE') rnds_and_seqs = load_sequences(x.name for x in args.selex_files) if args.starting_pwm != None: pyTFbindtools.log("Loading PWM starting location", 'VERBOSE') motifs = load_motifs(args.starting_pwm) assert len(motifs) == 1, "Motif file contains multiple motifs" motif = motifs.values()[0] args.starting_pwm.close() elif args.starting_energy_model != None: pyTFbindtools.log("Loading energy data", 'VERBOSE') motif = load_energy_data(args.starting_energy_model.name) args.starting_energy_model.close() else: pyTFbindtools.log( "Initializing starting location from %imer search" % args.initial_binding_site_len, 'VERBOSE') factor_name = 'TEST' bs_len = args.initial_binding_site_len pwm = find_pwm(rnds_and_seqs, args.initial_binding_site_len) motif = Motif("aligned_%imer" % args.initial_binding_site_len, factor_name, pwm) return motif, rnds_and_seqs, int(args.random_seq_pool_size)