def main(): parser = create_parser() args = parser.parse_args() min_overlap = args.min_overlap query_file = args.input_file out_file = args.output_file logger = logging.getLogger() logger.setLevel(logging.INFO) logger_fmt = '%(asctime)s [%(levelname)s] %(message)s' formatter = logging.Formatter(logger_fmt) console_handler = logging.StreamHandler(stream=sys.stdout) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.setLevel(logging.INFO) # parse input query meme file model_set = parse_meme(query_file) # pre-compute entropy for all the models models = update_models(model_set['models']) #filter models using affinity propagation new_models = filter_pwms(models, min_overlap) # update the model set after filtering new_model_set = {} new_model_set['version'] = model_set['version'] new_model_set['alphabet'] = model_set['alphabet'] new_model_set['bg_freq'] = model_set['bg_freq'] new_model_set['models'] = new_models # write out the meme file write_meme(new_model_set, out_file)
def main(): parser = create_parser() args = parser.parse_args() ipath = args.meme_file dir = os.path.dirname(ipath) basename = os.path.splitext(os.path.basename(ipath))[0] motifset = parse_meme(ipath) models = motifset['models'] for num in range(len(models)): filepath_v = dir + '/' + basename + "_motif_" + str(num + 1) + ".ihbcp" filepath_p = dir + '/' + basename + "_motif_" + str(num + 1) + ".ihbp" write_bamm(models[num]['pwm'], filepath_v) write_bamm(models[num]['pwm'], filepath_p)
def main(): parser = create_parser() args = parser.parse_args() ipath = args.meme_file if args.o is None: dir = os.path.dirname(ipath) else: dir = args.o if not os.path.exists(dir): os.makedirs(dir) basename = os.path.splitext(os.path.basename(ipath))[0] motifset = parse_meme(ipath) models = motifset['models'] for num in range(len(models)): filepath_v = os.path.join( dir, basename + "_motif_" + str(num + 1) + ".ihbcp") filepath_p = os.path.join( dir, basename + "_motif_" + str(num + 1) + ".ihbp") write_bamm(models[num]['pwm'], filepath_v) write_bamm(models[num]['pwm'], filepath_p)
def main(): parser = create_parser() args = parser.parse_args() min_overlap = args.min_overlap query_file = args.input_file db_file = args.model_db out_file = args.output_file output_score_file = args.output_score_file logger = logging.getLogger() logger.setLevel(logging.INFO) logger_fmt = '%(asctime)s [%(levelname)s] %(message)s' formatter = logging.Formatter(logger_fmt) console_handler = logging.StreamHandler(stream=sys.stdout) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.setLevel(logging.INFO) # parse input query meme file model_set = parse_meme(query_file) # pre-compute entropy for all the models models = update_models(model_set['models']) db_models = models # if model_db is not given, search model files against themselves if db_file != None: model_db = parse_meme(db_file) db_models = update_models(model_db['models']) db_size = len(db_models) #filter models using affinity propagation new_models = filter_pwms(models, min_overlap) # update the model set after filtering new_model_set = {} new_model_set['version'] = model_set['version'] new_model_set['alphabet'] = model_set['alphabet'] new_model_set['bg_freq'] = model_set['bg_freq'] new_model_set['models'] = new_models # write out the meme file write_meme(new_model_set, out_file) if output_score_file != None: def init_workers(): global highscore_fraction_g highscore_fraction_g = args.highscore_fraction global evalue_thresh_g evalue_thresh_g = args.evalue_threshold np.random.seed(args.seed) global db_models_g db_models_g = db_models global db_size_g db_size_g = db_size global n_neg_perm_g n_neg_perm_g = args.n_neg_perm global min_overlap_g min_overlap_g = args.min_overlap logger.info('Queuing %s search jobs', len(models)) with open(output_score_file, 'w') as out: print('model_id', 'db_id', 'simscore', 'e-value', 'start_query', 'end_query', 'start_hit', 'end_hit', 'bg_score', 'cross_score', 'pad_score', sep='\t', file=out) with Pool(args.n_processes, initializer=init_workers) as pool: jobs = [] for model in models: job = pool.apply_async(motif_search, args=(model,)) jobs.append(job) total_jobs = len(jobs) for job_index, job in enumerate(jobs, start=1): hits = job.get() hits.sort(key=lambda x: x[3]) for hit in hits: print(*hit, sep='\t', file=out) logger.info('Finished (%s/%s)', job_index, total_jobs)
def main(): parser = create_parser() args = parser.parse_args() query_file = args.input_file target_db_path = args.db_path output_score_file = args.output_score_file # print out logs logger = logging.getLogger() logger.setLevel(logging.INFO) logger_fmt = '%(asctime)s [%(levelname)s] %(message)s' formatter = logging.Formatter(logger_fmt) console_handler = logging.StreamHandler(stream=sys.stdout) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.setLevel(logging.INFO) # parse input query file model_format = args.input_format query_model_set = [] if model_format == "PWM": # parse input meme file query_model_set = parse_meme(query_file)['models'] elif model_format == "BaMM": # parse input bamm file query_model_set = parse_bamm_file(query_file) else: logger.info('Input model file is not recognised. ') # pre-compute entropy for all query meme models models = update_models(query_model_set) # parse bamms from the target database logger.info('Reading in BaMMs from the target database') target_db = parse_bamm_db(target_db_path) # pre-compute entropy for all models in target database db_models = update_models(target_db) db_size = len(db_models) # initialize task for paralleling jobs def init_workers(): np.random.seed(args.seed) global highscore_fraction_g highscore_fraction_g = args.highscore_fraction global pvalue_thresh_g pvalue_thresh_g = args.pvalue_threshold global db_models_g db_models_g = db_models global db_size_g db_size_g = db_size global n_neg_perm_g n_neg_perm_g = args.n_neg_perm global min_overlap_g min_overlap_g = args.min_overlap logger.info('Queuing %s search jobs', len(models)) with open(output_score_file, 'w') as out: print('model_id', 'db_id', 'p-value', 'e-value', 'sim_score', 'model_width', sep='\t', file=out) with Pool(args.n_processes, initializer=init_workers) as pool: jobs = [] for model in models: job = pool.apply_async(motif_search, args=(model, )) jobs.append(job) total_jobs = len(jobs) for job_index, job in enumerate(jobs, start=1): hits = job.get() hits.sort(key=lambda x: x[3]) for hit in hits: print(*hit, sep='\t', file=out) logger.info('Finished (%s/%s)', job_index, total_jobs)