コード例 #1
0
def main():
    parser = create_parser()
    args = parser.parse_args()

    min_overlap = args.min_overlap
    query_file = args.input_file
    out_file = args.output_file

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    logger_fmt = '%(asctime)s [%(levelname)s]  %(message)s'
    formatter = logging.Formatter(logger_fmt)
    console_handler = logging.StreamHandler(stream=sys.stdout)
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    logger.setLevel(logging.INFO)

    # parse input query meme file
    model_set = parse_meme(query_file)
    # pre-compute entropy for all the models
    models = update_models(model_set['models'])

    #filter models using affinity propagation
    new_models = filter_pwms(models, min_overlap)

    # update the model set after filtering
    new_model_set = {}
    new_model_set['version']    = model_set['version']
    new_model_set['alphabet']   = model_set['alphabet']
    new_model_set['bg_freq']    = model_set['bg_freq']
    new_model_set['models']     = new_models
    # write out the meme file
    write_meme(new_model_set, out_file)
コード例 #2
0
def main():
    parser = create_parser()
    args = parser.parse_args()

    ipath = args.meme_file
    dir = os.path.dirname(ipath)
    basename = os.path.splitext(os.path.basename(ipath))[0]
    motifset = parse_meme(ipath)
    models = motifset['models']

    for num in range(len(models)):
        filepath_v = dir + '/' + basename + "_motif_" + str(num + 1) + ".ihbcp"
        filepath_p = dir + '/' + basename + "_motif_" + str(num + 1) + ".ihbp"
        write_bamm(models[num]['pwm'], filepath_v)
        write_bamm(models[num]['pwm'], filepath_p)
コード例 #3
0
ファイル: pwm2bamm.py プロジェクト: maryam1353/BaMMmotif2
def main():
    parser = create_parser()
    args = parser.parse_args()

    ipath = args.meme_file
    if args.o is None:
        dir = os.path.dirname(ipath)
    else:
        dir = args.o
        if not os.path.exists(dir):
            os.makedirs(dir)
    basename = os.path.splitext(os.path.basename(ipath))[0]
    motifset = parse_meme(ipath)
    models = motifset['models']

    for num in range(len(models)):
        filepath_v = os.path.join(
            dir, basename + "_motif_" + str(num + 1) + ".ihbcp")
        filepath_p = os.path.join(
            dir, basename + "_motif_" + str(num + 1) + ".ihbp")
        write_bamm(models[num]['pwm'], filepath_v)
        write_bamm(models[num]['pwm'], filepath_p)
コード例 #4
0
def main():
    parser = create_parser()
    args = parser.parse_args()

    min_overlap = args.min_overlap
    query_file = args.input_file
    db_file = args.model_db
    out_file = args.output_file
    output_score_file = args.output_score_file

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    logger_fmt = '%(asctime)s [%(levelname)s]  %(message)s'
    formatter = logging.Formatter(logger_fmt)
    console_handler = logging.StreamHandler(stream=sys.stdout)
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    logger.setLevel(logging.INFO)

    # parse input query meme file
    model_set = parse_meme(query_file)
    # pre-compute entropy for all the models
    models = update_models(model_set['models'])

    db_models = models
    # if model_db is not given, search model files against themselves
    if db_file != None:
        model_db = parse_meme(db_file)
        db_models = update_models(model_db['models'])
    db_size = len(db_models)

    #filter models using affinity propagation
    new_models = filter_pwms(models, min_overlap)
    # update the model set after filtering
    new_model_set = {}
    new_model_set['version']    = model_set['version']
    new_model_set['alphabet']   = model_set['alphabet']
    new_model_set['bg_freq']    = model_set['bg_freq']
    new_model_set['models']     = new_models
    # write out the meme file
    write_meme(new_model_set, out_file)

    if output_score_file != None:
        def init_workers():
            global highscore_fraction_g
            highscore_fraction_g = args.highscore_fraction
            global evalue_thresh_g
            evalue_thresh_g = args.evalue_threshold
            np.random.seed(args.seed)
            global db_models_g
            db_models_g = db_models
            global db_size_g
            db_size_g = db_size
            global n_neg_perm_g
            n_neg_perm_g = args.n_neg_perm
            global min_overlap_g
            min_overlap_g = args.min_overlap

        logger.info('Queuing %s search jobs', len(models))

        with open(output_score_file, 'w') as out:
            print('model_id', 'db_id', 'simscore', 'e-value',
                  'start_query', 'end_query', 'start_hit', 'end_hit', 'bg_score', 'cross_score', 'pad_score',
                  sep='\t', file=out)
            with Pool(args.n_processes, initializer=init_workers) as pool:
                jobs = []
                for model in models:
                    job = pool.apply_async(motif_search, args=(model,))
                    jobs.append(job)

                total_jobs = len(jobs)
                for job_index, job in enumerate(jobs, start=1):
                    hits = job.get()
                    hits.sort(key=lambda x: x[3])
                    for hit in hits:
                        print(*hit, sep='\t', file=out)
                        logger.info('Finished (%s/%s)', job_index, total_jobs)
コード例 #5
0
ファイル: BaMMmatch.py プロジェクト: maryam1353/BaMMmotif2
def main():
    parser = create_parser()
    args = parser.parse_args()

    query_file = args.input_file
    target_db_path = args.db_path
    output_score_file = args.output_score_file

    # print out logs
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    logger_fmt = '%(asctime)s [%(levelname)s]  %(message)s'
    formatter = logging.Formatter(logger_fmt)
    console_handler = logging.StreamHandler(stream=sys.stdout)
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)
    logger.setLevel(logging.INFO)

    # parse input query file
    model_format = args.input_format
    query_model_set = []
    if model_format == "PWM":
        # parse input meme file
        query_model_set = parse_meme(query_file)['models']
    elif model_format == "BaMM":
        # parse input bamm file
        query_model_set = parse_bamm_file(query_file)

    else:
        logger.info('Input model file is not recognised. ')

    # pre-compute entropy for all query meme models
    models = update_models(query_model_set)

    # parse bamms from the target database
    logger.info('Reading in BaMMs from the target database')
    target_db = parse_bamm_db(target_db_path)

    # pre-compute entropy for all models in target database
    db_models = update_models(target_db)

    db_size = len(db_models)

    # initialize task for paralleling jobs
    def init_workers():
        np.random.seed(args.seed)
        global highscore_fraction_g
        highscore_fraction_g = args.highscore_fraction
        global pvalue_thresh_g
        pvalue_thresh_g = args.pvalue_threshold
        global db_models_g
        db_models_g = db_models
        global db_size_g
        db_size_g = db_size
        global n_neg_perm_g
        n_neg_perm_g = args.n_neg_perm
        global min_overlap_g
        min_overlap_g = args.min_overlap

    logger.info('Queuing %s search jobs', len(models))

    with open(output_score_file, 'w') as out:
        print('model_id',
              'db_id',
              'p-value',
              'e-value',
              'sim_score',
              'model_width',
              sep='\t',
              file=out)
        with Pool(args.n_processes, initializer=init_workers) as pool:
            jobs = []
            for model in models:
                job = pool.apply_async(motif_search, args=(model, ))
                jobs.append(job)

            total_jobs = len(jobs)
            for job_index, job in enumerate(jobs, start=1):
                hits = job.get()
                hits.sort(key=lambda x: x[3])
                for hit in hits:
                    print(*hit, sep='\t', file=out)
                    logger.info('Finished (%s/%s)', job_index, total_jobs)