예제 #1
0
파일: _finder.py 프로젝트: menis/abtools
def main(args):
    print_abfinder_start()
    db = mongodb.get_db(args.db, args.ip, args.port,
        args.user, args.password)
    make_directories(args)
    standards = get_standards(args)
    print_standards_info(standards)
    collections = mongodb.get_collections(db, args.collection, prefix=args.collection_prefix)
    print_collections_info(collections)
    for collection in collections:
        indexed = False
        print_single_collection(collection)
        if args.remove_padding:
            print_remove_padding()
            mongodb.remove_padding(db, collection)
        seq_files = get_sequences(db, collection, args.temp_dir, args)
        for standard in standards:
            print_single_standard(standard)
            scores = run_jobs(seq_files, standard, args)
            if args.output_dir:
                make_figure(standard.id, scores, collection, args)
            if args.update:
                if not indexed:
                    mongodb.index(db, collection, 'seq_id')
                    indexed = True
                update_db(db, standard.id, scores, collection, args)
        clean_up(seq_files)
예제 #2
0
def main(args):
    db = mongodb.get_db(args.db, args.ip, args.port,
                        args.user, args.password)
    print_method(args.method)
    pairs = get_collection_pairs(db, args)
    index_collections(db, pairs)
    prev1 = None
    scores = {}
    cscores = {}
    for pair in pairs:
        s1, s2 = pair
        curr1 = s1
        if prev1 != curr1:
            print_collection_info(s1)
            s1_all_vgenes = get_vgenes(db, s1, args.chain)
        print_pair_info(s1, s2)
        s1_vgenes, s2_vgenes = get_vgenes(db, s2, args.chain, prev_data=s1_all_vgenes)
        logger.info('')
        logger.info('Calculating similarities...')
        median, counts, bins, similarities = calculate_similarities(s1_vgenes,
                                                                    s2_vgenes,
                                                                    args)
        write_output(s1, s2, median, counts, bins, similarities, args)
        scores = update_scores(s1, s2, median, scores)
        if args.control_similarity:
            logger.info('')
            logger.info('Calculating control similarities...')
            cmedian, ccounts, cbins, csimilarities = calculate_control_similarities(s1_vgenes,
                                                                                    s2_vgenes,
                                                                                    args)
            write_output(s1, s2, cmedian, ccounts, cbins, csimilarities, args)
            cscores = update_scores(s1, s2, cmedian, cscores)
        prev1 = s1
    print_final_results(scores)
    print_final_results(cscores, control=True)
예제 #3
0
def main(args):
    _print_start_info(args)
    if args.sleep:
        countdown(args)
    for d in [args.output, args.temp_dir]:
        make_dir(d)
    if args.consensus and args.germs:
        germs = parse_germs(args.germs)
    else:
        germs = args.germs
    # check whether JSON files have been passed
    if args.json is not None and all([args.db is None, args.collection is None]):
    	if os.path.isfile(args.json) and args.json.endswith('.json'):
    		collections = [args.json, ]
    	else:
        	collections = list_files(args.json, extension='json')
        db = None
        sample_names = [os.path.basename(c).replace('.json', '') for c in collections]
    # check whether MINIMAL files have been passed:
    if args.minimal_input is not None and all([args.db is None, args.collection is None]):
        if os.path.isfile(args.minimal_input) and args.minimal_input.endswith('.txt'):
            collections = [args.minimal_input, ]
        else:
            collections = list_files(args.minimal_input, extension='txt')
        db = None
        sample_names = [os.path.basename(c).replace('.txt', '') for c in collections]
    # otherwise, get sequences from MongoDB
    else:
        db = mongodb.get_db(args.db, args.ip, args.port, args.user, args.password)
        collections = mongodb.get_collections(db, collection=args.collection)
        sample_names = collections
    for collection, sample_name in zip(collections, sample_names):
        collection_start = time.time()
        print_collection_info(collection, sample_name)
        if args.non_redundant:
            seqs = get_seqs(db, collection, args, make_seq_db=False)
            unique_file = unix_sort_unique(seqs, args)
            write_nr_output(collection, unique_file, collection_start, args)
        else:
            seq_db_path = get_seqs(db, collection, args)
            initial_clusters = initial_clustering(seq_db_path, args)
            if args.min_seqs == 1:
                singletons = [ic for ic in initial_clusters if ic.size == 1]
                initial_clusters = [ic for ic in initial_clusters if ic.size > 1]
                logger.info('{} clusters contained only a single sequence. Processing singletons...'.format(len(singletons)))
                singleton_consentroids = process_singleton_clusters(singletons, seq_db_path, args)
                logger.info('')
            else:
                singleton_consentroids = []
            consentroids = process_initial_clusters(initial_clusters, seq_db_path, args)
            consentroids += singleton_consentroids
            sequences, sizes = zip(*consentroids)
            write_output(sample_name, sequences, sizes, collection_start, args)
            for ic in initial_clusters:
                ic.cleanup()
            remove_sqlite_db(args)
예제 #4
0
def main(args):
    db = mongodb.get_db(args.db, ip=args.ip, port=args.port, user=args.user, password=args.password)
    for collection in mongodb.get_collections(db):
        print_collection_info(collection)
        seqs = query(db, collection, args.chain)
        if len(seqs) == 0:
            continue
        germline_plot(seqs, 'V', collection, args.output, args.var_plot, args.species, args.chain)
        if args.chain == 'heavy':
            germline_plot(seqs, 'D', collection, args.output, args.div_plot, args.species, args.chain)
        germline_plot(seqs, 'J', collection, args.output, args.join_plot, args.species, args.chain)
        cdr3_plot(seqs, collection, args.cdr3_plot, args.chain, args.output)
        vj_heatmap(seqs, collection, args.heatmap, args.species, args.chain, args.output)
예제 #5
0
파일: _finder.py 프로젝트: menis/abtools
def update(db, collection, data, standard, version, args):
    db = mongodb.get_db(args.db, args.ip, args.port, args.user, args.password)
    coll = db[collection]
    score = data[0]
    ids = data[1]
    mab_id_field = 'mab_identity_aa' if args.is_aa else 'mab_identity_nt'
    if int(version.split('.')[0]) < 3:
        result = coll.update({'seq_id': {'$in': ids}},
                    {'$set': {'{}.{}'.format(mab_id_field, standard.lower()): float(score)}},
                    multi=True)
    else:
        result = coll.update_many({'seq_id': {'$in': ids}},
                         {'$set': {'{}.{}'.format(mab_id_field, standard.lower()): float(score)}})

        if args.debug:
            print('matched: {}'.format(result.matched_count))
            print('modified: {}'.format(result.modified_count))
예제 #6
0
파일: _finder.py 프로젝트: menis/abtools
def update_db(db, standard, scores, collection, args):
    db = mongodb.get_db(args.db, args.ip, args.port, args.user, args.password)
    print_index_info()
    mongodb.index(db, collection, ['seq_id'])
    print_update_info()
    start = time.time()
    conn = mongodb.get_connection(args.ip, args.port,
        args.user, args.password)
    mongo_version = conn.server_info()['version']
    standard = standard.replace('.', '_')
    g = scores.groupby('identity')
    groups = regroup(g.groups)


    for g in range(0, len(groups), args.update_threads):
        tlist = []
        for group in groups[g:g + args.update_threads]:
            t = Thread(target=update, args=(db, collection, group, standard, mongo_version, args))
            t.start()
            tlist.append(t)
        for t in tlist:
            t.join()
        progbar.progress_bar(g + args.update_threads, len(groups))


    # if platform.system().lower() == 'darwin' or args.debug or args.single_process_update:
    #     for i, group in enumerate(groups):
    #         update(db, collection, group, standard, mongo_version, args)
    #         progbar.progress_bar(i, len(groups))
    # else:
    #     p = mp.Pool(processes=25)
    #     async_results = []
    #     for group in groups:
    #         async_results.append(p.apply_async(update, args=(db, collection, group, standard, mongo_version, args)))
    #     monitor_update(async_results)
    #     p.close()
    #     p.join()
    print('')
    run_time = time.time() - start
    logger.info('Updating took {} seconds. ({} sequences per second)'.format(round(run_time, 2),
        round(len(scores) / run_time, 1)))