Ejemplo n.º 1
0
def do(assemblies, labels, downloaded_dirpath, corrected_dirpath, ref_txt_fpath=None):
    logger.print_timestamp()
    err_fpath = os.path.join(downloaded_dirpath, 'blast.err')
    blast_check_fpath = os.path.join(downloaded_dirpath, 'blast.check')
    blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res')
    files_md5 = dict((assembly.fpath, md5(assembly.fpath)) for assembly in assemblies)
    assemblies_fpaths = dict((assembly.fpath, assembly) for assembly in assemblies)
    blast_assemblies, downloaded_organisms, not_founded_organisms = \
        check_blast(blast_check_fpath, blast_res_fpath, files_md5, assemblies_fpaths, assemblies, labels)

    species_list = []
    replacement_list = None
    if ref_txt_fpath:
        species_list = parse_refs_list(ref_txt_fpath)
        species_by_assembly = None
    else:
        species_scores, species_by_assembly, replacement_dict = process_blast(blast_assemblies, downloaded_dirpath,
                                                                              corrected_dirpath, labels, blast_check_fpath, err_fpath)
        if species_scores:
            species_scores = sorted(species_scores, reverse=True)
            species_list = [species for (species, query_id, score) in species_scores]
            replacement_list = [replacement_dict[query_id] for (species, query_id, score) in species_scores]

    downloaded_ref_fpaths = [os.path.join(downloaded_dirpath, file) for (path, dirs, files) in os.walk(downloaded_dirpath)
                             for file in files if qutils.check_is_fasta_file(file)]

    ref_fpaths = search_references(species_list, assemblies, labels, downloaded_dirpath, not_founded_organisms, downloaded_ref_fpaths,
                 blast_check_fpath, err_fpath, species_by_assembly, replacement_list)

    if not ref_fpaths:
        logger.main_info('Reference genomes are not found.')
    if not qconfig.debug and os.path.exists(err_fpath):
        os.remove(err_fpath)
    ref_fpaths.sort()
    return ref_fpaths
Ejemplo n.º 2
0
def do(assemblies, labels, downloaded_dirpath, corrected_dirpath, ref_txt_fpath=None):
    logger.print_timestamp()
    err_fpath = os.path.join(downloaded_dirpath, 'blast.err')
    blast_check_fpath = os.path.join(downloaded_dirpath, 'blast.check')
    blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res')
    files_sizes = dict((assembly.fpath, os.path.getsize(assembly.fpath)) for assembly in assemblies)
    assemblies_fpaths = dict((assembly.fpath, assembly) for assembly in assemblies)
    blast_assemblies, downloaded_organisms, not_founded_organisms = \
        check_blast(blast_check_fpath, blast_res_fpath, files_sizes, assemblies_fpaths, assemblies, labels)
    organisms = []

    if ref_txt_fpath:
        organisms = parse_refs_list(ref_txt_fpath)
        organisms_assemblies = None
    else:
        scores_organisms, organisms_assemblies = process_blast(blast_assemblies, downloaded_dirpath, corrected_dirpath,
                                                               labels, blast_check_fpath, err_fpath)
        if scores_organisms:
            scores_organisms = sorted(scores_organisms, reverse=True)
            organisms = [organism for (score, organism) in scores_organisms]

    downloaded_ref_fpaths = [os.path.join(downloaded_dirpath, file) for (path, dirs, files) in os.walk(downloaded_dirpath)
                             for file in files if qutils.check_is_fasta_file(file)]

    ref_fpaths = process_refs(organisms, assemblies, labels, downloaded_dirpath, not_founded_organisms, downloaded_ref_fpaths,
                 blast_check_fpath, err_fpath, organisms_assemblies)

    if not ref_fpaths:
        logger.main_info('Reference genomes are not found.')
    if not qconfig.debug and os.path.exists(err_fpath):
        os.remove(err_fpath)
    ref_fpaths.sort()
    return ref_fpaths
def do(assemblies, labels, downloaded_dirpath, corrected_dirpath, ref_txt_fpath=None):
    logger.print_timestamp()
    err_fpath = os.path.join(downloaded_dirpath, 'blast.err')
    blast_check_fpath = os.path.join(downloaded_dirpath, 'blast.check')
    blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res')
    files_md5 = dict((assembly.fpath, md5(assembly.fpath)) for assembly in assemblies)
    assemblies_fpaths = dict((assembly.fpath, assembly) for assembly in assemblies)
    blast_assemblies, downloaded_organisms, not_founded_organisms = \
        check_blast(blast_check_fpath, blast_res_fpath, files_md5, assemblies_fpaths, assemblies, labels)
    organisms = []

    if ref_txt_fpath:
        organisms = parse_refs_list(ref_txt_fpath)
        organisms_assemblies = None
    else:
        scores_organisms, organisms_assemblies = process_blast(blast_assemblies, downloaded_dirpath, corrected_dirpath,
                                                               labels, blast_check_fpath, err_fpath)
        if scores_organisms:
            scores_organisms = sorted(scores_organisms, reverse=True)
            organisms = [organism for (score, organism) in scores_organisms]

    downloaded_ref_fpaths = [os.path.join(downloaded_dirpath, file) for (path, dirs, files) in os.walk(downloaded_dirpath)
                             for file in files if qutils.check_is_fasta_file(file)]

    ref_fpaths = process_refs(organisms, assemblies, labels, downloaded_dirpath, not_founded_organisms, downloaded_ref_fpaths,
                 blast_check_fpath, err_fpath, organisms_assemblies)

    if not ref_fpaths:
        logger.main_info('Reference genomes are not found.')
    if not qconfig.debug and os.path.exists(err_fpath):
        os.remove(err_fpath)
    ref_fpaths.sort()
    return ref_fpaths
Ejemplo n.º 4
0
def do(assemblies, labels, downloaded_dirpath, corrected_dirpath, ref_txt_fpath=None):
    logger.print_timestamp()
    err_fpath = os.path.join(downloaded_dirpath, 'blast.err')
    blast_check_fpath = os.path.join(downloaded_dirpath, 'blast.check')
    blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res')
    files_sizes = dict((assembly.fpath, os.path.getsize(assembly.fpath)) for assembly in assemblies)
    assemblies_fpaths = dict((assembly.fpath, assembly) for assembly in assemblies)
    blast_assemblies, downloaded_organisms, not_founded_organisms = \
        check_blast(blast_check_fpath, blast_res_fpath, files_sizes, assemblies_fpaths, assemblies, labels)

    species_list = []
    replacement_list = None
    if ref_txt_fpath:
        species_list = parse_refs_list(ref_txt_fpath)
        species_by_assembly = None
    else:
        species_scores, species_by_assembly, replacement_dict = process_blast(blast_assemblies, downloaded_dirpath,
                                                                              corrected_dirpath, labels, blast_check_fpath, err_fpath)
        if species_scores:
            species_scores = sorted(species_scores, reverse=True)
            species_list = [species for (species, query_id, score) in species_scores]
            replacement_list = [replacement_dict[query_id] for (species, query_id, score) in species_scores]

    downloaded_ref_fpaths = [os.path.join(downloaded_dirpath, file) for (path, dirs, files) in os.walk(downloaded_dirpath)
                             for file in files if qutils.check_is_fasta_file(file)]

    ref_fpaths = search_references(species_list, assemblies, labels, downloaded_dirpath, not_founded_organisms, downloaded_ref_fpaths,
                 blast_check_fpath, err_fpath, species_by_assembly, replacement_list)

    if not ref_fpaths:
        logger.main_info('Reference genomes are not found.')
    if not qconfig.debug and os.path.exists(err_fpath):
        os.remove(err_fpath)
    ref_fpaths.sort()
    return ref_fpaths
Ejemplo n.º 5
0
def parse_meta_references(option, opt_str, value, parser, logger):
    ref_fpaths = []
    ref_values = value.split(',')
    for i, ref_value in enumerate(ref_values):
        if os.path.isdir(ref_value):
            references = [join(path, file) for (path, dirs, files) in os.walk(ref_value) for file in files
                               if qutils.check_is_fasta_file(file, logger=logger)]
            ref_fpaths.extend(sorted(references))
        else:
            assert_file_exists(ref_value, 'reference')
            ref_fpaths.append(ref_value)
    ensure_value(qconfig, option.dest, []).extend(ref_fpaths)
Ejemplo n.º 6
0
def parse_meta_references(option, opt_str, value, parser, logger):
    ref_fpaths = []
    ref_values = value.split(',')
    for i, ref_value in enumerate(ref_values):
        if os.path.isdir(ref_value):
            references = [join(path, file) for (path, dirs, files) in os.walk(ref_value) for file in files
                               if qutils.check_is_fasta_file(file, logger=logger)]
            ref_fpaths.extend(sorted(references))
        else:
            assert_file_exists(ref_value, 'reference')
            ref_fpaths.append(ref_value)
    ensure_value(qconfig, option.dest, []).extend(ref_fpaths)