def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return config_fpath = make_config(output_dir, tmp_dir, busco_threads, clade_dirpath, augustus_dirpath) logger.info('Logs and results will be saved under ' + output_dir + '...') os.environ['BUSCO_CONFIG_FILE'] = config_fpath os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_configs( augustus_dirpath, tmp_dir) if not os.environ['AUGUSTUS_CONFIG_PATH']: logger.error( 'Augustus configs not found, failed to run BUSCO without them.') busco_args = [[ contigs_fpath, qutils.label_from_fpath_for_fname(contigs_fpath) ] for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco_main_handler, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error( 'Failed running BUSCO for all the assemblies. See log files in ' + output_dir + ' for information ' '(rerun with --debug to keep all intermediate files).') return # saving results zero_output_for_all = True for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) if complete_buscos + part_buscos > 0: zero_output_for_all = False shutil.copy(summary_fpaths[i], output_dir) else: logger.error( 'Failed running BUSCO for ' + contigs_fpath + '. See the log for detailed information' ' (rerun with --debug to keep all intermediate files).') if zero_output_for_all: logger.warning( 'BUSCO did not fail explicitly but found nothing for all assemblies! ' 'Possible reasons and workarounds:\n' ' 1. Provided assemblies are so small that they do not contain even a single partial BUSCO gene. Not likely but may happen -- nothing to worry then.\n' ' 2. Incorrect lineage database was used. To run with fungi DB use --fungus, to run with eukaryota DB use --eukaryote, otherwise BUSCO uses bacteria DB.\n' ' 3. Problem with BUSCO dependencies, most likely Augustus. Check that the binaries in ' + augustus_dirpath + '/bin/ are working properly.\n' ' If something is wrong with Augustus, you may try to install it yourself (https://github.com/Gaius-Augustus/Augustus) and add "augustus" binary to PATH.\n' ' 4. Some other problem with BUSCO. Check the logs (you may need to rerun QUAST with --debug to see all intermediate files).\n' ' If you cannot solve the problem yourself, post an issue at https://github.com/ablab/quast/issues or write to [email protected]' ) if not qconfig.debug: cleanup(output_dir) logger.info('Done.')
def process_blast(blast_assemblies, downloaded_dirpath, corrected_dirpath, labels, blast_check_fpath, err_fpath): if not download_blast_binaries(filenames=blast_filenames): return None, None, None if qconfig.custom_blast_db_fpath: global db_fpath db_fpath = qconfig.custom_blast_db_fpath if isdir(db_fpath): db_aux_files = [ f for f in os.listdir(db_fpath) if f.endswith('.nsq') ] if db_aux_files: db_fpath = join(qconfig.custom_blast_db_fpath, db_aux_files[0].replace('.nsq', '')) elif isfile(db_fpath) and db_fpath.endswith('.nsq'): db_fpath = db_fpath[:-len('.nsq')] if not os.path.isfile(db_fpath + '.nsq'): logger.error( 'You should specify path to BLAST database obtained by running makeblastdb command: ' 'either path to directory containing <dbname>.nsq file or path to <dbname>.nsq file itself.' ' Also you can rerun MetaQUAST without --blast-db option. MetaQUAST uses SILVA 16S RNA database by default.', exit_with_code=2) elif not download_blastdb(): return None, None, None blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res') if len(blast_assemblies) > 0: logger.main_info('Running BlastN..') n_jobs = min(qconfig.max_threads, len(blast_assemblies)) blast_threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib2 import Parallel, delayed else: from joblib3 import Parallel, delayed Parallel(n_jobs=n_jobs)(delayed(parallel_blast)( assembly.fpath, assembly.label, corrected_dirpath, err_fpath, blast_res_fpath, blast_check_fpath, blast_threads) for i, assembly in enumerate(blast_assemblies)) logger.main_info() species_scores = [] species_by_assembly = dict() max_entries = 4 replacement_dict = defaultdict(list) for label in labels: assembly_scores = [] assembly_species = [] res_fpath = get_blast_output_fpath(blast_res_fpath, label) if os.path.exists(res_fpath): refs_for_query = 0 with open(res_fpath) as res_file: query_id_col, subj_id_col, idy_col, len_col, score_col = None, None, None, None, None for line in res_file: fs = line.split() if line.startswith('#'): refs_for_query = 0 # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score if 'Fields' in line: fs = line.strip().split('Fields: ')[-1].split(', ') query_id_col = fs.index( 'query id') if 'query id' in fs else 0 subj_id_col = fs.index( 'subject id') if 'subject id' in fs else 1 idy_col = fs.index( '% identity') if '% identity' in fs else 2 len_col = fs.index( 'alignment length' ) if 'alignment length' in fs else 3 score_col = fs.index( 'bit score') if 'bit score' in fs else 11 elif refs_for_query < max_entries and len(fs) > score_col: query_id = fs[query_id_col] organism_id = fs[subj_id_col] idy = float(fs[idy_col]) length = int(fs[len_col]) score = float(fs[score_col]) if idy >= qconfig.identity_threshold and length >= qconfig.min_length and score >= qconfig.min_bitscore: # and (not scores or min(scores) - score < max_identity_difference): seqname, taxons = parse_organism_id(organism_id) if not seqname: continue species_name = get_species_name(seqname) if species_name and 'uncultured' not in seqname: if refs_for_query == 0: if species_name not in assembly_species: assembly_scores.append( (seqname, query_id, score)) if taxons: taxons_for_krona[correct_name( seqname)] = taxons assembly_species.append(species_name) refs_for_query += 1 else: seq_scores = [ (query_name, seq_query_id, seq_score) for query_name, seq_query_id, seq_score in assembly_scores if get_species_name( query_name) == species_name ] if seq_scores and score > seq_scores[ 0][2]: assembly_scores.remove( seq_scores[0]) assembly_scores.append( (seqname, query_id, score)) if taxons: taxons_for_krona[correct_name( seqname)] = taxons refs_for_query += 1 else: if seqname not in replacement_dict[ query_id]: replacement_dict[query_id].append( seqname) refs_for_query += 1 assembly_scores = sorted(assembly_scores, reverse=True) assembly_scores = assembly_scores[:qconfig.max_references] for seqname, query_id, score in assembly_scores: if not species_by_assembly or not any( seqname in species_list for species_list in species_by_assembly.values()): species_scores.append((seqname, query_id, score)) species_by_assembly[label] = [ seqname for seqname, query_id, score in assembly_scores ] if not species_scores: return None, None, None return species_scores, species_by_assembly, replacement_dict
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return set_augustus_dir(augustus_dirpath) if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote) if not clade_dirpath: logger.info('Failed finding conservative genes.') return log_fpath = join(output_dir, 'busco.log') logger.info('Logging to ' + log_fpath + '...') busco_args = [([ '-i', contigs_fpath, '-o', qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath, '-m', 'genome', '-f', '-z', '-c', str(busco_threads), '-t', tmp_dir, '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' + join(augustus_dirpath, 'config') + '\'' ], output_dir) for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error('Failed running BUSCO for all the assemblies. See ' + log_fpath + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) else: logger.error('Failed running BUSCO for ' + contigs_fpath + '. See ' + log_fpath + ' for information.') logger.info('Done.')
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries( logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return config_fpath = make_config(output_dir, tmp_dir, busco_threads, clade_dirpath, augustus_dirpath) logger.info('Logs and results will be saved under ' + output_dir + '...') os.environ['BUSCO_CONFIG_FILE'] = config_fpath os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_contigs( augustus_dirpath, tmp_dir) if not os.environ['AUGUSTUS_CONFIG_PATH']: logger.error( 'Augustus configs not found, failed to run BUSCO without them.') busco_args = [[ contigs_fpath, qutils.label_from_fpath_for_fname(contigs_fpath) ] for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco_main_handler, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error( 'Failed running BUSCO for all the assemblies. See log files in ' + output_dir + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field( reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) shutil.copy(summary_fpaths[i], output_dir) else: logger.error('Failed running BUSCO for ' + contigs_fpath + '. See the log for detailed information.') if not qconfig.debug: cleanup(output_dir) logger.info('Done.')
def do(contigs_fpaths, output_dir, logger): logger.print_timestamp() logger.info('Running BUSCO...') compilation_success = True augustus_dirpath = download_augustus(logger) if not augustus_dirpath: compilation_success = False elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger): compilation_success = False if compilation_success and not download_blast_binaries(logger=logger, filenames=blast_filenames): compilation_success = False if not compilation_success: logger.info('Failed finding conservative genes.') return set_augustus_dir(augustus_dirpath) if not os.path.isdir(output_dir): os.makedirs(output_dir) tmp_dir = join(output_dir, 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) busco_threads = max(1, qconfig.max_threads // n_jobs) clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus) if not clade_dirpath: logger.info('Failed finding conservative genes.') return log_fpath = join(output_dir, 'busco.log') logger.info('Logging to ' + log_fpath + '...') busco_args = [(['-i', contigs_fpath, '-o', qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath, '-m', 'genome', '-f', '-z', '-c', str(busco_threads), '-t', tmp_dir, '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' + join(augustus_dirpath, 'config') + '\'' ], output_dir) for contigs_fpath in contigs_fpaths] summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads) if not any(fpath for fpath in summary_fpaths): logger.error('Failed running BUSCO for all the assemblies. See ' + log_fpath + ' for information.') return # saving results for i, contigs_fpath in enumerate(contigs_fpaths): report = reporting.get(contigs_fpath) if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]): total_buscos, part_buscos, complete_buscos = 0, 0, 0 with open(summary_fpaths[i]) as f: for line in f: if 'Complete BUSCOs' in line: complete_buscos = int(line.split()[0]) elif 'Fragmented' in line: part_buscos = int(line.split()[0]) elif 'Total' in line: total_buscos = int(line.split()[0]) if total_buscos != 0: report.add_field(reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos))) report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos))) else: logger.error( 'Failed running BUSCO for ' + contigs_fpath + '. See ' + log_fpath + ' for information.') logger.info('Done.')
def process_blast(blast_assemblies, downloaded_dirpath, corrected_dirpath, labels, blast_check_fpath, err_fpath): if not download_blast_binaries(filenames=blast_filenames): return None, None if qconfig.custom_blast_db_fpath: global db_fpath db_fpath = qconfig.custom_blast_db_fpath if isdir(db_fpath): db_aux_files = [f for f in os.listdir(db_fpath) if f.endswith('.nsq')] if db_aux_files: db_fpath = join(qconfig.custom_blast_db_fpath, db_aux_files[0].replace('.nsq', '')) elif isfile(db_fpath) and db_fpath.endswith('.nsq'): db_fpath = db_fpath[:-len('.nsq')] if not os.path.isfile(db_fpath + '.nsq'): logger.error('You should specify path to BLAST database obtained by running makeblastdb command: ' 'either path to directory containing <dbname>.nsq file or path to <dbname>.nsq file itself.' ' Also you can rerun MetaQUAST without --blast-db option. MetaQUAST uses SILVA 16S RNA database by default.', exit_with_code=2) elif not download_blastdb(): return None, None blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res') if len(blast_assemblies) > 0: logger.main_info('Running BlastN..') n_jobs = min(qconfig.max_threads, len(blast_assemblies)) blast_threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed Parallel(n_jobs=n_jobs)(delayed(parallel_blast)(assembly.fpath, assembly.label, corrected_dirpath, err_fpath, blast_res_fpath, blast_check_fpath, blast_threads) for i, assembly in enumerate(blast_assemblies)) logger.main_info() species_scores = [] species_by_assembly = dict() max_entries = 4 replacement_dict = defaultdict(list) for label in labels: assembly_scores = [] assembly_species = [] res_fpath = get_blast_output_fpath(blast_res_fpath, label) if os.path.exists(res_fpath): refs_for_query = 0 with open(res_fpath) as res_file: query_id_col, subj_id_col, idy_col, len_col, score_col = None, None, None, None, None for line in res_file: fs = line.split() if line.startswith('#'): refs_for_query = 0 # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score if 'Fields' in line: fs = line.strip().split('Fields: ')[-1].split(', ') query_id_col = fs.index('query id') subj_id_col = fs.index('subject id') idy_col = fs.index('% identity') len_col = fs.index('alignment length') score_col = fs.index('bit score') elif refs_for_query < max_entries and len(fs) > score_col: query_id = fs[query_id_col] organism_id = fs[subj_id_col] idy = float(fs[idy_col]) length = int(fs[len_col]) score = float(fs[score_col]) if idy >= qconfig.identity_threshold and length >= qconfig.min_length and score >= qconfig.min_bitscore: # and (not scores or min(scores) - score < max_identity_difference): seqname, taxons = parse_organism_id(organism_id) if not seqname: continue species_name = seqname.split('_') if len(species_name) > 1 and 'uncultured' not in seqname: species_name = species_name[0] + '_' + species_name[1] if refs_for_query == 0: if species_name not in assembly_species: assembly_scores.append((seqname, query_id, score)) if taxons: taxons_for_krona[correct_name(seqname)] = taxons assembly_species.append(species_name) refs_for_query += 1 else: seq_scores = [(seqname, query_id, score) for seqname, query_id, score in assembly_scores if species_name in seqname] if seq_scores and score > seq_scores[0][2]: assembly_scores.remove(seq_scores[0]) assembly_scores.append((seqname, query_id, score)) if taxons: taxons_for_krona[correct_name(seqname)] = taxons refs_for_query += 1 else: if seqname not in replacement_dict[query_id]: replacement_dict[query_id].append(seqname) refs_for_query += 1 assembly_scores = sorted(assembly_scores, reverse=True) assembly_scores = assembly_scores[:qconfig.max_references] for seqname, query_id, score in assembly_scores: if not species_by_assembly or not any(seqname in species_list for species_list in species_by_assembly.values()): species_scores.append((seqname, query_id, score)) species_by_assembly[label] = [seqname for seqname, query_id, score in assembly_scores] if not species_scores: return None, None return species_scores, species_by_assembly, replacement_dict
def process_blast(blast_assemblies, downloaded_dirpath, corrected_dirpath, labels, blast_check_fpath, err_fpath): if not download_blast_binaries(filenames=blast_filenames): return None, None if qconfig.custom_blast_db_fpath: global db_fpath db_fpath = qconfig.custom_blast_db_fpath if isdir(db_fpath): db_aux_files = [ f for f in os.listdir(db_fpath) if f.endswith('.nsq') ] if db_aux_files: db_fpath = join(qconfig.custom_blast_db_fpath, db_aux_files[0].replace('.nsq', '')) elif isfile(db_fpath) and db_fpath.endswith('.nsq'): db_fpath = db_fpath[:-len('.nsq')] if not os.path.isfile(db_fpath + '.nsq'): logger.error( 'You should specify path to BLAST database obtained by running makeblastdb command: ' 'either path to directory containing <dbname>.nsq file or path to <dbname>.nsq file itself.' ' Also you can rerun MetaQUAST without --blast-db option. MetaQUAST uses SILVA 16S RNA database by default.', exit_with_code=2) elif not download_blastdb(): return None, None blast_res_fpath = os.path.join(downloaded_dirpath, 'blast.res') if len(blast_assemblies) > 0: logger.main_info('Running BlastN..') n_jobs = min(qconfig.max_threads, len(blast_assemblies)) blast_threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed Parallel(n_jobs=n_jobs)(delayed(parallel_blast)( assembly.fpath, assembly.label, corrected_dirpath, err_fpath, blast_res_fpath, blast_check_fpath, blast_threads) for i, assembly in enumerate(blast_assemblies)) logger.main_info('') scores_organisms = [] organisms_assemblies = {} for label in labels: all_scores = [] organisms = [] res_fpath = get_blast_output_fpath(blast_res_fpath, label) if os.path.exists(res_fpath): refs_for_query = 0 with open(res_fpath) as res_file: for line in res_file: if refs_for_query == 0 and not line.startswith( '#') and len(line.split()) > 10: # TODO: find and parse "Fields" line to detect each column indexes: # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # We need: identity, legnth, score, query and subject id. line = line.split() organism_id = line[1] idy = float(line[2]) length = int(line[3]) score = float(line[11]) if idy >= qconfig.identity_threshold and length >= qconfig.min_length and score >= qconfig.min_bitscore: # and (not scores or min(scores) - score < max_identity_difference): seqname, taxons = parse_organism_id(organism_id) if not seqname: continue specie = seqname.split('_') if len(specie) > 1 and 'uncultured' not in seqname: specie = specie[0] + '_' + specie[1] if specie not in organisms: all_scores.append((score, seqname)) if taxons: taxons_for_krona[correct_name( seqname)] = taxons organisms.append(specie) refs_for_query += 1 else: tuple_scores = [ x for x in all_scores if specie in x[1] ] if tuple_scores and score > tuple_scores[ 0][0]: all_scores.remove((tuple_scores[0][0], tuple_scores[0][1])) all_scores.append((score, seqname)) if taxons: taxons_for_krona[correct_name( seqname)] = taxons refs_for_query += 1 elif line.startswith('#'): refs_for_query = 0 all_scores = sorted(all_scores, reverse=True) all_scores = all_scores[:qconfig.max_references] for score in all_scores: if not organisms_assemblies or ( organisms_assemblies.values() and not [ 1 for list in organisms_assemblies.values() if score[1] in list ]): scores_organisms.append(score) organisms_assemblies[label] = [score[1] for score in all_scores] if not scores_organisms: return None, None return scores_organisms, organisms_assemblies