def bulk_search( self, fasta: hug.types.text, threshold: hug.types.float_number = 1.0, config: hug.types.text = None, score: hug.types.smart_boolean = False, format: hug.types.one_of(["json", "csv"]) = "json", stream: hug.types.smart_boolean = False, ): config = get_config_from_file(config) fasta = Fasta(fasta) if not stream: _config = copy.copy(config) _config["nproc"] = 1 csv_combined = "" nproc = config.get("nproc", 1) with multiprocessing.Pool(processes=nproc) as pool: args = [(_config, str(seq), threshold, score) for seq in fasta.values()] dd = pool.map_async(search_bigsi_parallel, chunks(args, math.ceil(len(args) / nproc))).get() dd = [item for sublist in dd for item in sublist] if format == "csv": return "\n".join([d_to_csv(d, False, False) for d in dd]) else: return json.dumps(dd, indent=4) else: bigsi = BIGSI(config) csv_combined = "" for i, seq in enumerate(fasta.values()): seq = str(seq) d = { "query": seq, "threshold": threshold, "results": bigsi.search(seq, threshold, score), "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1", } if format == "csv": if i == 0: with_header = True carriage_return = False elif i == len(fasta) - 1: carriage_return = True else: with_header = False carriage_return = False csv_result = d_to_csv(d, with_header, carriage_return) csv_combined += csv_result if stream: print(csv_result) else: if stream: print(json.dumps(d))
def bulk_search( self, fasta: hug.types.text, threshold: hug.types.float_number = 1.0, config: hug.types.text = None, score: hug.types.smart_boolean = False, format: hug.types.one_of(["json", "csv"]) = "json", stream: hug.types.smart_boolean = False, ): config = get_config_from_file(config) bigsi = BIGSI(config) fasta = Fasta(fasta) if not stream: csv_combined = "" nproc = config.get("nproc", 1) with ThreadPool(processes=nproc) as pool: args = [(bigsi, str(seq), threshold, score) for seq in fasta.values()] dd = pool.starmap(search_bigsi, args) if format == "csv": return "\n".join([d_to_csv(d, False, False) for d in dd]) else: return json.dumps(dd, indent=4) else: dd = [] csv_combined = "" for i, seq in enumerate(fasta.values()): seq = str(seq) d = { "query": seq, "threshold": threshold, "results": bigsi.search(seq, threshold, score), "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1", } dd.append(d) if format == "csv": if i == 0: with_header = True carriage_return = False elif i == len(fasta) - 1: carriage_return = True else: with_header = False carriage_return = False csv_result = d_to_csv(d, with_header, carriage_return) csv_combined += csv_result if stream: print(csv_result) else: if stream: print(json.dumps(d))
def search(self, seq: hug.types.text, threshold: hug.types.float_number = 1.0, config: hug.types.text = None, score: hug.types.smart_boolean = False, format: hug.types.one_of(["json", "csv"]) = "json"): config = get_config_from_file(config) bigsi = BIGSI(config) d = { "query": seq, "threshold": threshold, "results": bigsi.search(seq, threshold, score), "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1" } if format == "csv": return d_to_csv(d) else: return json.dumps(d, indent=4)