Example #1
0
    def bulk_search(
        self,
        fasta: hug.types.text,
        threshold: hug.types.float_number = 1.0,
        config: hug.types.text = None,
        score: hug.types.smart_boolean = False,
        format: hug.types.one_of(["json", "csv"]) = "json",
        stream: hug.types.smart_boolean = False,
    ):
        config = get_config_from_file(config)

        fasta = Fasta(fasta)
        if not stream:
            _config = copy.copy(config)
            _config["nproc"] = 1
            csv_combined = ""
            nproc = config.get("nproc", 1)
            with multiprocessing.Pool(processes=nproc) as pool:
                args = [(_config, str(seq), threshold, score)
                        for seq in fasta.values()]
                dd = pool.map_async(search_bigsi_parallel,
                                    chunks(args, math.ceil(len(args) /
                                                           nproc))).get()
                dd = [item for sublist in dd for item in sublist]
            if format == "csv":
                return "\n".join([d_to_csv(d, False, False) for d in dd])
            else:
                return json.dumps(dd, indent=4)
        else:
            bigsi = BIGSI(config)
            csv_combined = ""
            for i, seq in enumerate(fasta.values()):
                seq = str(seq)
                d = {
                    "query": seq,
                    "threshold": threshold,
                    "results": bigsi.search(seq, threshold, score),
                    "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1",
                }
                if format == "csv":
                    if i == 0:
                        with_header = True
                        carriage_return = False
                    elif i == len(fasta) - 1:
                        carriage_return = True
                    else:
                        with_header = False
                        carriage_return = False
                    csv_result = d_to_csv(d, with_header, carriage_return)
                    csv_combined += csv_result
                    if stream:
                        print(csv_result)
                else:
                    if stream:
                        print(json.dumps(d))
Example #2
0
 def bulk_search(
     self,
     fasta: hug.types.text,
     threshold: hug.types.float_number = 1.0,
     config: hug.types.text = None,
     score: hug.types.smart_boolean = False,
     format: hug.types.one_of(["json", "csv"]) = "json",
     stream: hug.types.smart_boolean = False,
 ):
     config = get_config_from_file(config)
     bigsi = BIGSI(config)
     fasta = Fasta(fasta)
     if not stream:
         csv_combined = ""
         nproc = config.get("nproc", 1)
         with ThreadPool(processes=nproc) as pool:
             args = [(bigsi, str(seq), threshold, score)
                     for seq in fasta.values()]
             dd = pool.starmap(search_bigsi, args)
         if format == "csv":
             return "\n".join([d_to_csv(d, False, False) for d in dd])
         else:
             return json.dumps(dd, indent=4)
     else:
         dd = []
         csv_combined = ""
         for i, seq in enumerate(fasta.values()):
             seq = str(seq)
             d = {
                 "query": seq,
                 "threshold": threshold,
                 "results": bigsi.search(seq, threshold, score),
                 "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1",
             }
             dd.append(d)
             if format == "csv":
                 if i == 0:
                     with_header = True
                     carriage_return = False
                 elif i == len(fasta) - 1:
                     carriage_return = True
                 else:
                     with_header = False
                     carriage_return = False
                 csv_result = d_to_csv(d, with_header, carriage_return)
                 csv_combined += csv_result
                 if stream:
                     print(csv_result)
             else:
                 if stream:
                     print(json.dumps(d))
Example #3
0
 def search(self,
            seq: hug.types.text,
            threshold: hug.types.float_number = 1.0,
            config: hug.types.text = None,
            score: hug.types.smart_boolean = False,
            format: hug.types.one_of(["json", "csv"]) = "json"):
     config = get_config_from_file(config)
     bigsi = BIGSI(config)
     d = {
         "query": seq,
         "threshold": threshold,
         "results": bigsi.search(seq, threshold, score),
         "citation": "http://dx.doi.org/10.1038/s41587-018-0010-1"
     }
     if format == "csv":
         return d_to_csv(d)
     else:
         return json.dumps(d, indent=4)