Python logの例、r2g.utils.log Pythonの例

コード例 #1

0

ファイルを表示

 def setUp(self):
     self.args = {
         'CPU': 4,
         'browser': None,
         'chrome_proxy': None,
         'cleanup': False,
         'cut': '80,50',
         'docker': False,
         'evalue': 0.001,
         'firefox_proxy': None,
         'max_memory': '4G',
         'max_num_seq': 1000,
         'min_contig_length': 150,
         'outdir': 'OUTPUT',
         'program': 'blastn',
         'proxy': None,
         'query': 'ATGC',
         'retry': float('inf'),
         'sra': 'SRXNNNNNN',
         'stage': 'butterfly',
         'trim': False,
         'verbose': False
     }
     self.config_files = [
         os.path.abspath(os.path.join(r2g.__path__[0], "path.json")),
         os.path.abspath(os.path.join(os.path.expanduser('~'), ".r2g.path.json"))
     ]
     self.path = deepcopy(os.environ['PATH'])
     self.app_json = utils.preflight(self.args)
     # self.app_json["chromedriver"] = os.environ.get("PRIVATE_WEBDRIVER", "http://127.0.0.1:4444/wd/hub")
     utils.log("app_json is {}".format(self.app_json))
     self.pwd = os.getcwd()

コード例 #2

0

ファイルを表示

 def test_format_seq_2(self):
     # Test 2:
     utils.log("Testing r2g.online.blast _format_seq 2.")
     query_file = tempfile.mkstemp(suffix=".fasta",
                                   prefix="r2g-test_tmp_",
                                   text=True)[-1]
     fasta = ">{}\n{}\n>{}\n{}\n".format(
         "A",
         self.query_fasta.strip().split('\n', 1)[1], "B",
         self.query_fasta.strip().split('\n', 1)[1])
     with open(query_file, 'w') as outf:
         outf.write(fasta)
     self.args['query'] = query_file
     self.args['cut'] = "24,20"
     name, seq = blast._format_seq(self.args)
     formatted_name = "A_B"
     with open(
             '{}/data/formatted_seq.json'.format(
                 os.path.split(os.path.abspath(__file__))[0]), 'r') as inf:
         formatted_seq = json.loads(inf.read().strip())
     if name == formatted_name and seq == formatted_seq:
         assertion = True
     else:
         print(name)
         print(seq)
         assertion = False
     shutil.rmtree(query_file, ignore_errors=True)
     self.assertTrue(assertion)

コード例 #3

0

ファイルを表示

 def test_check_sequences(self):
     utils.log("Testing r2g.utils.utils _check_sequences")
     query_file = tempfile.mkstemp(suffix=".fasta", prefix="r2g-test_tmp_", text=True)[-1]
     with open(query_file, 'w') as outf:
         # a fake fasta with a wrong character ("!")
         outf.write(">some_gene\nATGC!\n")
     self.args['query'] = query_file
     with self.assertRaises(errors.InputError):
         _ = utils.preflight(self.args)
     utils.delete_everything(query_file)

コード例 #4

0

ファイルを表示

ファイル: test_fetch.py プロジェクト: yangwu91/r2g.dev

 def test_fastq_dump_error(self):
     utils.log("Raising r2g.online.fetch fastq_dump error.")
     args = {
         'query': "ATGC",
         'verbose': False,
         'stage': 'butterfly'
     }
     app_json = utils.preflight(args)
     with self.assertRaises(errors.FetchError):
         _, _ = fetch.fastq_dump('SRR1812889', "X", "J", app_json)

コード例 #5

0

ファイルを表示

 def test_query(self):
     utils.log("Testing r2g.online.blast query.")
     try:
         name, download_list = blast.query(self.args,
                                           os.environ["PRIVATE_WEBDRIVER"])
         assertion = (name == 'some_gene'
                      and len(download_list.get('SRR1812889', [])) > 0)
     except Exception as err:
         assertion = False
         utils.log("Error occurred while testing: {}".format(err))
     self.assertTrue(assertion)

コード例 #6

0

ファイルを表示

ファイル: assemblers.py プロジェクト: yangwu91/r2g.dev

 def run(self):
     utils.log("Trinity cmd: {}".format(' '.join(self.cmd)))
     utils.log("Trinity is running. Output dir: {}".format(self.output))
     utils.log("Trinity log file: {}".format(self.log))
     logs = ""
     try:
         p = subprocess.run(
             self.cmd,
             shell=False,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
         )
         logs = p.stdout.decode('utf-8')
         if p.returncode != 0:
             if self.args['verbose']:
                 print(logs)
             raise errors.AssembleError("Trinity exited {}.".format(
                 p.returncode))
         else:
             utils.log("Trinity done.")
         return self.output, self.log
     except Exception as err:
         if self.args['verbose'] and len(logs.strip()) > 0:
             print(logs)
         raise errors.AssembleError(
             "Errors raised when called Trinity. {}. "
             "Please check the Trinity log above.".format(err))
     finally:
         if len(logs.strip()) > 0:
             with open(self.log, 'w') as outf:
                 outf.write(logs)

コード例 #7

0

ファイルを表示

ファイル: dryrun.py プロジェクト: yangwu91/r2g.dev

 def __call__(self, parser, namespace, values, option_string=None):
     args = utils.file2json(os.path.join(r2g.__path__[0],
                                         "quick_test.json"))
     output_dir = tempfile.mkdtemp(prefix="r2g-dryrun_tmp_")
     cmd = [
         self.r2g_script,
         "-q",
         args['query'],
         "-s",
         args['sra'],
         "-o",
         output_dir,
         "-c",
         args['cut'],
         "-p",
         args['program'],
         "--verbose",
     ]
     try:
         webdriver_cmd = ["--browser", os.environ["PRIVATE_WEBDRIVER"]]
     except KeyError:
         webdriver_cmd = []
     cmd += webdriver_cmd
     err = ""
     try:
         p = subprocess.run(cmd, shell=False, timeout=600)
     except subprocess.TimeoutExpired as err:
         err += "\nThe quick test is supposed to finished in 10 minutes. Aborted."
         exit_code = 2
     except Exception as err:
         exit_code = 3
     else:
         exit_code = p.returncode
     if exit_code != 0:
         print(err)
         utils.log(
             "The quick test failed. Please check the error message above. "
             "Make sure the r2g was installed and configured correctly")
     else:
         utils.log(
             "The quick test done. Please feed me something real ðŸ˜‹")
     utils.delete_everything(output_dir)
     sys.exit(exit_code)

コード例 #8

0

ファイルを表示

ファイル: test_fetch.py プロジェクト: yangwu91/r2g.dev

 def test_parse_fastq_error(self):
     utils.log("Raising r2g.online.fetch _parse_fastq error.")
     check = []
     fake_fastqs = [
         "@a\nATGC\n+\nAAAA\n",
         "a\nATGC\n+\nAAAA\n",
         "@a/1\nATG\n+\nAAAA\n",
         "@a/1\nATGC\n\nAAAA\n",
         "@a/1\nATGC\n?\nAAAA\n",
         "@a/1\nATGC\n+\nAAAA\n",
     ]
     for seq in fake_fastqs:
         try:
             _ = fetch._parse_fastq(seq)
         except errors.FetchError:
             check.append(False)
         else:
             check.append(True)
     self.assertEqual(check, [False, False, False, False, False, True])

コード例 #9

0

ファイルを表示

ファイル: blast.py プロジェクト: yangwu91/r2g.dev

def _parse_xml(raw_results, args):
    download_list = {}
    r = -1
    err = ""
    while r < int(args['retry']):
        try:
            results_tree = ET.fromstring(raw_results)
        except ET.ParseError as e:
            err = str(e)
            r += 1
            print(raw_results)
            utils.log(
                "WARNING: couldn't get results from NCBI due to temporary errors. Retrying...",
                args['verbose'], 'debug')
        else:
            Iterations = results_tree.find('BlastOutput_iterations').findall(
                'Iteration')
            for i in Iterations:
                hits = i.find('Iteration_hits')
                for hit in hits:
                    hit = hit.find('Hit_accession').text.strip().split('.')
                    try:
                        sra = hit[0]
                        spot = int(hit[1])
                    except (ValueError, IndexError):
                        pass
                    else:
                        spots = deepcopy(download_list.get(sra, []))
                        spots.append(spot)
                        download_list[sra] = deepcopy(spots)
            err = ""
            break
    if len(err) > 0:
        utils.log(
            "WARNING: couldn't get results for from NCBI due to temporary errors. "
            "The fragment was skipped.")
        if args['verbose']:
            with open(
                    os.path.join(args['outdir'], "{}.xml".format(args['sra'])),
                    'w') as outf:
                outf.write(raw_results)
    return download_list

コード例 #10

0

ファイルを表示

 def test_format_seq_1(self):
     # Test 1 (total_length = 169, num_frag = 3):
     utils.log("Testing r2g.online.blast _format_seq 1.")
     self.query_fasta = self.query_fasta.strip().split('\n',
                                                       1)[1] + 29 * "A"
     self.args['query'] = self.query_fasta
     name, seq = blast._format_seq(self.args)
     formatted_name = "Undefined"
     formatted_seq = [
         ">Undefined_0\nAATCATTCCATTGATTAGACGATGGTTACACTTGGTTCACGTCGTGCGCGTTTCCCGTGTTCCCTCTAGACGTAGAAGTG\n"
         ">Undefined_1\nCTTGGTTCACGTCGTGCGCGTTTCCCGTGTTCCCTCTAGACGTAGAAGTGTTGGACTTTTTTTTTTGGGTGTTGTGCTGC\n"
         ">Undefined_2\nTCCCTCTAGACGTAGAAGTGTTGGACTTTTTTTTTTGGGTGTTGTGCTGCTATAAGCTGCTACTGCTGATTGAGGAAATT"
         "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
     ]
     if name == formatted_name and seq == formatted_seq:
         assertion = True
     else:
         print(name)
         print(seq)
         assertion = False
     self.assertTrue(assertion)

コード例 #11

0

ファイルを表示

 def test_parse_xml(self):
     utils.log("Testing r2g.online.blast _parse_xml.")
     xml_dir = "{}/data".format(os.path.split(os.path.abspath(__file__))[0])
     xml_files = [
         "{}/no_result.xml".format(xml_dir),
         "{}/other_result.xml".format(xml_dir),
         "{}/standard_result.xml".format(xml_dir),
         # "{}/error_result.xml".format(xml_dir)
     ]
     parsed_results = [{}, {}, {'SRR1812889': [25821753]}]
     for i in range(len(xml_files)):
         with open(xml_files[i], 'r') as inf:
             download_list = blast._parse_xml(inf.read(), self.args)
             if download_list == parsed_results[i]:
                 assertion = True
             else:
                 assertion = False
                 utils.log("Error occured while parsing {}".format(
                     xml_files[i]))
                 break
     self.assertTrue(assertion)

コード例 #12

0

ファイルを表示

ファイル: test_fetch.py プロジェクト: yangwu91/r2g.dev

 def test_fastq_dump(self):
     utils.log("Testing r2g.online.fetch fastq_dump.")
     args = {
         'query': "ATGC",
         'verbose': False,
         'stage': 'butterfly'
     }
     app_json = utils.preflight(args)
     fastq = {
         '1':
             '@FCC2U5KACXX:6:1101:9243:74192/1\n'
             'CACGTCGTGCGCGTTTTCCGTGTTCCCTCTAGCAGACCTCAAGGTTTTGGATTTTTTTTTGTGTGCTCAGTGCCAAAGTTGCTGATTGTC\n'
             '+SRR1812889.232339 FCC2U5KACXX:6:1101:9243:74192 length=90\n'
             'BB@FFFFDHHHHHHIJJJJJGHHHGIJIJIHJJJIJJJIJHIIJAHIJIICHHHHHDDDD?BCDDCDDDDCDCDDACCCCDDDDDCDCCD\n',
         '2':
             '@FCC2U5KACXX:6:1101:9243:74192/2\n'
             'TCCGGGAATCCACAGCAGCTCAGCAATGCGGGATTTTCCACTGCCCGATAAAAACAAGTTCTACTACTGATGATTTTTCACTTTCAGCTA\n'
             '+SRR1812889.232339 FCC2U5KACXX:6:1101:9243:74192 length=90\n'
             'CCCFFFFFHHHHHJJJJJJJIJJIJJIJJJJJIIJJJJIGIIJJJJJHIHHHFFFFDECEEEEDEDDDDDDDEEFEDDDCDDDDDCCDDD\n'
     }
     log = "SRR1812889 232339-232339:\nb'Read 1 spots for SRR1812889\\nWritten 1 spots for SRR1812889\\n'----"
     utils.log("Testing fastq-dump.")
     fetched_fastq, fetched_log = fetch.fastq_dump('SRR1812889', 232339, 232339, app_json)
     self.assertEqual((fastq, log), (fetched_fastq, fetched_log))

コード例 #13

0

ファイルを表示

 def test_check_apps(self):
     utils.log("Testing r2g.utils.utils configure files.")
     changing_app_json = deepcopy(self.app_json)
     # SITUATION 1: apps are not in $PATH and config_files[0] is configured.
     os.environ['PATH'] = '/usr/bin'
     os.chmod(self.config_files[0], S_IWUSR | S_IREAD)
     with open(self.config_files[0], 'w') as outf:
         json.dump(self.app_json, outf, indent=4, separators=(',', ': '))
     parsed_app_json = utils.preflight(self.args)
     if parsed_app_json == self.app_json:
         assertion1 = True
     else:
         assertion1 = False
     # SITUATION 2: apps are not in $PATH, config_files[0] is not writable, and both two configs are not configured.
     # Trinity is not found:
     with open(self.config_files[0], 'w') as outf:
         changing_app_json["Trinity"] = "/"
         json.dump(changing_app_json, outf, indent=4, separators=(',', ': '))
     # make the config_files[0] readable only:
     os.chmod(self.config_files[0], S_IREAD | S_IRGRP | S_IROTH)
     # Trinity is not executable:
     with open(self.config_files[1], 'w') as outf:
         changing_app_json["Trinity"] = "{}/data".format(os.path.split(os.path.abspath(__file__))[0])
         json.dump(changing_app_json, outf, indent=4, separators=(',', ': '))
         os.chmod("{}/data/Trinity".format(os.path.split(os.path.abspath(__file__))[0]),
                  S_IREAD | S_IRGRP | S_IROTH)
     choose_yes = mock.Mock(return_value=True)
     trinity_dir = mock.Mock(return_value=self.app_json['Trinity'])
     fastq_dump_dir = mock.Mock(return_value=self.app_json['fastq-dump'])
     chromedriver_dir = mock.Mock(return_value=self.app_json['chromedriver'])
     utils._ask_yes_or_no = choose_yes
     utils._input_trinity_dir = trinity_dir
     utils._input_fastq_dump_dir = fastq_dump_dir
     utils._input_webdriver_dir = chromedriver_dir
     parsed_app_json = utils.preflight(self.args)
     with open(self.config_files[1], 'r') as inf:
         read_app_json = json.load(inf)
     if parsed_app_json == self.app_json and read_app_json == self.app_json:
         assertion2 = True
     else:
         assertion2 = False
     # Restore everything:
     os.environ['PATH'] = deepcopy(self.path)
     os.chmod(self.config_files[0], S_IWUSR | S_IREAD)
     utils.delete_everything(self.config_files[1])
     with open(self.config_files[0], 'w') as outf:
         outf.write("")
     utils.log("assertion 1 is {}".format(assertion1))
     utils.log("assertion 2 is {}".format(assertion2))
     self.assertTrue(assertion1 & assertion2)

コード例 #14

0

ファイルを表示

 def test_query_cut_error_2(self):
     utils.log("Raising r2g.online.blast query Error 2.")
     self.args["cut"] = "X,J"
     with self.assertRaises(errors.InputError):
         _, _ = blast.query(self.args, "http://127.0.0.1:4444/wd/hub")

コード例 #15

0

ファイルを表示

ファイル: NCBIWWW_selenium.py プロジェクト: yangwu91/r2g.dev

def qblast(
        program,
        srx,  # only accept SRX
        query,
        query_from=None,
        query_to=None,
        max_num_seq=500,
        expect=10.0,
        repeat_filter=None,  # filter out low complexity regions
        short_query=None,
        word_size=None,
        job_title=None,
        format_type="XML",
        browser="http://127.0.0.1:4444/wd/hub",
        proxies=(None, None),  # (webdriver_proxy, general_proxy)
        verbose=False,
):
    """BLAST search using the selenium module:
         Some useful parameters:

          - program        megaBlast, blastn, discoMegablast, or tblastn (capital sensitive)
          - sra            Which sra database to search against (srr or srx).
          - sequence       The sequence to search.
          - max_num_seq    The number of hits that NCBI returned.
          - expect         An expect value cutoff.  Default 10.0.
          - repeat_filter  "L" turns on filtering low complexity regions.  Default no filtering.
          - word_size      default: 28 for blastn, 6 for tblastn
          - format_type    "HTML", "Text", "ASN.1", or "XML".  Default "XML".
    """
    # - base url:
    # https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?PAGE_TYPE=BlastSearch&BLAST_SPEC=SRA&DB_GROUP=Exp&
    # 1) PROGRAM = ['blastn', 'tblastn', 'tblastx']
    # 2) BLAST_PROGRAMS = ['megaBlast', 'blastn', 'discoMegablast']
    # e.g.
    # PROGRAM=blastn&BLAST_PROGRAMS=megaBlast&NUM_ORG=1&EQ_MENU=SRX000001
    # PROGRAM=tblastn&NUM_ORG=2&EQ_MENU=SRX000001&EQ_MENU1=SRX000002
    # Step 1 - Submit queries using the selenium module:
    url = "https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?PAGE_TYPE=BlastSearch&BLAST_SPEC=SRA&DB_GROUP=Exp"
    url += _add_eq_menus(srx)
    url += _add_program(program)

    chrome = _setup_chrome_webdriver(browser=browser, proxy=proxies[0])
    time.sleep(4)

    submit_params = [
        # ("QUERY", query),
        # ("QUERY_FROM", query_from),
        # ("QUERY_TO", query_to),
        ("MAX_NUM_SEQ", max_num_seq),
        ("EXPECT", expect),
        ("FILTER", repeat_filter),
        ("SHORT_QUERY_ADJUST", short_query),
        ("WORD_SIZE", word_size),
        ("JOB_TITLE", job_title)
    ]
    for p in submit_params:
        if p[1] is not None:
            url += "&{}={}".format(p[0], p[-1])

    chrome.get(url)
    time.sleep(4)
    chrome.find_element_by_name("QUERY").send_keys(query)
    if query_from is not None and query_to is not None:
        chrome.find_element_by_name("QUERY_FROM").send_keys(query_from)
        chrome.find_element_by_name("QUERY_TO").send_keys(query_to)
    time.sleep(4)
    chrome.find_element_by_class_name('blastbutton').click()
    wait_page = chrome.page_source
    try:
        rid, status, job_title, entrez_query, rtoe, max_num_seq = _parse_qblast_wait_page(
            wait_page)
    except errors.QueryError:
        # In my experience, the first submit may be blocked somehow, so try to submit again:
        time.sleep(4)
        chrome.find_element_by_class_name('blastbutton').click()
        wait_page = chrome.page_source
        rid, status, job_title, entrez_query, rtoe, max_num_seq = _parse_qblast_wait_page(
            wait_page)
    cookies = chrome.get_cookies()
    _previous = time.time()
    chrome.quit()

    # Step 2 - Poll results from NCBI:
    # Actually, all parameters for polling results can be obtained from the wait page.
    # --
    # Poll NCBI until the results are ready.
    # https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=DeveloperInfo
    # 1. Do not contact the server more often than once every 10 seconds.
    # 2. Do not poll for any single RID more often than once a minute.
    # 3. Use the URL parameter email and tool, so that the NCBI
    #    can contact you if there is a problem.
    # 4. Run scripts weekends or between 9 pm and 5 am Eastern time
    #    on weekdays if more than 50 searches will be submitted.
    # --
    # Could start with a 10s delay, but expect most short queries
    # will take longer thus at least 70s with delay. Therefore,
    # start with 20s delay, thereafter once a minute.
    poll_params = [
        ("RID", rid),
        ("JOB_TITLE", job_title),
        ("ENTREZ_QUERY", entrez_query),
        ('MAX_NUM_SEQ', max_num_seq),
        ("CMD", "Get"),
    ]
    poll_params = [p for p in poll_params if p[1] is not None]

    delay = 20  # seconds
    session = requests.Session()
    for c in cookies:
        session.cookies.set(c['name'], c['value'])
    not_done_yet = True
    while not_done_yet:
        current = time.time()
        wait = _previous + delay - current
        if wait > 0:
            time.sleep(wait)
            _previous = current + wait
        else:
            _previous = current
        # delay by at least 60 seconds only if running the request against the public NCBI API
        if delay < 60:
            # Wasn't a quick return, must wait at least a minute
            delay = 60

        try:
            poll_response = session.get(
                "https://blast.ncbi.nlm.nih.gov/Blast.cgi",
                params=poll_params,
                headers=headers,
                timeout=120,
                proxies=proxies[-1],
            )
        except Exception as err:
            utils.log(
                "WARNING: Couldn't poll results from NCBI. {}. "
                "But don't panic, we will retry and are almost there.".format(
                    err),
                verbose=verbose,
                attr="debug")
        else:
            if poll_response.ok:
                poll_rid, poll_status, _, _, _, _ = _parse_qblast_wait_page(
                    poll_response.content.decode("utf-8"))
                utils.log("RID: {}, Status: {}.".format(poll_rid, poll_status),
                          verbose, "debug")
                if poll_rid == rid:
                    if poll_status.lower() in ["waiting", "searching"]:
                        continue
                    elif poll_status.lower() == "failed":
                        err_msg = _search_keyword(
                            r'(<p class="error">.+?</p>)',
                            poll_response.content.decode("utf-8"), ">NA<")
                        err_msg = ''.join(
                            re.findall(r'>(.+?)<',
                                       err_msg))  # remove inside links <a></a>
                        raise errors.QueryError(
                            'Retrieving results failed. Error message from NCBI: "{}".'
                            .format(err_msg))
                    elif poll_status.lower() == "ready":
                        poll_params.append(("FORMAT_TYPE", format_type))
                        while not_done_yet:
                            try:
                                poll_response = session.get(
                                    "https://blast.ncbi.nlm.nih.gov/Blast.cgi",
                                    params=poll_params,
                                    headers=headers,
                                    timeout=120,
                                    proxies=proxies[-1])
                            except Exception as err:
                                raise errors.QueryError(
                                    "Although the query was submitted, "
                                    "but the results couldn't be retrieved. {}"
                                    .format(err))
                            else:
                                if poll_response.ok:
                                    poll_format = _search_keyword(
                                        r'<!DOCTYPE ([\w]+?) PUBLIC',
                                        poll_response.content.decode("utf-8"),
                                        "NA")
                                    if poll_format.lower() == "blastoutput":
                                        blastoutput = poll_response.content.decode(
                                            "utf-8")  # XML
                                        not_done_yet = False
                                        break
                                    else:
                                        utils.log(
                                            "WARNING: Although the results are ready, "
                                            "they can't be retrieved somehow. "
                                            "Don't panic, we will retry and are almost there.",
                                            verbose=verbose,
                                            attr="debug")
                                        continue
                                else:
                                    utils.log(
                                        "WARNING: Although the query was submitted, "
                                        "but the results couldn't be retrieved probably because of network issues. "
                                        "Status code: {}.".format(
                                            poll_response.status_code),
                                        verbose=verbose,
                                        attr="debug")
                    else:
                        utils.log(
                            "WARNING: Something wrong while retrieving results from NCBI. "
                            "RID: {}. Status: {}. "
                            "But don't panic, we will retry and are almost there."
                            .format(poll_rid, poll_status),
                            verbose=verbose,
                            attr="debug")
                else:
                    utils.log(
                        "WARNING: The submitted RID ({}) "
                        "is different from the polled one ({}). "
                        "But don't panic, we will try to retrieve results again."
                        .format(rid, poll_rid),
                        verbose=verbose,
                        attr="debug")
            else:
                utils.log(
                    "WARNING: Couldn't get results from NCBI. Status code: {}. "
                    "But don't panic, we will retry and are almost there.".
                    format(poll_response.status_code),
                    verbose=verbose,
                    attr="debug")
    return blastoutput

コード例 #16

0

ファイルを表示

 def test_parse_args(self):
     utils.log("Testing r2g.utils.utils _parse_args")
     raw_args = "r2g -o OUTPUT -s SRXNNNNNN -q ATGC --cut 80,50 -p blastn --CPU 4 --retry"
     raw_args = raw_args.split()
     parsed_args = utils.parse_arguments(raw_args)
     self.assertEqual(parsed_args, self.args)

コード例 #17

0

ファイルを表示

ファイル: blast.py プロジェクト: yangwu91/r2g.dev

def query(args, webdriver):
    download_list = {}
    name, seq_chunks = _format_seq(args)
    utils.log(seq_chunks, args['verbose'], 'debug')
    SRAs = {}.fromkeys(args['sra'].strip().split(',')).keys()
    formatted_SRAs = NCBIWWW_selenium.check_sra_validity(SRAs,
                                                         proxy=args["proxy"])
    # formatted_SRAs = {species1: {srx1: [srr...], srx2: [srr...]}, species2: ...}
    interval = 10
    for i in formatted_SRAs.items():
        for j in i[-1].items():
            srx = j[0]
            srr = ','.join(j[-1])
            current = 0
            for chunk in seq_chunks:
                current += 1
                utils.processing(current, len(seq_chunks),
                                 "{} - {} ({})".format(i[0], srx,
                                                       srr), "percent")
                r = -1
                err = ''
                while r < int(args['retry']):
                    # Do not contact the server more often than once every 10 seconds:
                    if interval < 10:
                        time.sleep(11 - interval)
                    start_time = time.time()
                    if len(err) > 0:
                        # utils.log("Retrying...", shift="\n")
                        utils.log("Retrying...")
                    try:
                        result = NCBIWWW_selenium.qblast(
                            program=args["program"],
                            srx=srx,
                            query=chunk,
                            max_num_seq=(args["max_num_seq"] //
                                         (len(seq_chunks) * 20) + 1),
                            expect=args["evalue"],
                            # format_type='Tabular'
                            # Don't know why the number of returned hits can't be determined when the format is Tabular.
                            # So the XML format is required:
                            format_type='XML',
                            browser=webdriver,
                            proxies=(args["chrome_proxy"], args["proxy"]),
                            verbose=args["verbose"])
                        if args['verbose']:
                            with open(
                                    os.path.join(args['outdir'],
                                                 "{}.xml".format(srx)),
                                    'w') as outf:
                                outf.write(result)
                    except Exception as e:
                        err = str(e)
                        r += 1
                        utils.log("Error msg while querying: {}.".format(err),
                                  shift="\n")
                    else:
                        err = ''
                        break
                if len(err) > 0:
                    raise errors.QueryError(
                        "Couldn't get results from NCBI. Errors above must be investigated."
                    )
                else:
                    result = _parse_xml(result, args)
                    for sra in result.keys():
                        spots = deepcopy(download_list.get(sra, []))
                        spots += result[sra]
                        download_list[sra] = deepcopy(spots)
                    interval = time.time() - start_time
    download_list = _clear_up_list(download_list)
    return name, download_list