def test_execute_script_consensus_taxonomy(self):

        # Filtering on Sequence Name. Time: 1.06s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "-t",
            "consensus", "--seqName", "Pyt_arrhenomanes_"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertTrue(os.path.isfile(self.output_taxon_file_name),
                        "Taxonomy file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        count = 0
        idList = []
        with open(self.output_taxon_file_name) as f:
            for line in f:
                count = count + 1
                idList.append(line.split()[0])
        self.assertEqual(5, count,
                         "Expected 5 sequence but got {}".format(count))
        self.assertIn('358301', idList,
                      "Expected taxonomy ID 358301 is not found in the file")
        self.assertIn('358327', idList,
                      "Expected taxonomy ID 358327 is not found in the file")
        self.assertIn('358485', idList,
                      "Expected taxonomy ID 358485 is not found in the file")

        # Filtering on Taxonomy Rank and Taxonomy Value. Time: 2.49s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "-t",
            "consensus", "--taxRank", "species", "--taxValue", "megasperma"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertTrue(os.path.isfile(self.output_taxon_file_name),
                        "Taxonomy file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        count = 0
        idList = []
        with open(self.output_taxon_file_name) as f:
            for line in f:
                count = count + 1
                idList.append(line.split()[0])
        self.assertEqual(3, count,
                         "Expected 3 sequence but got {}".format(count))
        self.assertIn('358368', idList,
                      "Expected taxonomy ID 358301 is not found in the file")
        self.assertIn('358385', idList,
                      "Expected taxonomy ID 358327 is not found in the file")
        self.assertIn('358394', idList,
                      "Expected taxonomy ID 358485 is not found in the file")
    def test_execute_script_its(self):

        # Getting all ITS Sequences. Time: 963.44s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "its"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(23517, count,
                         "Expected 23517 sequences but got {}".format(count))
        self.assertIn('>seqdb|131072', idList,
                      "Expected sequence ID 131072 is not found in the file")
        self.assertIn('>seqdb|111872', idList,
                      "Expected sequence ID 11187 is not found in the file")
        self.assertIn('>seqdb|131058', idList,
                      "Expected sequence ID 131071 is not found in the file")
    def test_execute_script_all_taxonomy(self):

        # Filtering on Gene Region Name. Time: 124.98s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "-t",
            "all", "--geneRegion", "ACA"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertTrue(os.path.isfile(self.output_taxon_file_name),
                        "Taxonomy file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        count = 0
        idList = []
        with open(self.output_taxon_file_name) as f:
            for line in f:
                count = count + 1
                idList.append(line.split()[0])
        self.assertEqual(1041, count,
                         "Expected 1,041 sequences but got {}".format(count))
        self.assertIn('358301', idList,
                      "Expected taxonomy ID 358301 is not found in the file")
        self.assertIn('37674', idList,
                      "Expected taxonomy ID 37674 is not found in the file")
        self.assertIn('148710', idList,
                      "Expected taxonomy ID 148710 is not found in the file")
    def test_execute_script_raw_taxonomy(self):

        # Filtering on Sample Name. Time: 3.40s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "raw",
            "-t", "--sampleName", "INVITRO221"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertTrue(os.path.isfile(self.output_taxon_file_name),
                        "Taxonomy file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was not created.")
        count = 0
        idList = []
        with open(self.output_taxon_file_name) as f:
            for line in f:
                count = count + 1
                idList.append(line.split()[0])
        self.assertEqual(6, count,
                         "Expected 6 sequences but got {}".format(count))
        self.assertIn('961', idList,
                      "Expected taxonomy ID 961 is not the found in the file")
        self.assertIn('97830', idList,
                      "Expected taxonomy ID 97830 is not found in the file")
        self.assertIn('97847', idList,
                      "Expected taxonomy ID 97847 is not found in the file")
    def test_execute_script_raw_fastq(self):

        # Filtering on Sample Name. Time: 4.6s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fastq", "raw",
            "--sampleName", "LEV6103"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fastq_file_name),
                        "Fastq file was not created.")
        self.assertFalse(os.path.isfile(self.output_fasta_file_name),
                         "Fasta file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fastq_file_name) as f:
            for line in f:
                if line.startswith('@'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(60, count,
                         "Expected 60 sequences but got {}".format(count))
        self.assertIn('@seqdb|266400', idList,
                      "Expected sequence ID 266400 is not found in the file")
        self.assertIn('@seqdb|301609', idList,
                      "Expected sequence ID 301609 is not found in the file")
        self.assertIn('@seqdb|331086', idList,
                      "Expected sequence ID 331086 is not found in the file")
 def setUpClass(cls):
     with open(config_root.path() +
               '/config4tests.yaml', 'r') as config_file:
         config = yaml.safe_load(config_file)
         cls.fixture = DeterminationApi(
             api_key=config['seqdb']['api_key'],
             base_url=config['seqdb']['base_url'])
 def setUpClass(self):
     config = yaml.load(file(config_root.path() + '/config4tests.yaml', 'r'))        
     self.featureTypeFixture = FeatureTypeApi(api_key=config['seqdb']['api_key'], base_url=config['seqdb']['base_url'])
     self.featureFixture = FeatureApi(api_key=config['seqdb']['api_key'], base_url=config['seqdb']['base_url'])
     self.itsx_positions_file_name = "data/test.positions.txt"
     self.push_to_seqdb_output_file_name = "seqdb_feature_ids.txt"
     self.failed_ids_output_file_name = "delete_failed_feature_ids.txt"
Beispiel #8
0
def set_up_logging():
    ''' Loads main configuration file and sets up logging for the script '''
    main_conf = tools_helper.load_config(config_root.path() + '/config.yaml')

    if not main_conf:
        logging.error(tools_helper.log_msg_noConfig)
        sys.exit(tools_helper.log_msg_sysExit)
    
    logging.config.dictConfig(main_conf['logging'])
    
    logging.info("{} '{}'".format(tools_helper.log_msg_scriptExecutionWithParams, sys.argv))
Beispiel #9
0
def main():
    ''' Write provided information to SeqDB '''

    ### Load main configuration file and set up logging for the script
    main_conf = tools_helper.load_config(config_root.path() + '/config.yaml')

    if not main_conf:
        logging.error(tools_helper.log_msg_noConfig)
        sys.exit(tools_helper.log_msg_sysExit)

    logging.config.dictConfig(main_conf['logging'])

    logging.info("{} '{}'".format(
        tools_helper.log_msg_scriptExecutionWithParams, sys.argv))

    ### Parse sript's input arguments
    parsed_args = parse_input_args(sys.argv[1:])

    if parsed_args.config_file:
        tool_config = tools_helper.load_config(parsed_args.config_file)
        base_url = tool_config['seqdb']['base_url']
        api_key = tool_config['seqdb']['api_key']
    else:
        base_url = parsed_args.base_url
        api_key = parsed_args.api_key

    logging.info("{} '{}'".format(tools_helper.log_msg_apiUrl, base_url))

    ### Script execution
    if push_types_dict["its"] == parsed_args.push_type:
        #sys.exit("Not yet implemented")
        log_msg = "Writing ITS features to SeqDB."
        logging.info(log_msg)
        featureTypeApi = FeatureTypeApi(api_key=api_key, base_url=base_url)
        featureApi = FeatureApi(api_key=api_key, base_url=base_url)
        success_feat_ids = push_its_features(featureTypeApi, featureApi,
                                             parsed_args.itsx_positions_file,
                                             parsed_args.itsx_extraction_file)
        print success_feat_ids

    elif push_types_dict["taxonomy"] == parsed_args.push_type:
        log_msg = "Writing taxonomy lineage information to SeqDB."
        logging.info(log_msg)
        determinationApi = DeterminationApi(api_key=api_key, base_url=base_url)
        push_taxonomy_data(determinationApi, parsed_args.lca_results_file,
                           main_conf['galaxy']['ncbi_taxonomy_dir'])

    ### Post-execution: messages and logging
    print(tools_helper.log_msg_execEnded)
    logging.info(tools_helper.log_msg_execEnded)
def main():
    main_conf = tools_helper.load_config(config_root.path() + '/config.yaml')

    if not main_conf:
        logging.error("Could not load configuration file. Exiting...")
        sys.exit("Configuration not loaded.")

    logging.config.dictConfig(main_conf['logging'])

    logging.info(
        "Script executed with the following command and arguments: %s" %
        sys.argv)

    # Parse command line arguments
    api_key = parse_input_args(sys.argv[1:])

    #logging.info("Base URL for web services is: '%s'" % base_url)

    configMaker = SeqdbConfigMaker(api_url=main_conf['seqdb']['url'])
    config_file = configMaker.createConfigFile(api_key)

    print("Configuration is written to a file: '%s'" %
          os.path.basename(config_file))
Beispiel #11
0
def main():
    """Load sequences matching Entrez query into SeqDB.
    Args:
        None
    Kargs:
        None
    Returns:
        None
    Raises:
        None
    """
    print("Loading configuration file: {}".format(config_root.path()) +
          '/config.yaml')
    print(
        "Loading tools configuration file: {}".format(
            os.path.dirname(__file__)) + '/seqdb_gb_insert_config.yaml')

    main_config = tools_helper.load_config(config_root.path() + '/config.yaml')

    if not main_config:
        logging.error(tools_helper.log_msg_noConfig)
        sys.exit(tools_helper.log_msg_sysExit)

    tool_config = tools_helper.load_config(
        os.path.dirname(__file__) + '/seqdb_gb_insert_config.yaml')

    if not tool_config:
        logging.error(tools_helper.log_msg_noConfig)
        sys.exit(tools_helper.log_msg_sysExit)

    url = main_config['seqdb']['url']
    api_key = tool_config['seqdb']['api_key']

    logging.config.dictConfig(main_config['logging'])

    http_client.HTTPConnection.debuglevel = main_config['http_connect'][
        'debug_level']

    # caching the entrez records shaved 2 minutes off the time to load
    # ~740 sequences from query: "(*DAOM*[source] and levesque and not
    # 'unplaced genomics scaffold')"
    # real    11m40.754s
    # user    1m31.726s
    # sys     0m14.760s
    # - vs -
    # real    9m21.112s
    # user    1m27.726s
    # sys     0m13.619s
    entrez_cache = shelve.open(tool_config['entrez']['cache'])

    # however, caching the lookup shaved an additional ~7 minutes off the total
    # time to load above query
    # real    2m35.773s
    # user    0m16.539s
    # sys     0m2.486s
    # TODO May not be necessary any longer; instead use API lookup by feature
    # type name
    feature_type_lookup = {}

    logging.info(
        "Script executed with the following command and arguments: {}".format(
            sys.argv))

    consensusSequenceEntity = ConsensusSequenceApi(api_key=api_key,
                                                   base_url=url)

    Entrez.email = tool_config['entrez']['email']
    query = tool_config['entrez']['query']

    logging.info("Querying GenBank: \'{}\'".format(
        tool_config['entrez']['query']))

    # preliminary query to find out how many records there are
    record = entrez_search(query)

    # setup loop counters; retrieving records 50 at a time
    count = int(record["Count"])
    start = 0
    retrieve = 50
    logging.info(
        "Query returned {} records. Retrieving them in batches of {}".format(
            count, retrieve))

    # repeat until we have all records
    while start < count:

        print 'Count:' + str(count)
        print 'Start:' + str(start)

        # retrieve block of records
        logging.debug("Retrieving {}..{}".format(start, start + retrieve))

        record = entrez_search(query, retstart=start, retmax=retrieve)
        # process each returned id in the batch
        for genbank_id in record["IdList"]:
            process_entrez_entry(consensusSequenceEntity,
                                 api_key,
                                 url,
                                 genbank_id,
                                 cache=entrez_cache,
                                 lookup=feature_type_lookup,
                                 delete=tool_config['gb_insert']['delete'],
                                 update=tool_config['gb_insert']['update'])
            print("\n >Seqid: {}".format(genbank_id))

        start += retrieve

    print "***Done***"
    def test_execute_script_consensus_fasta(self):

        #  Time: 86.143s

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta",
            "consensus"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        # Note that the number of all consensus sequences you get in SeqDB UI is 15037. This is a bug in
        # SeqDB that there are some sequences that are not deleted properly, so they are reported as there,
        # but they don't have any sequence information.
        self.assertEqual(15037, count,
                         "Expected 15037 sequences but got {}".format(count))
        self.assertIn('>seqdb|358301', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|4823203', idList,
                      "Expected sequence ID 4823203 is not found in the file")
        self.assertIn('>seqdb|4829279', idList,
                      "Expected sequence ID 4829279 is not found in the file")

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta",
            "consensus", "--geneRegion", "28s"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(1499, count,
                         "Expected 1499 sequences but got {}".format(count))
        self.assertIn('>seqdb|1582548', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|4825579', idList,
                      "Expected sequence ID 4823203 is not found in the file")
        self.assertIn('>seqdb|4827758', idList,
                      "Expected sequence ID 4829279 is not found in the file")

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta",
            "consensus", "--specNums", "4405,4264"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(3, count,
                         "Expected 3 sequences but got {}".format(count))
        self.assertIn('>seqdb|358301', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|358302', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|4825628', idList,
                      "Expected sequence ID 358301 is not found in the file")
    def test_execute_script_all_fasta(self):
        '''
        #Getting All Sequences. Time: TOO LONG
        pull_seqdb_seqs.execute_script(["-c", config_root.path() + '/config4tests.yaml', "-r", "fasta", "all"], 
                            self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name), "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name), "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name), "Taxonomy file was created.")
        count = 0
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
        self.assertEqual(485643, count, "Expected 485,643 sequences but got {}".format(count))     
        '''

        # Time: 32.8s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "all",
            "--geneRegion", "EF-1a"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(492, count,
                         "Expected 492 sequences but got {}".format(count))
        self.assertIn('>seqdb|1689', idList,
                      "Expected sequence ID 1689 is not found in the file")
        self.assertIn('>seqdb|103372', idList,
                      "Expected sequence ID 103372 is not found in the file")
        self.assertIn('>seqdb|149807', idList,
                      "Expected sequence ID 149807 is not found in the file")

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "all",
            "--projectName", "Pythium Type Specimens"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(4373, count,
                         "Expected 4,373 sequences but got {}".format(count))
        self.assertIn('>seqdb|358305', idList,
                      "Expected sequence ID 1689 is not found in the file")
        self.assertIn('>seqdb|196715', idList,
                      "Expected sequence ID 103372 is not found in the file")
        self.assertIn('>seqdb|356858', idList,
                      "Expected sequence ID 149807 is not found in the file")
    def test_execute_script_raw_fasta(self):
        '''
        #Getting all Raw Sequences. Time: TOO LONG
        pull_seqdb_seqs.execute_script(["-c", config_root.path() + '/config4tests.yaml', "-r", "fasta", "raw"], 
                            self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name), "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name), "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name), "Taxonomy file was created.")
        count = 0
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
        self.assertEqual(480088, count, "Expected 480,088 sequences but got {}".format(count))                
        '''

        # Time: 10.5s
        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "raw",
            "--seqName", "S-SH-"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(134, count,
                         "Expected 134 sequences but got {}".format(count))
        self.assertIn('>seqdb|1', idList,
                      "Expected sequence ID 1 is not found in the file")
        self.assertIn('>seqdb|79390', idList,
                      "Expected sequence ID 79390 is not found in the file")
        self.assertIn('>seqdb|126059', idList,
                      "Expected sequence ID 126059 is not found in the file")

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "raw",
            "--collectionCode", "pm"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(148, count,
                         "Expected 148 sequences but got {}".format(count))
        self.assertIn('>seqdb|268749', idList,
                      "Expected sequence ID 1 is not found in the file")
        self.assertIn('>seqdb|308734', idList,
                      "Expected sequence ID 79390 is not found in the file")
        self.assertIn('>seqdb|356572', idList,
                      "Expected sequence ID 126059 is not found in the file")

        pull_seqdb_seqs.execute_script([
            "-c",
            config_root.path() + '/config4tests.yaml', "-r", "fasta", "raw",
            "--specNums", "4405,4264"
        ], self.output_file_name, self.output_taxon_file_name)
        self.assertTrue(os.path.isfile(self.output_fasta_file_name),
                        "Fasta file was not created.")
        self.assertFalse(os.path.isfile(self.output_fastq_file_name),
                         "Fastq file was created.")
        self.assertFalse(os.path.isfile(self.output_taxon_file_name),
                         "Taxonomy file was created.")
        count = 0
        idList = []
        with open(self.output_fasta_file_name) as f:
            for line in f:
                if line.startswith('>'):
                    count = count + 1
                    idList.append(line.split()[0])
        self.assertEqual(33, count,
                         "Expected 33 sequences but got {}".format(count))
        self.assertIn('>seqdb|27755', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|155033', idList,
                      "Expected sequence ID 358301 is not found in the file")
        self.assertIn('>seqdb|239733', idList,
                      "Expected sequence ID 358301 is not found in the file")