def test_remove_identical_seqs():
    data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb'))
    data_obj.workdir = absworkdir

    ids = IdDicts(conf, workdir=data_obj.workdir)
    ids.acc_ncbi_dict = pickle.load(
        open("tests/data/precooked/tiny_acc_map.p", "rb"))

    # print("start")
    scraper = PhyscraperScrape(data_obj, ids)
    scraper.ids.otu_rank = {}
    scraper.config.gifilename = False
    scraper._blasted = 1
    blast_dir = "tests/data/precooked/fixed/tte_blast_files"
    #scraper.gi_list_mrca = pickle.load(open("tests/data/precooked/gi_list_mrca.p", 'rb'))
    scraper.read_blast_wrapper(blast_dir=blast_dir)
    #print scraper.ncbi_mrca

    assert (len(scraper.new_seqs) == 0)
    assert (len(scraper.data.aln) == 5)
    assert len(scraper.new_seqs_otu_id) == 17
    #Now that we are pulling the full remote sequences, we don'thave any identical seuqnces in the test.

    #TODO find an example where we do get identical sequences and need to discard them

    #    seqset = set()
    #    for otu in scraper.new_seqs_otu_id:
    #        seq = scraper.new_seqs_otu_id[otu]
    #        if seq in seqset:
    #            print otu
    #        seqset.add(seq)

    #check that every new sequence is unique in the new seqs set, and is not a substring of another sequence.
    ##    for otu in scraper.new_seqs_otu_id:
    #       qseq = scraper.new_seqs_otu_id[otu]
    #       count = 0
    #       for seq in seqset:
    #           if qseq in seq:
    #               count += 1
    #       assert count == 1

    ##    for taxon in scraper.data.tre.taxon_namespace:
    #       assert(taxon.label in scraper.data.otu_dict)
    #       status = scraper.data.otu_dict[taxon.label].get(u'^physcraper:status')
    #       assert(status in ('original', 'query'))

    aln_path1 = scraper.data.write_aln()
    aln_path = scraper.write_all_unaligned('test.fas')
    scraper.align_query_seqs()
    assert len(scraper.data.aln) == 22