Esempio n. 1
0
    def test_speed(self):
        _LINE_LENGTH = 100

        sym_count = 1
        while sym_count < 100000:
            sym_count *= 10

            with TempDirectory() as tmp:

                t1 = time.clock()

                for p in ('a.fa', 'b.fa'):
                    with open(os.path.join(tmp.path, p), 'w', sym_count) as f:
                        f.write('> seq\n')
                        for j in xrange(sym_count):
                            f.write(random.choice(['a', 'c', 't', 'g', '-']))
                            if (j + 1) % _LINE_LENGTH == 0:
                                f.write('\n')
                        f.write('\n')

                t2 = time.clock()

                ZipDirectory.create_original_sequences(
                    tmp.path, os.path.join(tmp.path, 'a.fa'),
                    os.path.join(tmp.path, 'b.fa'))

                t3 = time.clock()
                self.assertTrue(t1 < t2 < t3)
Esempio n. 2
0
 def test_create_cache(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         xdir = ZipDirectory(directory)
         self.assertFalse(xdir.is_cached(3))
         xdir.create_cache(3)
         self.assertTrue(xdir.is_cached(3))
    def test_speed(self):
        _LINE_LENGTH = 100

        sym_count = 1
        while sym_count < 100000:
            sym_count *= 10

            with TempDirectory() as tmp:

                t1 = time.clock()

                for p in ('a.fa', 'b.fa'):
                    with open(os.path.join(tmp.path, p), 'w', sym_count) as f:
                        f.write('> seq\n')
                        for j in xrange(sym_count):
                            f.write(random.choice(['a', 'c', 't', 'g', '-']))
                            if (j + 1) % _LINE_LENGTH == 0:
                                f.write('\n')
                        f.write('\n')

                t2 = time.clock()

                ZipDirectory.create_original_sequences(
                    tmp.path,
                    os.path.join(tmp.path, 'a.fa'),
                    os.path.join(tmp.path, 'b.fa'))

                t3 = time.clock()
                self.assertTrue(t1 < t2 < t3)
Esempio n. 4
0
def _init_alignments(options, fasta_index1, fasta_index2, group_dir):
    assert os.path.isdir(options.exp_folder), \
        'Directory not found: {0}'.format(options.exp_folder)
    assert len(options.fasta) > fasta_index1, \
        'Not enough FASTA files specified'
    assert len(options.fasta) > fasta_index2, \
        'Not enough FASTA files specified'

    root_dir = _get_ziphmm_root_dir(options, group_dir)

    if not os.path.isdir(root_dir):
        print '# Creating directory: {0}'.format(root_dir)
        os.mkdir(root_dir)

    ZipDirectory.create_original_sequences(
        root_dir,
        options.fasta[fasta_index1],
        options.fasta[fasta_index2],
        options.chunk_size,
        sys.stdout)

    ziphmm_dirs = map(ZipDirectory, glob.glob(os.path.join(root_dir, '*')))

    for ziphmm_dir in ziphmm_dirs:
        if not ziphmm_dir.is_cached(NUM_STATES):
            print '# Creating {0}-state alignment in directory: {1}'.format(
                NUM_STATES, ziphmm_dir.path)
            ziphmm_dir.create_cache(NUM_STATES)
Esempio n. 5
0
def _init_alignments(options, fasta_index1, fasta_index2, group_dir):
    assert os.path.isdir(options.exp_folder), \
        'Directory not found: {0}'.format(options.exp_folder)
    assert len(options.fasta) > fasta_index1, \
        'Not enough FASTA files specified'
    assert len(options.fasta) > fasta_index2, \
        'Not enough FASTA files specified'

    root_dir = _get_ziphmm_root_dir(options, group_dir)

    if not os.path.isdir(root_dir):
        print '# Creating directory: {0}'.format(root_dir)
        os.mkdir(root_dir)

    ZipDirectory.create_original_sequences(root_dir,
                                           options.fasta[fasta_index1],
                                           options.fasta[fasta_index2],
                                           options.chunk_size, sys.stdout)

    ziphmm_dirs = map(ZipDirectory, glob.glob(os.path.join(root_dir, '*')))

    for ziphmm_dir in ziphmm_dirs:
        if not ziphmm_dir.is_cached(NUM_STATES):
            print '# Creating {0}-state alignment in directory: {1}'.format(
                NUM_STATES, ziphmm_dir.path)
            ziphmm_dir.create_cache(NUM_STATES)
Esempio n. 6
0
 def test_clear_cache(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         xdir = ZipDirectory(directory)
         self.assertTrue(os.path.isfile(
             os.path.join(directory, 'data_structure')))
         xdir.clear_cache()
         self.assertFalse(os.path.isfile(
             os.path.join(directory, 'data_structure')))
Esempio n. 7
0
 def test_clear_cache(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         xdir = ZipDirectory(directory)
         self.assertTrue(
             os.path.isfile(os.path.join(directory, 'data_structure')))
         xdir.clear_cache()
         self.assertFalse(
             os.path.isfile(os.path.join(directory, 'data_structure')))
Esempio n. 8
0
 def test_is_cached(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         xdir = ZipDirectory(directory)
         self.assertTrue(xdir.is_cached(2))
         self.assertFalse(xdir.is_cached(3))
         xdir.create_cache(3)
         self.assertTrue(xdir.is_cached(2))
         self.assertTrue(xdir.is_cached(3))
         xdir.clear_cache()
         self.assertFalse(xdir.is_cached(2))
         self.assertFalse(xdir.is_cached(3))
Esempio n. 9
0
 def test_init_without_data(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         data_structure_path = os.path.join(directory, 'data_structure')
         os.remove(data_structure_path)
         xdir = ZipDirectory(directory)
         self.assertEqual(directory, xdir.path)
Esempio n. 10
0
    def test_create_original_sequences(self):
        with TempDirectory() as temp:
            ZipDirectory.create_original_sequences(temp.path,
                                                   locate("inputs/a.fa"),
                                                   locate("inputs/b.fa"),
                                                   chunk_size=10)

            def test(name, expected):
                path = os.path.join(temp.path, name, 'original_sequence')
                self.assertTrue(os.path.isfile(path))
                actual = open(path, 'r').read()
                self.assertEqual(expected, actual)

            test('s1.ziphmm0', '0 0 0 0 1 1 1 1 0 0 ')
            test('s1.ziphmm1', '0 0 1 1 1 1 0 0 0 0 ')
            test('s1.ziphmm2', '1 1 2 1 ')
            test('s2.ziphmm0', '0 0 0 0 1 1 0 0 0 0 ')
            test('s2.ziphmm1', '1 1 0 0 0 0 1 1 ')
Esempio n. 11
0
    def test_load(self):
        with TempDirectory() as temp:
            directory = create_ziphmm_directory(temp.path)
            xdir = ZipDirectory(directory)

            self.assertTrue(xdir.is_cached(2))
            xseq2 = xdir.load(2)
            self.assertEqual(2, xseq2.state_count)
            self.assertTrue(xdir.is_cached(2))

            self.assertFalse(xdir.is_cached(3))
            xseq3 = xdir.load(3)
            self.assertEqual(3, xseq3.state_count)
            self.assertTrue(xdir.is_cached(3))

            self.assertFalse(xdir.is_cached(4))
            xseq4 = xdir.load(4)
            self.assertEqual(4, xseq4.state_count)
            self.assertTrue(xdir.is_cached(4))
Esempio n. 12
0
    def test_create_original_sequences(self):
        with TempDirectory() as temp:
            ZipDirectory.create_original_sequences(
                temp.path,
                locate("inputs/a.fa"),
                locate("inputs/b.fa"),
                chunk_size=10)

            def test(name, expected):
                path = os.path.join(temp.path, name, 'original_sequence')
                self.assertTrue(os.path.isfile(path))
                actual = open(path, 'r').read()
                self.assertEqual(expected, actual)

            test('s1.ziphmm0', '0 0 0 0 1 1 1 1 0 0 ')
            test('s1.ziphmm1', '0 0 1 1 1 1 0 0 0 0 ')
            test('s1.ziphmm2', '1 1 2 1 ')
            test('s2.ziphmm0', '0 0 0 0 1 1 0 0 0 0 ')
            test('s2.ziphmm1', '1 1 0 0 0 0 1 1 ')
Esempio n. 13
0
 def test_has_original_sequence(self):
     with TempDirectory() as temp:
         self.assertFalse(ZipDirectory.has_original_sequence(temp.path))
         directory = create_ziphmm_directory(temp.path)
         self.assertTrue(ZipDirectory.has_original_sequence(directory))
Esempio n. 14
0
    def test_load(self):
        with TempDirectory() as temp:
            directory = create_ziphmm_directory(temp.path)
            xdir = ZipDirectory(directory)

            self.assertTrue(xdir.is_cached(2))
            xseq2 = xdir.load(2)
            self.assertEqual(2, xseq2.state_count)
            self.assertTrue(xdir.is_cached(2))

            self.assertFalse(xdir.is_cached(3))
            xseq3 = xdir.load(3)
            self.assertEqual(3, xseq3.state_count)
            self.assertTrue(xdir.is_cached(3))

            self.assertFalse(xdir.is_cached(4))
            xseq4 = xdir.load(4)
            self.assertEqual(4, xseq4.state_count)
            self.assertTrue(xdir.is_cached(4))
Esempio n. 15
0
 def test_init_with_data(self):
     with TempDirectory() as temp:
         directory = create_ziphmm_directory(temp.path)
         xdir = ZipDirectory(directory)
         self.assertEqual(directory, xdir.path)
Esempio n. 16
0
 def test_has_original_sequence(self):
     with TempDirectory() as temp:
         self.assertFalse(ZipDirectory.has_original_sequence(temp.path))
         directory = create_ziphmm_directory(temp.path)
         self.assertTrue(ZipDirectory.has_original_sequence(directory))