예제 #1
0
 def Conway_closure(self, tmp_fpath=None):
     for seq_idx in self.seq_idx_iter_func():
         if self._idx_is_available(seq_idx):
             self._add_barcode(seq_idx)
             log.info('Found barcode {}: {}'.format(len(self.barcodes),
                                                    num2dna(seq_idx, self.bc_len)))
             if tmp_fpath:
                 with open(tmp_fpath, 'a') as out:
                     out.write('{}\n'.format(num2dna(seq_idx, self.bc_len)))
예제 #2
0
 def manual_barcodes_test(self):
     bc_list = list(self.barcodes)
     for i in range(len(self.barcodes)):
         bc1 = num2dna(bc_list[i], self.bc_len)
         for j in range(i+1, len(self.barcodes)):
             bc2 = num2dna(bc_list[j], self.bc_len)
             dist = editmeasures.free_divergence(bc1, bc2)
             if dist < self.max_err:
                 log.error('!'*10 + ' FAIL ' + '!'*10)
                 log.error('Distance {} between {} and {}.'.format(dist, bc1, bc2))
                 return
     log.info('Barcodes Pass Manual Check')
예제 #3
0
 def iterate_approx_encode_sphere(self, center_idx):
     word = num2dna(center_idx, self.bc_len)
     for seq_idx in FreeDivSphere.FreeDivSphere(word,
                                                2 * self.max_err,
                                                min_r=self.max_err +
                                                1).parallel_num_iterator():
         yield seq_idx
예제 #4
0
 def Conway_closure_until_satisfied(self, n_desired_barcodes):
     for seq_idx in self.seq_idx_iter_func():
         if self._idx_is_available(seq_idx):
             self._add_barcode(seq_idx)
             log.info('Found barcode {}: {}'.format(len(self.barcodes),
                                                    num2dna(seq_idx, self.bc_len)))
             if len(self.barcodes) >= n_desired_barcodes:
                 return
예제 #5
0
 def exclude_barcodes(self, exclude_fpath):
     log.info('Excluding barcodes in {}...'.format(exclude_fpath))
     exclude_bc_idxs = [
         dna2num(line.strip()) for line in open(exclude_fpath)
     ]
     for seq_idx in exclude_bc_idxs:
         assert self.reserved_words[seq_idx] == 0, num2dna(
             seq_idx, self.bc_len)
         self.reserved_words[seq_idx] = 1
예제 #6
0
    def find_barcode_4sets(
        self,
        AT_max,
        GC_max,
        seqs_so_far=[],
        prev_spheres=[],
        tmp_fpath=None,
        last_prev_Aidx=None,
    ):
        """
        A barcode 4-set is here defined as a set of four barcodes such that no two barcodes have
        the same base in the same position. I.e., all four bases are in each position in exactly
        one barcode.
        """
        first_base = bases[len(seqs_so_far)]

        seq_idx_iter_func = idx_seq_iterator_avoiding_prev_bases(
            self.bc_len, AT_max, GC_max, first_base, seqs_so_far)

        # Restart previous run (For 'A' seqs only)
        seq_idx_iter = seq_idx_iter_func()
        if last_prev_Aidx is not None:
            for seq_idx in seq_idx_iter:
                if seq_idx >= last_prev_Aidx:
                    break

        for seq_idx in seq_idx_iter:
            if self._idx_is_available(seq_idx):
                seq_sphere = set(self.iterate_decode_sphere(seq_idx))
                for prev_sphere in prev_spheres:
                    if prev_sphere & seq_sphere:
                        break
                else:
                    new_seqs = seqs_so_far + [num2dna(seq_idx, self.bc_len)]
                    new_spheres = prev_spheres + [seq_sphere]
                    if len(new_seqs) == 4:
                        assert first_base == bases[-1], new_seqs
                        for seq in new_seqs:
                            self._add_barcode(dna2num(seq))
                        if tmp_fpath:
                            with open(tmp_fpath, 'a') as out:
                                out.write('\n'.join(new_seqs) + '\n\n')
                        self.dna_barcode_4sets.append(new_seqs)
                        log.info('Found barcode set {}: {}'.format(
                            len(self.dna_barcode_4sets), new_seqs))
                        return
                    else:
                        self.find_barcode_4sets(AT_max, GC_max, new_seqs,
                                                new_spheres, tmp_fpath)
                        if len(new_seqs) > 1:
                            return
            elif self.reserved_words[seq_idx] == 0:
                self.reserved_words[seq_idx] = 1
예제 #7
0
    def restart_Conway_closure(self, prev_fpath, tmp_fpath=None):
        log.info('Restarting {}...'.format(prev_fpath))
        prev_bc_idxs = [dna2num(line.strip()) for line in open(prev_fpath)]
        for bc_idx in prev_bc_idxs:
            self._add_barcode(bc_idx)
            log.info('Adding previous {}: {}'.format(len(self.barcodes),
                                                     num2dna(bc_idx, self.bc_len)))
        with open(tmp_fpath, 'w') as out:
            for bc_idx in prev_bc_idxs:
                out.write('{}\n'.format(num2dna(bc_idx, self.bc_len)))

        seq_iter = self.seq_idx_iter_func()
        max_prev = max(prev_bc_idxs)
        bc_idx = next(seq_iter)
        while bc_idx < max_prev:
            bc_idx = next(seq_iter)
        log.info('Reached last previous barcode: {}'.format(num2dna(max_prev, self.bc_len)))
        log.info('Restarting after {}'.format(num2dna(bc_idx, self.bc_len)))

        for seq_idx in seq_iter:
            if self._idx_is_available(seq_idx):
                self._add_barcode(seq_idx)
                log.info('Found barcode {}: {}'.format(len(self.barcodes),
                                                       num2dna(seq_idx, self.bc_len)))
                if tmp_fpath:
                    with open(tmp_fpath, 'a') as out:
                        out.write('{}\n'.format(num2dna(seq_idx, self.bc_len)))
예제 #8
0
    def iterator_test(self, iterator='self'):
        log.info('Generating self set...')
        if iterator == 'self':
            self_set = set(self)
        elif iterator == 'parallel_num':
            self_set = set(
                seqtools.num2dna(seq, len(self.c))
                for seq in self.parallel_num_iterator())
        else:
            raise ValueError('Invalid iterator to test: {}'.format(iterator))

        log.info('Generating brute force set...')
        bf_set = set(''.join(tup)
                     for tup in itertools.product(bases, repeat=len(self.c))
                     if self.min_r <= editmeasures.free_divergence(
                         self.c, ''.join(tup)) <= self.r)
        log.info('Comparing...')
        if self_set == bf_set:
            log.info('PASS')
        else:
            log.error('#### FAIL ####')
            log.error('{} missing seqs, {} extra seqs'.format(
                len(bf_set - self_set), len(self_set - bf_set)))
예제 #9
0
 def iterate_decode_sphere(self, center_idx):
     word = num2dna(center_idx, self.bc_len)
     for seq_idx in FreeDivSphere.FreeDivSphere(word, self.max_err).parallel_num_iterator():
         yield seq_idx
예제 #10
0
 def dna_barcodes(self):
     return (num2dna(seq_idx, self.bc_len) for seq_idx in self.barcodes)