def from_file(cls, name, fasta_file, force=False): ''' Create a Fasta object from a file. ''' self = cls(name) with RawFile(fasta_file) as IN, self._db as db: cur = db.cursor() cur_chrom = None seqs = [] name, attrs = None, None for line in IN: line = line.strip() if line.startswith('>'): # Finish the last chromosome before adding a new one if len(seqs) > 0: cur_chrom = Chromosome(name, seqs, *attrs) self.add_chrom(cur_chrom, cur=cur, force=force) seqs = [] name, *attrs = line.lstrip('>').split() else: seqs += line #cur_chrom.seq = np.append(cur_chrom.seq,list(line)) # Add the last chromosome cur_chrom = Chromosome(name, seqs, *attrs) self.add_chrom(cur_chrom, cur=cur, force=force) return self
def __getitem__(self, chrom_name): if chrom_name not in self: raise ValueError(f'{chrom_name} not in {self._m80_name}') try: seq_array = self._bcolz_array(chrom_name) except Exception as e: chrom_name = self._get_nickname(chrom_name) seq_array = self._bcolz_array(chrom_name) finally: attrs = [ x[0] for x in self._db.cursor().execute( ''' SELECT attribute FROM attributes WHERE chrom = ? ORDER BY rowid -- This preserves the ordering of attrs ''', (chrom_name, )) ] return Chromosome(chrom_name, seq_array, *attrs)
def chr1(): return Chromosome('chr1', 'A' * 500000)
def test_init_from_seq(): x = Chromosome('chr1', ['a', 'c', 'g', 't']) assert True
def chr2(): return Chromosome('chr1', 'C' * 500000)