def export_active_sequences(self): """ Export RNA sequences with active cross-references. """ try: previous_upi = '' iupac_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE) with cursor() as cur: cur.execute(self.get_active_sequences_sql()) for counter, result in enumerate(cur): if self.test and counter >= self.test_entries: return if result['upi'] == previous_upi: continue else: previous_upi = result['upi'] rna = Rna(upi=result['upi'], seq_short=result['seq_short'], seq_long=result['seq_long']) fasta = rna.get_sequence_fasta() self.filehandles['seq_active'].write(fasta) if counter < self.examples: self.filehandles['seq_example'].write(fasta) if iupac_chars.match(rna.get_sequence()): self.filehandles['nhmmer_db'].write(fasta) else: self.filehandles['nhmmer_db_excluded'].write(fasta) # species specific identifiers sequence = re.sub(r'^>.+?\n', '', fasta) # delete first line template = ">{upi}_{taxid} {description}\n{sequence}" queryset = rna.xrefs.filter(deleted='N') for taxid in set(queryset.values_list('taxid', flat=True)): description = rna.get_description(taxid=taxid) species_specific_fasta = template.format( upi=result['upi'], taxid=taxid, sequence=sequence, description=description) self.filehandles['species_specific'].write( species_specific_fasta) except psycopg2.Error as exc: self.log_database_error(exc) sys.exit(1)
def export_active_sequences(self): """ Export RNA sequences with active cross-references. """ try: previous_upi = '' iupac_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE) with cursor() as cur: cur.execute(self.get_active_sequences_sql()) for counter, result in enumerate(cur): if self.test and counter >= self.test_entries: return if result['upi'] == previous_upi: continue else: previous_upi = result['upi'] rna = Rna(upi=result['upi'], seq_short=result['seq_short'], seq_long=result['seq_long']) fasta = rna.get_sequence_fasta() self.filehandles['seq_active'].write(fasta) if counter < self.examples: self.filehandles['seq_example'].write(fasta) if iupac_chars.match(rna.get_sequence()): self.filehandles['nhmmer_db'].write(fasta) else: self.filehandles['nhmmer_db_excluded'].write(fasta) # species specific identifiers sequence = re.sub(r'^>.+?\n', '', fasta) # delete first line template = ">{upi}_{taxid} {description}\n{sequence}" queryset = rna.xrefs.filter(deleted='N') for taxid in set(queryset.values_list('taxid', flat=True)): description = rna.get_description(taxid=taxid) species_specific_fasta = template.format(upi=result['upi'], taxid=taxid, sequence=sequence, description=description) self.filehandles['species_specific'].write(species_specific_fasta) except psycopg2.Error as exc: self.log_database_error(exc) sys.exit(1)
def process_inactive_sequences(): """ Create inactive.fasta file. """ counter = 0 previous_upi = '' for row in self.cursor: if self.test and counter > self.test_entries: return result = self.row_to_dict(row) if result['upi'] == previous_upi: continue else: previous_upi = result['upi'] rna = Rna(upi=result['upi'], seq_short=result['seq_short'], seq_long=read_lob(result['seq_long'])) fasta = rna.get_sequence_fasta() self.filehandles['seq_inactive'].write(fasta) counter += 1
def export_inactive_sequences(self): """ Export RNA sequences without active cross-references. """ try: previous_upi = '' with cursor() as cur: cur.execute(self.get_inactive_sequences_sql()) for counter, result in enumerate(cur): if self.test and counter > self.test_entries: return if result['upi'] == previous_upi: continue else: previous_upi = result['upi'] rna = Rna(upi=result['upi'], seq_short=result['seq_short'], seq_long=result['seq_long']) fasta = rna.get_sequence_fasta() self.filehandles['seq_inactive'].write(fasta) except psycopg2.Error as exc: self.log_database_error(exc) sys.exit(1)
def process_active_sequences(): """ Create the active.fasta file and the example.fasta file. """ counter = 0 previous_upi = '' valid_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE) # IUPAC for row in self.cursor: if self.test and counter >= self.test_entries: return result = self.row_to_dict(row) if result['upi'] == previous_upi: continue else: previous_upi = result['upi'] rna = Rna(upi=result['upi'], seq_short=result['seq_short'], seq_long=read_lob(result['seq_long'])) fasta = rna.get_sequence_fasta() self.filehandles['seq_active'].write(fasta) if counter < self.examples: self.filehandles['seq_example'].write(fasta) if valid_chars.match(rna.get_sequence()): self.filehandles['nhmmer_db'].write(fasta) else: self.filehandles['nhmmer_db_excluded'].write(fasta) # species specific identifiers sequence = re.sub(r'^>.+?\n', '', fasta) # delete first line template = ">{upi}_{taxid} {description}\n{sequence}" queryset = rna.xrefs.filter(deleted='N') for taxid in set(queryset.values_list('taxid', flat=True)): species_specific_fasta = template.format(upi=result['upi'], taxid=taxid, sequence=sequence, description=rna.get_description(taxid=taxid)) self.filehandles['species_specific'].write(species_specific_fasta) counter += 1
def get_description(): """ Get species-specific entry description. """ rna = Rna(upi=row['upi']) return rna.get_description(taxid=row['taxid'])