def test_strip_non_alphabetic(self): test_string = "abcdefghi" result = _utils.strip_non_alphabetic(test_string) self.assertEqual(test_string, result) test_string = "abcd890" result = _utils.strip_non_alphabetic(test_string) self.assertEqual("abcd",result) test_string = "123456789" result = _utils.strip_non_alphabetic(test_string) self.assertEqual("", result)
def genome_name(cls, contig): """ Returns a string of the name of a non-complete genome from its contig name. Args: contig(str): contig's accession name """ if len(contig) == 12: return strip_non_alphabetic(contig) + "00000000" else: return contig
def from_ftp(self, seqdata): """Obtains the FASTA sequence via the NCBI FTP server in the WGS genome pipeline and labels the sequence as being from the WGS piepline. Args: seqdata: a SequenceMetadata instance storing sequence-related data that would otherwise be a data clump """ seq_id = strip_non_alphabetic(str(seqdata.accession)) self.download_file(seq_id, 'fsa_nt.gz') with open(generate_path('tmp/loading.fasta'), 'rb') as handle: self.read_fasta(handle, seqdata) seqdata.dict["is_from"] = "WGS"
def get_seqdata(self, contigswrapper): """ Args: contigswrapper: a ContigsWrapper instance that holds contig metadata for a genome Returns: a BLAST record for self.load_contigs to use """ Entrez.email = "*****@*****.**" handle = None i = 0 while i < 3: try: print "Getting data from Entrez..." handle = Entrez.efetch( db="nuccore", id=contigswrapper.genome, rettype="fasta", retmode="text" ) for record in SeqIO.parse(handle, 'fasta'): if "complete" in record.description.lower(): contigswrapper.dict["is_from"] = "CORE" print "Getting data from Entrez..." handle = Entrez.efetch( db="nuccore", id=contigswrapper.genome, rettype="fasta", retmode="text" ) self.load_contigs(handle, contigswrapper) break else: print "Downloading data from WGS" self.download_file( strip_non_alphabetic(str(contigswrapper.genome)), 'fsa_nt.gz' ) with open( generate_path('tmp/loading.fasta'), 'rb' ) as handle: contigswrapper.dict["is_from"] = "WGS" self.load_contigs(handle, contigswrapper) except HTTPError: i += 1 continue break try: handle is None except NameError: raise TypeError("Could not retrieve file for analysis")