Example #1
0
    def test_strip_non_alphabetic(self):
        test_string = "abcdefghi"
        result = _utils.strip_non_alphabetic(test_string)
        self.assertEqual(test_string, result)

        test_string = "abcd890"
        result = _utils.strip_non_alphabetic(test_string)
        self.assertEqual("abcd",result)

        test_string = "123456789"
        result = _utils.strip_non_alphabetic(test_string)
        self.assertEqual("", result)
Example #2
0
    def genome_name(cls, contig):
        """
        Returns a string of the name of a non-complete genome from its contig
        name.

        Args:
            contig(str): contig's accession name
        """
        if len(contig) == 12:
            return strip_non_alphabetic(contig) + "00000000"
        else:
            return contig
Example #3
0
    def from_ftp(self, seqdata):
        """Obtains the FASTA sequence via the NCBI FTP server in the WGS genome
        pipeline and labels the sequence as being from the WGS piepline.

        Args:
            seqdata: a SequenceMetadata instance storing sequence-related data
            that would otherwise be a data clump
        """
        seq_id = strip_non_alphabetic(str(seqdata.accession))
        self.download_file(seq_id, 'fsa_nt.gz')

        with open(generate_path('tmp/loading.fasta'), 'rb') as handle:
            self.read_fasta(handle, seqdata)
        seqdata.dict["is_from"] = "WGS"
Example #4
0
    def get_seqdata(self, contigswrapper):
        """
        Args:
            contigswrapper: a ContigsWrapper instance that holds contig metadata for a genome

        Returns: a BLAST record for self.load_contigs to use
        """
        Entrez.email = "*****@*****.**"
        handle = None
        i = 0

        while i < 3:
            try:
                print "Getting data from Entrez..."
                handle = Entrez.efetch(
                    db="nuccore", id=contigswrapper.genome,
                    rettype="fasta", retmode="text"
                )
                for record in SeqIO.parse(handle, 'fasta'):
                    if "complete" in record.description.lower():
                        contigswrapper.dict["is_from"] = "CORE"
                        print "Getting data from Entrez..."
                        handle = Entrez.efetch(
                            db="nuccore", id=contigswrapper.genome,
                            rettype="fasta", retmode="text"
                        )
                        self.load_contigs(handle, contigswrapper)
                        break
                    else:
                        print "Downloading data from WGS"
                        self.download_file(
                            strip_non_alphabetic(str(contigswrapper.genome)),
                            'fsa_nt.gz'
                        )
                        with open(
                            generate_path('tmp/loading.fasta'),
                            'rb'
                        ) as handle:
                            contigswrapper.dict["is_from"] = "WGS"
                            self.load_contigs(handle, contigswrapper)
            except HTTPError:
                i += 1
                continue
            break
        try:
            handle is None
        except NameError:
            raise TypeError("Could not retrieve file for analysis")