def test_store_biopython_seq_record_DOI(self): """Tests - __init__ and store_biopython_seq_record with DOI.""" # DOI TESTING db2 = BibDatabase() db2.entries = [{ "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db2.entries: bibtest3 = PublicationLoader() bibtest3.store_bibtex_entry(entry) test_bibtex3 = Pub.objects.get(uniquename="Teste2018") test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3) test_bibtex3_dbxref = Dbxref.objects.get( dbxref_id=test_bibtex3_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_bibtex3_dbxref.accession) Organism.objects.create(genus="Mus", species="musculus") test_seq_file_pub = SequenceLoader( filename="sequence_doi.fasta", doi="10.1186/s12864-016-2535-300002") test_seq_obj_pub = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr2", description="chromosome 2") test_seq_file_pub.store_biopython_seq_record(test_seq_obj_pub, "assembly", "Mus musculus") test_feature_doi = Feature.objects.get(name="chromosome 2") self.assertEqual("chr2", test_feature_doi.uniquename) test_feature_pub_doi = FeaturePub.objects.get( pub_id=test_bibtex3.pub_id) test_pub_dbxref_doi = PubDbxref.objects.get( pub_id=test_feature_pub_doi.pub_id) test_dbxref_doi = Dbxref.objects.get( dbxref_id=test_pub_dbxref_doi.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_dbxref_doi.accession) # test remove_file self.assertTrue( Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists()) call_command("remove_file", "--name=sequence_doi.fasta", "--verbosity=0") self.assertFalse( Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
def handle( self, file: str, soterm: str, verbosity: int = 1, cpu: int = 1, **options ): """Execute the main function.""" try: FileValidator().validate(file) except ImportingError as e: raise CommandError(e) # retrieve only the file name filename = os.path.basename(file) try: sequence_file = SequenceLoader(filename=filename) except ImportingError as e: raise CommandError(e) if verbosity > 0: self.stdout.write("Processing file: {}".format(filename)) fasta_sequences = SeqIO.parse(open(file), "fasta") pool = ThreadPoolExecutor(max_workers=cpu) tasks = list() for fasta in fasta_sequences: tasks.append( pool.submit(sequence_file.add_sequence_to_feature, fasta, soterm) ) if verbosity > 0: self.stdout.write("Loading") for task in tqdm(as_completed(tasks), total=len(tasks)): if task.result(): raise (task.result()) pool.shutdown() if verbosity > 0: self.stdout.write(self.style.SUCCESS("Done with {}".format(filename)))
def test_store_biopython_seq_record(self): """Tests - __init__ and store_biopython_seq_record.""" # test insert sequence Organism.objects.create(genus="Mus", species="musculus") test_seq_file = SequenceLoader(filename="sequence.fasta") test_seq_obj = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr1", description="chromosome 1") test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "Mus musculus") test_feature = Feature.objects.get(uniquename="chr1", organism__genus="Mus", organism__species="musculus") self.assertEqual("chr1", test_feature.uniquename) self.assertEqual("chromosome 1", test_feature.name) self.assertEqual("acgtgtgtgcatgctagatcgatgcatgca", test_feature.residues) # test insert no sequence test_seq_obj = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr2") test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "Mus musculus", ignore_residues=True) test_feature = Feature.objects.get(uniquename="chr2") self.assertEqual("chr2", test_feature.uniquename) self.assertEqual("", test_feature.residues) # test fail insert same id, different organism # dbxref.accession must be unique Organism.objects.create(genus="H**o", species="sapiens") test_seq_file = SequenceLoader(filename="sequence2.fasta") test_seq_obj = SeqRecord( Seq("atgctagctagcatgactgactggtgcagtgcatgca"), id="chr1", description="chromosome 1", ) with self.assertRaises(IntegrityError): test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "H**o sapiens")
def handle(self, file: str, organism: str, soterm: str, nosequence: bool = False, cpu: int = 1, description: str = None, url: str = None, doi: str = None, verbosity: int = 1, **options) -> None: """Execute the main function.""" if verbosity > 0: self.stdout.write("Preprocessing") try: FileValidator().validate(file) except ImportingError as e: raise CommandError(e) # retrieve only the file name filename = os.path.basename(file) try: sequence_file = SequenceLoader(filename=filename, description=description, url=url, doi=doi) except ImportingError as e: raise CommandError(e) fasta_sequences = SeqIO.parse(open(file), "fasta") pool = ThreadPoolExecutor(max_workers=cpu) tasks = list() for fasta in fasta_sequences: tasks.append( pool.submit( sequence_file.store_biopython_seq_record, fasta, soterm, organism, nosequence, )) if verbosity > 0: self.stdout.write("Loading") for task in tqdm(as_completed(tasks), total=len(tasks)): if task.result(): raise (task.result()) pool.shutdown() if verbosity > 0: self.stdout.write(self.style.SUCCESS("Done"))
def test_add_sequence_to_feature(self): """Tests - add_sequence_to_feature.""" # test insert sequence Organism.objects.create(genus="Mus", species="musculus") test_seq_file = SequenceLoader(filename="sequence.fasta") test_seq_obj = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr1", description="chromosome 1") test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "Mus musculus") # test add_sequence_to_feature test_seq_obj = SeqRecord(Seq("aaaaaaaaaaaaaaaaaaaa"), id="chr1", description="chromosome 1") test_seq_file.add_sequence_to_feature(test_seq_obj, "assembly") test_feature_seq = Feature.objects.get(uniquename="chr1") self.assertEqual("aaaaaaaaaaaaaaaaaaaa", test_feature_seq.residues)
def test_fail_biopython_seq_record(self): """Tests fail __init__ and store_biopython_seq_record.""" # sequence already registered Organism.objects.create(genus="Mus", species="musculus") with self.assertRaises(ImportingError): test_seq_file = SequenceLoader(filename="sequence.fasta") test_seq_obj = SeqRecord( Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr1", description="chromosome 1", ) test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "Mus musculus") test_seq_obj = SeqRecord( Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr1", description="chromosome 1", ) test_seq_file.store_biopython_seq_record(test_seq_obj, "assembly", "Mus musculus")