Beispiel #1
0
    def test_store_organism_publication(self):
        """Tests - store organism publication."""
        test_organism = Organism.objects.create(genus="Mus", species="musculus")

        db2 = BibDatabase()
        db2.entries = [
            {
                "journal": "Nice Journal",
                "comments": "A comment",
                "pages": "12--23",
                "month": "jan",
                "abstract": "This is an abstract. This line should be "
                "long enough to test multilines...",
                "title": "An amazing title",
                "year": "2013",
                "doi": "10.1186/s12864-016-2535-300002",
                "volume": "12",
                "ID": "Teste2018",
                "author": "Foo, b. and Foo1, b. and Foo b.",
                "keyword": "keyword1, keyword2",
                "ENTRYTYPE": "article",
            }
        ]
        for entry in db2.entries:
            bibtest = PublicationLoader()
            bibtest.store_bibtex_entry(entry)

        OrganismLoader().store_organism_publication(
            organism="Mus musculus", doi="10.1186/s12864-016-2535-300002"
        )
        test_organismpub = OrganismPub.objects.get(organism=test_organism)
        self.assertEqual("An amazing title", test_organismpub.pub.title)
Beispiel #2
0
    def test_store_biopython_seq_record_DOI(self):
        """Tests - __init__ and store_biopython_seq_record with DOI."""
        # DOI TESTING
        db2 = BibDatabase()
        db2.entries = [{
            "journal": "Nice Journal",
            "comments": "A comment",
            "pages": "12--23",
            "month": "jan",
            "abstract": "This is an abstract. This line should be "
            "long enough to test multilines...",
            "title": "An amazing title",
            "year": "2013",
            "doi": "10.1186/s12864-016-2535-300002",
            "volume": "12",
            "ID": "Teste2018",
            "author": "Foo, b. and Foo1, b. and Foo b.",
            "keyword": "keyword1, keyword2",
            "ENTRYTYPE": "article",
        }]
        for entry in db2.entries:
            bibtest3 = PublicationLoader()
            bibtest3.store_bibtex_entry(entry)
        test_bibtex3 = Pub.objects.get(uniquename="Teste2018")
        test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3)
        test_bibtex3_dbxref = Dbxref.objects.get(
            dbxref_id=test_bibtex3_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_bibtex3_dbxref.accession)

        Organism.objects.create(genus="Mus", species="musculus")
        test_seq_file_pub = SequenceLoader(
            filename="sequence_doi.fasta",
            doi="10.1186/s12864-016-2535-300002")
        test_seq_obj_pub = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"),
                                     id="chr2",
                                     description="chromosome 2")
        test_seq_file_pub.store_biopython_seq_record(test_seq_obj_pub,
                                                     "assembly",
                                                     "Mus musculus")

        test_feature_doi = Feature.objects.get(name="chromosome 2")

        self.assertEqual("chr2", test_feature_doi.uniquename)
        test_feature_pub_doi = FeaturePub.objects.get(
            pub_id=test_bibtex3.pub_id)
        test_pub_dbxref_doi = PubDbxref.objects.get(
            pub_id=test_feature_pub_doi.pub_id)
        test_dbxref_doi = Dbxref.objects.get(
            dbxref_id=test_pub_dbxref_doi.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_dbxref_doi.accession)
        # test remove_file
        self.assertTrue(
            Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
        call_command("remove_file", "--name=sequence_doi.fasta",
                     "--verbosity=0")
        self.assertFalse(
            Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
 def test_store_pub_record(self):
     """Tests - __init__ and store_pub_record."""
     # test PublicationLoader
     test_entry2 = dict()
     test_entry2["ENTRYTYPE"] = "article"
     test_entry2["ID"] = "Chado2006"
     test_entry2["title"] = "A mock test title"
     test_entry2["year"] = "2006"
     test_entry2["pages"] = "12000"
     test_entry2["doi"] = "10.1111/s12122-012-1313-4"
     test_entry2["author"] = "Foo, b. and Foo1, b. and Foo b."
     test_entry2["volume"] = "v2"
     test_entry2["journal"] = "Journal of Testing"
     bibtest = PublicationLoader()
     bibtest.store_bibtex_entry(test_entry2)
     test_bibtex = Pub.objects.get(uniquename="Chado2006")
     self.assertEqual("v2", test_bibtex.volume)
     # test mock bibtexparser object database'
     db = BibDatabase()
     # pages ommited
     db.entries = [{
         "journal": "Nice Journal",
         "comments": "A comment",
         "month": "jan",
         "abstract": "This is an abstract. This line should be "
         "long enough to test multilines...",
         "title": "An amazing title",
         "year": "2013",
         "doi": "10.1111/s12122-012-1313-5",
         "volume": "12",
         "ID": "Cesar2013",
         "author": "Foo, b. and Foo1, b. and Foo b.",
         "keyword": "keyword1, keyword2",
         "ENTRYTYPE": "article",
     }]
     for entry in db.entries:
         bibtest2 = PublicationLoader()
         bibtest2.store_bibtex_entry(entry)
     test_bibtex2 = Pub.objects.get(uniquename="Cesar2013")
     self.assertEqual("12", test_bibtex2.volume)
     self.assertEqual(None, test_bibtex2.pages)
     test_bibtex2_pub_dbxref = PubDbxref.objects.get(
         pub_id=test_bibtex2.pub_id)
     self.assertEqual(test_bibtex2.pub_id, test_bibtex2_pub_dbxref.pub_id)
     # test remove publication (with cascade enabled)
     self.assertTrue(Pub.objects.filter(uniquename="Cesar2013").exists())
     call_command("remove_publication", "--doi=10.1111/s12122-012-1313-5",
                  "--verbosity=0")
     self.assertFalse(Pub.objects.filter(uniquename="Cesar2013").exists())
     # check if dbxref remains
     self.assertTrue(
         Dbxref.objects.filter(
             accession="10.1111/s12122-012-1313-5").exists())
    def handle(self, file=str, verbosity: int = 1, cpu: int = 1, **options):
        """Execute the main function."""
        if verbosity > 0:
            self.stdout.write("Preprocessing")

        try:
            FileValidator().validate(file)
        except ImportingError as e:
            raise CommandError(e)

        # filename = os.path.basename(file)
        bib_database = None
        try:
            bib_database = bibtexparser.load(open(file))
        except ValueError as e:
            return CommandError(e)

        bibtex = PublicationLoader()

        pool = ThreadPoolExecutor(max_workers=cpu)
        tasks = list()
        for entry in bib_database.entries:
            # create model object for each entry
            if entry["ENTRYTYPE"]:
                tasks.append(pool.submit(bibtex.store_bibtex_entry, entry))
        if verbosity > 0:
            self.stdout.write("Loading")
        for task in tqdm(
            as_completed(tasks),
            total=len(tasks),
            disable=False if verbosity > 0 else True,
        ):
            try:
                task.result()
            except ImportingError as e:
                raise CommandError(e)
        pool.shutdown()

        if verbosity > 0:
            self.stdout.write(self.style.SUCCESS("Done"))
Beispiel #5
0
    def test_store_feature_publication(self):
        """Tests - store feature publication."""
        test_db = Db.objects.create(name="RO")
        test_dbxref = Dbxref.objects.create(accession="00002", db=test_db)
        test_cv = Cv.objects.create(name="relationship")
        Cvterm.objects.create(
            name="contained in",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        test_db = Db.objects.create(name="SO")
        test_dbxref = Dbxref.objects.create(accession="12345", db=test_db)
        test_cv = Cv.objects.create(name="sequence")
        test_so_term = Cvterm.objects.create(
            name="gene",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="123456", db=test_db)
        Cvterm.objects.create(
            name="polypeptide",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="123455", db=test_db)
        Cvterm.objects.create(
            name="protein_match",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        test_organism = Organism.objects.create(genus="Mus",
                                                species="musculus")

        test_db = Db.objects.create(name="GFF_SOURCE")
        test_dbxref = Dbxref.objects.create(accession="feat_gene", db=test_db)
        test_feature = Feature.objects.create(
            organism=test_organism,
            uniquename="feat_gene",
            dbxref=test_dbxref,
            is_analysis=False,
            type_id=test_so_term.cvterm_id,
            is_obsolete=False,
            timeaccessioned=datetime.now(timezone.utc),
            timelastmodified=datetime.now(timezone.utc),
        )

        db2 = BibDatabase()
        db2.entries = [{
            "journal": "Nice Journal",
            "comments": "A comment",
            "pages": "12--23",
            "month": "jan",
            "abstract": "This is an abstract. This line should be "
            "long enough to test multilines...",
            "title": "An amazing title",
            "year": "2013",
            "doi": "10.1186/s12864-016-2535-300002",
            "volume": "12",
            "ID": "Teste2018",
            "author": "Foo, b. and Foo1, b. and Foo b.",
            "keyword": "keyword1, keyword2",
            "ENTRYTYPE": "article",
        }]
        for entry in db2.entries:
            bibtest = PublicationLoader()
            bibtest.store_bibtex_entry(entry)

        test_feature_file = FeatureLoader(filename="file.name",
                                          source="GFF_loader")

        test_feature_file.store_feature_publication(
            feature="feat_gene",
            soterm="gene",
            doi="10.1186/s12864-016-2535-300002")
        test_featurepub = FeaturePub.objects.get(feature=test_feature)
        self.assertEqual("An amazing title", test_featurepub.pub.title)
Beispiel #6
0
    def test_store_tabix_GFF_feature(self):
        """Tests - store tabix feature / store relationships."""
        # creating exact term
        test_db_global = Db.objects.create(name="_global")
        test_dbxref = Dbxref.objects.create(accession="exact",
                                            db=test_db_global)
        test_cv = Cv.objects.create(name="synonym_type")
        Cvterm.objects.create(
            name="exact",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        # creating part_of term
        test_dbxref = Dbxref.objects.create(accession="part_of",
                                            db=test_db_global)
        test_cv = Cv.objects.create(name="sequence")
        Cvterm.objects.create(
            name="part_of",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        # create SO terms: assembly, gene, and exon
        test_db = Db.objects.create(name="SO")
        test_dbxref = Dbxref.objects.create(accession="00001", db=test_db)
        test_cvterm_assembly = Cvterm.objects.create(
            name="assembly",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00002", db=test_db)
        Cvterm.objects.create(
            name="gene",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00003", db=test_db)
        Cvterm.objects.create(
            name="exon",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00004", db=test_db)
        Cvterm.objects.create(
            name="polypeptide",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00005", db=test_db)
        Cvterm.objects.create(
            name="protein_match",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        # create RO term: contained in
        test_db = Db.objects.create(name="RO")
        test_dbxref = Dbxref.objects.create(accession="00002", db=test_db)
        test_cv = Cv.objects.create(name="relationship")
        Cvterm.objects.create(
            name="contained in",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        # create an organism
        test_organism = Organism.objects.create(genus="Mus",
                                                species="musculus")
        # create a srcfeature
        test_db = Db.objects.create(name="FASTA_SOURCE")
        test_dbxref = Dbxref.objects.create(accession="contig1", db=test_db)
        feature = Feature.objects.create(
            dbxref=test_dbxref,
            organism=test_organism,
            name="contig1",
            type=test_cvterm_assembly,
            uniquename="contig1",
            is_analysis=False,
            is_obsolete=False,
            timeaccessioned=datetime.now(timezone.utc),
            timelastmodified=datetime.now(timezone.utc),
        )

        # DOI TESTING
        db2 = BibDatabase()
        db2.entries = [{
            "journal": "Nice Journal",
            "comments": "A comment",
            "pages": "12--23",
            "month": "jan",
            "abstract": "This is an abstract. This line should be "
            "long enough to test multilines...",
            "title": "An amazing title",
            "year": "2013",
            "doi": "10.1186/s12864-016-2535-300002",
            "volume": "12",
            "ID": "Teste2018",
            "author": "Foo, b. and Foo1, b. and Foo b.",
            "keyword": "keyword1, keyword2",
            "ENTRYTYPE": "article",
        }]
        for entry in db2.entries:
            bibtest3 = PublicationLoader()
            bibtest3.store_bibtex_entry(entry)
        test_bibtex3 = Pub.objects.get(uniquename="Teste2018")
        test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3)
        test_bibtex3_dbxref = Dbxref.objects.get(
            dbxref_id=test_bibtex3_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_bibtex3_dbxref.accession)
        # DOI: try to link feature to publication's DOI
        featurepub_test = None
        if feature and test_bibtex3_pubdbxref:
            featurepub_test = FeaturePub.objects.create(
                feature_id=feature.feature_id,
                pub_id=test_bibtex3_pubdbxref.pub_id)
        test_pub = Pub.objects.get(pub_id=featurepub_test.pub_id)
        self.assertEqual("An amazing title", test_pub.title)
        test_pubdbxref = PubDbxref.objects.get(pub=test_pub)
        test_dbxref = Dbxref.objects.get(dbxref_id=test_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_dbxref.accession)

        # create a tabix feature
        class TabixFeature(object):
            """mock tabix feature."""

        test_tabix_feature1 = TabixFeature()
        test_tabix_feature1.contig = "contig1"
        test_tabix_feature1.feature = "gene"
        test_tabix_feature1.start = "10"
        test_tabix_feature1.end = "100"
        test_tabix_feature1.strand = "+"
        test_tabix_feature1.frame = "1"
        test_tabix_feature1.attributes = "id=id1;name=name1"

        test_tabix_feature2 = TabixFeature()
        test_tabix_feature2.contig = "contig1"
        test_tabix_feature2.feature = "exon"
        test_tabix_feature2.start = "10"
        test_tabix_feature2.end = "100"
        test_tabix_feature2.strand = "-"
        test_tabix_feature2.frame = "2"
        test_tabix_feature2.attributes = "id=id2;name=name2;parent=id1"

        # instantiate the loader
        test_feature_file = FeatureLoader(filename="file.name",
                                          source="GFF_source")

        organism = "Mus musculus"
        # store the tabix feature
        qtl = False
        test_feature_file.store_tabix_GFF_feature(test_tabix_feature1,
                                                  organism, qtl)
        test_feature_file.store_tabix_GFF_feature(test_tabix_feature2,
                                                  organism, qtl)

        # store the relationships
        for item in test_feature_file.relationships:
            test_feature_file.store_relationship(organism, item["subject_id"],
                                                 item["object_id"])

        test_feature = Feature.objects.get(uniquename="id2")
        test_featureloc = Featureloc.objects.get(feature=test_feature)
        test_feature_relationship = FeatureRelationship.objects.get(
            object=test_feature.feature_id)
        test_src_feature = Feature.objects.get(
            feature_id=test_feature_relationship.subject.feature_id)
        self.assertEqual("name2", test_feature.name)
        self.assertEqual(10, test_featureloc.fmin)
        self.assertEqual("id1", test_src_feature.uniquename)
Beispiel #7
0
    def test_store_tabix_VCF_feature(self):
        """Tests - store tabix VCF feature / store relationships."""
        # creating exact term
        test_db_global = Db.objects.create(name="_global")
        test_dbxref = Dbxref.objects.create(accession="exact",
                                            db=test_db_global)
        test_cv = Cv.objects.create(name="synonym_type")
        Cvterm.objects.create(
            name="exact",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        # creating part_of term
        test_dbxref = Dbxref.objects.create(accession="part_of",
                                            db=test_db_global)
        test_cv = Cv.objects.create(name="sequence")
        Cvterm.objects.create(
            name="part_of",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        # create SO terms: assembly, gene, and exon
        test_db = Db.objects.create(name="SO")
        test_dbxref = Dbxref.objects.create(accession="00001", db=test_db)
        test_cvterm_assembly = Cvterm.objects.create(
            name="assembly",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00002", db=test_db)
        Cvterm.objects.create(
            name="snv",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00003", db=test_db)
        Cvterm.objects.create(
            name="snp",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00004", db=test_db)
        Cvterm.objects.create(
            name="polypeptide",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00005", db=test_db)
        Cvterm.objects.create(
            name="protein_match",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )
        test_dbxref = Dbxref.objects.create(accession="00006", db=test_db)
        Cvterm.objects.create(
            name="quality_value",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        # create RO term: contained in
        test_db = Db.objects.create(name="RO")
        test_dbxref = Dbxref.objects.create(accession="00002", db=test_db)
        test_cv = Cv.objects.create(name="relationship")
        Cvterm.objects.create(
            name="contained in",
            cv=test_cv,
            dbxref=test_dbxref,
            is_obsolete=0,
            is_relationshiptype=0,
        )

        # create an organism
        test_organism = Organism.objects.create(genus="Mus",
                                                species="musculus")
        # create a srcfeature
        test_db = Db.objects.create(name="FASTA_SOURCE")
        test_dbxref = Dbxref.objects.create(accession="contig1", db=test_db)
        feature = Feature.objects.create(
            dbxref=test_dbxref,
            organism=test_organism,
            name="contig1",
            type=test_cvterm_assembly,
            uniquename="contig1",
            is_analysis=False,
            is_obsolete=False,
            timeaccessioned=datetime.now(timezone.utc),
            timelastmodified=datetime.now(timezone.utc),
        )

        # DOI TESTING
        db2 = BibDatabase()
        db2.entries = [{
            "journal": "Nice Journal",
            "comments": "A comment",
            "pages": "12--23",
            "month": "jan",
            "abstract": "This is an abstract. This line should be "
            "long enough to test multilines...",
            "title": "An amazing title",
            "year": "2013",
            "doi": "10.1186/s12864-016-2535-300002",
            "volume": "12",
            "ID": "Teste2018",
            "author": "Foo, b. and Foo1, b. and Foo b.",
            "keyword": "keyword1, keyword2",
            "ENTRYTYPE": "article",
        }]
        for entry in db2.entries:
            bibtest3 = PublicationLoader()
            bibtest3.store_bibtex_entry(entry)
        test_bibtex3 = Pub.objects.get(uniquename="Teste2018")
        test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3)
        test_bibtex3_dbxref = Dbxref.objects.get(
            dbxref_id=test_bibtex3_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_bibtex3_dbxref.accession)
        # DOI: try to link feature to publication's DOI
        featurepub_test = None
        if feature and test_bibtex3_pubdbxref:
            featurepub_test = FeaturePub.objects.create(
                feature_id=feature.feature_id,
                pub_id=test_bibtex3_pubdbxref.pub_id)
        test_pub = Pub.objects.get(pub_id=featurepub_test.pub_id)
        self.assertEqual("An amazing title", test_pub.title)
        test_pubdbxref = PubDbxref.objects.get(pub=test_pub)
        test_dbxref = Dbxref.objects.get(dbxref_id=test_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_dbxref.accession)

        # create a tabix feature
        class TabixFeature(object):
            """mock tabix feature."""

        test_tabix_feature1 = TabixFeature()
        test_tabix_feature1.contig = "contig1"
        test_tabix_feature1.feature = "snp"
        test_tabix_feature1.pos = 10
        test_tabix_feature1.id = "id1"
        test_tabix_feature1.ref = "A"
        test_tabix_feature1.alt = "T,C"
        test_tabix_feature1.info = "TSA=snv"
        test_tabix_feature1.qual = 10

        test_tabix_feature2 = TabixFeature()
        test_tabix_feature2.contig = "contig1"
        test_tabix_feature2.feature = "snv"
        test_tabix_feature2.pos = 100
        test_tabix_feature2.id = "id2"
        test_tabix_feature2.ref = "G"
        test_tabix_feature2.alt = "C,A"
        test_tabix_feature2.info = "VC=snp;SAO=0"
        test_tabix_feature2.qual = 20

        # instantiate the loader
        test_feature_file = FeatureLoader(filename="file.name",
                                          source="VCF_SOURCE")

        organism = "Mus musculus"
        # store the tabix feature
        test_feature_file.store_tabix_VCF_feature(test_tabix_feature1,
                                                  organism)
        test_feature_file.store_tabix_VCF_feature(test_tabix_feature2,
                                                  organism)

        test_feature = Feature.objects.get(uniquename="id2")
        test_featurelocs = Featureloc.objects.filter(feature=test_feature)
        self.assertEqual(100, test_featurelocs[0].fmin)
        self.assertEqual("G", test_featurelocs[0].residue_info)
        self.assertEqual("C", test_featurelocs[1].residue_info)
        self.assertEqual("A", test_featurelocs[2].residue_info)
        self.assertEqual(0, test_featurelocs[0].rank)
        self.assertEqual(1, test_featurelocs[1].rank)
        self.assertEqual(2, test_featurelocs[2].rank)
        self.assertEqual("contig1", test_featurelocs[0].srcfeature.uniquename)