예제 #1
0
    def test_insert_geo(self):
        dataset = Dataset('GDS987').populate()
        Word2Geo.insert_geo(dataset)

        w2gs = list(Word2Geo.mongo().find({'geo_id': dataset.geo_id}))

        self.assertEqual(len(w2gs), 249)
예제 #2
0
    def test_insert_geo(self):
        dataset=Dataset('GDS987').populate()
        Word2Geo.insert_geo(dataset)
        
        w2gs=list(Word2Geo.mongo().find({'geo_id': dataset.geo_id}))

        self.assertEqual(len(w2gs), 249)
예제 #3
0
    def test_get_field_words(self):
        dataset = Dataset('GDS987').populate()
        words = Word2Geo.get_field_words(dataset)

        expected = {
            "description": [
                u'analysis', u'of', u'kidneys', u'from', u'adult', u'renal',
                u'transplant', u'recipients', u'subjected', u'to',
                u'calcineurin', u'inhibitor-free', u'immunosuppression',
                u'using', u'sirolimus', u'patients', u'treated', u'with',
                u'sirolimus', u'have', u'a', u'lower', u'prevalence', u'of',
                u'chronic', u'allograft', u'nephropathy', u'compared', u'to',
                u'those', u'treated', u'with', u'cyclosporine', u'a',
                u'calcineurin', u'inhibitor'
            ],
            "title": [
                'kidney', u'transplant', u'response', u'to', u'calcineurin',
                u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus'
            ],
            "summary": []
        }

        #        warn("words[title](%d)=%s" % (len(words['title']), words['title']))
        for tag, n_words in {
                "description": 105,
                "title": 9 + 8 + 7,
                "summary": 0
        }.items():
            self.assertEqual(len(words[tag]),
                             n_words,
                             msg="%s: got: %d, expected: %d" %
                             (tag, len(words[tag]), n_words))
예제 #4
0
def insert_words(geo, words):
    '''
    geo: GEO object
    words: dict [k=source (aka 'tag'); v=list of words (maybe with dups)]
    Creates records with keys [geo_id, word, source, count], adds to db
    '''
    mongo=Word2Geo.mongo()
    mongo.remove({'geo_id': geo.geo_id}) # remove the record for this geo

    totals=dict()           # k=source, v=count
    for source, words in words.items():
        for word in words:
            query={'geo_id':geo.geo_id, 'word':word, 'source':source}
            record=mongo.find_one(query)
            if record:
                if 'count' in record: record['count']+=1
                else: record['count']=1
            else:           # record not found, construct it from query:
                record=query
                record['count']=1
            mongo.save(record)

            try: totals[source]+=1
            except: totals[source]=1

    return totals
예제 #5
0
    def test_get_field_words(self):
        geo_id='GSE10072'
        geo=Factory().newGEO(geo_id)
        words=Word2Geo.get_field_words(geo)

        self.assertEqual(len(words['title']), 42)
        self.assertEqual(len(words['description']), 0)
        self.assertEqual(len(words['summary']), 738) # not quite sure why this isn't 741
예제 #6
0
    def test_get_field_words(self):
        geo_id = "GSE10072"
        geo = Factory().newGEO(geo_id)
        words = Word2Geo.get_field_words(geo)

        self.assertEqual(len(words["title"]), 42)
        self.assertEqual(len(words["description"]), 0)
        self.assertEqual(len(words["summary"]), 738)  # not quite sure why this isn't 741
예제 #7
0
    def test_get_pubmed_words(self):
        dataset=Dataset('GDS987').populate()
        self.assertEqual(dataset.pubmed_id, str(15476476))
        words=Word2Geo.get_pubmed_words(dataset.pubmed_id)

        for tag, n_words in {"MeshHeading":86 , "AbstractText":412, "ArticleTitle":34}.items():
            self.assertIn(tag, words)
            self.assertIsInstance(words[tag], list)
#            warn("words[%s] (%d): %s" % (tag, len(words[tag]), words[tag]))
            self.assertEqual(len(words[tag]), n_words)
예제 #8
0
    def test_get_field_words(self):
        dataset=Dataset('GDS987').populate()
        words=Word2Geo.get_field_words(dataset)

        expected={"description" : [u'analysis', u'of', u'kidneys', u'from', u'adult', u'renal', u'transplant', u'recipients', u'subjected', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus', u'patients', u'treated', u'with', u'sirolimus', u'have', u'a', u'lower', u'prevalence', u'of', u'chronic', u'allograft', u'nephropathy', u'compared', u'to', u'those', u'treated', u'with', u'cyclosporine', u'a', u'calcineurin', u'inhibitor'],
                  "title" : ['kidney', u'transplant', u'response', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus'],
                  "summary" : []}

#        warn("words[title](%d)=%s" % (len(words['title']), words['title']))
        for tag, n_words in {"description": 105, "title": 9+8+7, "summary": 0}.items():
            self.assertEqual(len(words[tag]), n_words, msg="%s: got: %d, expected: %d" % (tag, len(words[tag]), n_words))
예제 #9
0
    def test_get_pubmed_words(self):
        dataset = Dataset('GDS987').populate()
        self.assertEqual(dataset.pubmed_id, str(15476476))
        words = Word2Geo.get_pubmed_words(dataset.pubmed_id)

        for tag, n_words in {
                "MeshHeading": 86,
                "AbstractText": 412,
                "ArticleTitle": 34
        }.items():
            self.assertIn(tag, words)
            self.assertIsInstance(words[tag], list)
            #            warn("words[%s] (%d): %s" % (tag, len(words[tag]), words[tag]))
            self.assertEqual(len(words[tag]), n_words)
예제 #10
0
 def test_insert_GSE10072(self):
     series=Series('GSE10072').populate()
     Word2Geo.insert_geo(series)
예제 #11
0
import sys, os
sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src'))
sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib'))

import GEO
from GEO.word2geo import Word2Geo
from warn import *

for cls in [
        GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset
]:
    #for cls in [GEO.Series.Series, GEO.Dataset.Dataset]:
    cursor = cls.mongo().find()
    for record in cursor:
        geo = cls(record)
        warn("inserting %s" % (geo.geo_id))
        Word2Geo.insert_geo(geo)
        break
예제 #12
0
 def test_insert_GSE10072(self):
     series = Series('GSE10072').populate()
     Word2Geo.insert_geo(series)
예제 #13
0
import sys, os
sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src'))
sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib'))

import GEO
from GEO.word2geo import Word2Geo
from warn import *

for cls in [GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset]:
#for cls in [GEO.Series.Series, GEO.Dataset.Dataset]:
    cursor=cls.mongo().find()
    for record in cursor:
        geo=cls(record)
        warn("inserting %s" % (geo.geo_id))
        Word2Geo.insert_geo(geo)
        break