def test_insert_geo(self): dataset = Dataset('GDS987').populate() Word2Geo.insert_geo(dataset) w2gs = list(Word2Geo.mongo().find({'geo_id': dataset.geo_id})) self.assertEqual(len(w2gs), 249)
def test_insert_geo(self): dataset=Dataset('GDS987').populate() Word2Geo.insert_geo(dataset) w2gs=list(Word2Geo.mongo().find({'geo_id': dataset.geo_id})) self.assertEqual(len(w2gs), 249)
def test_get_field_words(self): dataset = Dataset('GDS987').populate() words = Word2Geo.get_field_words(dataset) expected = { "description": [ u'analysis', u'of', u'kidneys', u'from', u'adult', u'renal', u'transplant', u'recipients', u'subjected', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus', u'patients', u'treated', u'with', u'sirolimus', u'have', u'a', u'lower', u'prevalence', u'of', u'chronic', u'allograft', u'nephropathy', u'compared', u'to', u'those', u'treated', u'with', u'cyclosporine', u'a', u'calcineurin', u'inhibitor' ], "title": [ 'kidney', u'transplant', u'response', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus' ], "summary": [] } # warn("words[title](%d)=%s" % (len(words['title']), words['title'])) for tag, n_words in { "description": 105, "title": 9 + 8 + 7, "summary": 0 }.items(): self.assertEqual(len(words[tag]), n_words, msg="%s: got: %d, expected: %d" % (tag, len(words[tag]), n_words))
def insert_words(geo, words): ''' geo: GEO object words: dict [k=source (aka 'tag'); v=list of words (maybe with dups)] Creates records with keys [geo_id, word, source, count], adds to db ''' mongo=Word2Geo.mongo() mongo.remove({'geo_id': geo.geo_id}) # remove the record for this geo totals=dict() # k=source, v=count for source, words in words.items(): for word in words: query={'geo_id':geo.geo_id, 'word':word, 'source':source} record=mongo.find_one(query) if record: if 'count' in record: record['count']+=1 else: record['count']=1 else: # record not found, construct it from query: record=query record['count']=1 mongo.save(record) try: totals[source]+=1 except: totals[source]=1 return totals
def test_get_field_words(self): geo_id='GSE10072' geo=Factory().newGEO(geo_id) words=Word2Geo.get_field_words(geo) self.assertEqual(len(words['title']), 42) self.assertEqual(len(words['description']), 0) self.assertEqual(len(words['summary']), 738) # not quite sure why this isn't 741
def test_get_field_words(self): geo_id = "GSE10072" geo = Factory().newGEO(geo_id) words = Word2Geo.get_field_words(geo) self.assertEqual(len(words["title"]), 42) self.assertEqual(len(words["description"]), 0) self.assertEqual(len(words["summary"]), 738) # not quite sure why this isn't 741
def test_get_pubmed_words(self): dataset=Dataset('GDS987').populate() self.assertEqual(dataset.pubmed_id, str(15476476)) words=Word2Geo.get_pubmed_words(dataset.pubmed_id) for tag, n_words in {"MeshHeading":86 , "AbstractText":412, "ArticleTitle":34}.items(): self.assertIn(tag, words) self.assertIsInstance(words[tag], list) # warn("words[%s] (%d): %s" % (tag, len(words[tag]), words[tag])) self.assertEqual(len(words[tag]), n_words)
def test_get_field_words(self): dataset=Dataset('GDS987').populate() words=Word2Geo.get_field_words(dataset) expected={"description" : [u'analysis', u'of', u'kidneys', u'from', u'adult', u'renal', u'transplant', u'recipients', u'subjected', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus', u'patients', u'treated', u'with', u'sirolimus', u'have', u'a', u'lower', u'prevalence', u'of', u'chronic', u'allograft', u'nephropathy', u'compared', u'to', u'those', u'treated', u'with', u'cyclosporine', u'a', u'calcineurin', u'inhibitor'], "title" : ['kidney', u'transplant', u'response', u'to', u'calcineurin', u'inhibitor-free', u'immunosuppression', u'using', u'sirolimus'], "summary" : []} # warn("words[title](%d)=%s" % (len(words['title']), words['title'])) for tag, n_words in {"description": 105, "title": 9+8+7, "summary": 0}.items(): self.assertEqual(len(words[tag]), n_words, msg="%s: got: %d, expected: %d" % (tag, len(words[tag]), n_words))
def test_get_pubmed_words(self): dataset = Dataset('GDS987').populate() self.assertEqual(dataset.pubmed_id, str(15476476)) words = Word2Geo.get_pubmed_words(dataset.pubmed_id) for tag, n_words in { "MeshHeading": 86, "AbstractText": 412, "ArticleTitle": 34 }.items(): self.assertIn(tag, words) self.assertIsInstance(words[tag], list) # warn("words[%s] (%d): %s" % (tag, len(words[tag]), words[tag])) self.assertEqual(len(words[tag]), n_words)
def test_insert_GSE10072(self): series=Series('GSE10072').populate() Word2Geo.insert_geo(series)
import sys, os sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src')) sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib')) import GEO from GEO.word2geo import Word2Geo from warn import * for cls in [ GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset ]: #for cls in [GEO.Series.Series, GEO.Dataset.Dataset]: cursor = cls.mongo().find() for record in cursor: geo = cls(record) warn("inserting %s" % (geo.geo_id)) Word2Geo.insert_geo(geo) break
def test_insert_GSE10072(self): series = Series('GSE10072').populate() Word2Geo.insert_geo(series)
import sys, os sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src')) sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib')) import GEO from GEO.word2geo import Word2Geo from warn import * for cls in [GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset]: #for cls in [GEO.Series.Series, GEO.Dataset.Dataset]: cursor=cls.mongo().find() for record in cursor: geo=cls(record) warn("inserting %s" % (geo.geo_id)) Word2Geo.insert_geo(geo) break