def expression_data(self, id_type='gene'): ''' if id_type == None, choose one based on which data exists return (id_type, data) where data is a dict: k=id, v=expression value raises exceptions on errors, so be careful ''' # determine id_type if necessary, checking for existance as well: if id_type == None: id_type = self._get_id_type() if not re.search('^gene|probe$', id_type): raise Exception('id_type must be one of "gene" or "probe"') # open data file: data_file = open(self.data_path(id_type=id_type), 'r') if id_type == 'probe': burn_line = data_file.readline() burn_line = data_file.readline() # read data and store to dict: data = {} for line in data_file: l = re.split('[,\s]+', line) if l[0] in data: warn("Sample.expression_data: overwriting %s %f->%f" % (l[0], data[l[0]], l[1])) data[l[0]] = float(l[1]) data_file.close() return (id_type, data)
def sh_cmdline(self): try: usage=self['usage'] if usage==None: usage='' except KeyError: usage='' # look for exe in path, unless exe is an absolute path try: if os.path.abspath(self['exe'])!=self['exe']: self['exe']=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe']) except KeyError as ae: # not all steps have self['exe']; eg header, footer pass try: return usage % self # fixme: you don't really know what you're doing in these except blocks... except KeyError as e: raise ConfigError("Missing value %s in\n%s" % (e.args, self.name)) except AttributeError as e: raise ConfigError("Missing value %s in\n%s" % (e.args, self.name)) except ValueError as e: warn(e) warn("%s.usage: %s" % (self.name,usage)) raise "%s.keys(): %s" % (self.name, ", ".join(self.__dict__.keys())) except TypeError as te: raise ConfigError("step %s: usage='%s': %s" % (self.name, usage, te))
def insert_geo(self, geo): ''' insert all words associated with a geo object into the word2geo db ''' self.mongo().remove({'geo_id': geo.geo_id}) # words: k=tag, v=list of sanitized words (may have dups) words=self.get_field_words(geo) if hasattr(geo, 'pubmed_id'): if type(geo.pubmed_id)==type([]): for pmid in [int(x) for x in geo.pubmed_id]: words.update(self.get_pubmed_words(pmid)) else: words.update(self.get_pubmed_words(int(geo.pubmed_id))) totals=dict() for source, words in words.items(): for word in words: warn("%s: adding %s:%s" % (geo.geo_id, source, word)) query={'geo_id':geo.geo_id, 'word':word, 'source':source} record=self.mongo().find_one(query) if record: if 'count' in record: record['count']+=1 else: record['count']=1 else: record=query record['count']=1 self.mongo().save(record) try: totals[source]+=1 except: totals[source]=1 warn("%s: %s" % (geo.geo_id, totals)) return
def expression_data(self, id_type='gene'): ''' if id_type == None, choose one based on which data exists return (id_type, data) where data is a dict: k=id, v=expression value raises exceptions on errors, so be careful ''' # determine id_type if necessary, checking for existance as well: if id_type == None: id_type=self._get_id_type() if not re.search('^gene|probe$', id_type): raise Exception('id_type must be one of "gene" or "probe"') # open data file: data_file=open(self.data_path(id_type=id_type), 'r') if id_type == 'probe': burn_line=data_file.readline() burn_line=data_file.readline() # read data and store to dict: data={} for line in data_file: l=re.split('[,\s]+', line) if l[0] in data: warn("Sample.expression_data: overwriting %s %f->%f" % (l[0], data[l[0]], l[1])) data[l[0]]=float(l[1]) data_file.close() return (id_type, data)
def get_field_words(geo): ''' collect words from certain fields in the record: ''' debug='DEBUG' in os.environ words={} # k=field, v=[w1, w2, w3, ...] (w's can be "windows") word_fields=['title', 'description', 'summary'] for field in word_fields: words[field]=[] if hasattr(geo, field): field_words=getattr(geo, field) # can be a string, a list of single words, or a list of paragraphs if type(field_words) != list: field_words=[field_words] if len(field_words)==0: if debug: warn("does this ever happen?" % ()) continue i=0 for wl in field_words: # if debug: warn("\n%s[%d]: wl(%s, %d) is %s" % (field, i, type(wl), len(wl), wl)) i+=1 # wrap this in a loop n=(1..3) # replace sanitized_list() with str_windows(wl, n) for n in range(1,4): # gives 1,2,3 if len(wl)>=n: windows=str_windows(wl, n, '[-_\s]+') # if debug: warn("%s(%d): %d windows " % (field, n, len(windows))) for w in windows: words[field].append(w) else: if debug: warn("skipping %s(%d): len(wl)=%d" % (field, n, len(wl))) return words
def test_bowtie(self): sy = superyaml(domain="/proj/hoodlab/share/vcassen/rna-seq/rnaseq/templates") d_sy = sy.load("bowtie.syml", {}) warn(yaml.dump(d_sy)) d_sy = sy.load("bowtie.syml", d_sy) warn(yaml.dump(d_sy)) self.assertEquals(d_sy["exe"], "bowtie", 'got d[exe]=%s, expected "bowtie"' % d_sy["exe"])
def test_all_ids_with_data(self): for id_type in ['probe', 'gene']: warn("calling Sample.all_ids_with_data(id_type='%s'...)" % id_type) all_samples=Sample.all_ids_with_data(id_type='probe') self.assertIsInstance(all_samples, list) self.assertIn('GSM32106', all_samples) # it's in both lists self.assertNotIn('GSM1', all_samples)
def test_all_ids_with_data(self): for id_type in ['probe', 'gene']: warn("calling Sample.all_ids_with_data(id_type='%s'...)" % id_type) all_samples = Sample.all_ids_with_data(id_type='probe') self.assertIsInstance(all_samples, list) self.assertIn('GSM32106', all_samples) # it's in both lists self.assertNotIn('GSM1', all_samples)
def ensure_indexes(self): if hasattr(self, 'indexes'): for index_spec in self.indexes: # warn("indexing %s with %s" % (self.__name__, index_spec)) try: kwargs=index_spec['options'] self.mongo().ensure_index(index_spec['keys']) except AttributeError as ae: warn("caught %s for %s" % (ae, index_spec))
def test_mongo(self): self.assertIsInstance(Sample.mongo(), pymongo.collection.Collection) cursor=Sample.mongo().find() warn("cursor: got %d records" % cursor.count()) self.assertTrue(cursor.count() > 1) record=cursor.next() self.assertTrue('geo_id' in record) self.assertTrue('_id' in record)
def read_config(self, config_file): try: f=open(config_file) yml=f.read() f.close() self.config=yaml.load(yml) except IOError as ioe: warn("error trying to load global config file:") die(UserError(ioe))
def subsets(self): if not hasattr(self, 'n_subsets'): if 'DEBUG' in os.environ: warn("%s: no subsets?" % (self.geo_id)) return [] subsets=[] for i in range(1,int(self.n_subsets)+1): subset_id="%s_%d" % (self.geo_id, i) subsets.append(DatasetSubset(subset_id).populate()) return subsets
def id_file_callback(option, opt, value, parser, *args, **kwargs): warn("options is %s" % (option)) warn("opt is %s" % (opt)) warn("value is %s" % (value)) warn("args are %s" % (args)) warn("kwargs are %s" % (kwargs)) f = open(value) idlist = [x for x in f if type(x) == int] f.close() parser.values.idlist.extend(idlist)
def test_with_phenos(self): for pheno in [ "adenocarcinoma", "normal", "asthma", "squamous cell carcinoma", "chronic obstructive pulmonary disease", "large cell lung carcinoma" ]: samples = GEO.Sample.Sample.with_pheno(pheno) warn("%s: got %d samples" % (pheno, len(samples))) for sample in samples: self.assertEqual(sample.phenotype, pheno)
def id_file_callback(option, opt, value, parser, *args, **kwargs): warn("options is %s" % (option)) warn("opt is %s" % (opt)) warn("value is %s" % (value)) warn("args are %s" % (args)) warn("kwargs are %s" % (kwargs)) f=open(value) idlist=[x for x in f if type(x) == int] f.close() parser.values.idlist.extend(idlist)
def get_geo_ids(options): if len(options.idlist): return options.idlist geo_ids=[] # for cls in [GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset]: # only doing series now, and everything else goes through series for cls in [GEO.Series.Series]: cursor=cls.mongo().find({}, {'_id':0, 'geo_id':1}) warn("got %d %s records" % (cursor.count(), cls.__name__)) for record in cursor: geo_ids.append(record['geo_id']) return geo_ids
def _all_geo_pmids(): ''' gets pmids from all geo objects in the db: ''' pmidlist = [] for cls in [Series, Dataset]: cursor = cls.mongo().find({'pubmed_id': {'$ne': 'null'}}) for record in cursor: if 'pubmed_id' in record: pmids = record['pubmed_id'] if type(pmids) == type([]): pmidlist.extend([int(x) for x in pmids]) else: pmidlist.append(int(pmids)) warn("pmids are %s" % (pmids)) return pmidlist
def _all_geo_pmids(): ''' gets pmids from all geo objects in the db: ''' pmidlist=[] for cls in [Series, Dataset]: cursor=cls.mongo().find({'pubmed_id': {'$ne':'null'}}) for record in cursor: if 'pubmed_id' in record: pmids=record['pubmed_id'] if type(pmids)==type([]): pmidlist.extend([int(x) for x in pmids]) else: pmidlist.append(int(pmids)) warn("pmids are %s" % (pmids)) return pmidlist
def fetch(self): ''' Return Document object for this pubmed id, obtained from NCBI if necessary ''' if os.access(self.path(), os.R_OK): warn("%d: already on disk" % ()) f=open(self.path(), 'r') xml_doc=f.read() f.close() else: warn("%d: fetching from pubmed" % (self.pubmed_id)) xml_doc=Entrez.efetch(db="pubmed", id=self.pubmed_id, retmode='xml').read() f=open(self.path(), 'w') f.write(xml_doc+"\n") f.close() doc=parseString(xml_doc) return doc
def _geolist2pmidlist(geo_ids): ''' converts a list of mixed pmids and geo_ids to all pmids by doing the lookups on the geo objects ''' pmidlist=[] for id in geo_ids: if re.match('^\d+$', id): pmidlist.append(id) else: try: geo=Factory().newGEO(id) pmids=geo.pubmed_id # might be single value or list, so: except Exception as e: warn("caught %s" % (e)) continue # id not a geo id, or geo didn't have any pubmed_id try: pmidlist.append(pmids) except: pmidlist.extend(pmids) return pmidlist
def test_all_ids_with_pheno(self): phenos=[ "normal", "adenocarcinoma", "squamous cell carcinoma", "asthma", "chronic obstructive pulmonary disease", "large cell lung carcinoma" ] for pheno in phenos: ids=Sample.all_ids_with_pheno(pheno) self.assertIsInstance(ids, list) warn("got %d '%s' samples" % (len(ids), pheno)) self.assertTrue(len(ids) > 0) ids_with_data=[x for x in ids if os.access(Sample.data_path_of(geo_id=x), os.R_OK)] warn("got %d '%s' samples with data" % (len(ids_with_data), pheno))
def fetch(self): ''' Return Document object for this pubmed id, obtained from NCBI if necessary ''' if os.access(self.path(), os.R_OK): warn("%d: already on disk" % ()) f = open(self.path(), 'r') xml_doc = f.read() f.close() else: warn("%d: fetching from pubmed" % (self.pubmed_id)) xml_doc = Entrez.efetch(db="pubmed", id=self.pubmed_id, retmode='xml').read() f = open(self.path(), 'w') f.write(xml_doc + "\n") f.close() doc = parseString(xml_doc) return doc
def _ref_dc(self) -> int: """ Get the position of the reference dataset from the results file as a 0-based index Returns ------- ref_dc : int """ ref_dc = 0 try: val_ref = self.meta[globals._ref_ds_attr] ref_dc = parse(globals._ds_short_name_attr, val_ref)[0] except KeyError as e: warn("The netCDF file does not contain the attribute {}".format(globals._ref_ds_attr)) raise e return ref_dc
def test_all_ids_with_pheno(self): phenos = [ "normal", "adenocarcinoma", "squamous cell carcinoma", "asthma", "chronic obstructive pulmonary disease", "large cell lung carcinoma" ] for pheno in phenos: ids = Sample.all_ids_with_pheno(pheno) self.assertIsInstance(ids, list) warn("got %d '%s' samples" % (len(ids), pheno)) self.assertTrue(len(ids) > 0) ids_with_data = [ x for x in ids if os.access(Sample.data_path_of(geo_id=x), os.R_OK) ] warn("got %d '%s' samples with data" % (len(ids_with_data), pheno))
def _geolist2pmidlist(geo_ids): ''' converts a list of mixed pmids and geo_ids to all pmids by doing the lookups on the geo objects ''' pmidlist = [] for id in geo_ids: if re.match('^\d+$', id): pmidlist.append(id) else: try: geo = Factory().newGEO(id) pmids = geo.pubmed_id # might be single value or list, so: except Exception as e: warn("caught %s" % (e)) continue # id not a geo id, or geo didn't have any pubmed_id try: pmidlist.append(pmids) except: pmidlist.extend(pmids) return pmidlist
def test_store(self): warn("\n") pmid=18297132 pubmed=Pubmed(pmid) self.assertIsInstance(pubmed, Pubmed) self.assertEqual(pubmed.pubmed_id, pmid) pubmed.remove() pubmed.store() self.assertTrue(os.access(pubmed.path(), os.R_OK)) cursor=Pubmed.mongo().find({'pubmed_id':pmid}) self.assertEqual(cursor.count(), len(Pubmed.text_tags)) tag2count={'MeshHeading':22, 'AbstractText':247, 'ArticleTitle':15} for record in cursor: tag=record['tag'] self.assertEqual(len(record['words']), tag2count[tag])
def test_store(self): warn("\n") pmid = 18297132 pubmed = Pubmed(pmid) self.assertIsInstance(pubmed, Pubmed) self.assertEqual(pubmed.pubmed_id, pmid) pubmed.remove() pubmed.store() self.assertTrue(os.access(pubmed.path(), os.R_OK)) cursor = Pubmed.mongo().find({'pubmed_id': pmid}) self.assertEqual(cursor.count(), len(Pubmed.text_tags)) tag2count = { 'MeshHeading': 22, 'AbstractText': 247, 'ArticleTitle': 15 } for record in cursor: tag = record['tag'] self.assertEqual(len(record['words']), tag2count[tag])
def expression_data(self, id_type="gene"): """ if id_type == None, choose one based on which data exists return (id_type, data) where data is a dict: k=id, v=expression value raises exceptions on errors, so be careful """ # determine id_type if necessary, checking for existance as well: if id_type == None: if os.access(self.data_path(id_type="gene"), os.R_OK): id_type = "gene" elif os.access(self.data_path(id_type="probe"), os.R_OK): id_type = "probe" else: raise Exception("No data for sample %s" % self.geo_id) else: if not re.search("^gene|probe$", id_type): raise Exception('id_type must be one of "gene" or "probe"') # open data file: data_filename = self.data_path(id_type=id_type) data_file = open(data_filename, "r") if id_type == "probe": burn_line = data_file.readline() burn_line = data_file.readline() # read data and store to dict: data = {} for line in data_file: l = re.split("[,\s]+", line) if l[0] in data: warn("Sample.expression_data: overwriting %s %f->%f" % (l[0], data[l[0]], l[1])) data[l[0]] = float(l[1]) data_file.close() # self.data[id_type]=data return (id_type, data)
def main(): options=get_options() geo_ids=get_geo_ids(options) f=Factory() warn("insert_geo_words starting: %s" % (datetime.datetime.now().__str__())) fuse=options.fuse for geo_id in geo_ids: geo=f.newGEO(geo_id) warn("inserting %s" % (geo.geo_id)) stats=insert_series(geo) warn("%s: %s" % (geo_id, stats)) fuse-=1 if (fuse==0): break warn("insert_geo_words done: %s" % (datetime.datetime.now().__str__())) return 0
def insert_geo(self, geo): ''' insert all words associated with a geo object into the word2geo db ''' self.mongo().remove({'geo_id': geo.geo_id}) # words: k=tag, v=list of sanitized words (may have dups) words = self.get_field_words(geo) if hasattr(geo, 'pubmed_id'): if type(geo.pubmed_id) == type([]): for pmid in [int(x) for x in geo.pubmed_id]: words.update(self.get_pubmed_words(pmid)) else: words.update(self.get_pubmed_words(int(geo.pubmed_id))) totals = dict() for source, words in words.items(): for word in words: warn("%s: adding %s:%s" % (geo.geo_id, source, word)) query = {'geo_id': geo.geo_id, 'word': word, 'source': source} record = self.mongo().find_one(query) if record: if 'count' in record: record['count'] += 1 else: record['count'] = 1 else: record = query record['count'] = 1 self.mongo().save(record) try: totals[source] += 1 except: totals[source] = 1 warn("%s: %s" % (geo.geo_id, totals)) return
def get_session(self): debug=self.conf_value('debug') try: return self.session except AttributeError: db_name=self.get_db_file() self.make_db_dir(db_name) engine=create_engine('sqlite:///%s' % db_name, echo=False) warn("connected to %s" % (db_name)) metadata=MetaData() # have to import these explicitly because we're in a classmethod: (or something) from Rnaseq import Pipeline, Step, Readset, StepRun, PipelineRun, FileOutput classes=[Pipeline,Readset,PipelineRun,StepRun,FileOutput] # omit step tables={} for cls in classes: tables[cls]=cls.create_table(metadata,engine) Session=sessionmaker(bind=engine) session=Session() self.engine=engine self.metadata=metadata self.session=session return session
def main(options): warn("getting pmid list..." % ()) idlist = get_pmidlist(options) warn("processing %d ids: %s" % (len(idlist), idlist)) if options.dry_run: exit(0) fuse = options.fuse for pmid in idlist: pubmed = Pubmed(pmid) warn("pmid is %s" % (pmid)) continue try: pubmed.store() # does the fetching automatically except Exception as e: warn("%d: caught %s" % (pmid, e)) fuse -= 1 if (fuse == 0): break exit(0)
def main(options): warn("getting pmid list..." % ()) idlist=get_pmidlist(options) warn("processing %d ids: %s" % (len(idlist), idlist)) if options.dry_run: exit(0) fuse=options.fuse for pmid in idlist: pubmed=Pubmed(pmid) warn("pmid is %s" % (pmid)) continue try: pubmed.store() # does the fetching automatically except Exception as e: warn("%d: caught %s" % (pmid, e)) fuse-=1 if (fuse==0): break exit(0)
def test_composite(self): sy=superyaml(domain='/proj/hoodlab/share/vcassen/rna-seq/rnaseq/templates') readset=sy.load('readset.syml',{}) bowtie=sy.load('bowtie.syml',readset) warn("bowtie 1",yaml.dump(bowtie)) self.assertEquals(bowtie['exe'],'bowtie','got d[exe]=%s, expected "bowtie"' % bowtie['exe']) bowtie.update(readset) bowtie=sy.load('bowtie.syml',bowtie) warn("bowtie 2",yaml.dump(bowtie)) expected=("%(exe)s %(ewbt)s %(args)s" % hashslice(bowtie,'exe','ewbt','args')) + " ${input} ${output}" warn("expected is %s" % expected) self.assertEquals(bowtie['usage'], expected, 'got bowtie[usage]=%s, expected %s' % (bowtie['usage'], expected))
def __init__(self, *args): assert len(args) == 1 try: self.pubmed_id = int(args[0]) except Exception as e: warn("args[0]: %s; caught %s" % (args[0], e))
f.close() parser.values.idlist.extend(idlist) parser = OptionParser() parser.add_option('-f', '--id_file', action='callback', callback=id_file_callback, help='file containing list of ids') parser.add_option('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not actually store/fetch any ids') parser.add_option('--fuse', dest='fuse', type='int', default=-1, help='debugging fuse (limits iterations in main loop)') (options, args) = parser.parse_args() warn("options are %s" % (options)) warn("args are %s" % (args)) if hasattr(options, 'idlist'): options.idlist.extend(args) else: options.idlist = args main(options)
def setUp(self): warn("\n")
+ else: + with file: + import ConfigParser + config = ConfigParser.ConfigParser() + config.readfp(file) + separator = config.get('parse_qs', envvar_name) + _default_qs_separator = separator + config_source = _QS_SEPARATOR_CONFIG_FILENAME + if separator is None: + # The default is '&', but warn if not specified explicitly + if ';' in qs: + from warnings import warn + warn("The default separator of urlparse.parse_qsl and " + + "parse_qs was changed to '&' to avoid a web cache " + + "poisoning issue (CVE-2021-23336). " + + "By default, semicolons no longer act as query field " + + "separators. " + + "See https://access.redhat.com/articles/5860431 for " + + "more details.", + _QueryStringSeparatorWarning, stacklevel=2) + separator = '&' + elif separator == 'legacy': + separator = _legacy + elif len(separator) != 1: + raise ValueError( + '{} (from {}) must contain '.format(envvar_name, config_source) + + '1 character, or "legacy". See ' + + 'https://access.redhat.com/articles/5860431 for more details.' + ) + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS
def test_n_too_big(self): s='this is a string with some stuff in it' n_words=len(s.split(' ')) warn("n_words is %d" % (n_words)) self.assertEqual(str_windows(s,n_words+1), [])
def test_all_with_data_gene(self): samples=Sample.all_with_data(id_type='gene') warn("got %d 'gene' samples" % (len(samples))) self.assertTrue(len(samples)>100)
def test_all_with_data_probe(self): samples=Sample.all_with_data(id_type='probe', ids_only=True) warn("got %d 'probe' samples" % (len(samples))) self.assertTrue(len(samples)>100)
def insert_series(series): global seen_dataset; global seen_pubmed; seen_pubmed={} debug='DEBUG' in os.environ # gather ALL the words! words=gather_words(series) totals=insert_words(series, words) if debug: warn("series %s: %s" % (series.geo_id, totals)) if type(series) != Series: return # build up words from datasets and subsets, and insert words as we go: # (but only insert dataset/subset words once) datasets=series.datasets() warn("%s: %d datasets" % (series.geo_id, len(datasets))) for dataset in datasets: warn(" %s: inserting %s" % (series.geo_id, dataset.geo_id)) ds_words=gather_words(dataset) add_words(words, ds_words) if dataset.geo_id not in seen_dataset: ds_totals=insert_words(dataset, ds_words) if debug: warn("dataset %s: %s" % (dataset.geo_id, totals)) add_totals(totals, ds_totals) try: warn("%s: %d subsets" % (dataset.geo_id, dataset.n_subsets)) except AttributeError: warn("%s: subsets not defined???" % (dataset.geo_id)) for subset in dataset.subsets(): warn(" %s: inserting %s" % (series.geo_id, subset.geo_id)) ss_words=gather_words(subset) add_words(words, ss_words) if dataset.geo_id not in seen_dataset: ss_totals=insert_words(subset, ss_words) if debug: warn("subset %s: %s" % (subset.geo_id, totals)) add_totals(totals,ss_totals) seen_dataset[dataset.geo_id]=True # add the sum of words from all objects to every sample in the series: samples=series.samples() warn("%d samples for %s" % (len(samples), series.geo_id)) for sample in samples: warn(" %s: inserting %s" % (series.geo_id, sample.geo_id)) s_totals=insert_words(sample, words) if debug: warn("sample %s: %s" % (sample.geo_id, totals)) add_totals(totals, s_totals) return totals
if __name__ == '__main__': def id_file_callback(option, opt, value, parser, *args, **kwargs): warn("options is %s" % (option)) warn("opt is %s" % (opt)) warn("value is %s" % (value)) warn("args are %s" % (args)) warn("kwargs are %s" % (kwargs)) f=open(value) idlist=[x for x in f if type(x) == int] f.close() parser.values.idlist.extend(idlist) parser=OptionParser() parser.add_option('-f', '--id_file', action='callback', callback=id_file_callback, help='file containing list of ids') parser.add_option('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not actually store/fetch any ids') parser.add_option('--fuse', dest='fuse', type='int', default=-1, help='debugging fuse (limits iterations in main loop)') (options, args)=parser.parse_args() warn("options are %s" % (options)) warn("args are %s" % (args)) if hasattr(options, 'idlist'): options.idlist.extend(args) else: options.idlist=args main(options)
def test_all_ids_with_data(self): ids=Sample.all_ids_with_data(id_type='probe') warn("len(ids)=%d" % (len(ids))) self.assertTrue(len(ids) > 1000)
def test_all_with_data_gene(self): samples = Sample.all_with_data(id_type='gene') warn("got %d 'gene' samples" % (len(samples))) self.assertTrue(len(samples) > 100)
def test_all_with_data_probe(self): samples = Sample.all_with_data(id_type='probe', ids_only=True) warn("got %d 'probe' samples" % (len(samples))) self.assertTrue(len(samples) > 100)
def test_with_phenos(self): for pheno in ["adenocarcinoma","normal","asthma","squamous cell carcinoma","chronic obstructive pulmonary disease","large cell lung carcinoma"]: samples=GEO.Sample.Sample.with_pheno(pheno) warn("%s: got %d samples" % (pheno, len(samples))) for sample in samples: self.assertEqual(sample.phenotype, pheno)
def dump_words(words, msg): for tag,l in words.items(): warn("%s: %s: %d items" % (msg, tag, len(l))) warn("\n")
def test_connection(self): warn(sys._getframe().f_code.co_name) # testing framework does this anyway... connection=pymongo.Connection() self.assertIsInstance(connection, pymongo.Connection) self.connection=connection
from sandbox.markers import ArucoMarkers, MarkerDetection from sandbox.sensor import Sensor from sandbox import _test_data, _calibration_dir im_folder = _test_data['test'] import numpy as np import matplotlib.pyplot as plt frame = np.load(im_folder + 'frame1.npz') depth = frame['arr_0'] color = frame['arr_1'] try: sensor = Sensor(calibsensor=_calibration_dir + "sensorcalib.json", name='kinect_v2') except: import warnings as warn warn("Testing will be performed without the sensor") sensor = None def test_plot_image(): depth = frame['arr_0'] col = frame['arr_1'] plt.imshow(depth) plt.show() plt.imshow(col) plt.show() def test_aruco_detect(): aruco = ArucoMarkers() corners, ids, rejected = aruco.aruco_detect(color) print(corners, ids, rejected)
import sys, os sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src')) sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib')) import GEO from GEO.word2geo import Word2Geo from warn import * for cls in [ GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset ]: #for cls in [GEO.Series.Series, GEO.Dataset.Dataset]: cursor = cls.mongo().find() for record in cursor: geo = cls(record) warn("inserting %s" % (geo.geo_id)) Word2Geo.insert_geo(geo) break