def __init__(self, mgids=[], metadata=True, stats=True, auth=None, def_name=None, cache=None): self._auth = auth self._stats = stats # hack to get variable name if def_name == None: try: (filename,line_number,function_name,text)=traceback.extract_stack()[-2] def_name = text[:text.find('=')].strip() except: pass self.defined_name = def_name # get metagenomes self._mgids = mgids self.metagenomes = self._get_metagenomes(mgids, metadata, stats, cdir=cache) self.rarefaction = Rarefaction(mgObjs=self.metagenomes.values())
class Collection(object): """Class representation of Collection object: metagenomes : [ 'hash', 'key = metagenome_id, value = Metagenome() object'] rarefaction : Rarefaction object for collection metagenomes _mgids : [ 'list', 'inputted metagenome ids' ] see: help(Metagenome) """ def __init__(self, mgids=[], metadata=True, stats=True, auth=None, def_name=None, cache=None): self._auth = auth self._stats = stats # hack to get variable name if def_name == None: try: (filename,line_number,function_name,text)=traceback.extract_stack()[-2] def_name = text[:text.find('=')].strip() except: pass self.defined_name = def_name # get metagenomes self._mgids = mgids self.metagenomes = self._get_metagenomes(mgids, metadata, stats, cdir=cache) self.rarefaction = Rarefaction(mgObjs=self.metagenomes.values()) def _get_metagenomes(self, mgids, metadata, stats, cdir=None): mgs = {} for mg in mgids: keyArgs = { 'metadata': metadata, 'stats': stats, 'auth': self._auth, 'cache': cdir, 'def_name': '%s.metagenomes["%s"]'%(self.defined_name, mg) } if cdir and os.path.isfile(cdir+'/'+mg+'.json'): keyArgs['mfile'] = cdir+'/'+mg+'.json' mgs[mg] = Metagenome(mg, **keyArgs) return mgs def _set_statistics(self): self._stats = True for mg in self.metagenomes.itervalues(): mg._set_statistics() def mgids(self): return self._mgids def get_stat(self, mgid=None, stat=None, mgid_set=[]): if not (mgid and stat and (mgid in self._mgids)): return [] if not self._stats: self._set_statistics() if stat not in self.metagenomes[mgid].stats['sequence_stats']: return [] stat_list = [ toNum(self.metagenomes[mgid].stats['sequence_stats'][stat]) ] if not mgid_set: mgid_set = self._mgids for m in mgid_set: if m == mgid: continue if stat in self.metagenomes[m].stats['sequence_stats']: stat_list.append( toNum(self.metagenomes[m].stats['sequence_stats'][stat]) ) return stat_list def sub_mgs(self, category=None, field=None, value=None): sub_mgs = set() all_fields = [] for f in self.metadata_fields(table=False).itervalues(): all_fields.extend(list(f)) if not (category and (category in Ipy.MD_CATS)): sys.stderr.write("category must be one of: %s\n"%", ".join(Ipy.MD_CATS)) return self.mgids() if not (field and value and (field in all_fields)): sys.stderr.write("field '%s' does not exist\n"%field) return self.mgids() for mid, mg in self.metagenomes.iteritems(): if not (hasattr(mg, 'metadata') and (category in mg.metadata)): continue for key, val in mg.metadata[category]['data'].iteritems(): if key == field: x = str(val).find(value) if x != -1: sub_mgs.add(mid) return list(sub_mgs) def metadata_fields(self, table=True): tdata = [] mdata = dict([(x, set()) for x in Ipy.MD_CATS]) for mg in self.metagenomes.itervalues(): if not hasattr(mg, 'metadata'): continue for cat in Ipy.MD_CATS: if cat not in mg.metadata: continue for key in mg.metadata[cat]['data'].iterkeys(): mdata[cat].add(key) if not table: return mdata for cat in mdata.iterkeys(): for field in sorted(mdata[cat]): tdata.append([cat, field]) keyArgs = { 'width': 400, 'height': 600, 'target': 'metadata_fields_'+random_str(), 'data': {'data': tdata, 'header': ['category', 'field']}, 'rows_per_page': 20 } if Ipy.DEBUG: print keyArgs try: Ipy.RETINA.table(**keyArgs) except: sys.stderr.write("Error producing metadata table\n") def show_metadata(self, mgids=None, arg_list=False): header = [] tdata = [] mdata = dict([(x, {}) for x in Ipy.MD_CATS]) submgs = mgids if mgids and (len(mgids) > 0) else self.mgids() for mid in submgs: if (mid in self.metagenomes) and hasattr(self.metagenomes[mid], 'metadata'): header.append(mid) if len(header) == 0: sys.stderr.write("No metadata to display\n") for i, mid in enumerate(header): for cat, data in self.metagenomes[mid].metadata.iteritems(): for field, value in data['data'].iteritems(): if field not in mdata[cat]: mdata[cat][field] = ['' for x in range(len(header))] mdata[cat][field][i] = value for cat in mdata.iterkeys(): for field in mdata[cat].iterkeys(): tdata.append( [cat, field] + mdata[cat][field] ) keyArgs = { 'width': 700, 'height': 600, 'target': "metadata_table_"+random_str(), 'data': {'data': tdata, 'header': ['category', 'field'] + header}, 'rows_per_page': 20 } if Ipy.DEBUG: print keyArgs if arg_list: return keyArgs else: try: Ipy.RETINA.table(**keyArgs) except: sys.stderr.write("Error producing metadata table\n") return None def plot_rarefaction(self, mgids=None, width=800, height=300, title="", x_title="", y_title="", legend=True, arg_list=False): return self.rarefaction.plot(mgids=mgids, width=width, height=height, title=title, x_title=x_title, y_title=y_title, legend=legend, arg_list=arg_list) def barchart_taxon(self, level='domain', parent=None, width=800, height=0, x_rotate='0', title="", legend=True): children = get_taxonomy(level, parent) if parent is not None else None self._barchart('taxonomy', level, width, height, x_rotate, title, legend, names=children) def barchart_function(self, source='Subsystems', width=800, height=0, x_rotate='0', title="", legend=True): self._barchart('ontology', source, width, height, x_rotate, title, legend) def _barchart(self, atype, level, width, height, x_rotate, title, legend, names=None): if not self._stats: self._set_statistics() data = [] annD = {} colors = google_palette(len(self.metagenomes)) for i, item in enumerate(self.metagenomes.iteritems()): mid, mg = item data.append({'name': mid, 'data': [], 'fill': colors[i]}) for d in mg.stats[atype][level]: if (names is not None) and (d[0] not in names): continue annD[ d[0] ] = 1 annL = sorted(annD.keys()) for d in data: annMG = {} for a, v in self.metagenomes[d['name']].stats[atype][level]: annMG[a] = v for a in annL: if a in annMG: d['data'].append(int(annMG[a])) else: d['data'].append(0) height = height if height else len(annL)*len(self.metagenomes)*7.5 lheight = min(height, len(self.metagenomes)*35) lwidth = len(max(annL, key=len)) * 7.2 cwidth = 0.85 if legend else 0.99 keyArgs = { 'btype': 'row', 'width': width+lwidth, 'height': height, 'x_labels': annL, 'x_labels_rotation': x_rotate, 'title': title, 'target': '_'.join(self.mgids())+"_"+level+'_'+random_str(), 'show_legend': legend, 'legendArea': [0.87, 0.05, 0.2, lheight], 'chartArea': [lwidth, 0.02, cwidth, 0.95], 'data': data } if Ipy.DEBUG: print keyArgs try: Ipy.RETINA.graph(**keyArgs) except: sys.stderr.write("Error producing chart") return None