def SummarizeMetaData(X): if 'image' in X.keys(): from PIL import Image K = 125 try: x = Image.open('..' + X['image']) except: print 'Importing image', X['image'], 'failed. here is the error:' print_exc() sizetag = '' else: (w,h) = x.size if w > K or h > K: r = float(max(w,h)) w = int(w * K/r) h = int(h * K/r) sizetag = 'width="' + str(w) + '" height="' + str(h) + '"' image = '<img src="' + X['image'] + '" ' + sizetag + '/><br/>' else: image = '' if 'description' in X.keys(): description = '<strong>Description: </strong>' + X['description'].replace('\n','<br/>') else: description = '' if 'author' in X.keys(): author = '<strong>Author: </strong>' + X['author'] else: author = '' if 'title' in X.keys(): title = '<strong>Title: </strong>' + X['title'] else: title = '' if 'keywords' in X.keys(): keywords = '<strong>Keywords: </strong>' + X['keywords'] else: keywords = '' if 'signature' in X.keys(): if X['signature'] != 'directory': signature = '<strong>Signature: </strong> This appears to be a ' + X['signature'] + ' file.' elif 'DIR_signature' in X.keys(): signature = '<strong>Signature: </strong> This is a directory consisting of ' + X['DIR_signature'] + ' files.' else: signature = '' else: signature = '' X['signature'] = '' nr = [x for x in X.keys() if x.endswith('nrows')] nc = [x for x in X.keys() if x.endswith('ncols')] preamble = 'It has' if X['signature'] == 'tabular' else 'Its constituent datasets commonly have' if X['signature'] == 'directory' and 'DIR_signature' in X.keys() and X['DIR_signature'] == 'tabular' else 'This data has' if len(nr) > 0 and len(nc) > 0: ending = str(X[nr[0]]) + ' rows and ' + str(X[nc[0]]) + ' columns.' elif len(nr) > 0: ending = str(X[nr[0]]) + ' rows.' elif len(nc) > 0: ending = str(X[nc[0]]) + ' columns.' else: ending = '' if ending != '': tabulartext = preamble + ' ' + ending else: tabulartext = '' nn = [x for x in X.keys() if x.endswith('colnames')] if len(nn) > 0: names = X[nn[0]] nt = [x for x in X.keys() if x.endswith('coltypes') and len(X[x]) == len(X[nn[0]])] if len(nt): types = [' (' + t + ')' for t in X[nt[0]]] else: types = ['']*len(names) nd = [x for x in X.keys() if x.endswith('coldescrs') and isinstance(X[x],dict) and set(X[x].keys()).intersection(names)] if len(nd) > 0: descrs = X[nd[0]] descrs = [': ' + descrs[n] if n in descrs.keys() else '' for n in names] else: descrs = ['']*len(names) coltext = 'The columns are:<br/>' + '<br/>'.join(['<strong>'+n+'</strong>' + t + d for (n,t,d) in zip(names,types,descrs)]) else: coltext = '' nt = [] nd = [] #frequentwords if 'frequentwords' in X.keys(): frequentwords = 'Frequent words in this data include: ' + repr(X['frequentwords']) + '.' else: frequentwords = '' text = '<br/><br/>'.join([x for x in [image,title,author,description,signature,tabulartext,coltext,frequentwords,keywords] if x != '']) OtherKeys = set(X.keys()).difference(['image','coloring','description','author','title','keywords','signature','frequentwords','colformats','nfiles','npaths'] + nr + nc + nn + nt + nd) if OtherKeys: text += '<br/><br/><strong>Other MetaData</strong>:' + '<br/><br/>'.join(['<strong>' + k + ': </strong>' + (X[k] if is_string_like(X[k]) else repr(X[k])) for k in OtherKeys]) return text
def ConsolidateSources(metapath,objname=None,extensions=None): consolidated = {} if extensions is None: extensions = ['Attached','Associated','Automatic','Inherited'] combined = CombineSources(metapath,extensions=extensions) if 'Resources' in combined.keys(): consolidated['Resources'] = uniqify(ListUnion(combined['Resources'].values())) if 'image' in combined.keys(): consolidated['image'] = ListUnion([x.split(',') if is_string_like(x) else x for x in combined['image'].values()]) if 'author' in combined.keys(): consolidated['author'] = '; '.join(combined['author'].values()) if 'title' in combined.keys(): consolidated['title'] = '; '.join(combined['title'].values()) if 'description' in combined.keys(): descrs = combined['description'].items() if len(descrs) == 1: consolidated['description'] = descrs[0][1] else: consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs]) elif 'Verbose' in combined.keys(): descrs = combined['Verbose'].items() if len(descrs) == 1: consolidated['description'] = descrs[0][1] else: consolidated['description'] = '\n\n'.join([e + ': ' + d for (e,d) in descrs]) if 'keywords' in combined.keys(): for k in combined['keywords'].keys(): if not is_string_like(combined['keywords'][k]): combined['keywords'][k] = ','.join(combined['keywords'][k]) consolidated['keywords'] = ','.join([x.strip() for x in uniqify((','.join(combined['keywords'].values())).split(','))]) if 'signature' in combined.keys(): s = uniqify(combined['signature'].values()) if len(s) == 1: consolidated['signature'] = s[0] else: consolidated['signature'] = '' L = ['nrows','ncols','coloring','wordassoc','colformats','coltypes','colnames','wordassoc','frequentwords','nfiles','npaths'] LL = L + [x for x in combined.keys() if x.startswith('DIR_')] for x in LL: if x in combined.keys() and 'Automatic' in combined[x].keys(): consolidated[x] = combined[x]['Automatic'] elif x in combined.keys() and 'Attached' in combined[x].keys(): consolidated[x] = combined[x]['Automatic'] elif x in combined.keys() and 'Associated' in combined[x].keys(): consolidated[x] = combined[x]['Associated'] elif x in combined.keys() and 'Inherited' in combined[x].keys(): consolidated[x] = combined[x]['Inherited'] if 'coldescrs' in combined.keys(): coldescrs = {} for c in combined['coldescrs'].values(): if isinstance(c,dict): for k in c.keys(): if k in coldescrs.keys(): coldescrs[k] += (c[k],) else: coldescrs[k] = (c[k],) for k in coldescrs.keys(): coldescrs[k] = '\n'.join(coldescrs[k]) consolidated['coldescrs'] = coldescrs OtherKeys = set(combined.keys()).difference(consolidated.keys()) for k in OtherKeys: consolidated[k] = ' '.join([x if is_string_like(x) else repr(x) for x in combined[k].values()]) return consolidated