def recurse_into(aTag, _list=lists.HashedLists(), n=1): is_tag = lambda t: (misc.ObjectTypeName.typeClassName(t) == 'BeautifulSoup.Tag') is_valid_tag = lambda t: (t.name in ['table', 'tr', 'th', 'td']) if (is_tag(aTag)) and (is_valid_tag(aTag)): for aKid in aTag.childGenerator() if ( is_valid_tag(aTag)) else []: if (is_tag(aKid)): print misc.ObjectTypeName.typeClassName( aKid), aKid.name, if (aKid.name == 'table'): recurse_into(aKid, _list=_list, n=n) elif (aKid.name == 'tr'): n += 1 print recurse_into(aKid, _list=_list, n=n) elif (aKid.name in ['th', 'td']): data = aKid.contents if (misc.isList( aKid.contents)) and (len( aKid.contents) == 1) else [ misc.unpack([ t for t in aKid.contents if (not is_tag(t)) ]) ] print data _list['%s' % (n)] = data return
def dedupeContacts(self, _contacts): from vyperlogix.hash import lists d = lists.HashedLists() for c in _contacts: d[c['Email']] = lists.HashedLists2(c) contacts = [] ascii_only = _utils.ascii_only for k, v in d.iteritems(): if (misc.isList(v)): assets = lists.HashedLists2() for item in v: try: for aKey in item.keys(): item[aKey] = ascii_only(item[aKey]) assets[item['Asset_Id']] = item['Asset_Name'] del item['Asset_Id'] del item['Asset_Name'] except Exception as details: info_string = _utils.formattedException( details=details) appendText(self.__child_frame.textboxLog, info_string) v[0]['Asset_Ids'] = ','.join( misc.sortCopy([item for item in list(set(assets.keys()))])) contacts.append(v[0]) else: try: for aKey in v.keys(): v[aKey] = ascii_only(v[aKey]) except: pass contacts.append(v) return contacts
def split_column(self,colName,colName1,colName2,split_callback=__column_split_func__): '''Splits the contents of colName into colName1 and colName2. This function has not been tested and will not handle a split that results in more than two items per split. It is recommended the user split the data manually. ''' split_callback = split_callback if (callable(split_callback)) else __column_split_func__ num = self.column_number_for_name(colName) if (num > -1): recs = [] # replace the col header for column at num and insert a new column then split the data... for k,v in self.__by_col__.iteritems(): if (k.lower() == colName): recs = [rec for rec in self.__by_col__[k]] del self.__by_col__[k] _recs = [split_callback(rec) for rec in recs] self.__by_col__[colName1] = [_rec[0] for _rec in _recs] self.__by_col__[colName2] = [_rec[-1] for _rec in _recs] d = self.__row_by_col__[k] new_d = lists.HashedLists() for aKey,aValue in d.iteritems(): _aKey = split_callback(aKey) new_d[_aKey[0]] = aValue new_d[_aKey[-1]] = aValue del self.__row_by_col__[k] self.__row_by_col__[colName1] = new_d self.__row_by_col__[colName2] = new_d self.header[num] = colName1 pass pass
def getSolutionsViewsFrequencies(self, num_days=30): from vyperlogix.sf.magma.solution_views import SalesForceMoltenSolutionViews sf_solution_views = SalesForceMoltenSolutionViews(self.sfQuery) num_days = int(num_days) if (str(num_days).isdigit()) else num_days d = lists.HashedLists() views = sf_solution_views.getSolutionsViews(num_days=num_days) try: for view in views: if (view.has_key('Solution__c')): d[view['Solution__c']] = view except: pass d_freqs = lists.HashedLists() for k, v in d.iteritems(): d_freqs[len(v)] = k l_freqs = misc.sort(d_freqs.keys()) return (l_freqs, d_freqs, d)
def normalizedSortedKeys(self, options=NormalizedKeyOptions.via_list): ''' Get a list of keys sorted in a fashion that allows for easier manipulation - padded or normalized to allow sorting to work. options = see also NormalizedKeyOptions enumeration ''' from vyperlogix.hash import lists try: _asDict = options.value & NormalizedKeyOptions.via_dict.value except: _asDict = False try: _useHash1 = options.value & NormalizedKeyOptions.use_hash1.value except: _asDict = False _useHash1 = False try: _useHash2 = options.value & NormalizedKeyOptions.use_hash2.value except: _useHash2 = False _useHash2 = ((not _useHash1) and (not _useHash2)) or _useHash2 _useHash1 = False if (_useHash1 and _useHash2) else _useHash1 d_keys = lists.HashedLists() if _useHash1 else lists.HashedLists2( ) if _useHash2 else {} def normalizeKey(k, m): toks = k.split(',') n = toks[0] del toks[0] getLen = lambda m, n: m - len(n) formatter = lambda p, m, n, f: p % (' ' * getLen(m, n), f(n)) _value = '' _isString = (misc.isString(n)) _pattern = '%s%s' if _isString else '%s%d' _func = str if _isString else int _value = formatter(_pattern, m, n, _func) toks.insert(0, _value) s = ','.join(toks) if (_asDict): d_keys[s] = k return s l_keys = [len(k.split(',')[0]) for k in self.keys()] l_keys.sort() _max_len = l_keys[-1] if len(l_keys) > 0 else 0 _keys = [normalizeKey(k, _max_len) for k in self.keys()] _keys.sort() _keys.reverse() return _keys if (not _asDict) else d_keys
def _process(self, xml=None): d = lists.HashedLists() def recurse_into(aTag): _d_ = lists.HashedLists() is_tag = lambda t: (misc.ObjectTypeName.typeClassName(t) == 'xml.dom.minidom.Element') if (is_tag(aTag)): _d_[aTag.nodeName] = lists.HashedLists( dict(aTag.attributes.items())) for aKid in aTag.childNodes: if (is_tag(aKid)): if (self.verbose): print misc.ObjectTypeName.typeClassName( aKid), aKid.nodeName node = recurse_into(aKid) for k, v in node.iteritems(): misc._unpack_( _d_[aTag.nodeName])[k] = misc._unpack_(v) if (callable(self.callback)): self.callback( k, misc._unpack_( misc._unpack_(_d_[aTag.nodeName])[k])) if (callable(self.callbackNodeName)): _k_ = self.callbackNodeName(k) if (k != _k_): _x_ = misc._unpack_(_d_[aTag.nodeName]) del _x_[k] _x_[_k_] = misc._unpack_(v) pass pass else: try: misc._unpack_( _d_[aTag.nodeName])['data'] = aKid.data except: pass return _d_ if (xml is not None): self.xml = _utils.ascii_only(xml) dom = parseString(self.xml) for aChild in dom.childNodes: d[aChild.nodeName] = recurse_into(aChild) try: __d = misc._unpack_(d[aChild.nodeName]) if (__d.has_key(aChild.nodeName)) and (len( list(set(__d.keys()) - set([aChild.nodeName]))) == 0): d = __d except Exception as e: print 'ERROR', _utils.formattedException(details=e) if (self.verbose): print '=' * 40 return d
def getAccountTree(self,account,tree=[],skip_ancestors=False): '''Account tree in a list sorted by ParentId.''' _tree = self._getAccountTree_(account,tree=tree,skip_ancestors=skip_ancestors) d_tree = lists.HashedLists() for item in _tree: pid = item['ParentId'] d_tree[pid if (pid is not None) else ''] = item for k,v in d_tree.iteritems(): if (not isinstance(v,list)): v = [v] for _v in v: tree.append(_v) return tree
def associationsBy(self,name_or_number,callback=None): '''Returns a dict whose keys are composed by values from name_or_number and whose values are the row''' d = lists.HashedLists() iCol = self.column_number_for_name(name_or_number) for row in self.rows: key = row[iCol] if (callable(callback)): try: key = callback(key) except: pass d[key] = row return d
def post_from_sqlalchemy(request): d = lists.HashedLists() for k, v in request.POST.iteritems(): toks = k.split('-') _d_ = d for t in toks[0:-1]: _d = _d_[t] if (_d is None): _d = lists.HashedLists2() _d_[t] = _d else: _d = _d[0] if (misc.isList(_d)) else _d _d_ = _d _d_[toks[-1]] = v[0] if (misc.isList(v)) else v return d
def recurse_into(aTag): _d_ = lists.HashedLists() is_tag = lambda t: (misc.ObjectTypeName.typeClassName(t) == 'xml.dom.minidom.Element') if (is_tag(aTag)): _d_[aTag.nodeName] = lists.HashedLists( dict(aTag.attributes.items())) for aKid in aTag.childNodes: if (is_tag(aKid)): if (self.verbose): print misc.ObjectTypeName.typeClassName( aKid), aKid.nodeName node = recurse_into(aKid) for k, v in node.iteritems(): misc._unpack_( _d_[aTag.nodeName])[k] = misc._unpack_(v) if (callable(self.callback)): self.callback( k, misc._unpack_( misc._unpack_(_d_[aTag.nodeName])[k])) if (callable(self.callbackNodeName)): _k_ = self.callbackNodeName(k) if (k != _k_): _x_ = misc._unpack_(_d_[aTag.nodeName]) del _x_[k] _x_[_k_] = misc._unpack_(v) pass pass else: try: misc._unpack_( _d_[aTag.nodeName])['data'] = aKid.data except: pass return _d_
def getNormalizedDaemons(prefix, fpath): h = lists.HashedLists() fs = [] dms = getDaemons(prefix, fpath) for f in dms: h[f.split('.')[0]] = f.split('.')[-1] for k, v in h.iteritems(): x = [n for n in v if n == 'py'] if (len(x) == 0): x = [n for n in v if n == 'pyc'] if (len(x) == 0): x = [n for n in v if n == 'pyo'] if (len(x) > 0): fs.append('.'.join([k, x[0]])) return fs
def process(self, html=None): l = lists.HashedLists() n = 1 def recurse_into(aTag, _list=lists.HashedLists(), n=1): is_tag = lambda t: (misc.ObjectTypeName.typeClassName(t) == 'BeautifulSoup.Tag') is_valid_tag = lambda t: (t.name in ['table', 'tr', 'th', 'td']) if (is_tag(aTag)) and (is_valid_tag(aTag)): for aKid in aTag.childGenerator() if ( is_valid_tag(aTag)) else []: if (is_tag(aKid)): print misc.ObjectTypeName.typeClassName( aKid), aKid.name, if (aKid.name == 'table'): recurse_into(aKid, _list=_list, n=n) elif (aKid.name == 'tr'): n += 1 print recurse_into(aKid, _list=_list, n=n) elif (aKid.name in ['th', 'td']): data = aKid.contents if (misc.isList( aKid.contents)) and (len( aKid.contents) == 1) else [ misc.unpack([ t for t in aKid.contents if (not is_tag(t)) ]) ] print data _list['%s' % (n)] = data return if (html is not None): self.html = _utils.ascii_only(html) soup = BeautifulSoup.BeautifulSoup(self.html) tables = soup.findAll('table') for aTable in tables: recurse_into(aTable, _list=l, n=n) print '=' * 40 keys = misc.sortCopy([int(k) for k in l.keys()]) items = ListWrapper() for k in keys: _items = l['%s' % (k)] items.append([_items]) return items
def __init__(self,filename='',dict2_factory=lists.HashedLists2): self.__header__ = [] self.__num_headers__ = len(self.__header__) self.__dict2_factory__ = dict2_factory self.__rows__ = [] self.__rows_dicts__ = [] self.__by_col__ = lists.HashedLists() self.__row_by_col__ = self.dict2_factory() self.__problem_rows__ = [] self.__isValid__ = True self.__filename__ = '' self.codec = 'UTF-8' if (len(filename) > 0): if (isCSVFile(filename)): self.__filename__ = filename self.parse() else: raise ValueError('The filename "%s" must have the file type of one of the following: %s and it does not.' % (filename,csv_file_types))
def performGoogleSearch(searchString, start=-1): results = lists.HashedLists() __failedSearch_symbol = '\xe4\xe9\xe4\xa0\xee\xef\xf4\xa0\xed\xe1\xf4\xe3\xe8\xa0\xe1\xee\xf9\xa0\xe4\xef\xe3\xf5\xed\xe5\xee\xf4\xf3' failedSearch_symbol = ''.join( [chr(ord(ch) & 127) for ch in __failedSearch_symbol]) __failedSearch2_symbol = "\xce\xef\xa0\xf3\xf4\xe1\xee\xe4\xe1\xf2\xe4\xa0\xf7\xe5\xe2\xa0\xf0\xe1\xe7\xe5\xf3\xa0\xe3\xef\xee\xf4\xe1\xe9\xee\xe9\xee\xe7\xa0\xe1\xec\xec\xa0\xf9\xef\xf5\xf2\xa0\xf3\xe5\xe1\xf2\xe3\xe8\xa0\xf4\xe5\xf2\xed\xf3\xa0\xf7\xe5\xf2\xe5\xa0\xe6\xef\xf5\xee\xe4\xae" failedSearch2_symbol = ''.join( [chr(ord(ch) & 127) for ch in __failedSearch2_symbol]) __url = "\xf7\xf7\xf7\xae\xe7\xef\xef\xe7\xec\xe5\xae\xe3\xef\xed\xba\xb8\xb0" url = ''.join([chr(ord(ch) & 127) for ch in __url]) __term = "\xaf\xf3\xe5\xe1\xf2\xe3\xe8\xbf\xf1\xbd\xa5\xf3\xa6\xe9\xe5\xbd\xf5\xf4\xe6\xad\xb8\xa6\xef\xe5\xbd\xf5\xf4\xe6\xad\xb8\xa6\xe1\xf1\xbd\xf4\xa6\xf2\xec\xf3\xbd\xef\xf2\xe7\xae\xed\xef\xfa\xe9\xec\xec\xe1\xba\xe5\xee\xad\xd5\xd3\xba\xef\xe6\xe6\xe9\xe3\xe9\xe1\xec\xa6\xe3\xec\xe9\xe5\xee\xf4\xbd\xe6\xe9\xf2\xe5\xe6\xef\xf8\xad\xe1" _term = ''.join([chr(ord(ch) & 127) for ch in __term]) if (start > -1): _term += '&start=%s' % start print 'url="%s"' % url print '_term="%s"' % _term conn = httplib.HTTPConnection(url) conn.request("GET", _term % (urllib.quote(searchString))) isError = False try: r1 = conn.getresponse() except: isError = True if ((isError == False) and (r1.status == 200) and (r1.reason == 'OK')): data1 = r1.read() myParser = TargetedHTMLParser() myParser.targetTag('a') myParser.feed(data1) fname = urllib.quote(searchString).replace('/', '+') if ((data1.find(failedSearch_symbol) == -1) and (data1.find(failedSearch2_symbol) == -1) and (myParser.tagCount > 0)): pass else: results['data'] = data1.split('\n') results['search'] = searchString results['parser'] = myParser return results
def parse(self,dict2_factory=dict): '''dict2_factory is optional and specifies a class to be used when making each record.''' self.__header__ = None self.fIn = open(self.filename,'r') _re = re.compile('"[^"\r\n]*"|[^,\r\n]*', re.MULTILINE) for l in self.fIn: l = str(l).strip() if (self.__header__ is None): self.__header__ = [h.strip() for h in l.split(',') if (len(h) > 0)] self.__num_headers__ = len(self.header) for i in xrange(0,len(self.header)): self.__row_by_col__[self.header[i]] = lists.HashedLists() recs = [match.group().replace('"','') for match in _re.finditer(l)] if (len(recs) != len(self.header)): i = 0 n = len(recs) while (i < (n-1)): can_remove = (len(recs[i]) > 0) and (len(recs[i+1]) == 0) if (can_remove): del recs[i+1] n = len(recs) i += 1 len_header = len(self.header) len_recs = len(recs) _num_missing_cols = len_header - len_recs if (_num_missing_cols > 0): _msg = '(%s.%s) :: CSV parser warning "%s", number of fields do not match the first line, padding with "MISSING" data.' % (ObjectTypeName.typeName(self),misc.funcName(),l) logging.warning(_msg) for i in xrange(0,_num_missing_cols): recs.append('MISSING') _num_missing_cols = len(self.header) - len(recs) self.__problem_rows__.append(recs) elif (_num_missing_cols == 0): for i in xrange(0,len(self.header)): self.__by_col__[self.header[i]] = recs[i] self.__row_by_col__[self.header[i]][recs[i]] = recs yield recs if (dict2_factory) else self.__rowAsRecords__(recs,dict2_factory=dict2_factory) elif (_num_missing_cols < 0): yield recs if (dict2_factory) else self.__rowAsRecords__(recs,dict2_factory=dict2_factory) self.fIn.close()
def parse(self): fIn = open(self.filename,'r') try: lines = [l.strip() for l in fIn.readlines()] finally: fIn.close() self.__header__ = [h.strip() for h in lines[0].split(',') if (len(h) > 0)] self.__num_headers__ = len(self.header) for i in xrange(0,len(self.header)): self.__row_by_col__[self.header[i]] = lists.HashedLists() _re = re.compile('"[^"\r\n]*"|[^,\r\n]*', re.MULTILINE) for l in lines[1:]: recs = [match.group().replace('"','') for match in _re.finditer(l)] if (len(recs) != len(self.header)): i = 0 n = len(recs) while (i < (n-1)): can_remove = (len(recs[i]) > 0) and (len(recs[i+1]) == 0) if (can_remove): del recs[i+1] n = len(recs) i += 1 len_header = len(self.header) len_recs = len(recs) _num_missing_cols = len_header - len_recs if (_num_missing_cols > 0): _msg = '(%s.%s) :: CSV parser warning "%s", number of fields do not match the first line, padding with "MISSING" data.' % (ObjectTypeName.typeName(self),misc.funcName(),l) logging.warning(_msg) for i in xrange(0,_num_missing_cols): recs.append('MISSING') _num_missing_cols = len(self.header) - len(recs) self.__problem_rows__.append(recs) elif (_num_missing_cols == 0): self.__rows__.append(recs) for i in xrange(0,len(self.header)): self.__by_col__[self.header[i]] = recs[i] self.__row_by_col__[self.header[i]][recs[i]] = recs elif (_num_missing_cols < 0): self.__rows__.append(recs)
def getZipFilesAnalysis(_zip, prefix='', _acceptable_types=[]): import os from vyperlogix.misc import ObjectTypeName from vyperlogix.hash import lists _analysis = lists.HashedLists() try: iterable = None if (ObjectTypeName.typeClassName(_zip) == 'zipfile.ZipFile'): iterable = (f.filename for f in _zip.filelist) elif (lists.isDict(_zip)): iterable = (f for f in _zip.keys()) for f in iterable: toks = os.path.splitext(f) if (len(_acceptable_types) == 0) or ( toks[-1].split('.')[-1] in _acceptable_types) or ((len(prefix) > 0) and (toks[0].startswith(prefix))): _analysis[toks[0]] = toks[-1] if (len(toks) > 1) else '' except: pass return _analysis
def parse(self): try: import xlrd book = xlrd.open_workbook(self.filename) sheet = book.sheets()[0] self.__header__ = [h.strip() for h in sheet.row_values(0,0)] self.__num_headers__ = len(self.header) for i in xrange(0,self.__num_headers__): self.__row_by_col__[self.header[i]] = lists.HashedLists() for rowNum in xrange(1,sheet._dimnrows): try: recs = sheet.row_values(rowNum,0) self.__rows__.append(recs) for i in xrange(0,self.__num_headers__): self.__by_col__[self.header[i]] = recs[i] self.__row_by_col__[self.header[i]][recs[i]] = recs except: pass except Exception as details: info_string = _utils.formattedException(details=details) print >>sys.stderr, info_string
def __init__(self): self.d_cache = lists.HashedLists()
def procs_by_parentId(self): d = lists.HashedLists() for node in self.root.preOrder(): d[str(node.parentId)] = node return d