Example #1
0
	def makeTermAndIdMap (self):
		termMap = UserDict()
		idMap = UserDict()
		for term in self.lexiconData:
			termKey = term.prettyTerm
			idKey = term.id
			if termMap.has_key(termKey):
				print "dup term: " + termKey
				continue
			if idMap.has_key(idKey):
				print "dup id: " + idKey
				continue
			termMap[termKey] = term
			idMap[idKey] = term
		return termMap, idMap
Example #2
0
class EadCollection(EadComponent):
    """
	collection elements correspond to the "c02" level of the EAD document
	"""
    def __init__(self, element, parent):
        EadComponent.__init__(self, element, parent)
        self._items = UserDict()
        self.boxes = UserDict()
        for node in XmlUtils.selectNodes(element, "c03"):
            self.add(EadItem(node, self))

    def getItems(self):
        return self._items.values()

    def getBox(self, key):
        if not self.boxes.has_key(key):
            self.boxes[key] = Box()
        return self.boxes[key]

    def getFolder(self, box_key, folder_key):
        if not self.boxes.has_key(box_key):
            raise Exception, "box not found for %s" % box_key
        box = self.boxes[box_key]

        if not box.has_key(folder_key):
            raise Exception, "folder not found in box %s for %s" % (box_key,
                                                                    folder_key)
        return box.getFolder(folder_key)

    def add(self, item):
        self._items[item.id] = item
        box = self.getBox(item.box)
        box.addItem(item)

    def report(self):
        EadComponent.report(self)
        print "id: %s" % self.id
        print "items: %d" % len(self._items)
Example #3
0
 def __init__(self, path):
     self.wos = WOSXlsReader(path)
     tally = UserDict()
     for rec in self.wos:
         for field in rec.keys():
             val = rec[field]
             if tally.has_key(field):
                 tally_val = tally[field]
             else:
                 tally_val = 0
             if val:
                 tally_val = tally_val + 1
             tally[field] = tally_val
     self.tally = tally
Example #4
0
	def __init__ (self, path):
		CsvFile.__init__(self)
		self.read(path)
		order_id_map = UserDict()
		order_detail_id_map = {}
		for rec in self.data:

			order_detail_id_map[rec['orderdetailid']] = rec

			order_id = rec['orderid']
			val = order_id_map.has_key(order_id) and order_id_map[order_id] or []
			val.append (rec['orderdetailid'])
			order_id_map[order_id] = val
		self.order_id_map = order_id_map
		self.order_detail_id_map = order_detail_id_map
Example #5
0
 def __init__(self, path, field):
     self.wos = WOSXlsReader(path)
     self.field = field
     tally = UserDict()
     for rec in self.wos:
         val = rec[field]
         if not val: continue
         if tally.has_key(val):
             tally_val = tally[val]
         else:
             tally_val = 0
         if val:
             tally_val = tally_val + 1
         tally[val] = tally_val
     self.tally = tally
Example #6
0
 def __init__(self, path):
     self.wos = WOSXlsReader(path)
     self.field = "pubname"
     tally = UserDict()
     for rec in self.wos:
         val = rec._getPubname()
         if not val: continue
         if tally.has_key(val):
             tally_val = tally[val]
         else:
             tally_val = 0
         if val:
             tally_val = tally_val + 1
         tally[val] = tally_val
     self.tally = tally
Example #7
0
 def __init__(self, path):
     self.wos = WOSXlsReader(path)
     tally = UserDict()
     for rec in self.wos:
         for author in rec._getAuthors():
             val = author
             if not val: continue
             if tally.has_key(val):
                 tally_val = tally[val]
             else:
                 tally_val = 0
             if val:
                 tally_val = tally_val + 1
             tally[val] = tally_val
     self.tally = tally
Example #8
0
    def doTally(self, field, ip):
        tally = UserDict()

        filtered = self.logFile.filter(lambda x: x['IP'] == ip)
        print '%d filtered entries' % len(filtered)
        # for logLine in self.logFile:
        for logLine in filtered:
            # if logLine.has_key (field):
            # val = logLine[field]
            # else:
            # continue
            val = logLine[field]
            tally_cnt = tally.has_key(val) and tally[val] or 0
            tally[val] = tally_cnt + 1

        return tally
Example #9
0
    def processResults(self):
        print 'processResults ...'
        formats = UserDict()
        records = UserDict()
        bscs_urls = []
        for result in self:
            xmlFormat = result.xmlFormat
            num = formats.has_key(xmlFormat) and formats[xmlFormat] or 0
            formats[xmlFormat] = num + 1
            record = urlRewritingRecordFromSearchResult(result)
            records[result.recId] = record
            for url in record.getProtectedUrls():
                if url not in bscs_urls:
                    bscs_urls.append(url)

        self.formatTally = formats
        self.records = records
        self.bscs_urls = bscs_urls
Example #10
0
class CollectionInfoSearcher(RepositorySearcher):
    """
	Searches the collection of collection records and exposes
	"collections" as a mapping from collection id to collectionInfo
	"""
    batchSize = 500
    verbose = True
    collection_info_constructor = DleseCollectionInfo

    def __init__(self, collection, xmlFormat, baseUrl):
        self.collections = UserDict()
        RepositorySearcher.__init__(self,
                                    collection=collection,
                                    xmlFormat=xmlFormat,
                                    baseUrl=baseUrl)

    def get_paramsOFF(self, collection, xmlFormat):
        params = RepositorySearcher.get_params(self, collection, xmlFormat)
        for p in params:
            print '- %s: %s' % (p, params[p])
        return params

    def processResults(self):
        """
		concrete classes should override this method to do some real processing
		"""
        for result in self:
            # print result
            # sys.exit()
            info = self.collection_info_constructor(result)
            self.collections[info.key] = info
            # print '- %s (%s)' % (info.title, info.key)
            # sys.exit()

    def getCollectionInfo(self, key):
        """
		return collection info for specified collection
		"""
        print 'lookinfo for key:"%s"' % key
        if not self.collections.has_key(key):
            for key in self.collections.keys():
                print '- ', key
            return None
        return self.collections[key]
Example #11
0
class DataHubItemsMgr(ProvItemsMgr):
    """
	maps provCode to provItem for all items in dataHub worksheet
	- countryMap organizes the provItems by country (contryCode -> [provItems])
	"""
    def __init__(self):
        """
		ProItemsMgr calls self.load()
		"""
        self.xls = DataHubWorksheet()
        self.countryMap = UserDict(
        )  # holds all provItems for given country (key is country code)

        ProvItemsMgr.__init__(self)

    def load(self):
        """
		obtain items from the data hub xls and
		populate this class's data structures
		"""
        for provItem in self.xls:
            self[provItem.provCode] = provItem
            country_code = provItem.countryCode
            countryProvItems = None
            if self.countryMap.has_key(country_code):
                countryProvItems = self.countryMap[country_code]
            else:
                countryProvItems = UserDict()
                self.countryMap[country_code] = countryProvItems
            countryProvItems[provItem.provCode] = provItem

    def getProvItem(self, provCode):
        """
		get the provItem for specified provCode (e.g. 'US-CO')
		"""
        return self[provCode]

    def getProvItems(self, countryCode):
        """
		get all the provItems having specified country code
		"""
        return self.countryMap[countryCode].values()
Example #12
0
class Year(OrderedDict):
    """
    data is months
    """
    def __init__(self, year):
        self.year = year
        self.data = UserDict()
        self.sortKey = lambda x: x.year

    def add(self, point):
        month = point.date.month
        if not self.data.has_key(month):
            self.data[month] = Month(month)
        self.data[month].add(point)

    def report(self):
        print '\n** {} **'.format(self.year)
        # sorted_keys = sorted(self.data, key=lambda x:x.date.month)

        for month in self.sorted_values():
            month.report()
Example #13
0
class Visitors(OrderedDict):
    """
    data is years
    """
    def __init__(self, path):
        content = open(path, 'r').read()
        obj = json.loads(content)
        data = map(DataPoint, obj['visitors']['data'])
        print 'there are {} data points'.format(len(data))

        self.data = UserDict()
        self.sortKey = lambda x: x.year
        for point in data:
            year = point.date.year
            if not self.data.has_key(year):
                self.data[year] = Year(year)
            self.data[year].add(point)

    def report(self):
        # sorted_years = sorted(self.data, lambda x:x.year)
        for year in self.sorted_values():
            year.report()
Example #14
0
 def has_key(self, key):
     self.__populate()
     return UserDict.has_key( self, self._keyTransform(key) )
Example #15
0
class MetadataScanner (SimpleScanner):
	
	outpath = 'METADATA_SCAN_URLS.txt'
	
	def __init__ (self, baseDir):
		self.unique_protected_urls = []
		self.protected_url_count = 0
		self.unique_asset_filenames = []
		self.metadata_info = UserDict()
		self.recordsToUpdate = UserDict() # recordId -> RecordInfo
		SimpleScanner.__init__(self, baseDir)

	def getRecordInfo (self, recordId):
		if self.recordsToUpdate.has_key(recordId):
			return self.recordsToUpdate[recordId]
		return None
		
	def getRecordInfoForPath (self, path):
		"""
		return existing recordInfo if possible, otherwise
		create and return new recordInfo
		"""
		tmpInfo = RecordInfo(path)
		recordId = tmpInfo.recordId
		if self.recordsToUpdate.has_key(recordId):
			return self.recordsToUpdate[recordId]
		else:
			self.recordsToUpdate[recordId] = tmpInfo
			return tmpInfo
	
	def acceptFileToScan (self, path):
		"""
		bolean determining whether this file is scanned
		e.g., typically look for xml files when processing metadata
		"""
		return os.path.isfile(path) and os.path.basename(path).endswith('.xml')
	
	def processPath (self, path):
		SimpleScanner.processPath(self, path)
		
		content = open(path, 'r').read()
		
		urlPattern= 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
		
		for m in re.finditer(urlPattern, content):
			url = m.group()
			# urlPattern doesn't catch the tag ending at the tail of urls
			for ending in [
				'</primaryURL>', 
				'</url>',
				'</urlEntry>',
				'</standardURL>'
			]:
				if url.endswith(ending):
					url = url.replace(ending,'')
			# print '-m: ' + m.group()
			if bscs.protected.isAnyProtectedUrl (url):
				filename = os.path.basename(url)
				if not url in self.unique_protected_urls:
					self.unique_protected_urls.append(url)
					
				if not filename in self.unique_asset_filenames:
					self.unique_asset_filenames.append(filename)
					
				self.protected_url_count = self.protected_url_count + 1
				infos = self.metadata_info.has_key(url) and self.getMetadataInfo(url) or []
				info = self.getRecordInfoForPath(path)
				info.protectedUrls.append(url)
				infos.append(info)
				self.metadata_info[url] = infos
				# print 'just assigned 

	

	def report (self):
		SimpleScanner.report(self)
		print '\nunique protected URLs: %d (out of %d total)' % (len(self.unique_protected_urls), self.protected_url_count)
		print 'unique asset filenames: %d' % len(self.unique_asset_filenames)

	def reportMetadataInfo (self):
		urls = self.metadata_info.keys()
		urls.sort()
		print "metadata info report (%d records)" % len(urls)
		for url in urls:
			print '- %s' % url
			for info in self.metadata_info[url]:
				print '  -', info.recordId

	def getRecordMap (self):
		"""
		OBSOLETE - Use self.recordsToUpdate
		"""
		recordMap = {}
		for url in self.metadata_info.keys():
			for info in self.metadata_info[url]:
				recId = info.recordId
				if not recordMap.has_key(recId):
					values = []
				else:
					values = recordMap[recId]
				values.append(url)
				recordMap[recId] = values
		return recordMap

	def reportRecordMap (self):
		"""
		OBSOLETE - see self.recordsToUpdate()
		"""
		recordMap = self.getRecordMap()
		keys = recordMap.keys()
		keys.sort()
		print '\n%d records containing protectedUrls' % len(keys)
		for recId in keys:
			print "- %s" % recId
			for url in recordMap[recId]:
				print '  - %s' % url
		
	def getMetadataInfo (self, url):
		"""
		returns a LIST of metadata infos for given url
		"""
		if not self.metadata_info.has_key(url):
			return None
		return self.metadata_info[url]
		
	def writeProtectedUrls (self, outpath=None):
		if outpath is None:
			outpath = self.outpath
		self.unique_protected_urls.sort()
		fp = open(outpath, 'w')
		fp.write ('\n'.join(self.unique_protected_urls))
		fp.close()
		print 'wrote unique protected URLs to ', outpath
Example #16
0
# Check every path through every method of UserDict
from test_support import verify, verbose
from UserDict import UserDict, IterableUserDict
d0 = {}
d1 = {"one": 1}
d2 = {"one": 1, "two": 2}
# Test constructors
u = UserDict()
u0 = UserDict(d0)
u1 = UserDict(d1)
u2 = IterableUserDict(d2)
uu = UserDict(u)
uu0 = UserDict(u0)
uu1 = UserDict(u1)
uu2 = UserDict(u2)
# Test __repr__
verify(str(u0) == str(d0))
verify(repr(u1) == repr(d1))
verify( ` u2 ` == ` d2 `)
# Test __cmp__ and __len__
all = [d0, d1, d2, u, u0, u1, u2, uu, uu0, uu1, uu2]
for a in all:
    for b in all:
        verify(cmp(a, b) == cmp(len(a), len(b)))
# Test __getitem__
verify(u2["one"] == 1)
try:
    u1["two"]
except KeyError:
    pass
Example #17
0
class CurriculumScanner(SimpleScanner):

    counter = 0

    def __init__(self, baseDir):
        self.unique_protected_urls = []
        self.protected_url_count = 0
        self.unique_asset_filenames = []
        self.metadata_info = UserDict()
        SimpleScanner.__init__(self, baseDir)

    def acceptFileToScan(self, path):
        """
		bolean determining whether this file is scanned
		e.g., typically look for xml files when processing metadata
		"""
        return os.path.isfile(path) and os.path.basename(path).endswith('.xml')

    def processPath(self, path):
        SimpleScanner.processPath(self, path)

        content = open(path, 'r').read()

        urlPattern = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'

        for m in re.finditer(urlPattern, content):
            url = m.group()
            # urlPattern doesn't catch the tag ending at the tail of urls
            for ending in [
                    '</primaryURL>', '</url>', '</urlEntry>', '</standardURL>'
            ]:
                if url.endswith(ending):
                    url = url.replace(ending, '')
            # print '-m: ' + m.group()
            if bscs.protected.isAnyProtectedUrl(url):
                filename = os.path.basename(url)
                if not url in self.unique_protected_urls:
                    self.unique_protected_urls.append(url)

                if not filename in self.unique_asset_filenames:
                    self.unique_asset_filenames.append(filename)

                self.protected_url_count = self.protected_url_count + 1
                infos = self.metadata_info.has_key(
                    url) and self.getMetadataInfo(url) or []
                info = RecordInfo(path)
                infos.append(info)
                self.metadata_info[url] = infos
                # print 'just assigned

    def report(self):
        SimpleScanner.report(self)
        print '\nunique protected URLs: %d (out of %d total)' % (len(
            self.unique_protected_urls), self.protected_url_count)
        print 'unique asset filenames: %d' % len(self.unique_asset_filenames)

    def getMetadataInfo(self, url):
        """
		returns a LIST of metadata infos for given url
		"""
        if not self.metadata_info.has_key(url):
            return None
        return self.metadata_info[url]

    def writeProtectedUrls(self):
        out = 'CURRICULUM_SCAN_URLS.txt'
        self.unique_protected_urls.sort()
        fp = open(out, 'w')
        fp.write('\n'.join(self.unique_protected_urls))
        fp.close()
        print 'wrote unique protected URLs to ', out
Example #18
0
class VdxRecord(MetaDataRecord):
    verbose = 0
    xpath_delimiter = '/'
    # default_vdx_template = 'VDX-TEMPLATE.xml'
    default_vdx_template = util.getTemplate('VDX-TEMPLATE')

    def __init__(self, template=None):
        template = template or self.default_vdx_template
        MetaDataRecord.__init__(self, path=template)
        self.shapes = UserDict()
        self.edges = UserDict()
        self.nodes = UserDict()

    def getShapeNodes(self):
        """
		return the shape elements from DOM
		"""
        return self.selectNodes(self.dom,
                                'VisioDocument/Pages/Page/Shapes/Shape')

    def getShapeId(self):

        return str(len(self.getShapeNodes()) + 1)

    def _shapeGetter(self, shapeId):
        return self.getShape(shapeId)

    def getShapeByName(self, name):
        # print 'getShapeByName (%s)' % name
        for node in self.shapes.values():
            if node.Name == name:
                return node

    def getShape(self, shapeId):
        """
		return the Shape instance for provided shapeId or None if not defined
		"""
        return self.shapes.has_key(shapeId) and self.shapes[shapeId] or None

    def makeEdgeShape(self, source, target, relation, id=None):
        id = id or self.getShapeId()
        # return makeConnectorShape(source, target, relation, self.getShapeId())
        line_args = {
            'name': 'connector',
            'label': {
                'text': relation
            },
            # 'x' : avg (start[0], end[0]),
            # 'y' : avg (start[1], end[1]),
            # 'x' : start[0],
            # 'y' : start[1],
            # 'height' : diff (end[1], start[1]) or util.pt2in(2),
            # 'width' : diff (end[0],start[0]),
            # 'begin_x' : start[0],
            # 'begin_y' : start[1],
            # 'end_x' : end[0],
            # 'end_y' : end[1]
        }
        return Line(id, line_args)

    def addEdge(self, sourceId, targetId, relation):
        try:
            # source = self.getShape(sourceId)
            #source = self.getShapeByName(sourceId)
            source = self._shapeGetter(sourceId)
            if not source:
                raise Exception, "sourceId '%s'" % sourceId
            # target = self.getShape(targetId)
            # target = self.getShapeByName(targetId)
            target = self._shapeGetter(targetId)
            if not target:
                raise Exception, "targetId '%s'" % targetId
        except Exception, msg:
            print "addEdge Error: could not find a shape (%s)" % msg
            # print self
            print "SHAPE KEYS: %s" % self.shapes.keys()
            for key in self.shapes.keys():
                # print "%s: %s" % (key, self.shapes[key])
                print " - ", key
            print "HALTInG ..."
            sys.exit()

        # edge = makeConnectorShape(source, target, relation, self.getShapeId())
        edge = self.makeEdgeShape(source, target, relation, self.getShapeId())

        # parent = self.selectSingleNode (self.dom, 'VisioDocument/Pages/Page/Shapes')
        # if not parent:
        # raise xml.dom.NotFoundErr, 'Shapes node not found'
        # parent.appendChild(edge.getElement());

        self.edges[edge.ID] = edge
        self.addShapeObj(edge)

        # add the connect element
        self.addConnect(edge, source, target)

        return edge.ID
Example #19
0
 def has_key(self, key):
     k = self._key(key)
     return UserDict.has_key(self, k)
Example #20
0
# SF bug #476616 -- copy() of UserDict subclass shared data
m2['foo'] = 'bar'
verify(m2a != m2)

# Test keys, items, values

verify(u2.keys() == d2.keys())
verify(u2.items() == d2.items())
verify(u2.values() == d2.values())

# Test has_key and "in".

for i in u2.keys():
    verify(u2.has_key(i) == 1)
    verify((i in u2) == 1)
    verify(u1.has_key(i) == d1.has_key(i))
    verify((i in u1) == (i in d1))
    verify(u0.has_key(i) == d0.has_key(i))
    verify((i in u0) == (i in d0))

# Test update

t = UserDict()
t.update(u2)
verify(t == u2)

# Test get

for i in u2.keys():
    verify(u2.get(i) == u2[i])
    verify(u1.get(i) == d1.get(i))
Example #21
0
 def has_key(self,key):
   return UserDict.has_key(self,lower(key))
Example #22
0
# Check every path through every method of UserDict
from test_support import verify, verbose
from UserDict import UserDict, IterableUserDict
d0 = {}
d1 = {"one": 1}
d2 = {"one": 1, "two": 2}
# Test constructors
u = UserDict()
u0 = UserDict(d0)
u1 = UserDict(d1)
u2 = IterableUserDict(d2)
uu = UserDict(u)
uu0 = UserDict(u0)
uu1 = UserDict(u1)
uu2 = UserDict(u2)
# Test __repr__
verify(str(u0) == str(d0))
verify(repr(u1) == repr(d1))
verify(`u2` == `d2`)
# Test __cmp__ and __len__
all = [d0, d1, d2, u, u0, u1, u2, uu, uu0, uu1, uu2]
for a in all:
    for b in all:
        verify(cmp(a, b) == cmp(len(a), len(b)))
# Test __getitem__
verify(u2["one"] == 1)
try:
    u1["two"]
except KeyError:
    pass
Example #23
0
 def has_key(self, key):
     self.__populate()
     return UserDict.has_key( self, self._keyTransform(key) )
# SF bug #476616 -- copy() of UserDict subclass shared data
m2['foo'] = 'bar'
verify(m2a != m2)

# Test keys, items, values

verify(u2.keys() == d2.keys())
verify(u2.items() == d2.items())
verify(u2.values() == d2.values())

# Test has_key and "in".

for i in u2.keys():
    verify(u2.has_key(i) == 1)
    verify((i in u2) == 1)
    verify(u1.has_key(i) == d1.has_key(i))
    verify((i in u1) == (i in d1))
    verify(u0.has_key(i) == d0.has_key(i))
    verify((i in u0) == (i in d0))

# Test update

t = UserDict()
t.update(u2)
verify(t == u2)

# Test get

for i in u2.keys():
    verify(u2.get(i) == u2[i])
    verify(u1.get(i) == d1.get(i))
Example #25
0
    def display(self): print self

m2 = MyUserDict(u2)
m2a = m2.copy()
assert m2a == m2

# Test keys, items, values

assert u2.keys() == d2.keys()
assert u2.items() == d2.items()
assert u2.values() == d2.values()

# Test has_key

for i in u2.keys():
    assert u2.has_key(i) == 1
    assert u1.has_key(i) == d1.has_key(i)
    assert u0.has_key(i) == d0.has_key(i)

# Test update

t = UserDict()
t.update(u2)
assert t == u2

# Test get

for i in u2.keys():
    assert u2.get(i) == u2[i]
    assert u1.get(i) == d1.get(i)
    assert u0.get(i) == d0.get(i)
Example #26
0
 def has_key(self, key):
     k = self._key(key)
     return UserDict.has_key(self, k)