def archiveData(key, datasources): oldValue = cmapi.getItemValue(key, [], modelname=DatasourceArchive) if oldValue: oldValue.extend(datasources) else: oldValue = datasources cmapi.saveItem(key, oldValue, modelname=DatasourceArchive)
def updateUuids(uuid): _MAX_ITEM_COUNT = 100 if not uuid: return items = cmapi.getItemValue("uuids", [], modelname="RunStatus") items.insert(0, uuid) items = items[:_MAX_ITEM_COUNT] cmapi.saveItem("uuids", items, modelname="RunStatus")
def _saveDatasourceHistory(datasource, items): _MAX_COUNT = 20 slug = datasource['slug'] value = getDatasourceHistory(slug) value['source'] = datasource pages = value.get('pages', []) for item in reversed(items): if item['added'] == datasource['added']: pages.insert(0, item) pages = pages[:_MAX_COUNT] value['pages'] = pages key = _getDatasourceHistoryKey(slug) cmapi.saveItem(key, value, modelname=DatasourceHistory)
def _saveHistory(datasource, items): sourceadded = datasource.get('added') if not sourceadded: return key = _getDatasourceHistoryKey() latestItems = getDatasourceHistory() for item in items: itemadded = item.get('added') if not itemadded: continue # An item only need archive the first time it appears. if itemadded < sourceadded: continue item['source'] = datasource latestItems.insert(0, item) latestItems.sort(key=lambda item: item.get('added'), reverse=True) cmapi.saveItem(key, latestItems, modelname=DatasourceHistory)
def getUrlAdded(url, added): items = cmapi.getItemValue(_getKeyname(), [], modelname='RunStatus') found = _getItem(items, url) if found: found['count'] += 1 else: found = {} found['count'] = 1 found['url'] = url found['added'] = added items.append(found) found['updated'] = dateutil.getDateAs14(datetime.datetime.utcnow()) start14 = dateutil.getHoursAs14(24) items = [ item for item in items if item['updated'] > start14 ] cmapi.saveItem(_getKeyname(), items, modelname='RunStatus') return found['added']
def increaseIncomingBandwidth(bytes): itemKey = 'inbandwidth' inbandwidth = cmapi.getItemValue(itemKey, {}, modelname='RunStatus') allband = inbandwidth.get('all') if not allband: allband = {} allband['start'] = dateutil.getDateAs14(datetime.datetime.utcnow()) inbandwidth['all'] = allband allband['bytes'] = allband.get('bytes', 0) + bytes allband['fetch'] = allband.get('fetch', 0) + 1 timezonename = inbandwidth.get('tz') if not timezonename: timezonename = 'US/Pacific' inbandwidth['tz'] = timezonename nnow = datetime.datetime.now(tz=pytz.utc) tzdate = nnow.astimezone(pytz.timezone(timezonename)) key = tzdate.strftime('%Y%m%d') current = inbandwidth.get('current') if not current or current.get('key') != key: historycount = inbandwidth.get('historycount') if not historycount: historycount = 7 inbandwidth['historycount'] = historycount if current: history = inbandwidth.get('history') if not history: history = [] history.insert(0, current) history = history[:historycount] inbandwidth['history'] = history current = {'key': key, 'bytes': bytes, 'fetch': 1} inbandwidth['current'] = current else: current['fetch'] += 1 current['bytes'] += bytes cmapi.saveItem(itemKey, inbandwidth, modelname='RunStatus')
def savePageHistory(url): pages = cmapi.getItemValue('page.history', [], modelname='RunStatus') found = None for page in pages: if page.get('url') == url: found = page break if found: found['count'] += 1 else: found = {} found['count'] = 1 found['url'] = url pages.append(found) found['updated'] = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S') pages.sort(key=lambda page: page['updated'], reverse=True) pages.sort(key=lambda page: page['count'], reverse=True) MAX_COUNT = 1000 RESET_COUNT = 200 if len(pages) > MAX_COUNT: pages = pages[:RESET_COUNT] cmapi.saveItem('page.history', pages, modelname='RunStatus')
def _saveNow(datasource, items, keyname): datasources = cmapi.getItemValue(keyname, [], modelname=LatestItem) days = 7 strStart = dateutil.getHoursAs14(days * 24) datasources = [child for child in datasources if child['source']['added'] >= strStart] data = { 'source': datasource, 'pages': items, } foundIndex = -1 for i in range(len(datasources)): item = datasources[i] if item['source'].get('slug') == datasource.get('slug'): foundIndex = i break if foundIndex >= 0: datasources[foundIndex] = data else: datasources.append(data) cmapi.saveItem(keyname, datasources, modelname=LatestItem)
def isConstantTitle(titleConfig, url, title, sideEffect): if not url: return False netloc = urlparse.urlparse(url).netloc key = netloc value = cmapi.getItemValue(key, {}, modelname=PageConstantTitle) record = value.get(title) if not record: record = {} count = record.get('c', 0) isconstant = count >= titleConfig.get('occurrence', 1) if sideEffect: nnow = datetime.datetime.utcnow() record['c'] = count + 1 record['u'] = dateutil.getDateAs14(nnow) if len(value) > 20: for ik, iv in value.items(): if (nnow - dateutil.parseDate14(iv['u'])).days >= titleConfig.get('cache.day', 7): del value[ik] value[title] = record success = cmapi.saveItem(key, value, modelname=PageConstantTitle) return isconstant
def savePosters(posters): return cmapi.saveItem(_getPosterListKey(), posters)
def saveWordsRequest(keyname, data): cmapi.saveItem(keyname, data, modelname=WordsRequest)
def addDeprecatedSource(slug): items = cmapi.getItemValue(_getKeyname(), [], modelname="RunStatus") if slug not in items: items.append(slug) cmapi.saveItem(_getKeyname(), items, modelname="RunStatus")
def saveWords(keyname, value): cmapi.saveItem(keyname, value, modelname=HotWord)
def addDeprecatedSource(slug): items = cmapi.getItemValue(_getKeyname(), [], modelname='RunStatus') if slug not in items: items.append(slug) cmapi.saveItem(_getKeyname(), items, modelname='RunStatus')
def removeDeprecatedSource(slug): items = cmapi.getItemValue(_getKeyname(), [], modelname='RunStatus') if slug in items: items.remove(slug) cmapi.saveItem(_getKeyname(), items, modelname='RunStatus')
def saveDatasourceHistory(datasourceHistory): key = _getDatasourceHistoryKey() cmapi.saveItem(key, datasourceHistory, modelname=DatasourceHistory)
def saveEvent(scope, event): cmapi.saveItem(scope + '.' + str(event['id']), event, modelname=HotEvent)
def removeDeprecatedSource(slug): items = cmapi.getItemValue(_getKeyname(), [], modelname="RunStatus") if slug in items: items.remove(slug) cmapi.saveItem(_getKeyname(), items, modelname="RunStatus")
def saveEvents(scope, value): cmapi.saveItem(scope, value, modelname=HotEvents)
def saveHistoryEvents(scope, value): cmapi.saveItem(scope + '.history', value, modelname=HotEvents)