Example #1
0
def archiveData(key, datasources):
    oldValue = cmapi.getItemValue(key, [], modelname=DatasourceArchive)
    if oldValue:
        oldValue.extend(datasources)
    else:
        oldValue = datasources
    cmapi.saveItem(key, oldValue, modelname=DatasourceArchive)
Example #2
0
def updateUuids(uuid):
    _MAX_ITEM_COUNT = 100
    if not uuid:
        return
    items = cmapi.getItemValue("uuids", [], modelname="RunStatus")
    items.insert(0, uuid)
    items = items[:_MAX_ITEM_COUNT]
    cmapi.saveItem("uuids", items, modelname="RunStatus")
Example #3
0
def _saveDatasourceHistory(datasource, items):
    _MAX_COUNT = 20
    slug = datasource['slug']
    value = getDatasourceHistory(slug)
    value['source'] = datasource
    pages = value.get('pages', [])
    for item in reversed(items):
        if item['added'] == datasource['added']:
            pages.insert(0, item)
    pages = pages[:_MAX_COUNT]
    value['pages'] = pages

    key = _getDatasourceHistoryKey(slug)
    cmapi.saveItem(key, value, modelname=DatasourceHistory)
Example #4
0
def _saveHistory(datasource, items):
    sourceadded = datasource.get('added')
    if not sourceadded:
        return
    key = _getDatasourceHistoryKey()
    latestItems = getDatasourceHistory()
    for item in items:
        itemadded = item.get('added')
        if not itemadded:
            continue
        # An item only need archive the first time it appears.
        if itemadded < sourceadded:
            continue
        item['source'] = datasource
        latestItems.insert(0, item)
    latestItems.sort(key=lambda item: item.get('added'), reverse=True)
    cmapi.saveItem(key, latestItems, modelname=DatasourceHistory)
Example #5
0
def getUrlAdded(url, added):
    items = cmapi.getItemValue(_getKeyname(), [],
                    modelname='RunStatus')
    found = _getItem(items, url)
    if found:
        found['count'] += 1
    else:
        found = {}
        found['count'] = 1
        found['url'] = url
        found['added'] = added
        items.append(found)
    found['updated'] = dateutil.getDateAs14(datetime.datetime.utcnow())
    start14 = dateutil.getHoursAs14(24)
    items = [ item for item in items if item['updated'] > start14 ]
    cmapi.saveItem(_getKeyname(), items,
                modelname='RunStatus')
    return found['added']
Example #6
0
def increaseIncomingBandwidth(bytes):
    itemKey = 'inbandwidth'
    inbandwidth = cmapi.getItemValue(itemKey, {}, modelname='RunStatus')

    allband = inbandwidth.get('all')
    if not allband:
        allband = {}
        allband['start'] = dateutil.getDateAs14(datetime.datetime.utcnow())
        inbandwidth['all'] = allband
    allband['bytes'] = allband.get('bytes', 0) + bytes
    allband['fetch'] = allband.get('fetch', 0) + 1

    timezonename = inbandwidth.get('tz')
    if not timezonename:
        timezonename = 'US/Pacific'
        inbandwidth['tz'] = timezonename

    nnow = datetime.datetime.now(tz=pytz.utc)
    tzdate = nnow.astimezone(pytz.timezone(timezonename))
    key = tzdate.strftime('%Y%m%d')

    current = inbandwidth.get('current')
    if not current or current.get('key') != key:
        historycount = inbandwidth.get('historycount')
        if not historycount:
            historycount = 7
            inbandwidth['historycount'] = historycount
        if current:
            history = inbandwidth.get('history')
            if not history:
                history = []
            history.insert(0, current)
            history = history[:historycount]
            inbandwidth['history'] = history
        current = {'key': key, 'bytes': bytes, 'fetch': 1}
        inbandwidth['current'] = current
    else:
        current['fetch'] += 1
        current['bytes'] += bytes

    cmapi.saveItem(itemKey, inbandwidth, modelname='RunStatus')
Example #7
0
def savePageHistory(url):
    pages = cmapi.getItemValue('page.history', [], modelname='RunStatus')
    found = None
    for page in pages:
        if page.get('url') == url:
            found = page
            break
    if found:
        found['count'] += 1
    else:
        found = {}
        found['count'] = 1
        found['url'] = url
        pages.append(found)
    found['updated'] = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
    pages.sort(key=lambda page: page['updated'], reverse=True)
    pages.sort(key=lambda page: page['count'], reverse=True)
    MAX_COUNT = 1000
    RESET_COUNT = 200
    if len(pages) > MAX_COUNT:
        pages = pages[:RESET_COUNT]
    cmapi.saveItem('page.history', pages, modelname='RunStatus')
Example #8
0
def savePageHistory(url):
    pages = cmapi.getItemValue('page.history', [], modelname='RunStatus')
    found = None
    for page in pages:
        if page.get('url') == url:
            found = page
            break
    if found:
        found['count'] += 1
    else:
        found = {}
        found['count'] = 1
        found['url'] = url
        pages.append(found)
    found['updated'] = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
    pages.sort(key=lambda page: page['updated'], reverse=True)
    pages.sort(key=lambda page: page['count'], reverse=True)
    MAX_COUNT = 1000
    RESET_COUNT = 200
    if len(pages) > MAX_COUNT:
        pages = pages[:RESET_COUNT]
    cmapi.saveItem('page.history', pages, modelname='RunStatus')
Example #9
0
def _saveNow(datasource, items, keyname):
    datasources = cmapi.getItemValue(keyname, [], modelname=LatestItem)

    days = 7
    strStart = dateutil.getHoursAs14(days * 24)
    datasources = [child for child in datasources
                    if child['source']['added'] >= strStart]

    data = {
        'source': datasource,
        'pages': items,
    }

    foundIndex = -1
    for i in range(len(datasources)):
        item = datasources[i]
        if item['source'].get('slug') == datasource.get('slug'):
            foundIndex = i
            break
    if foundIndex >= 0:
        datasources[foundIndex] = data
    else:
        datasources.append(data)
    cmapi.saveItem(keyname, datasources, modelname=LatestItem)
Example #10
0
def isConstantTitle(titleConfig, url, title, sideEffect):
    if not url:
        return False
    netloc = urlparse.urlparse(url).netloc
    key = netloc
    value = cmapi.getItemValue(key, {}, modelname=PageConstantTitle)
    record = value.get(title)
    if not record:
        record = {}
    count = record.get('c', 0)
    isconstant = count >= titleConfig.get('occurrence', 1)
    if sideEffect:
        nnow = datetime.datetime.utcnow()
        record['c'] = count + 1
        record['u'] = dateutil.getDateAs14(nnow)
        if len(value) > 20:
            for ik, iv in value.items():
                if (nnow - dateutil.parseDate14(iv['u'])).days >= titleConfig.get('cache.day', 7):
                    del value[ik]
        value[title] = record
        success = cmapi.saveItem(key, value, modelname=PageConstantTitle)
    return isconstant
Example #11
0
def savePosters(posters):
    return cmapi.saveItem(_getPosterListKey(), posters)
Example #12
0
def saveWordsRequest(keyname, data):
    cmapi.saveItem(keyname, data, modelname=WordsRequest)
Example #13
0
def addDeprecatedSource(slug):
    items = cmapi.getItemValue(_getKeyname(), [], modelname="RunStatus")
    if slug not in items:
        items.append(slug)
        cmapi.saveItem(_getKeyname(), items, modelname="RunStatus")
Example #14
0
def saveWords(keyname, value):
    cmapi.saveItem(keyname, value, modelname=HotWord)
Example #15
0
def savePosters(posters):
    return cmapi.saveItem(_getPosterListKey(), posters)
Example #16
0
def addDeprecatedSource(slug):
    items = cmapi.getItemValue(_getKeyname(), [], modelname='RunStatus')
    if slug not in items:
        items.append(slug)
        cmapi.saveItem(_getKeyname(), items, modelname='RunStatus')
Example #17
0
def removeDeprecatedSource(slug):
    items = cmapi.getItemValue(_getKeyname(), [], modelname='RunStatus')
    if slug in items:
        items.remove(slug)
        cmapi.saveItem(_getKeyname(), items, modelname='RunStatus')
Example #18
0
def saveDatasourceHistory(datasourceHistory):
    key = _getDatasourceHistoryKey()
    cmapi.saveItem(key, datasourceHistory, modelname=DatasourceHistory)
Example #19
0
def saveEvent(scope, event):
    cmapi.saveItem(scope + '.' + str(event['id']), event, modelname=HotEvent)
Example #20
0
def removeDeprecatedSource(slug):
    items = cmapi.getItemValue(_getKeyname(), [], modelname="RunStatus")
    if slug in items:
        items.remove(slug)
        cmapi.saveItem(_getKeyname(), items, modelname="RunStatus")
Example #21
0
def saveEvents(scope, value):
    cmapi.saveItem(scope, value, modelname=HotEvents)
Example #22
0
def saveHistoryEvents(scope, value):
    cmapi.saveItem(scope + '.history', value, modelname=HotEvents)