Esempio n. 1
0
def getProcessingWindow(configContext,tableName, productVersionRestriction,cursor,logger, **kwargs):
  """
  ProcessingWindow is a single time window over which to aggregate materialized view data.

  Returns (startWindow,deltaWindow,endWindow) using this heuristic:
  kwargs beats configContext which beats latest table row
  if two among startWindow, endWindow, deltaWindow in config or kwargs: they are used.
    if all three: assert startWindow + deltaWindow == endWindow
  Backward compatibility: if processingDay is present and windowXxx are not:
    startWindow = midnight of given day, deltaWindow = timedelta(days=1)
  else: try to read window_end and window_size from the given table
  if one is available from config/kwargs it beats the same (or calculated) one from the table
  On inconsistency or failure, logs the problem and aborts
  BEWARE: You can get inconsitency by having one item in config and the other two in kwargs: BEWARE
  """
  config = {}
  config.update(configContext)
  config.update(kwargs)
  startWindow = config.get('startWindow')
  if type(startWindow) is str:
    startWindow = cm.dateTimeConverter(startWindow)
  deltaWindow = config.get('deltaWindow')
  if type(deltaWindow) is str:
    deltaWindow = cm.timeDeltaConverter(deltaWindow)
  endWindow = config.get('endWindow')
  if type(endWindow) is str:
    endWindow = cm.dateTimeConverter(endWindow)
  processingDay = config.get('processingDay')
  if type(processingDay) is str:
    processingDay = cm.dateTimeConverter(processingDay)
  try:
    if startWindow or deltaWindow or endWindow:
      if startWindow and endWindow and deltaWindow:
        assert startWindow + deltaWindow == endWindow,"inconsistent: %s + %s != %s"%(startWindow,deltaWindow,endWindow)
      elif startWindow and endWindow:
        deltaWindow = endWindow - startWindow
      elif startWindow and deltaWindow:
        endWindow = startWindow + deltaWindow
      elif deltaWindow and endWindow:
        startWindow = endWindow - deltaWindow
      else:
        assert not (startWindow or deltaWindow or endWindow), "insufficient: Need two of window ...Start: %s, ...Delta: %s, ...End:%s"%(startWindow,deltaWindow,endWindow)
    elif processingDay:
      dayt = datetime.datetime.fromtimestamp(time.mktime(processingDay.timetuple()))
      startWindow = dayt.replace(hour=0,minute=0,second=0,microsecond=0)
      assert startWindow == dayt,'processingDay must be some midnight, but was %s'%dayt
      deltaWindow = datetime.timedelta(days=1)
      endWindow = startWindow + deltaWindow
    else: # no params: try table
      startWindow,deltaWindow = getLastWindowAndSizeFromTable(cursor,tableName, productVersionRestriction,logger)
      if startWindow:
        endWindow = startWindow+deltaWindow
    return (startWindow,deltaWindow,endWindow)
  except:
    lib_util.reportExceptionAndAbort(logger)
Esempio n. 2
0
 def fixupContextByProcessingDay(self,context):
   pday = context.get('processingDay')
   if pday:
     logger.info("Adjusting startDate and deltaDate per processingDay %s",pday)
     pday = cm.dateTimeConverter(pday)
     startDate = datetime.datetime.fromtimestamp(time.mktime(pday.timetuple()))
     startDate.replace(hour=0,minute=0,second=0,microsecond=0)
     context['startDate'] = startDate
     context['deltaDate'] = datetime.timedelta(days=1)
     context['startWindw'] = startDate
Esempio n. 3
0
def getProcessingDates(configContext, tableName, productVersionRestriction, cursor, logger, **kwargs):
  """
  A processing interval is a time interval greater or equal to a processing window. Used to
  calculate a series of adjacent materialized view aggregates.

  Returns (startDate, deltaDate, endDate) using this heuristic:
  kwargs beats configContext
  if none are provided, calculates based on latest row of table, now()
  if only one is provided, logs the insufficiency and aborts
  if two among startDate, deltaDate, endDate: they are used
  Checks the table for most recent window_end
    if startDate < window_end:
      startDate = window_end
      logger.info(...that change...)
  if startDate >= endDate, or deltaDate <= 0, or three provided are inconsistent:
    logs the inconsistency and aborts
  """
  config = {}
  config.update(configContext)
  config.update(kwargs)
  delta0 = datetime.timedelta(days=0)
  delay = config.get('processingDelay', datetime.timedelta(hours=2))
  startDate = config.get('startDate')
  if startDate:
    startDate = "%s"%(startDate)
    startDate = cm.dateTimeConverter(startDate)
  deltaDate = config.get('deltaDate')
  if type(deltaDate) is str:
    deltaDate = cm.timeDeltaConverter(deltaDate)
  endDate = config.get('endDate')
  if endDate:
    endDate = "%s"%(endDate)
    endDate = cm.dateTimeConverter(endDate)
  initialDeltaDate = config.get('initialDeltaDate',config.get('deltaDate'))
  if not initialDeltaDate: initialDeltaDate = globalInitialDeltaDate
  defaultDeltaWindow = config.get('defaultDeltaWindow',config.get('deltaWindow'))
  if not defaultDeltaWindow: defaultDeltaWindow = globalDefaultDeltaWindow
  try:
    try:
      logger.debug('trying getDefaultDateInterval')
      startDateFromTable,endDateFromTable,latestWindowEnd = getDefaultDateInterval(cursor,tableName,delay,initialDeltaDate,defaultDeltaWindow,productVersionRestriction,logger)
    except Exception, x:
      print x
    if startDate and endDate and deltaDate:
      assert startDate + deltaDate == endDate,"inconsistent: %s + %s != %s"%(startDate,deltaDate,endDate)
    elif startDate and endDate:
      assert startDate < endDate, 'inconsistent: startDate %s >= endDate %s'%(startDate,endDate)
      deltaDate = endDate - startDate
    elif startDate and deltaDate:
      assert deltaDate > delta0, 'inconsistent: deltaDate %s <= 0'%(deltaDate)
      endDate = startDate + deltaDate
    elif deltaDate and endDate:
      assert deltaDate > delta0, 'inconsistent: deltaDate %s <= 0'%(deltaDate)
      startDate = endDate - deltaDate
    else:
      assert not (startDate or deltaDate or endDate), "insufficient: Need two xxxDate: start: %s, delta: %s, end:%s"%(startDate,deltaDate,endDate)
      startDate = startDateFromTable
      endDate = endDateFromTable
      deltaDate = endDate - startDate
    if latestWindowEnd and startDate < latestWindowEnd:
      logger.info("given/calculated startDate: %s < latest row in %s. Changing to %s",startDate,tableName,latestWindowEnd)
      startDate = latestWindowEnd
      deltaDate = endDate - startDate
      assert deltaDate > delta0, 'inconsistent (after check with db table %s): deltaDate %s <= 0'%(tableName,deltaDate)
    return (startDate,deltaDate,endDate)