def _makeRegex(pattern, startp): pattern = learning.safeRegexLiteral(pattern) if startp: if pattern.startswith('\n'): pattern = pattern[1:] return '([\\r\\n]+)%s # BREAK BEFORE SEEING "%s" AT THE START OF A LINE' % (pattern, pattern) else: return '([\\r\\n]+).{0,%s}%s # BREAK BEFORE SEEING "%s" NEAR THE BEGINNING OF A LINE' % (MAX_FORWARD_CHARS, pattern, pattern) else: if pattern.endswith('\n'): pattern = pattern[:-1] return '%s([\\r\\n]+) # BREAK AFTER SEEING "%s" AT THE END OF A LINE' % (pattern, pattern) else: return '%s.{0,%s}([\\r\\n]+) # BREAK AFTER SEEING "%s" NEAR THE END OF A LINE' % (pattern, MAX_BACKWARDS_CHARS, pattern)
def learnSTRPTime(text, timevalues): timevalues = timevalues.split(',') positions = getPositions(text, timevalues) if positions == None: return None, None if positions[0] == -1 or positions[1] == -1 or positions[ 3] == -1 or positions[4] == -1: print "Warning: month, day, hour, and minute are required." return None, None posDict = {} valueDict = {} count = 0 first = -1 last = -1 for position in positions: fieldname = FIELDNAMES[count] posDict[fieldname] = position value = timevalues[count] valueDict[fieldname] = value count += 1 if first < 0 or -1 < position < first: first = position if position > last: last = position + len(value) # sort by position fieldAndPos = posDict.items() fieldAndPos.sort(lambda x, y: x[1] - y[1]) #print fieldAndPos prefix = text[:first] suffix = text[last:] #print "TEXT:", text #print "PREFIX:", prefix #print "SUFFIX:", suffix #print "VALUES:", valueDict prefixRegex = learning.generateSearchRegex(prefix) strpformat = buildSTRPTime(text, fieldAndPos, valueDict) print "TIME_PREFIX: '%s'" % prefixRegex print "TIME_FORMAT:", strpformat try: verify(text, prefixRegex, strpformat, suffix) return (prefixRegex, strpformat, suffix) except: print "Error determining timeformat" return None
def learnSTRPTime(text, timevalues): timevalues = timevalues.split(',') positions = getPositions(text, timevalues) if positions == None: return None, None if positions[0] == -1 or positions[1] == -1 or positions[3] == -1 or positions[4] == -1: print "Warning: month, day, hour, and minute are required." return None, None posDict = {} valueDict = {} count = 0 first = -1 last = -1 for position in positions: fieldname = FIELDNAMES[count] posDict[fieldname] = position value = timevalues[count] valueDict[fieldname] = value count += 1 if first < 0 or -1 < position < first: first = position if position > last: last = position + len(value) # sort by position fieldAndPos = posDict.items() fieldAndPos.sort( lambda x, y: x[1] - y[1] ) #print fieldAndPos prefix = text[:first] suffix = text[last:] #print "TEXT:", text #print "PREFIX:", prefix #print "SUFFIX:", suffix #print "VALUES:", valueDict prefixRegex = learning.generateSearchRegex(prefix) strpformat = buildSTRPTime(text, fieldAndPos, valueDict) print "TIME_PREFIX: '%s'" % prefixRegex print "TIME_FORMAT:", strpformat try: verify(text, prefixRegex, strpformat, suffix) return (prefixRegex, strpformat, suffix) except: print "Error determining timeformat" return None