コード例 #1
0
def _makeRegex(pattern, startp):
    pattern = learning.safeRegexLiteral(pattern)
    if startp:
        if pattern.startswith('\n'):
            pattern = pattern[1:]
            return '([\\r\\n]+)%s            # BREAK BEFORE SEEING "%s" AT THE START OF A LINE' % (pattern, pattern)
        else:
            return '([\\r\\n]+).{0,%s}%s     # BREAK BEFORE SEEING "%s" NEAR THE BEGINNING OF A LINE' % (MAX_FORWARD_CHARS, pattern, pattern)
    else:
        if pattern.endswith('\n'):
            pattern = pattern[:-1]
            return '%s([\\r\\n]+)            # BREAK AFTER SEEING "%s" AT THE END OF A LINE' % (pattern, pattern)
        else:
            return '%s.{0,%s}([\\r\\n]+)     # BREAK AFTER SEEING "%s" NEAR THE END OF A LINE' % (pattern, MAX_BACKWARDS_CHARS, pattern)        
コード例 #2
0
def learnSTRPTime(text, timevalues):
    timevalues = timevalues.split(',')
    positions = getPositions(text, timevalues)
    if positions == None:
        return None, None
    if positions[0] == -1 or positions[1] == -1 or positions[
            3] == -1 or positions[4] == -1:
        print "Warning: month, day, hour, and minute are required."
        return None, None

    posDict = {}
    valueDict = {}
    count = 0
    first = -1
    last = -1
    for position in positions:
        fieldname = FIELDNAMES[count]
        posDict[fieldname] = position
        value = timevalues[count]
        valueDict[fieldname] = value
        count += 1
        if first < 0 or -1 < position < first:
            first = position
        if position > last:
            last = position + len(value)
    # sort by position
    fieldAndPos = posDict.items()
    fieldAndPos.sort(lambda x, y: x[1] - y[1])
    #print fieldAndPos
    prefix = text[:first]
    suffix = text[last:]
    #print "TEXT:", text
    #print "PREFIX:", prefix
    #print "SUFFIX:", suffix
    #print "VALUES:", valueDict
    prefixRegex = learning.generateSearchRegex(prefix)

    strpformat = buildSTRPTime(text, fieldAndPos, valueDict)
    print "TIME_PREFIX: '%s'" % prefixRegex
    print "TIME_FORMAT:", strpformat
    try:
        verify(text, prefixRegex, strpformat, suffix)
        return (prefixRegex, strpformat, suffix)
    except:
        print "Error determining timeformat"
        return None
コード例 #3
0
def learnSTRPTime(text, timevalues):
    timevalues = timevalues.split(',')
    positions = getPositions(text, timevalues)
    if positions == None:
        return None, None
    if positions[0] == -1 or positions[1] == -1 or positions[3] == -1 or positions[4] == -1:
        print "Warning: month, day, hour, and minute are required."
        return None, None
                  
    posDict = {}
    valueDict = {}
    count = 0
    first = -1
    last = -1
    for position in positions:
        fieldname = FIELDNAMES[count]
        posDict[fieldname] = position
        value = timevalues[count]
        valueDict[fieldname] = value
        count += 1
        if first < 0 or -1 < position < first:
            first = position
        if position > last:
            last = position + len(value)
    # sort by position
    fieldAndPos = posDict.items()
    fieldAndPos.sort( lambda x, y: x[1] - y[1] )
    #print fieldAndPos
    prefix = text[:first]
    suffix = text[last:]
    #print "TEXT:", text
    #print "PREFIX:", prefix
    #print "SUFFIX:", suffix
    #print "VALUES:", valueDict
    prefixRegex = learning.generateSearchRegex(prefix)

    strpformat = buildSTRPTime(text, fieldAndPos, valueDict)
    print "TIME_PREFIX: '%s'" % prefixRegex
    print "TIME_FORMAT:", strpformat
    try:
        verify(text, prefixRegex, strpformat, suffix)
        return (prefixRegex, strpformat, suffix)
    except:
        print "Error determining timeformat"
        return None