예제 #1
0
def parseLine(line):
    pos = [m.start() for m in re.finditer("\t", line)]
    if len(pos) < 4:
        Utils.error("len(pos)<4")
    url = line[0:pos[0]]
    category = line[pos[0] + 1:pos[1]]
    subcategory = line[pos[1] + 1:pos[2]]
    title = line[pos[2] + 1:pos[3]]
    if len(pos) > 4:
        content = line[pos[3] + 1:pos[4]]
    else:
        content = line[pos[3] + 1:]
    #
    url = url.strip()
    category = category.strip()
    title = title.strip()
    content = content.strip()
    subcategory = subcategory.strip()
    return category, url, title, content, subcategory