def parseFile(fp):
    root_folder = import_util.Folder('')
    folder_stack = [root_folder]

    iterator = iterRecords(fp)

    for lineno, type, name, attrs in iterator:

        if type == FOLDER:
            if attrs.has_key('trashfolder'):
                # skipping everything under the trash folder
                trash_count = 1
                for lineno, type, name, attrs in iterator:
                    if type == FOLDER:
                        trash_count += 1
                        log.info('drop %s', name)
                    elif type == DASH:
                        trash_count -= 1
                        if trash_count == 0:
                            break
                continue

            if not name:
                log.warn('Invalid name line %s', lineno+1)
                continue

            folder = import_util.Folder(name)
            folder_stack[-1].children.append(folder)
            folder_stack.append(folder)

        elif type == URL:
            if not name:
                log.warn('Invalid name line %s', lineno+1)
                continue

            created  = attrs.get('created','')
            created  = import_util._ctime_str_2_iso8601(created)
            # Opera doesn't have modified. Map visited to modified.
            modified = attrs.get('visited','')
            modified = import_util._ctime_str_2_iso8601(modified)

            page = import_util.Bookmark(
                name,
                url         = attrs.get('url',''),
                description = attrs.get('description',''),
                created     = created,
                modified    = modified,
            )
            folder_stack[-1].children.append(page)

        elif type == SEPERATOR:
            pass

        elif type == DASH:
            if len(folder_stack) <= 1:
                raise RuntimeError('Unmatched "-" line: %s' % (lineno+1,))
            else:
                folder_stack.pop()

    return root_folder
def parseLink(tokens, attrs):
    # <a>
    url           = _get_attr(attrs, 'href')
    last_modified = _get_attr(attrs, 'last_modified')
    add_date      = _get_attr(attrs, 'add_date')

    last_modified = import_util._ctime_str_2_iso8601(last_modified)
    add_date      = import_util._ctime_str_2_iso8601(add_date)

    title = []
    for kind, data, attrs in tokens:
        if kind == DATA:
            title.append(data)
        elif kind == ENDTAG and data == 'a':
            break
        elif data in ('dl','dt', 'dd'):
            # malformed!
            tokens.push_back((kind,data,attrs))
            break
    return _join_text(title), url, add_date, last_modified