Ejemplo n.º 1
0
        response = request.urlopen(url)
    except error.HTTPError as err:
        failed.append(url)
        continue

    the_page = response.read()
    content = the_page.decode(encoding='latin-1')
    # file = open("test", "w") #"test" = url to where you want to save file
    # file.write(content)

    parts = links.split("/")
    if len(parts) == 5:
        accNum = parts[len(parts) - 3] + '/' + parts[
            len(parts) - 2] + '/' + parts[len(parts) - 1].split('.')[0]
    if len(parts) == 4:
        accNum = parts[len(parts) - 2] + '/' + parts[len(parts) -
                                                     1].split('.')[0]

    if "4/A" in link:
        documentType = "4/A"
    else:
        documentType = "4"

    # start parsing
    begin = content.find("<SEC-DOCUMENT>")
    end = content.find("-----END")
    xmlFile = content[begin:end]
    soup = BeautifulSoup(xmlFile, 'xml')
    parser = Parser(soup, accNum, documentType, conn)
    parser.parseHead()
    parser.parseTransacs()