def addRSSentry(entry): formTitle = entry["title"][0:3] if formTitle == 'D -' or formTitle == 'D/A': filingURL = entry["link"]["@href"] r2 = requests.get(filingURL) soup = BeautifulSoup(filingURL) allFilings = soup.find(_class='companyName') # if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true': # addAmendedDocuments(allFilings.a['href']) form = "http://www.sec.gov" + re.search('<a href="(.*?primary_doc.xml)">primary_doc.xml', r2.text).group(1) print form r3 = requests.get(form) doc = xmltodict.parse(r3.text) if doc["edgarSubmission"]["offeringData"]["industryGroup"]["industryGroupType"] == 'Pooled Investment Fund': i = Issuer() i.Name = doc["edgarSubmission"]["primaryIssuer"]["entityName"] i.City = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"]["city"] i.Country = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"]["stateOrCountryDescription"] i.TierID = '' if i.Country == 'NEW YORK' or i.Country == 'JERSEY' or i.Country == 'NEW JERSEY' or i.Country == 'CONNECTICUT' or i.Country == 'CALIFORNIA' or i.Country == 'DELAWARE' or i.Country == 'MASSACHUSETTS': i.TierID = '1' elif i.Country == 'FLORIA' or i.Country == 'PENNSYLVANIA' or i.Country == 'ILLINOIS' or i.Country == 'WASHINGTON' or i.Country == 'TEXAS': i.TierID = '2' else: i.TierID = '3' db_session.add(i) db_session.commit() j = FormD() j.url = form j.SubmissionDate = entryUpdatedTime j.IndustryGroupType = doc["edgarSubmission"]["offeringData"]["industryGroup"]["industryGroupType"] j.InvestmentFundType = doc["edgarSubmission"]["offeringData"]["industryGroup"]["investmentFundInfo"]["investmentFundType"] if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true': j.Amended = 1 #1 is true; 0 is false else: j.Amended = 0 firstSaleDict = doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["dateOfFirstSale"] if firstSaleDict.keys() == ['yetToOccur']: j.IsDateOfFirstSaleYetToOccur = 1 else: unformattedDate = firstSaleDict['value'] year = int(unformattedDate[0:4]) month = int(unformattedDate[5:7]) day = int(unformattedDate[8:10]) j.DateOfFirstSale = date(year, month, day) j.MinimumInvestmentAccepted = doc["edgarSubmission"]["offeringData"]["minimumInvestmentAccepted"] j.TotalAmountSold = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalAmountSold"] if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalOfferingAmount"] == 'Indefinite': j.TotalOfferingAmount = None j.IsTotalOfferingAmountIndefinite = 1 else: j.TotalOfferingAmount = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalOfferingAmount"] j.IsTotalOfferingAmountIndefinite = 0 if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalRemaining"] == 'Indefinite': j.TotalRemaining = None j.IsTotalRemainingIndefinite = 1 else: j.TotalRemaining = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalRemaining"] j.IsTotalRemainingIndefinite = 0 j.Issuer = i db_session.add(j) db_session.commit() print 'parsing complete, added it to the DB'
def addRSSentry(entry): formTitle = entry["title"][0:3] if formTitle == 'D -' or formTitle == 'D/A': filingURL = entry["link"]["@href"] r2 = requests.get(filingURL) soup = BeautifulSoup(filingURL) allFilings = soup.find(_class='companyName') # if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true': # addAmendedDocuments(allFilings.a['href']) form = "http://www.sec.gov" + re.search( '<a href="(.*?primary_doc.xml)">primary_doc.xml', r2.text).group(1) print form r3 = requests.get(form) doc = xmltodict.parse(r3.text) i = Issuer() i.Name = doc["edgarSubmission"]["primaryIssuer"]["entityName"] i.City = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"][ "city"] i.Country = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"][ "stateOrCountryDescription"] db_session.add(i) db_session.commit() j = FormD() j.url = form j.SubmissionDate = entryUpdatedTime j.IndustryGroupType = doc["edgarSubmission"]["offeringData"][ "industryGroup"]["industryGroupType"] if j.IndustryGroupType == 'Pooled Investment Fund': j.InvestmentFundType = doc["edgarSubmission"]["offeringData"][ "industryGroup"]["investmentFundInfo"]["investmentFundType"] else: j.InvestmentFundType = None if doc["edgarSubmission"]["offeringData"]["typeOfFiling"][ "newOrAmendment"]["isAmendment"] == 'true': j.Amended = 1 #1 is true; 0 is false else: j.Amended = 0 firstSaleDict = doc["edgarSubmission"]["offeringData"]["typeOfFiling"][ "dateOfFirstSale"] if firstSaleDict.keys() == ['yetToOccur']: j.IsDateOfFirstSaleYetToOccur = 1 else: unformattedDate = firstSaleDict['value'] year = int(unformattedDate[0:4]) month = int(unformattedDate[5:7]) day = int(unformattedDate[8:10]) j.DateOfFirstSale = date(year, month, day) j.MinimumInvestmentAccepted = doc["edgarSubmission"]["offeringData"][ "minimumInvestmentAccepted"] j.TotalAmountSold = doc["edgarSubmission"]["offeringData"][ "offeringSalesAmounts"]["totalAmountSold"] if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"][ "totalOfferingAmount"] == 'Indefinite': j.TotalOfferingAmount = None j.IsTotalOfferingAmountIndefinite = 1 else: j.TotalOfferingAmount = doc["edgarSubmission"]["offeringData"][ "offeringSalesAmounts"]["totalOfferingAmount"] j.IsTotalOfferingAmountIndefinite = 0 if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"][ "totalRemaining"] == 'Indefinite': j.TotalRemaining = None j.IsTotalRemainingIndefinite = 1 else: j.TotalRemaining = doc["edgarSubmission"]["offeringData"][ "offeringSalesAmounts"]["totalRemaining"] j.IsTotalRemainingIndefinite = 0 j.Issuer = i db_session.add(j) db_session.commit() print 'parsing complete, added it to the DB'
# "Schwab Capital Trust" : "/Users/jeffca/documents/blackrock/flask_base/forms/Schwab Capital Trust/Schwab Capital Trust.csv", # "BlackRock Capital Appreciation Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Capital Appreciation Fund/BlackRock Capital Appreciation Fund.csv", # "BlackRock Focus Growth Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Focus Growth Fund/BlackRock Focus Growth Fund.csv" # } filings = { "BlackRock Bond Index Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Bond Index Fund/BlackRock Bond Index Fund.csv", "BlackRock Emerging Markets Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Emerging Markets Fund/BlackRock Emerging Markets Fund.csv" } for filing in filings.keys(): reg = Registrant() doc = Registrant_Document() db_session.add(reg) db_session.add(doc) db_session.commit() doc.Registrant = reg # url = "http://localhost:8080/rest/xbrl/view?file=" + filings[filing] + "&view=facts&media=csv" # data = requests.get(url).text # f = StringIO.StringIO(data) f = open("forms/" + filing + "/" + filing + '.csv' , 'r') reader = csv.reader(f, delimiter=',') r2 = requests.get(html[filing]) print 'parsing ' + html[filing] doc.url = html[filing] soup = BeautifulSoup(r2.text) for row in reader: if row[0] == 'Label': continue if row[0] == 'Registrant Name' or 'Effective' in row[0] or 'Objective' in row[0] or 'objective' in row[0] or 'Narrative' in row[0] or '[Text Block]' in row[0] or 'Annual Return' in row[0] or '200' in row[0] or 'Inception' in row[0] or 'Fee' in row[0] or 'Expense' in row[0]: rdt = Registrant_Document_Term()