예제 #1
0
def addRSSentry(entry):
	formTitle = entry["title"][0:3]
	if formTitle == 'D -' or formTitle == 'D/A':	
		filingURL = entry["link"]["@href"]
		r2 = requests.get(filingURL)
		soup = BeautifulSoup(filingURL)
		allFilings = soup.find(_class='companyName')
#		if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true':
#				addAmendedDocuments(allFilings.a['href'])
		form = "http://www.sec.gov" + re.search('<a href="(.*?primary_doc.xml)">primary_doc.xml', r2.text).group(1)
		print form
		r3 = requests.get(form)
		doc = xmltodict.parse(r3.text)		
		if doc["edgarSubmission"]["offeringData"]["industryGroup"]["industryGroupType"] == 'Pooled Investment Fund':
			i = Issuer()
			i.Name = doc["edgarSubmission"]["primaryIssuer"]["entityName"]
			i.City = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"]["city"]
			i.Country = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"]["stateOrCountryDescription"]			
			i.TierID = ''
			if i.Country == 'NEW YORK' or i.Country == 'JERSEY' or i.Country == 'NEW JERSEY' or i.Country == 'CONNECTICUT' or i.Country == 'CALIFORNIA' or i.Country == 'DELAWARE' or i.Country == 'MASSACHUSETTS':
				i.TierID = '1'
			elif i.Country == 'FLORIA' or i.Country == 'PENNSYLVANIA' or i.Country == 'ILLINOIS' or i.Country == 'WASHINGTON' or i.Country == 'TEXAS':
				i.TierID = '2'
			else:
				i.TierID = '3'	
			db_session.add(i)
			db_session.commit()

			j = FormD()
			j.url = form
			j.SubmissionDate = entryUpdatedTime
			j.IndustryGroupType = doc["edgarSubmission"]["offeringData"]["industryGroup"]["industryGroupType"]
			j.InvestmentFundType = doc["edgarSubmission"]["offeringData"]["industryGroup"]["investmentFundInfo"]["investmentFundType"]
			if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true':
				j.Amended = 1 #1 is true; 0 is false
			else:
				j.Amended = 0
			firstSaleDict = doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["dateOfFirstSale"]
			if firstSaleDict.keys() == ['yetToOccur']:
				j.IsDateOfFirstSaleYetToOccur = 1
			else:
				unformattedDate = firstSaleDict['value']
				year = int(unformattedDate[0:4])
				month = int(unformattedDate[5:7])
				day = int(unformattedDate[8:10])
				j.DateOfFirstSale = date(year, month, day)
			j.MinimumInvestmentAccepted = doc["edgarSubmission"]["offeringData"]["minimumInvestmentAccepted"]
			j.TotalAmountSold = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalAmountSold"] 
			if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalOfferingAmount"] == 'Indefinite':
				j.TotalOfferingAmount = None
				j.IsTotalOfferingAmountIndefinite = 1 
			else:
				j.TotalOfferingAmount = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalOfferingAmount"]
				j.IsTotalOfferingAmountIndefinite = 0
			if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalRemaining"] == 'Indefinite':
				j.TotalRemaining = None
				j.IsTotalRemainingIndefinite = 1
			else:
				j.TotalRemaining = doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"]["totalRemaining"]
				j.IsTotalRemainingIndefinite = 0
			j.Issuer = i
			db_session.add(j)
			db_session.commit()
			print 'parsing complete, added it to the DB'
예제 #2
0
def addRSSentry(entry):
    formTitle = entry["title"][0:3]
    if formTitle == 'D -' or formTitle == 'D/A':
        filingURL = entry["link"]["@href"]
        r2 = requests.get(filingURL)
        soup = BeautifulSoup(filingURL)
        allFilings = soup.find(_class='companyName')
        #		if doc["edgarSubmission"]["offeringData"]["typeOfFiling"]["newOrAmendment"]["isAmendment"] == 'true':
        #				addAmendedDocuments(allFilings.a['href'])
        form = "http://www.sec.gov" + re.search(
            '<a href="(.*?primary_doc.xml)">primary_doc.xml', r2.text).group(1)
        print form
        r3 = requests.get(form)
        doc = xmltodict.parse(r3.text)
        i = Issuer()
        i.Name = doc["edgarSubmission"]["primaryIssuer"]["entityName"]
        i.City = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"][
            "city"]
        i.Country = doc["edgarSubmission"]["primaryIssuer"]["issuerAddress"][
            "stateOrCountryDescription"]
        db_session.add(i)
        db_session.commit()

        j = FormD()
        j.url = form
        j.SubmissionDate = entryUpdatedTime
        j.IndustryGroupType = doc["edgarSubmission"]["offeringData"][
            "industryGroup"]["industryGroupType"]
        if j.IndustryGroupType == 'Pooled Investment Fund':
            j.InvestmentFundType = doc["edgarSubmission"]["offeringData"][
                "industryGroup"]["investmentFundInfo"]["investmentFundType"]
        else:
            j.InvestmentFundType = None
        if doc["edgarSubmission"]["offeringData"]["typeOfFiling"][
                "newOrAmendment"]["isAmendment"] == 'true':
            j.Amended = 1  #1 is true; 0 is false
        else:
            j.Amended = 0
        firstSaleDict = doc["edgarSubmission"]["offeringData"]["typeOfFiling"][
            "dateOfFirstSale"]
        if firstSaleDict.keys() == ['yetToOccur']:
            j.IsDateOfFirstSaleYetToOccur = 1
        else:
            unformattedDate = firstSaleDict['value']
            year = int(unformattedDate[0:4])
            month = int(unformattedDate[5:7])
            day = int(unformattedDate[8:10])
            j.DateOfFirstSale = date(year, month, day)
        j.MinimumInvestmentAccepted = doc["edgarSubmission"]["offeringData"][
            "minimumInvestmentAccepted"]
        j.TotalAmountSold = doc["edgarSubmission"]["offeringData"][
            "offeringSalesAmounts"]["totalAmountSold"]
        if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"][
                "totalOfferingAmount"] == 'Indefinite':
            j.TotalOfferingAmount = None
            j.IsTotalOfferingAmountIndefinite = 1
        else:
            j.TotalOfferingAmount = doc["edgarSubmission"]["offeringData"][
                "offeringSalesAmounts"]["totalOfferingAmount"]
            j.IsTotalOfferingAmountIndefinite = 0
        if doc["edgarSubmission"]["offeringData"]["offeringSalesAmounts"][
                "totalRemaining"] == 'Indefinite':
            j.TotalRemaining = None
            j.IsTotalRemainingIndefinite = 1
        else:
            j.TotalRemaining = doc["edgarSubmission"]["offeringData"][
                "offeringSalesAmounts"]["totalRemaining"]
            j.IsTotalRemainingIndefinite = 0
        j.Issuer = i
        db_session.add(j)
        db_session.commit()
        print 'parsing complete, added it to the DB'
예제 #3
0
# 	 	"Schwab Capital Trust" : "/Users/jeffca/documents/blackrock/flask_base/forms/Schwab Capital Trust/Schwab Capital Trust.csv",
# 	 	"BlackRock Capital Appreciation Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Capital Appreciation Fund/BlackRock Capital Appreciation Fund.csv",
# 	 	"BlackRock Focus Growth Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Focus Growth Fund/BlackRock Focus Growth Fund.csv"
# 	 }

filings = {
	"BlackRock Bond Index Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Bond Index Fund/BlackRock Bond Index Fund.csv",
	"BlackRock Emerging Markets Fund" : "/Users/jeffca/documents/blackrock/flask_base/forms/BlackRock Emerging Markets Fund/BlackRock Emerging Markets Fund.csv"
}

for filing in filings.keys():
	reg = Registrant()
	doc = Registrant_Document()
	db_session.add(reg)
	db_session.add(doc)
	db_session.commit()
	doc.Registrant = reg
	# url = "http://localhost:8080/rest/xbrl/view?file=" + filings[filing] + "&view=facts&media=csv"
	# data = requests.get(url).text
	# f = StringIO.StringIO(data)
	f = open("forms/" + filing + "/" + filing + '.csv' , 'r')
	reader = csv.reader(f, delimiter=',')	
	r2 = requests.get(html[filing])
	print 'parsing ' + html[filing]
	doc.url = html[filing]
	soup = BeautifulSoup(r2.text)
	for row in reader:
		if row[0] == 'Label':
			continue
		if row[0] == 'Registrant Name' or 'Effective' in row[0] or 'Objective' in row[0] or 'objective' in row[0] or 'Narrative' in row[0] or '[Text Block]' in row[0] or 'Annual Return' in row[0] or '200' in row[0] or 'Inception' in row[0] or 'Fee' in row[0] or 'Expense' in row[0]:
			rdt = Registrant_Document_Term()