-
Notifications
You must be signed in to change notification settings - Fork 0
/
sec.py
110 lines (87 loc) · 3.03 KB
/
sec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/python
import os, sys
import requests
import pandas as pd
from bs4 import BeautifulSoup
import dateparser
from timesetter import *
# Determine either the given time is within the specified 'news_interval' days
def sec_time_filter(news_interval, dtg):
today = dateparser.parse(datetime.today().astimezone(nyc).strftime('%Y-%m-%d'))
td = (today - dateparser.parse(dtg))
if td <= timedelta(days=news_interval):
return True
else:
return False
def update_sec(ticker, days_interval):
endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar"
e = r"https://www.sec.gov/edgar/searchedgar/companysearch.html"
#endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar?CIK={}&owner=exclude&action=getcompany&Find=Search
param_dict = {
'CIK': ticker,
'action': 'getcompany',
'owner': 'exclude',
}
response = requests.get(url = endpoint, params = param_dict)
soup = BeautifulSoup(response.content, 'html.parser')
doc_table = soup.find_all('table', class_='tableFile2')
base_url_sec = r"https://www.sec.gov"
master_list =[]
first_index = 0
last_index = 10
# create and store data in the dictionary
df_list=[]
if len(doc_table) != 0:
for row in doc_table[0].find_all('tr')[first_index:last_index]:
cols = row.find_all('td')
if len(cols) != 0:
filing_date = cols[3].text.strip()
dtg = dateparser.parse(filing_date).strftime('%Y-%m-%d')
if sec_time_filter(days_interval, dtg) == True:
filing_type = cols[0].text.strip()
filing_numb = cols[4].text.strip()
filing_doc_href = cols[1].find('a', {'href':True, 'id': 'documentsbutton'})
filing_int_href = cols[1].find('a', {'href':True, 'id': 'interactiveDataBtn'})
filing_num_href = cols[4].find('a')
# grab the first href
if filing_doc_href != None:
filing_doc_link = base_url_sec + filing_doc_href['href']
else:
filing_doc_link = 'no link'
# grab the second href
if filing_int_href != None:
filing_int_link = base_url_sec + filing_int_href['href']
else:
filing_int_link = 'no link'
# grab the third href
if filing_num_href != None:
filing_num_link = base_url_sec + filing_num_href['href']
else:
filing_num_link = 'no link'
# create and store data in the dictionary
file_dict={
'file_type': filing_type,
'file_number': filing_numb,
'file_date': dateparser.parse(filing_date).strftime("%d %b"),
'links': {
'documents': filing_doc_link,
'interactive_data': filing_int_link,
'filing_number': filing_num_link
}
}
r = list(file_dict.keys())
df_list.append([file_dict['file_type'], file_dict['file_date'], file_dict['links']['documents']])
df = pd.DataFrame(df_list)
if not df.empty:
df.columns=['Type', 'Date', 'Links']
else:
df = pd.DataFrame()
#pd.set_option('display.max_colwidth', -1) # For dev purpuoses
return (df)
def main():
update_sec(ticker, days_interval)
if __name__ == '__main__':
ticker = sys.argv[1]
ticker = ticker.upper()
days_interval = int(sys.argv[2])
main()