def _load(self): url = '{}?method=download&searchType=13'.format( self._COMPANY_LIST_URL_) if self.market_type == 'allMkt': pass elif self.market_type == 'etcMkt': raise ValueError('ETC market is not supported') else: url = '{}&marketType={}'.format(url, self.market_type) resp = request_get(url=url, timeout=120) soup = BeautifulSoup(resp.text, 'html.parser') rows = soup.find_all('tr') for row in rows: cols = row.find_all('td') if len(cols) > 0: crp_nm = cols[0].text.strip() crp_cd = cols[1].text.strip() crp_ctp = cols[2].text.strip() crp_prod = cols[3].text.strip() crp_info = { 'crp_cd': crp_cd, 'crp_nm': crp_nm, 'crp_ctp': crp_ctp, 'crp_prod': crp_prod } self.__crp_list.append(Crp(**crp_info))
def _get_report(self): """ 보고서 html 불러오기""" params = dict(rcpNo=self.rcp_no) if self.dcm_no: params['dcmNo'] = self.dcm_no resp = request_get(url=self._REPORT_URL_, params=params) self.html = BeautifulSoup(resp.text, 'html.parser')
def api_key(self, api_key: str) -> None: if not isinstance(api_key, str): raise ValueError( 'The Dart Api key must be provided through the api_key variable' ) url = 'http://opendart.fss.or.kr/api/list.json' params = dict() params['crtfc_key'] = api_key resp = request_get(url=url, params=params) data = resp.json() check_err_code(**data) self.__api_key = api_key
def load(self): """ 종목 정보 로딩 """ api_key = DartAuth().api_key url = self._SEARCH_URL_ + 'company.json' params = dict( auth=api_key, crp_cd=self.crp_cd ) resp = request_get(url=url, params=params) data = resp.json() data['crp_cd'] = data.pop('stock_cd') check_err_code(**data) self.crp_nm = data.get('crp_nm') self._info = {key: value for key, value in data.items() if key not in ['err_code', 'err_msg', 'crp_cd', 'crp_nm']}
def extract_attached_files(self): """ 첨부된 파일 리스트 추출 및 반환 Returns ------- list of AttachedFile 첨부된 파일리스트 """ if self.html is None: self._get_report() results = [] a_href = self.html.find('a', href='#download') a_onclick = a_href.attrs.get('onclick', '') raw_data = re.search(r'openPdfDownload\(.*?(\d+).*?(\d+).*?\)', a_onclick) if raw_data is None: return results rcp_no = raw_data.group(1) dcm_no = raw_data.group(2) params = dict(rcp_no=rcp_no, dcm_no=dcm_no) resp = request_get(url=self._DOWNLOAD_URL_, params=params) soup = BeautifulSoup(resp.text, 'html.parser') tr_list = soup.find_all('tr') attached_files = [] for tr in tr_list: if tr.find('a'): td_list = tr.find_all('td') filename = td_list[0].text.strip() file_url = td_list[1].a.get('href') if not file_url: continue info = dict() info['rcp_no'] = self.rcp_no info['url'] = file_url info['filename'] = filename attached_files.append(AttachedFile(**info)) self._attached_files = attached_files return self._attached_files
def download(self, path): """ 첨부파일 다운로드 Method Parameters ---------- path: str 다운롣드 받을 경로 Returns ------- str 다운받은 첨부파일 경로 """ from dart_fss.spinner import Spinner create_folder(path) url = self.url r = request_get(url=url, stream=True) headers = r.headers.get('Content-Disposition') if not re.search('attachment', headers): raise Exception('invalid data found') # total_size = int(r.headers.get('content-length', 0)) block_size = 8192 filename = unquote(re.findall(r'filename="(.*?)"', headers)[0]) filename = '{}_{}'.format(self.rcp_no, filename) spinner = Spinner('Downloading ' + filename) spinner.start() file_path = os.path.join(path, filename) with open(file_path, 'wb') as f: for chunk in r.iter_content(chunk_size=block_size): if chunk is not None: f.write(chunk) r.close() spinner.stop() return file_path
def load(self): """ page loading 함수 """ def change_url(bs, tag): tags = bs.find_all(attrs={tag: re.compile(r'.*')}) if tags: for t in tags: t[tag] = "http://dart.fss.or.kr" + t[tag] return bs def add_prefix(match_obj): return r"window.open('http://dart.fss.or.kr" + match_obj.group( 1) + r"'" params = { 'rcpNo': self.rcp_no, 'dcmNo': self.dcm_no, 'eleId': self.ele_id, 'offset': self._offset, 'length': self._length, 'dtd': self._dtd } html = request_get(url=self._BASE_URL_, params=params).content try: html = html.decode() except UnicodeDecodeError: html = html.decode('cp949') finally: soup = BeautifulSoup(html, 'html.parser') meta = soup.find('meta', {'content': re.compile(r'charset')}) if meta: meta['content'] = meta['content'].replace('euc-kr', 'utf-8') soup = change_url(soup, 'href') soup = change_url(soup, 'src') html = str(soup) html = re.sub(r'window.open\(\'(.*?)\'', add_prefix, html) self._html = html
def search_report(crp_cd: str = None, start_dt: str = None, end_dt: str = None, fin_rpt: bool = False, dsp_tp: List_or_str = None, bsn_tp: List_or_str = None, sort: str = 'date', series: str = 'desc', page_no: int = 1, page_set: int = 10, return_dict: bool = False) -> SearchResults_or_dict: """ DART 공시 정보 검색 DART 에 공시된 정보를 검색하는 함수로, Parameters 가 설정되지 않을 경우 당일 접수 10건을 검색함 Parameters ---------- crp_cd: str 종목 코드 start_dt: str 검색 시작일자(YYYYMMDD) end_dt: str 검색 종료일자(YYYYMMDD) fin_rpt: bool 최종보고서만 검색여부, 기본값: False dsp_tp: list of str or str 공시 유형(DSP_TYPES) bsn_tp: list of str or str 공시 상세 유형(BSN_TYPES) sort: str 정렬 방법, 접수일자(date), 회사명(crp), 보고서명(rpt), 기본값 : date series: str 오름차순(asc), 내림차순(desc) 기본값 : desc page_no: int 페이지 번호, 기본값: 1 page_set: int 페이지당 건수(1-100) 기본값: 10, 최대값: 100 return_dict: bool dict 타입으로 반환할지 여부, 기본은 SearchResults Returns ------- SearchResults or dict of str 검색결과 """ api_key = DartAuth().api_key url = 'http://dart.fss.or.kr/api/search.json' params = dict() params['auth'] = api_key params = _set_params(params, crp_cd=crp_cd, start_dt=start_dt, end_dt=end_dt, fin_rpt=fin_rpt, dsp_tp=dsp_tp, bsn_tp=bsn_tp, sort=sort, series=series) if isinstance(page_no, int) and 1 <= page_no: params['page_no'] = page_no if isinstance(page_set, int) and (1 <= page_set <= 100): params['page_set'] = page_set resp = request_get(url=url, params=params) data = resp.json() data['report_list'] = data.pop('list') params.pop('auth') if return_dict: return {'params': params, 'data': data} return SearchResults(params=params, data=data)