def get_n(url): res = crawler.connect(url) soup = BeautifulSoup(res, "lxml") kekka = soup.find("p", attrs={"class": "pageLink"}).string kekka = rm_escape(str(kekka)) num = int(kekka.split("件")[0]) return num
def companies(): url = "http://www.jpx.co.jp/markets/statistics-equities/misc/01.html" root = "http://www.jpx.co.jp" res = crawler.connect(url) soup = BeautifulSoup(res, "lxml") link = soup.table.td.a["href"] res = crawler.connect(root + link) if res != "": with open(os.path.expanduser("~/c_info/data_j.xls"), "wb") as f: f.write(res) market = {"出資証券":"0", "市場第一部(内国株)":"1", "市場第二部(内国株)":"2", "マザーズ(内国株)":"3", \ "JASDAQ(スタンダード・内国株)":"4", "ETF・ETN":"5", "REIT・ベンチャーファンド・カントリーファンド・インフラファンド": "6"} df = pd.read_excel(os.path.expanduser("~/c_info/data_j.xls")) df["市場・商品区分"] = df["市場・商品区分"].map(market) df = df.loc[:, [ "コード", "銘柄名", "市場・商品区分", "33業種区分", "17業種区分", ]].applymap(make_blank) timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") timestamps = pd.Series([timestamp] * len(df)) df["created_at"] = timestamps df["updated_at"] = timestamps df.to_csv(os.path.expanduser("~/c_info/companies.csv"), index=False, header=False, encoding="utf-8") else: print("更新に失敗しました。")
def check_node(): """ Checks the current status of a node. This is a live result, so response times will be longer - to view a saved result see /api/check_historic_node. :param node: connection string, e.g. ok:127.0.0.1:6970 - port is optional if it is the network default :param to_services (integer, optional): outgoing services to broadcast, default=0 :param from_services (integer, optional): outgoing services to broadcast, default=0 :param version (integer, optional): version code to broadcast, default varies by network :param user_agent (string, optional): user agent to broadcast, default="/oknodes:0.1/" :param height (integer, optional): block height to broadcast during handshake. default=network median :param p2p_nodes (bool, optional): issues a getaddr call and list of connected nodes, default=False :return: json dict {"result":{"user_agent":"/oktoshi:5.0.0.2/", "version":" .... }, "nodes":[["127.0.0.1:6970, 157532132191], ...]} """ dat = request.form node = dat.get("node") network, address, port, resp = deconstruct_address_string(node) network_data = CONF['networks'][network] if dat.get("height"): network_data['height'] = dat.get("height") else: with open("static/network_summaries.json", 'r') as f: network_data['height'] = int(json.load(f)[network]['med']) network_data['protocol_version'] = dat.get( "version") or network_data['protocol_version'] result = connect(network, address, port, to_services=dat.get("to_services") or network_data['services'], network_data=network_data, user_agent=dat.get("user_agent") or None, p2p_nodes=False, explicit_p2p=dat.get("p2p_nodes") or False, from_services=dat.get('from_services') or None, keepalive=False) resp['result'] = result[0] resp['nodes'] = result[1] resp['result'] = geocode(resp['result']) return to_json(resp)
def e_code_list(): root = "https://disclosure.edinet-fsa.go.jp" top = "/E01EW/download?1512538021191" uji_bean = "ee.bean.W1E62071.EEW1E62071Bean" uji_verb = "W1E62071EdinetCodeDownload" SESSIONKEY = top.split("?")[1] file_info = "lgKbn=2&dflg=0&iflg=0&dispKbn=1" _id = "W1E62071" components = [ top, "uji.bean=" + uji_bean, "uji.verb=" + uji_verb, "TID=" + _id, "PID" + _id, "SESSIONKEY=" + SESSIONKEY, file_info ] url = root + "&".join(components) res = crawler.connect(url) with open(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"), "wb") as f: f.write(res) with zipfile.ZipFile(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"), "r") as input_file: input_file.extractall(path=os.path.expanduser("~/c_info")) os.remove(os.path.expanduser("~/c_info/EdinetcodeDlInfo.zip"))
def get_components(url): root = "https://disclosure.edinet-fsa.go.jp" pdfs = [] xbrls = [] titles = [] codes = [] res = crawler.connect(url) soup = BeautifulSoup(res, "lxml") for row in soup.find_all("tr"): for cell in row.find_all("td", class_="table_border_1 table_cellpadding_1 "): if cell.find("a", attrs={ "onclick": re.compile("^return clickDocNameForNotPaper") }) != None: #titleの格納 title = str(cell.a.string) title = rm_escape(title) if not re.match("有価*", title): break titles.append(title) elif cell.find("img", attrs={"alt": "PDF"}) != None: #pdfのリンクを格納 pdf = root + cell.a["href"] pdfs.append(pdf) elif cell.find("img", attrs={"alt": "XBRL"}) != None: #xbrlのリンクを格納 xbrl = cell.a["onclick"] components = xbrl.split("\'") top = components[7] uji_bean = components[3] uji_verb = components[1] SESSIONKEY = top.split("?")[1] file_info = components[5] components = [ top, "uji.bean=" + uji_bean, "uji.verb=" + uji_verb, "SESSIONKEY=" + SESSIONKEY, file_info ] xbrl = root + "&".join(components) xbrls.append(xbrl) elif cell.div != None: #edinetコードの格納 content = str(cell.div.string) content = rm_escape(content) if re.match("^E", content): code = content code = e_to_s(code) codes.append(code) elif cell.div.br != None: content = str(cell.div) content = rm_escape(content) code = content.split(">")[1].replace("/<br/", "") code = e_to_s(code) codes.append(code) if len(pdfs) == len(xbrls) and len(titles) == len(codes) and len( pdfs) == len(titles): return codes, titles, pdfs, xbrls else: print(len(codes), len(titles), len(pdfs), len(xbrls)) return [], [], [], []