def http_get_with_retry(url, times=3): for i in range(times): try: if i == 0: return HttpHelper.http_get(url) else: time.sleep(1) url = url.replace('bidDate,', '') # sometime the bidDate field is unavailable.. return HttpHelper.http_get(url) except Exception as e: print str(e) raise Exception('Failed to get data from %s'%url, )
def get_vix_future(): url = 'http://www.cboe.com/delayedquote/' content = HttpHelper.http_get(url) content = string_fetch(content, 'FutureDataTabs', 'sf_colsIn') items = content.split(' <a href="futures-quotes?') vix_items = filter(lambda x: 'VIX/' in x, items) return map(CBOEScraper.parse_vix_future, vix_items)
def get_data_by_symbol(self, symbol): url = 'https://www.marketwatch.com/investing/fund/%s' % symbol get_logger().info('Http request to: %s' % url, False) content = HttpHelper.http_get(url) content = string_fetch(content, 'mw-rangeBar precision=', 'Day Low') value = string_fetch(content, '\"last-value\">', '</span>') return float(value.replace(',', ''))
def get_data_by_symbols(self, symbols): sina_symbols = ','.join( map(lambda x: 'gb_%s' % x.replace('.', '$').lower(), symbols)) url = 'http://hq.sinajs.cn/?list=%s' % sina_symbols content = HttpHelper.http_get(url) items = content.split(';')[:-1] values = map(lambda x: float(string_fetch(x, ',', ',')), items) return values
def get_vxmt_daily(): url = 'http://www.cboe.com/publish/ScheduledTask/MktData/datahouse/vxmtdailyprices.csv' content = HttpHelper.http_get(url) records = content.split('\r\n')[3:-1] yahoo_records = ['Date,Open,High,Low,Close,Adj Close,Volume'] + map(CBOEScraper.to_yahoo_format, records) yahoo_content = '\r\n'.join(yahoo_records) path = PathMgr.get_historical_etf_path('^VXMT') write_to_file(path, yahoo_content)
def ingest_credit(): content = HttpHelper.http_get(CREDIT_URL) content = string_fetch(content, ' View All Years', '') sub_contents = content.split('Securities market credit')[1:] all_credits = [] for sub_content in sub_contents: credits = parse_table(sub_content) all_credits.extend(credits) return all_credits
def get_option_content(symbol, url_template): #url = 'http://bigcharts.marketwatch.com/quickchart/options.asp?symb={}&showAll=True'.format(symbol) url = url_template.format(symbol) content = HttpHelper.http_get(url) if 'showAll' in url: file_path = PathMgr.get_bigcharts_option_symbol_path(symbol + '2') else: file_path = PathMgr.get_bigcharts_option_symbol_path(symbol + '1') write_to_file(file_path, content) return content
def get_current_data(symbols, logger=Logger(__name__, None)): url_template = "http://marketdata.websol.barchart.com/getQuote.json?apikey=7aa9a38e561042d48e32f3b469b730d8&symbols={}" url = url_template.format(','.join(symbols)) # print url try: content = HttpHelper.http_get(url) except Exception as e: logger.error('Trace: ' + traceback.format_exc(), False) logger.error('Error: get action arguments failed:' + str(e)) content = '' # print content return list(BarchartScraper.parse_content(content))
def get_current_data(self, symbol): yahoo_symbol = Symbols.get_mapped_symbol(symbol) url = 'https://finance.yahoo.com/quote/%s/' % yahoo_symbol content = HttpHelper.http_get(url) try: sub_content = string_fetch(content, 'Currency in USD', 'At close:') sub_content = string_fetch(sub_content, 'react-text', 'react-text') value = string_fetch(sub_content, '-->', '<!--') return float(value.replace(',', '')) except Exception: sub_content = string_fetch(content, '\"close\":', ',') value = round(float(sub_content), 2) return value
def get_data_by_symbol(symbol): url = 'https://www.marketwatch.com/investing/fund/%s' % symbol # url = 'http://www.marketwatch.com' # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'} headers = { "Accept-Language": "en-US,en;q=0.5", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", "Connection": "keep-alive" } content = HttpHelper.http_get(url, headers) content = string_fetch(content, 'mw-rangeBar precision=', 'Day Low') value = string_fetch(content, '\"last-value\">', '</span>') return float(value.replace(',', ''))
def history_min(self, symbol, window): url_template = 'http://ondemand.websol.barchart.com/getHistory.json?apikey={}&symbol={}.BZ&type=formTMinutes&startDate={}00' days_window = window / 391 + 2 from_date = TradeTime.get_from_date_by_window(days_window) start_time = datetime.datetime(from_date.year, from_date.month, from_date.day, 0, 0) url = url_template.format(self.api_key, symbol, start_time.strftime('%Y%m%d%M%S')) content = HttpHelper.http_get(url) data = json.loads(content) if data['status']['code'] != 200: raise Exception('http response unexcepted, the the content is: %s' % content) else: rows = map(lambda x: [datetime.datetime.strptime(x['timestamp'][:-6], '%Y-%m-%dT%H:%M:%S'), x['close']], data['results']) rows = filter(lambda x: TradeTime.is_valid_trade_time(x[0]), rows) rows.sort(key=lambda x: x[0]) return rows[-window:]
def get_data_by_symbol(symbol): logger = Logger(__name__, PathMgr.get_log_path()) yahoo_symbol = Symbols.get_mapped_symbol(symbol) url = 'https://finance.yahoo.com/quote/%s/' % yahoo_symbol logger.info('Http request to: %s' % url, False) content = HttpHelper.http_get(url) try: sub_content = string_fetch(content, 'Currency in USD', 'At close:') sub_content = string_fetch(sub_content, 'react-text', 'react-text') value = string_fetch(sub_content, '-->', '<!--') return float(value.replace(',', '')) except Exception: sub_content = string_fetch(content, '\"close\":', ',') value = round(float(sub_content), 2) return value
def download_quote2(symbol, date_from, date_to): url_template = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events=history&crumb={}' time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from, "%Y-%m-%d").timetuple()) time_stamp_to = calendar.timegm(datetime.datetime.strptime(date_to, "%Y-%m-%d").timetuple()) crumble_str, cookie = YahooScraper.get_crumble_and_cookie_with_cache() url = url_template.format(symbol, time_stamp_from, time_stamp_to, crumble_str) attempts = 0 while attempts < 5: try: content = HttpHelper.http_get_with_time_out(url, {'Cookie':cookie}) return content except urllib2.URLError: print "{} failed at attempt # {}".format(symbol, attempts) attempts += 1 time.sleep(2 * attempts) return ""
def get_crumble_and_cookie2(): # url_template = 'https://finance.yahoo.com/quote/{0}/history?p={0}' url = 'https://finance.yahoo.com/quote/SPY' # content = HttpHelper.http_get(url, headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) chrome_driver_path = ConfigMgr.get_others_config()['chromedriver'] content = HttpHelper.webdriver_http_get(url, chrome_driver_path) #seems phantomjs does not work... # phantomjs_path = ConfigMgr.get_others_config()['phantomjs'] # content = HttpHelper.webdriver_http_get2(url, phantomjs_path) # print(content) crumb = string_fetch(content, 'CrumbStore\":{\"crumb\":\"', '\"}') cookie_value = string_fetch(content, 'guid=', ';') cookie = 'B=%s'%cookie_value # print crumb, cookie return crumb, cookie
def history(self, symbol, field, window): from_date = TradeTime.get_from_date_by_window(window) url_template = 'http://ondemand.websol.barchart.com/getHistory.json?apikey={}&symbol={}.BZ&type=daily&startDate={}000000' url = url_template.format(self.api_key, symbol, from_date.strftime('%Y%m%d')) fields_dic = {'open': 'open', 'close': 'close', 'high': 'high', 'low': 'low', 'price': 'close', 'unadj': 'close'} fields = fields_dic.keys() if field.lower() not in field: raise Exception('the field should be in %s...'%fields) price_field = fields_dic[field] content = HttpHelper.http_get(url) data = json.loads(content) if data['status']['code'] != 200: raise Exception('http response unexcepted, the the content is: %s'%content) else: rows = map(lambda x: [datetime.datetime.strptime(x['tradingDay'], '%Y-%m-%d'), x[price_field]], data['results']) return rows
def get_data_by_symbol(self, symbol): url = 'http://hq.sinajs.cn/?list=gb_%s' % symbol.replace('.', '$').lower() content = HttpHelper.http_get(url) value = string_fetch(content, ',', ',') return float(value)
def get_minutes_equites(self, symbol): url_template = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=%s&interval=1min&apikey=%s' url = url_template % (symbol, self.apikey) content = HttpHelper.http_get(url) return list(self.parse_record(content, symbol))
def get_current_data(self, symbol): url = 'https://www.marketwatch.com/investing/fund/%s' % symbol content = HttpHelper.http_get(url) content = string_fetch(content, 'mw-rangeBar precision=', 'Day Low') value = string_fetch(content, '\"last-value\">', '</span>') return float(value.replace(',', ''))
def get_data_by_symbol(self, symbol): url = 'https://www.laohu8.com/hq/s/%s' % symbol content = HttpHelper.http_get(url) value = string_fetch(content, 'class=\"price\">', '</td>') return float(value)
def get_data_by_symbol(self, symbol): url = 'https://www.cnbc.com/quotes/?symbol=%s' % symbol content = HttpHelper.http_get(url) value = string_fetch(content, '\"previous_day_closing\":\"', '\"') return float(value.replace(',', ''))