def Dump2(channel_id): global error_map globv.update_logger.info('start record %s' % channel_id) port = dbutil.get_udp_port(channel_id) if port is None: error_map[channel_id] = 'wrong stream' globv.update_logger.error( 'channel %s \'s address don\'t have a port number!' % channel_id) dbutil.set_start(channel_id, False) return channel_path = os.path.join(globv.html_path, channel_id) if not os.path.exists(channel_path): globv.update_logger.info('create path: %s' % channel_path) os.makedirs(channel_path) logger = logutil.getLogger(os.path.join(channel_path, channel_id + '.log'), name=channel_id) try: while True: error_map[channel_id] = "success" logger.debug('%s start record with dump()' % channel_id) res = dump(port, channel_path.encode(), 60) if res != 0: error_map[channel_id] = 'no stream' ldbutil.update_err(channel_id, 'no stream') logger.debug(channel_id + ' stopped by dump()!') globv.update_logger.error('channel %s stopped by dump()!' % channel_id) time.sleep(60 * 5) except Exception as e: logger.exception(e) finally: dbutil.set_start(channel_id, False)
def initConfigFile(): global UPDATE_FREQUENCY, IP, EXPIRE, update_logger, html_path, PORT, EPG_URL config_file_name = 'replay.conf' try: config_file = os.path.join(cur_path, config_file_name) if not os.path.exists(config_file): raise ValueError('%s is not exists in current folder!' % config_file_name) cf.read(config_file) html_path = cf.get('dump', 'html_path') UPDATE_FREQUENCY = cf.getint('dump', 'update_frequence') IP = cf.get('record_server', 'ip') EXPIRE = cf.getint('dump', 'expire') + 1 LOG_LEVEL = cf.get('log', 'level') if LOG_LEVEL == 'DEBUG': LOG_LEVEL = logging.DEBUG elif LOG_LEVEL == 'INFO': LOG_LEVEL = logging.INFO elif LOG_LEVEL == 'WARNING': LOG_LEVEL = logging.WARNING elif LOG_LEVEL == 'ERROR': LOG_LEVEL = logging.ERROR else: LOG_LEVEL = logging.DEBUG print('log level is', logging.getLevelName(LOG_LEVEL)) update_logger = logutil.getLogger(os.path.join(html_path, 'dump.log'), name='dump', level=LOG_LEVEL) PORT = cf.getint('record_server', 'port') EPG_URL = cf.get('dump', 'epg_url') except Exception as e: logging.exception(e) exit(-1)
def stub_runtime(): rt = runtime.Runtime( latest_parent_version=False, logger=logutil.getLogger(__name__), stage=False, branch=None, ) rt.group_config = Model() return rt
def download_stock_list(td_processor=get_stock_dict, proxy_flag=False, retry_time=3, retry_delay=10, timeout=PAGE_TIMEOUT): # Define the destination URL hkex_list_url = HKEXNEWS_URL_CHI hkex_list_tr_class_list = [ "ms-rteTableOddRow-BlueTable_CHI", "ms-rteTableEvenRow-BlueTable_CHI" ] # Section of downloading stock list logger = logutil.getLogger(__name__) logger.info('It starts to download stock list. Please wait.') tr_list = [] for _ in range(retry_time): try: proxy_server = None if proxy_flag: proxy_server = webutil.get_random_proxy() logger.info('download via a proxy server: %s', proxy_server['ip'] + ':' + proxy_server['port']) response = webutil.create_get_request(url=hkex_list_url, proxy_server=proxy_server, timeout=timeout) if response.status_code != 200: response.raise_for_status() decoded_result = response.content.decode('utf-8', 'ignore') if len(decoded_result) <= 0: logger.error('Failed to retrieve content.') continue # Create a BeautifulSoup object soup = BeautifulSoup(decoded_result, 'html.parser') # Search by CSS Selector tr_list = soup.findAll("tr", {"class": hkex_list_tr_class_list}) break except Timeout: logger.error('socket timed out - URL %s', hkex_list_url) time.sleep(retry_delay) except RequestException as error: logger.error('Data not retrieved because %s\nURL: %s', error, hkex_list_url) time.sleep(retry_delay) except Exception as error: logger.error('Unexpected error of %s\nURL: %s', error, hkex_list_url) time.sleep(retry_delay) else: logger.error('No response after %d retry.' % retry_time) logger.info('Downloading stock list completed with length of %d.', len(tr_list)) # Prepare list of stock triple from list of table TR objects return [td_processor(tr.findAll("td")) for tr in tr_list]
def find_crumb_store(lines): ''' This function is to retrieve crumb store data from content lines Looking for ,"CrumbStore":{"crumb":"9q.A4D1c.b9 ''' logger = logutil.getLogger(__name__) for l in lines: if re.findall(r'CrumbStore', l): return l logger.error("Did not find CrumbStore")
def initialize_logging(self): if self.initialized: return # Three flags control the output modes of the command: # --verbose prints logs to CLI as well as to files # --debug increases the log level to produce more detailed internal # behavior logging # --quiet opposes both verbose and debug if self.debug: log_level = logging.DEBUG elif self.quiet: log_level = logging.WARN else: log_level = logging.INFO default_log_formatter = logging.Formatter( '%(asctime)s %(levelname)s %(message)s') root_logger = logging.getLogger() root_logger.setLevel(logging.WARN) root_stream_handler = logging.StreamHandler() root_stream_handler.setFormatter(default_log_formatter) root_logger.addHandler(root_stream_handler) # If in debug mode, let all modules log if not self.debug: # Otherwise, only allow children of ocp to log root_logger.addFilter(logging.Filter("ocp")) # Get a reference to the logger for ocp_cd_tools self.logger = logutil.getLogger() self.logger.propagate = False # levels will be set at the handler level. Make sure master level is low. self.logger.setLevel(logging.DEBUG) main_stream_handler = logging.StreamHandler() main_stream_handler.setFormatter(default_log_formatter) main_stream_handler.setLevel(log_level) self.logger.addHandler(main_stream_handler) self.debug_log_path = os.path.join(self.working_dir, "debug.log") debug_log_handler = logging.FileHandler(self.debug_log_path) # Add thread information for debug log debug_log_handler.setFormatter( logging.Formatter( '%(asctime)s %(levelname)s (%(thread)d) %(message)s')) debug_log_handler.setLevel(logging.DEBUG) self.logger.addHandler(debug_log_handler)
def download_dividend_hist(stock_code, proxy_flag=False, retry_time=3, retry_delay=10, timeout=PAGE_TIMEOUT): logger = logutil.getLogger(__name__) dividend_list = [] data_url = 'http://www.aastocks.com/en/stocks/analysis/dividend.aspx?symbol={}'.format( stock_code) for _ in range(retry_time): try: proxy_server = None if proxy_flag: proxy_server = webutil.get_random_proxy() logger.info('download via a proxy server: %s', proxy_server['ip'] + ':' + proxy_server['port']) response = webutil.create_get_request(url=data_url, proxy_server=proxy_server, timeout=timeout) if response.status_code != 200: response.raise_for_status() decoded_result = response.content.decode('utf-8', 'ignore') if len(decoded_result) <= 0: logger.error('Failed to retrieve content.') continue stock_soup = BeautifulSoup(decoded_result, 'html.parser') tr_list = get_dividend_trlist(stock_soup) dividend_list = get_dividend_list(tr_list, stock_code) break except Timeout: logger.error('socket timed out - URL %s', data_url) time.sleep(retry_delay) except RequestException as error: logger.error('Data not retrieved because %s\nURL: %s', error, data_url) time.sleep(retry_delay) except Exception as error: logger.error('Unexpected error of %s\nURL: %s', error, data_url) time.sleep(retry_delay) else: logger.error('No response after %d retry.' % retry_time) return dividend_list
def download_bloomberg_df(stock_code_list, max_workers=10, proxy_flag=False): logger = logutil.getLogger(__name__) stock_quote_list = [] # Threads start and it takes quite a long time due to multiple network I/O logger.info('It starts to download stock quotes. Please wait.') with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: future_to_stock_code = { executor.submit(download_bloomberg_quote, stock_code, proxy_flag): stock_code for stock_code in stock_code_list } # Update thread status for monitoring for future in concurrent.futures.as_completed(future_to_stock_code): stock_quote_list.append(future.result()) logger.info('Downloading stock quotes completed.') return pd.DataFrame(data=stock_quote_list).set_index('stock_code', append=False)
def _info(message): _logger = logutil.getLogger("CommitServer") _logger.info(message)
def get_stock_quote( stock_code , proxy_flag=False , retry_time=3 , retry_delay=10 , timeout=PAGE_TIMEOUT ): ''' Get stock quote in format of dictionary by a given stock Code Parameters ---------- stock_code : string stock Code proxy_flag : boolean Whether retrieval uses a random Proxy Server retry_time : int number of time to retry if each connection fails retry_delay : int How long does it wait if retry fails to get the next Returns ------- stock quote in format of dictionary ''' logger = logutil.getLogger(__name__) stock_url = 'https://hk.finance.yahoo.com/quote/' + stock_code td_class = "C(black) W(51%)" for _ in range(retry_time): try: td_list = [] proxy_server = None if proxy_flag: proxy_server = webutil.get_random_proxy() logger.info('download via a proxy server: %s', proxy_server['ip'] + ':' + proxy_server['port']) response = webutil.create_get_request(url=stock_url, proxy_server=proxy_server, timeout=timeout) if response.status_code != 200: response.raise_for_status() stock_page = response.content.decode('utf-8', 'ignore') stock_soup = BeautifulSoup(stock_page, 'html.parser') td_list = stock_soup.findAll('td', {"class": td_class}) logger.info('Result of %s has %d records', stock_code, len(td_list)) pair_list = { td.get_text(): td.findNext('td').get_text() for td in td_list } break except Timeout: logger.error('socket timed out - URL %s', stock_url) time.sleep(retry_delay) except RequestException as error: logger.error('Data not retrieved because %s\nURL: %s', error, stock_url) time.sleep(retry_delay) except Exception as error: logger.error('Unexpected error of %s\nURL: %s', error, stock_url) time.sleep(retry_delay) else: logger.error('No response after %d retry.' % retry_time) pair_list = {} pair_list.update({'stock_code': stock_code}) return pair_list
def download_bloomberg_quote(stock_code, proxy_flag=False, retry_time=3, retry_delay=10, timeout=PAGE_TIMEOUT): logger = logutil.getLogger(__name__) data_dict = {'stock_code': stock_code} data_url = 'https://www.bloomberg.com/quote/{}'.format(stock_code) for _ in range(retry_time): try: proxy_server = None if proxy_flag: proxy_server = webutil.get_random_proxy() logger.info('download via a proxy server: %s', proxy_server['ip'] + ':' + proxy_server['port']) response = webutil.create_get_request(url=data_url, proxy_server=proxy_server, timeout=timeout) if response.status_code != 200: response.raise_for_status() decoded_result = response.content.decode('utf-8', 'ignore') if len(decoded_result) <= 0: logger.error('Failed to retrieve content.') continue stock_soup = BeautifulSoup(decoded_result, 'html.parser') # Open Price open_price_section = stock_soup.find( 'section', {"class": 'dataBox openprice numeric'}) open_price = '' if open_price_section is not None: open_price_div = open_price_section.findNext('div') if open_price_div is not None: open_price = open_price_div.get_text() # Previous Close prev_close_section = stock_soup.find('section', { "class": 'dataBox previousclosingpriceonetradingdayago numeric' }) prev_close = '' if prev_close_section is not None: prev_close_div = prev_close_section.findNext('div') if prev_close_div is not None: prev_close = prev_close_div.get_text() # Volume volume_section = stock_soup.find( 'section', {"class": 'dataBox volume numeric'}) volume = '' if volume_section is not None: volume_div = volume_section.findNext('div') if volume_div is not None: volume = volume_div.get_text() # Market Cap marketcap_section = stock_soup.find( 'section', {"class": 'dataBox marketcap numeric'}) marketcap = '' if marketcap_section is not None: marketcap_div = marketcap_section.findNext('div') if marketcap_div is not None: marketcap = marketcap_div.get_text() # Range one day rangeoneday_section = stock_soup.find( 'section', {"class": 'dataBox rangeoneday'}) rangeoneday = '' if rangeoneday_section is not None: rangeoneday_div = rangeoneday_section.findNext('div') if rangeoneday_div is not None: rangeoneday = rangeoneday_div.get_text() # Range 52 weeks range52weeks_section = stock_soup.find( 'section', {"class": 'dataBox range52weeks'}) range52weeks = '' if range52weeks_section is not None: range52weeks_div = range52weeks_section.findNext('div') if range52weeks_div is not None: range52weeks = range52weeks_div.get_text() # Industry Category industry_div = stock_soup.find( 'div', {"class": 'industry labelText__6f58d7c0'}) industry = '' if industry_div is not None: industry = industry_div.get_text() # Sector Category sector_div = stock_soup.find( 'div', {"class": 'sector labelText__6f58d7c0'}) sector = '' if sector_div is not None: sector = sector_div.get_text() # Nominal Price nominal_price_div = stock_soup.find( 'span', {"class": 'priceText__1853e8a5'}) nominal_price = '' if nominal_price_div is not None: nominal_price = nominal_price_div.get_text() data_dict.update({ 'prev_close': prev_close, 'open_price': open_price, 'nominal_price': nominal_price, 'volume': volume, 'marketcap': marketcap, 'rangeoneday': rangeoneday, 'range52weeks': range52weeks, 'industry': industry, 'sector': sector }) div_list = stock_soup.findAll( 'div', {"class": 'rowListItemWrap__4121c877'}) for div in div_list or []: key_str = div.findNext('span').get_text() val_str = div.find('span', { "class": 'fieldValue__2d582aa7' }).get_text() data_dict.update({key_str: val_str}) next_announce_date_span = stock_soup.find( 'span', {'class': 'nextAnnouncementDate__0dd98bb1'}) if next_announce_date_span is not None: next_announce_date = next_announce_date_span.get_text() data_dict.update({'next_announce_date': next_announce_date}) break except Timeout: logger.error('socket timed out - URL %s', data_url) time.sleep(retry_delay) except RequestException as error: logger.error('Data not retrieved because %s\nURL: %s', error, data_url) time.sleep(retry_delay) except Exception as error: logger.error('Unexpected error of %s\nURL: %s', error, data_url) time.sleep(retry_delay) else: logger.error('No response after %d retry.' % retry_time) return data_dict
def _info(message): _logger = logutil.getLogger("GenerateAppSchemes") _logger.info(message)
def _info(message): _logger = logutil.getLogger("CheckIpadOnly") _logger.info(message)
def download_yahoo_hist( stock_code , from_date='2000-01-01' , to_date=datetime.now().strftime(YAHOO_DATE_FORMAT) , proxy_flag=False , retry_time=3 , retry_delay=10 , timeout=PAGE_TIMEOUT ): ''' This function is to download historical stock prices from Yahoo! Finance. Parameters ---------- stock_code : string Stock ID, usually a 4 digit number from_date: string Starting Date in yyyy-mm-dd format to_date: string Ending Date in yyyy-mm-dd format retry_time : int number of time to retry if each connection fails Returns ------- Pandas DataFrame ''' logger = logutil.getLogger(__name__) df = None from_timestamp = int(round(datetime.strptime(from_date, YAHOO_DATE_FORMAT).timestamp())) to_timestamp = int(round(datetime.strptime(to_date, YAHOO_DATE_FORMAT).timestamp())) if from_timestamp >= to_timestamp: # invalid time range return None CSV_FORMAT = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events=history&crumb={}' for _ in range(retry_time): try: proxy_server = None if proxy_flag: proxy_server = webutil.get_random_proxy() logger.info('download via a proxy server: %s', proxy_server['ip'] + ':' + proxy_server['port']) cookie, crumb = get_cookie_crumb(stock_code, proxy_server=proxy_server, timeout=timeout) csv_url = CSV_FORMAT.format(stock_code, from_timestamp, to_timestamp, crumb) response = webutil.create_get_request(url=csv_url, cookies=cookie, proxy_server=proxy_server, timeout=timeout) if response.status_code != 200: response.raise_for_status() else: df = pd.read_csv(io.StringIO(response.content.decode('utf-8'))) break except Timeout: logger.error('socket timed out - URL %s', csv_url) time.sleep(retry_delay) except RequestException as error: logger.error('Data not retrieved because %s\nURL: %s', error, csv_url) time.sleep(retry_delay) except Exception as error: logger.error('Unexpected error of %s\nURL: %s', error, csv_url) time.sleep(retry_delay) else: logger.error('No historical data after %d retry.' % retry_time) if df is None: return df # Change correct Data Type df[LABEL_DATE] = pd.to_datetime(df[LABEL_DATE], format=YAHOO_DATE_FORMAT) df[LABEL_OPEN] = pd.to_numeric(df[LABEL_OPEN], errors='ignore', downcast='float') df[LABEL_HIGH] = pd.to_numeric(df[LABEL_HIGH], errors='ignore', downcast='float') df[LABEL_LOW] = pd.to_numeric(df[LABEL_LOW], errors='ignore', downcast='float') df[LABEL_CLOSE] = pd.to_numeric(df[LABEL_CLOSE], errors='ignore', downcast='float') df[LABEL_ADJCLOSE] = pd.to_numeric(df[LABEL_ADJCLOSE], errors='ignore', downcast='float') df[LABEL_VOLUME] = pd.to_numeric(df[LABEL_VOLUME], errors='ignore', downcast='integer') return df.set_index(LABEL_DATE, append=False)
def _debug(message): _logger = logutil.getLogger("CommitServer") _logger.debug(message)
def stub_runtime(): return runtime.Runtime( latest_parent_version=False, logger=logutil.getLogger(__name__), stage=False, )
def _info(message): _logger = logutil.getLogger("GuidInstallApp") _logger.info(message)
import subprocess import time import shlex import os from fcntl import fcntl, F_GETFL, F_SETFL from os import O_NONBLOCK, read import logutil import pushd import assertion from util import red_print, green_print, yellow_print SUCCESS = 0 logger = logutil.getLogger(__name__) class RetryException(Exception): """ Provide a custom exception for retry failures """ pass def retry(retries, task_f, check_f=bool, wait_f=None): """ Try a function up to n times. Raise an exception if it does not pass in time :param retries int: The number of times to retry