def __get_history__(code_date): code = code_date[0].upper() history_df = pd.DataFrame() if self.is_valid_code(code): # Parse the dates in the correct format if not isinstance(code_date[1], datetime): start = parse(code_date[1], dayfirst=True) if not isinstance(code_date[2], datetime): end = parse(code_date[2], dayfirst=True) # Find the difference between the dates and generate batches of 100 days till the end date difference = (end - start).days if difference > 100: curr_end = start + timedelta(days=100) while curr_end < end: url = self.build_url_for_history(code_date[0], datetime.strftime(start, '%d-%m-%Y'), datetime.strftime(curr_end, '%d-%m-%Y')) res = read_url(url, self.headers) res = res.read() history_df = history_df.append(pd.read_html(res, header=0, index_col='Date')[0]) start = curr_end + timedelta(days=1) curr_end += timedelta(days=100) url = self.build_url_for_history(code_date[0], datetime.strftime(start, '%d-%m-%Y'), datetime.strftime(end, '%d-%m-%Y')) res = read_url(url, self.headers) res = res.read() history_df = history_df.append(pd.read_html(res, header=0, index_col='Date')[0]) if as_json: return history_df.to_json() return history_df
def get_stock_codes(self): """ Retreives the equity list from NSE, and stores it in a dataframe. :Parameters: cached: bool Whether to cache the data or not. Prefer keeping this true unless you are running into OOM issues. :return: pandas DataFrame """ res_dataframe = pd.DataFrame() url = self.stocks_csv_url res = read_url(url, self.headers) column_dict = { 0: 'Symbol', 1: 'Name', 2: 'Series', 3: 'Date of Listing', 4: 'Paid up Value', 5: 'Market Lot', 6: 'ISIN Number', 7: 'Face Value' } for i, line in enumerate(res.read().split('\n')): if i == 0: # This contains the column names pass elif line != '' and re.search(',', line): split_line = line.split(',') for index, items in enumerate(split_line): res_dataframe.set_value(i, column_dict[index], items) # else just skip the evaluation, line may not be a valid csv return res_dataframe
def __parse_holiday_list__(self): """ :Returns: a list of all the holidays with the serial number, date and holiday name """ # Parse the holiday url and extract useful details holiday_url = 'https://www.nseindia.com/products/content/equities/equities/mrkt_timing_holidays.htm' headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Host': 'nseindia.com', 'Referer': "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=INFY&illiquid=0&smeFlag=0&itpFlag=0", 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0', 'X-Requested-With': 'XMLHttpRequest' } res = read_url(holiday_url, headers) res = res.read() soup = BeautifulSoup(res, 'html.parser') holiday_list = [] # The data is stored in tables. Extract only the tabular data for row in soup.find_all('tr', recursive=False): record = [td.text.replace(',', '') for td in row.find_all('td')] holiday_list.append(record) return holiday_list
def get_index_list(self, as_json=False): """ get list of indices and codes params: as_json: True | False returns: a list | json of index codes """ url = self.index_url resp = read_url(url, self.headers) resp_list = json.load(resp)['data'] index_list = [str(item['name']) for item in resp_list] return self.render_response(index_list, as_json)
def get_advances_declines(self, as_json=False): """ :return: pandas DataFrame | JSON with advance decline data :raises: URLError, HTTPError """ url = self.advances_declines_url resp = read_url(url, self.headers) resp_dict = json.load(resp) resp_list = [self.clean_server_response(item) for item in resp_dict['data']] response = self.render_response(resp_list, as_json) if as_json: return response else: return pd.DataFrame(response).set_index('indice')
def get_most_active(self, as_json=False): """ :return: pandas DataFrame | JSON containing most active equites of the day """ url = self.most_active_url res = read_url(url, self.headers) res_dict = json.load(res) # clean the output and make appropriate type conversions res_list = [self.clean_server_response( item) for item in res_dict['data']] response = self.render_response(res_list, as_json) if as_json: return response else: return pd.DataFrame(response).set_index('symbol')
def get_index_quote(self, code, as_json=False): """ params: code : string index code as_json: True|False returns: a dict | json quote for the given index """ url = self.index_url if self.is_valid_index(code): resp = read_url(url, self.headers) resp_list = json.load(resp)['data'] # this is list of dictionaries resp_list = [self.clean_server_response(item) for item in resp_list] # search the right list element to return search_flag = False for item in resp_list: if item['name'] == code.upper(): search_flag = True break return self.render_response(item, as_json) if search_flag else None
def get_peer_companies(self, code, as_json=False): """ :Parameters: code: str The code of the company to find peers of as_json: bool Whether to render the response as json :returns: a list of peer companies """ code = code.upper() if self.is_valid_code(code): url = self.peer_companies_url + code res = read_url(url, self.headers) # We need to filter the data from this. The data is at an offset of 39 from the beginning and 8 at the end res = res.read() string_index = re.search('data:', res).span()[1] # Everything under 'data' res = res[string_index+1:] # Now comes the tricky batshit crazy part. # We will iteratively filter each company and append them to a list # HACK: the solution is very messy. Would be nice if a better cleaner solution can be found. start = 0 data = pd.DataFrame() for item in re.finditer(r'({*})', res): # The second item. We want the curly brace for json parsing end = item.span()[1] # This is the actual data we are interested in string = res[start:end] try: company_info = json.loads(string) del(company_info['industry']) data = data.append(company_info, ignore_index=True) except Exception: pass # We dont care about this. Throw it out start = end + 1 return data.to_json() if as_json else data
def __get_quote__(code): code = code.upper() if self.is_valid_code(code): url = self.build_url_for_quote(code) res = read_url(url, self.headers) # Now parse the response to get the relevant data match = re.search( r'\{<div\s+id="responseDiv"\s+style="display:none">\s+(\{.*?\{.*?\}.*?\})', res.read(), re.S) # ast can raise SyntaxError, let's catch only this error try: buffer = match.group(1) buffer = js_adaptor(buffer) response = self.clean_server_response( ast.literal_eval(buffer)['data'][0]) except Exception as err: raise Exception('Symbol Not Traded today') else: rendered_response = self.render_response(response, as_json) # Check if the market is open (to avoid repeated network computation) return rendered_response