def get_symbol_eod_data( self, symbol_list=None, index=None, index_type=None, start=None, end=None, min_rows=0, missing_count=0 ): ''' If SYMBOL_DATA_PATH exists grab data from file. Update data if data in the file is older than 5 days. Else fetch symbol data from NSE website. ''' if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: self.force_load_data(force_load='symbol_eod_data') eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY) eod_data = eod_data.reset_index() symbol_list = self.get_symbol_list( symbol_list=symbol_list, index=index, index_type=index_type, start=start, missing_count=missing_count, min_rows=min_rows ) eod_data = eod_data[eod_data.symbol.isin(symbol_list)] start = get_date(start, out='dt', start=True) end = get_date(end, out='dt', start=False) eod_data = eod_data.loc[ (eod_data.date >= start) & (eod_data.date <= end) ] return eod_data
def get_index_eod_data( self, index_list=None, index_type=None, start=None, end=None, ): ''' TODO If SYMBOL_DATA_PATH exists grab data from file. Update data if data in the file is older than 5 days. Else fetch symbol data from NSE website. ''' if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH): eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() else: self.force_load_data(force_load='index_eod_data') eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY) eod_data = eod_data.reset_index() index_list = self.get_index_list( index_list=index_list, index_type=index_type ) eod_data = eod_data[eod_data.symbol.isin(index_list)] start = get_date(start, out='dt', start=True) end = get_date(end, out='dt', start=False) eod_data = eod_data.ix[ (eod_data.date >= start) & (eod_data.date <= end) ] return eod_data
def get_symbol_eod_values( self, data='returns', symbol_list=None, index=None, index_type=None, start=None, end=None, min_rows=0, missing_count=0 ): '''Get Close prices for historical as a separate dataframe''' symbol_list = self.get_symbol_list( symbol_list=symbol_list, index=index, index_type=index_type, start=start, missing_count=missing_count, min_rows=min_rows ) eod_data_schema = [ 'symbol', 'date', 'prev_close', 'open', 'high', 'low', 'last', 'close', 'vwap', 'trades', 'volume', 'turnover', 'pct_deliverble', 'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread' ] if data in eod_data_schema: values = data elif data == 'returns': values = 'log_returns' elif data == 'deliverble': values = 'pct_deliverble' else: warnings.warn( 'Invalid type of data requested. Returning returns data' ) values = 'log_returns' if 'symbol_eod_values_{0}'.format(values) in get_store_keys(NSE.__NSE_DATA_PATH): data = pd.read_hdf( NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values) ) else: self.force_load_data(force_load='symbol_eod_values', values=values) data = pd.read_hdf( NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values) ) column_list = data.columns column_list = data.columns.intersection(symbol_list) data = data[column_list] start = get_date(start, 'str', True) end = get_date(end, 'str', False) data = data[start:end] data = data.dropna(how='all', axis=1) return data
def rosario_3(self): """ Esta funcion devuelve todas las urls de la seccion de policiales del portal de noticias rosario3. La limit_date define hasta que fecha se permitira obtener urls. """ date_limit = get_date(self.limit_date) self.name = 'rosario 3' self.section_url = config.rosario3_url driver = self.instance_driver() # Obtenemos fecha de la ultima noticia en listing y verificamos que no sea mas antigua que la solicitada. logging.info('Cargando noticias hasta el dia {}.'.format(date_limit)) for i in range(1, 10000): try: date = driver.find_element_by_xpath( '//div[2]//div[2]/article[{}]/div[2]/time'.format(i * 20)).text except NoSuchElementException as e: logging.exception("Exception occurred", exc_info=True) if re.match('.*-.*-.*', date): date = get_date(date) if date < date_limit: break # Si todavia estamos dentro del limite temporal solicitado seguimos cargando noticias. try: driver.find_element_by_xpath( '(//a[@class="btn btn-medium"])[1]').click() time.sleep(5) except NoSuchElementException as e: logging.exception("Exception occurred", exc_info=True) # Obtenemos todas las urls de las noticias cargadas. elements = driver.find_elements_by_xpath("//div[2]/article//h1/a") if len(elements) > 0: self.urls = [e.get_attribute('href') for e in elements] else: logging.exception('Failed because elements list is empty.') logging.info('Se han recolectado {} urls.'.format(len(self.urls))) driver.quit()
def main(client): console_print( "You can check failed requests with Grafana - grafana.tools.signin.service.gov.uk/d/NYxHTZqmz/matomo" ) start_date = get_date( "What date did the failed requests begin (dd/mm/yy)?") end_date = get_date("What date did the failed requests end (dd/mm/yy)?", start_date) start_datetime, records_count = return_date_and_records_count_from_completed_query( client, query_limits(client, start_date, end_date, "asc")) end_datetime, _ = return_date_and_records_count_from_completed_query( client, query_limits(client, start_date, end_date, "desc")) confirm_or_abort( f"There were {int(records_count)} failed requests between {start_datetime.strftime(DATETIME_FORMAT)} " f"and {end_datetime.strftime(DATETIME_FORMAT)}.\nIs this correct? (yes/no)" ) return start_datetime, end_datetime
def __init__(self, symbol_list=None, index=None, index_type=None, start=None, end=None, force_load=None): self.force_load_data(force_load) self.symbol_meta = self.get_symbol_meta() self.index_meta = self.get_index_meta() self.start = get_date(start, start=True) self.end = get_date(end, start=False) self.symbol_list = self.get_symbol_list(symbol_list=symbol_list, index=index, index_type=index_type, start=start) self.index_list = self.get_index_list(index_list=index, index_type=index_type)
def get_traded_dates(self, start=None, end=None): 'Generate Traded dates for NSE' if Market.__TRADED_DATES_KEY in get_store_keys(Market.__Market_PATH): traded_dates = pd.read_hdf(Market.__Market_PATH, Market.__TRADED_DATES_KEY) else: self.force_load_data('traded_dates') traded_dates = pd.read_hdf(Market.__Market_PATH, Market.__TRADED_DATES_KEY) start = get_date(start, 'str', True) end = get_date(end, 'str', False) traded_dates = traded_dates[start:end] traded_dates['specific_date_count'] = [ i + 1 for i in range(len(traded_dates)) ] return traded_dates
def get_course_info(course_url): response = requests.get(course_url) # If course not exists, coursera reroutes you to another url if response.url != course_url: return None course_info = {'url': course_url} soup = BeautifulSoup(response.content, 'html.parser') course_info['rating'] = helpers.get_rating(soup) course_info['start_date'], course_info['end_date'] = helpers.get_date(soup) course_info['week_count'] = helpers.get_week_count(soup) course_info['language'] = helpers.get_language(soup) # If one of field is None - return None return course_info if None not in course_info.values() else None
def get_symbol_list( self, symbol_list=None, index=None, index_type=None, start=None, ): ''' Get symbol list based on criteria provided. Pass index for getting symbols in index. List of indexes to get union of symbols of all indexes in list. start: {year as int or string, string format of date, None} ''' symbol_meta = self.symbol_meta.copy() if symbol_list is None: if 'symbol_list' in dir(self): symbol_list = symbol_meta[symbol_meta.index.isin( self.symbol_list)].date_of_listing else: symbol_list = symbol_meta.date_of_listing elif isinstance(symbol_list, str) and symbol_list == 'all': symbol_list = symbol_meta.date_of_listing return symbol_list elif isinstance(symbol_list, str): symbol_list = symbol_meta[symbol_meta.index == symbol_list].date_of_listing elif isinstance(symbol_list, list): symbol_list = symbol_meta[symbol_meta.index.isin( symbol_list)].date_of_listing else: warnings.warn('Could not fetch proper symbol_list.\ Loading default symbol_list') symbol_list = symbol_meta.date_of_listing.copy() symbol_list = symbol_list.copy() if index is not None or index_type is not None: symbol_list_temp = self.get_index_components(index=index, index_type=index_type) symbol_list = symbol_list[symbol_list.index.isin(symbol_list_temp)] if start is not None: start = get_date(start, 'dt') symbol_list = symbol_list[symbol_list <= start] return symbol_list.index.tolist()
def _group_listings(self,time_period):#parsed may or may not be an attribute """Returns a nested dictionary of grouped, vectorized listings, where the groups are categories then time periods Ex: lexicon = ["big","dog","barks","loudly] parsed_listings = [([1,2],'business','2012-01-12'), ([1,3],'business',2013-03-01'), ([0,1,2],'marketing','2014-04-01'),([0,2],'marketing','2013-01-01')] group_listings(parsed_listings,lexicon_size=4,time_period='year') => {"business": {datetime.date(2012, 1, 1): [[0,1,1,0]],datetime.date(2013, 1, 1): [[0,1,0,1]]}, "marketing":{datetime.date(2013, 1, 1):[[0,0,1,0]] , datetime.date(2014, 1, 1): [[1,1,1,0]]}}""" lexicon_size = len(self._lexicon) for (int_tokens, categories, date) in self._parsed_listings: date_period = trunc_date(get_date(date),time_period) vector_listing = get_vectorized_listing(int_tokens,lexicon_size) for category in categories: group_listings = self._groups.setdefault(category,{}) category_date_listings = self._groups[category].setdefault(date_period,[]) self._groups[category][date_period].append(vector_listing)
def run_email(event, context): student = event['key'] if 'key' in event else 'Lehman, Hayes' case = event[ 'case'] if 'case' in event else '0a608d99-9484-4333-a735-29179e1e1ef5' student_parsed = student.split(', ') student_name = student_parsed[1] + ' ' + student_parsed[0] print(student_name) recipients = ['*****@*****.**', '*****@*****.**'] if 'queryStringParameters' in event and 'recipients' in event[ 'queryStringParameters']: recipients = ast.literal_eval( event['queryStringParameters']['recipients']) datetime_object = get_date(event) current_date = '{d.month}/{d.day}/{d.year}'.format(d=datetime_object) cb = ChromeBrowser(datetime_object) cb.log_in() cb.select_student(case) tantrum_graph = cb.get_tantrum_graph(case) rm_list = cb.get_recently_mastered(student_name) tc_list = cb.get_trial_count(student_name) attachment_list = cb.get_attachments() if attachment_list: send_email(current_date, recipients, attachment_list, rm_list, tc_list, tantrum_graph, student_name) response = {"statusCode": 200} return response
] # Files in dataframe files_df = images['file_name'].str.extract( '(?P<camera>[^\d]+)(?P<file_number>\d+)') files_df['file_number'] = files_df['file_number'].astype(str) images = images.merge(files_df, left_index=True, right_index=True) # Get dates info into separate cols images['created_month'] = images['created'].str[2:4] images['created_day'] = images['created'].str[0:2] images['created_year'] = images['created'].str[4:6] # Construct a sensible date images['created'] = get_date(images['created_year'], images['created_month'], images['created_day']) # For file names, month needs to be A, B, C for 10, 11 and 12 images.loc[images['created_month'] == '10', 'created_month'] = 'A' images.loc[images['created_month'] == '11', 'created_month'] = 'B' images.loc[images['created_month'] == '12', 'created_month'] = 'C' images['created_month'] = images['created_month'].str.strip('0') # Join IMG_ files normally, treat P files separately further down # images['file_exists_img'] = images['created_year'] + '/' + images['file_name'] # images['file_exists_img'] = images['file_exists_img'].isin(file_names_list) # Doesn't seem to be any of these, but there might be in the future so let's leave it in # More complicated cases for P files images['real_file_name'] = images['camera'] + images[
def test_handles_1990s(self): year, month, day = pd.Series(['98', '10']), pd.Series(['09', '11']), pd.Series(['01', '02']) expected = pd.Series(['1998-09-01', '2010-11-02']) assert_series_equal(get_date(year, month, day), expected)
def test_constructs_date(self): year, month, day = pd.Series(['19', '20']), pd.Series(['09', '11']), pd.Series(['01', '02']) expected = pd.Series(['2019-09-01', '2020-11-02']) assert_series_equal(get_date(year, month, day), expected)
LOGGER = get_logger() client = boto3.client('logs') LOGGER.info(f"Starting at the '{starting_stage[1]}' stage.") if starting_stage_index == 1: LOGGER.info("Starting check logs") start_datetime, end_datetime = check_logs(client) LOGGER.info("Finished check logs") if starting_stage_index <= 2: LOGGER.info( "Downloading failed requets from cloudwatch. This may take a few minutes..." ) if starting_stage_index == 2: start_datetime = get_date( "What date did the failed requests begin? (dd/mm/yy)") end_datetime = get_date( "What date did the failed requests end? (dd/mm/yy)" ) + timedelta(1) output_filename = download_failed_requests( client, start_datetime - timedelta(seconds=1), end_datetime + timedelta(seconds=1), ) LOGGER.info("Downloading complete") confirm_or_abort( f"You should check the contents of '{os.getenv('HOST_WORKING_DIR') + '/' if os.getenv('HOST_WORKING_DIR') else ''}" f"{output_filename}' and ensure the requests to be replayed are correct.\nOnce you've done this enter 'yes'. Or enter 'no' to abort." )
def fetch_eod_data( self, symbol, start=None, index=False ): 'Fetch all End of Day(EOD) data from NSE' from_date = get_date(start, start=True) to_date = get_date(start=False) # Get data from NSE try: eod_data = get_history( symbol=symbol, index=index, start=from_date, end=to_date, series='EQ' ) if eod_data.empty: warnings.warn( 'No data recieved from NSE for {0} from {1} to {2}'. format(symbol, from_date.date(), to_date.date()) ) return eod_data eod_data.index = pd.to_datetime(eod_data.index) if index: eod_data['prev_close'] = eod_data['Close'].shift(1) eod_data['pct_deliverble'] = 100 eod_data['vwap'] = eod_data['Close'] eod_data['last'] = eod_data['Close'] eod_data['trades'] = 0 else: eod_data.drop( ['Series', 'Deliverable Volume'], 1, inplace=True ) except Exception as exception: warnings.warn( 'Could not get data for {0} from NSE due to {1}'.format(symbol, exception) ) return pd.DataFrame() rename_columns(eod_data) eod_data['symbol'] = [symbol for i in range(len(eod_data))] eod_data = eod_data.reset_index().sort_values(['symbol', 'date', 'close']) eod_data = eod_data.drop_duplicates( subset=['symbol', 'date'], keep='last' ) # Handle prev_close = 0 eod_data['prev_close_test'] = eod_data.close.shift(1) eod_data.loc[eod_data.prev_close == 0, 'prev_close'] = eod_data.loc[eod_data.prev_close == 0, 'prev_close_test'] eod_data.drop(['prev_close_test'], axis=1, inplace=True) from_date = eod_data.date.min() to_date = eod_data.date.max() traded_dates = self.get_traded_dates( start=from_date, end=to_date ) traded_dates = pd.DataFrame(index=traded_dates.index) missing_dates = traded_dates.index.difference(eod_data.date) eod_data = traded_dates.join(eod_data.set_index('date'), how='outer') traded_dates = pd.DataFrame(index=eod_data.index) traded_dates['date_count'] = [i+1 for i in range(len(traded_dates))] if len(missing_dates) > 0: for i in missing_dates: date_count = traded_dates.loc[i]['date_count'] back_date = traded_dates[traded_dates.date_count == date_count-1].index.values[0] next_first_valid_date = eod_data.loc[i:].symbol.first_valid_index() if next_first_valid_date is None: next_first_valid_date = TODAY if eod_data.loc[back_date, 'close'] == eod_data.loc[next_first_valid_date, 'prev_close']: close = eod_data.loc[back_date, 'close'] eod_data.loc[i, ['symbol']] = symbol eod_data.loc[i, ['prev_close', 'open', 'high', 'low', 'last', 'close', 'vwap']] = close eod_data.loc[i, ['volume', 'turnover', 'trades', 'pct_deliverble']] = 0 missing_count = len(traded_dates) - eod_data.symbol.count() if missing_count > 0: warnings.warn( ' {0} missing rows in {1}'.format(missing_count, symbol) ) eod_data['simple_returns'] = ( (eod_data.close - eod_data.prev_close) / eod_data.prev_close ) eod_data['log_returns'] = np.log(eod_data.close / eod_data.prev_close) eod_data['high_low_spread'] = (eod_data.high - eod_data.low) / eod_data.low * 100 eod_data['open_close_spread'] = (eod_data.close - eod_data.open) / eod_data.open * 100 eod_data['pct_deliverble'] = eod_data['pct_deliverble'] * 100 # Handle abnormal returns. i.e Splits abnormal_condition = (eod_data.simple_returns < -0.35) | (eod_data.simple_returns > 0.35) eod_data.loc[abnormal_condition, ['simple_returns']] = ( (eod_data[abnormal_condition].high_low_spread + eod_data[abnormal_condition].open_close_spread) / (2 * 100) ) eod_data.loc[abnormal_condition, ['log_returns']] = ( (eod_data[abnormal_condition].high_low_spread + eod_data[abnormal_condition].open_close_spread) / (2 * 100) ) eod_data.index.name = 'date' eod_data = eod_data.reset_index().set_index(['symbol', 'date']) eod_data = eod_data.astype(np.float) if index: eod_data = eod_data.drop(['pct_deliverble', 'vwap', 'last', 'trades'], axis=1) return eod_data