예제 #1
0
 def get_symbol_eod_data(
         self, symbol_list=None,
         index=None, index_type=None, start=None, end=None,
         min_rows=0, missing_count=0
     ):
     '''
     If SYMBOL_DATA_PATH exists grab data from file.
     Update data if data in the file is older than 5 days.
     Else fetch symbol data from NSE website.
     '''
     if NSE.__SYMBOL_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
         eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
         eod_data = eod_data.reset_index()
     else:
         self.force_load_data(force_load='symbol_eod_data')
         eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__SYMBOL_EOD_DATA_KEY)
         eod_data = eod_data.reset_index()
     symbol_list = self.get_symbol_list(
         symbol_list=symbol_list, index=index, index_type=index_type,
         start=start, missing_count=missing_count, min_rows=min_rows
     )
     eod_data = eod_data[eod_data.symbol.isin(symbol_list)]
     start = get_date(start, out='dt', start=True)
     end = get_date(end, out='dt', start=False)
     eod_data = eod_data.loc[
         (eod_data.date >= start) & (eod_data.date <= end)
     ]
     return eod_data
예제 #2
0
    def get_index_eod_data(
            self, index_list=None, index_type=None,
            start=None, end=None,
        ):
        '''
        TODO
        If SYMBOL_DATA_PATH exists grab data from file.
        Update data if data in the file is older than 5 days.
        Else fetch symbol data from NSE website.
        '''
        if NSE.__INDEX_EOD_DATA_KEY in get_store_keys(NSE.__NSE_DATA_PATH):
            eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
            eod_data = eod_data.reset_index()
        else:
            self.force_load_data(force_load='index_eod_data')
            eod_data = pd.read_hdf(NSE.__NSE_DATA_PATH, NSE.__INDEX_EOD_DATA_KEY)
            eod_data = eod_data.reset_index()

        index_list = self.get_index_list(
            index_list=index_list, index_type=index_type
        )

        eod_data = eod_data[eod_data.symbol.isin(index_list)]
        start = get_date(start, out='dt', start=True)
        end = get_date(end, out='dt', start=False)
        eod_data = eod_data.ix[
            (eod_data.date >= start) & (eod_data.date <= end)
        ]
        return eod_data
예제 #3
0
    def get_symbol_eod_values(
            self, data='returns', symbol_list=None,
            index=None, index_type=None, start=None, end=None,
            min_rows=0, missing_count=0
        ):
        '''Get Close prices for historical as a separate dataframe'''

        symbol_list = self.get_symbol_list(
            symbol_list=symbol_list, index=index, index_type=index_type,
            start=start, missing_count=missing_count, min_rows=min_rows
        )
        eod_data_schema = [
            'symbol', 'date', 'prev_close', 'open', 'high',
            'low', 'last', 'close', 'vwap',
            'trades', 'volume', 'turnover', 'pct_deliverble',
            'simple_returns', 'log_returns', 'high_low_spread', 'open_close_spread'
        ]
        if data in eod_data_schema:
            values = data
        elif data == 'returns':
            values = 'log_returns'
        elif data == 'deliverble':
            values = 'pct_deliverble'
        else:
            warnings.warn(
                'Invalid type of data requested. Returning returns data'
            )
            values = 'log_returns'
        if 'symbol_eod_values_{0}'.format(values) in get_store_keys(NSE.__NSE_DATA_PATH):
            data = pd.read_hdf(
                NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values)
            )
        else:
            self.force_load_data(force_load='symbol_eod_values', values=values)
            data = pd.read_hdf(
                NSE.__NSE_DATA_PATH, 'symbol_eod_values_{0}'.format(values)
            )
        column_list = data.columns
        column_list = data.columns.intersection(symbol_list)
        data = data[column_list]
        start = get_date(start, 'str', True)
        end = get_date(end, 'str', False)
        data = data[start:end]
        data = data.dropna(how='all', axis=1)
        return data
def rosario_3(self):
    """ Esta funcion devuelve todas las urls de la seccion de policiales del portal de noticias
    rosario3. La limit_date define hasta que fecha se permitira obtener urls.
    """
    date_limit = get_date(self.limit_date)

    self.name = 'rosario 3'
    self.section_url = config.rosario3_url

    driver = self.instance_driver()

    # Obtenemos fecha de la ultima noticia en listing y verificamos que no sea mas antigua que la solicitada.
    logging.info('Cargando noticias hasta el dia {}.'.format(date_limit))

    for i in range(1, 10000):
        try:
            date = driver.find_element_by_xpath(
                '//div[2]//div[2]/article[{}]/div[2]/time'.format(i * 20)).text
        except NoSuchElementException as e:
            logging.exception("Exception occurred", exc_info=True)

        if re.match('.*-.*-.*', date):
            date = get_date(date)
            if date < date_limit:
                break

        # Si todavia estamos dentro del limite temporal solicitado seguimos cargando noticias.
        try:
            driver.find_element_by_xpath(
                '(//a[@class="btn btn-medium"])[1]').click()
            time.sleep(5)
        except NoSuchElementException as e:
            logging.exception("Exception occurred", exc_info=True)

    # Obtenemos todas las urls de las noticias cargadas.
    elements = driver.find_elements_by_xpath("//div[2]/article//h1/a")

    if len(elements) > 0:
        self.urls = [e.get_attribute('href') for e in elements]
    else:
        logging.exception('Failed because elements list is empty.')

    logging.info('Se han recolectado {} urls.'.format(len(self.urls)))
    driver.quit()
예제 #5
0
def main(client):
    console_print(
        "You can check failed requests with Grafana - grafana.tools.signin.service.gov.uk/d/NYxHTZqmz/matomo"
    )
    start_date = get_date(
        "What date did the failed requests begin (dd/mm/yy)?")
    end_date = get_date("What date did the failed requests end (dd/mm/yy)?",
                        start_date)

    start_datetime, records_count = return_date_and_records_count_from_completed_query(
        client, query_limits(client, start_date, end_date, "asc"))
    end_datetime, _ = return_date_and_records_count_from_completed_query(
        client, query_limits(client, start_date, end_date, "desc"))
    confirm_or_abort(
        f"There were {int(records_count)} failed requests between {start_datetime.strftime(DATETIME_FORMAT)} "
        f"and {end_datetime.strftime(DATETIME_FORMAT)}.\nIs this correct? (yes/no)"
    )

    return start_datetime, end_datetime
예제 #6
0
    def __init__(self,
                 symbol_list=None,
                 index=None,
                 index_type=None,
                 start=None,
                 end=None,
                 force_load=None):
        self.force_load_data(force_load)
        self.symbol_meta = self.get_symbol_meta()
        self.index_meta = self.get_index_meta()
        self.start = get_date(start, start=True)
        self.end = get_date(end, start=False)

        self.symbol_list = self.get_symbol_list(symbol_list=symbol_list,
                                                index=index,
                                                index_type=index_type,
                                                start=start)

        self.index_list = self.get_index_list(index_list=index,
                                              index_type=index_type)
예제 #7
0
    def get_traded_dates(self, start=None, end=None):
        'Generate Traded dates for NSE'

        if Market.__TRADED_DATES_KEY in get_store_keys(Market.__Market_PATH):
            traded_dates = pd.read_hdf(Market.__Market_PATH,
                                       Market.__TRADED_DATES_KEY)
        else:
            self.force_load_data('traded_dates')
            traded_dates = pd.read_hdf(Market.__Market_PATH,
                                       Market.__TRADED_DATES_KEY)

        start = get_date(start, 'str', True)
        end = get_date(end, 'str', False)

        traded_dates = traded_dates[start:end]
        traded_dates['specific_date_count'] = [
            i + 1 for i in range(len(traded_dates))
        ]

        return traded_dates
예제 #8
0
def get_course_info(course_url):
    response = requests.get(course_url)

    # If course not exists, coursera reroutes you to another url
    if response.url != course_url:
        return None

    course_info = {'url': course_url}

    soup = BeautifulSoup(response.content, 'html.parser')

    course_info['rating'] = helpers.get_rating(soup)
    course_info['start_date'], course_info['end_date'] = helpers.get_date(soup)
    course_info['week_count'] = helpers.get_week_count(soup)
    course_info['language'] = helpers.get_language(soup)

    # If one of field is None - return None
    return course_info if None not in course_info.values() else None
예제 #9
0
    def get_symbol_list(
        self,
        symbol_list=None,
        index=None,
        index_type=None,
        start=None,
    ):
        '''
        Get symbol list based on criteria provided.
        Pass index for getting symbols in index.
        List of indexes to get union of symbols of all indexes in list.
        start: {year as int or string, string format of date, None}
        '''
        symbol_meta = self.symbol_meta.copy()
        if symbol_list is None:
            if 'symbol_list' in dir(self):
                symbol_list = symbol_meta[symbol_meta.index.isin(
                    self.symbol_list)].date_of_listing
            else:
                symbol_list = symbol_meta.date_of_listing
        elif isinstance(symbol_list, str) and symbol_list == 'all':
            symbol_list = symbol_meta.date_of_listing
            return symbol_list
        elif isinstance(symbol_list, str):
            symbol_list = symbol_meta[symbol_meta.index ==
                                      symbol_list].date_of_listing
        elif isinstance(symbol_list, list):
            symbol_list = symbol_meta[symbol_meta.index.isin(
                symbol_list)].date_of_listing
        else:
            warnings.warn('Could not fetch proper symbol_list.\
                           Loading default symbol_list')
            symbol_list = symbol_meta.date_of_listing.copy()

        symbol_list = symbol_list.copy()
        if index is not None or index_type is not None:
            symbol_list_temp = self.get_index_components(index=index,
                                                         index_type=index_type)
            symbol_list = symbol_list[symbol_list.index.isin(symbol_list_temp)]

        if start is not None:
            start = get_date(start, 'dt')
            symbol_list = symbol_list[symbol_list <= start]
        return symbol_list.index.tolist()
예제 #10
0
	def _group_listings(self,time_period):#parsed may or may not be an attribute
		"""Returns a nested dictionary of grouped, vectorized listings, where the 
		groups are categories then time periods

		Ex: 
		lexicon = ["big","dog","barks","loudly]
		parsed_listings = [([1,2],'business','2012-01-12'), ([1,3],'business',2013-03-01'),
							([0,1,2],'marketing','2014-04-01'),([0,2],'marketing','2013-01-01')]
		group_listings(parsed_listings,lexicon_size=4,time_period='year')

		=> {"business": {datetime.date(2012, 1, 1): [[0,1,1,0]],datetime.date(2013, 1, 1): [[0,1,0,1]]}, 
			"marketing":{datetime.date(2013, 1, 1):[[0,0,1,0]] , datetime.date(2014, 1, 1): [[1,1,1,0]]}}"""
		lexicon_size = len(self._lexicon)
		for (int_tokens, categories, date) in self._parsed_listings:
			date_period = trunc_date(get_date(date),time_period)
			vector_listing = get_vectorized_listing(int_tokens,lexicon_size)
			for category in categories:
				group_listings = self._groups.setdefault(category,{})
				category_date_listings = self._groups[category].setdefault(date_period,[])
				self._groups[category][date_period].append(vector_listing)
예제 #11
0
def run_email(event, context):
    student = event['key'] if 'key' in event else 'Lehman, Hayes'
    case = event[
        'case'] if 'case' in event else '0a608d99-9484-4333-a735-29179e1e1ef5'
    student_parsed = student.split(', ')
    student_name = student_parsed[1] + ' ' + student_parsed[0]
    print(student_name)
    recipients = ['*****@*****.**', '*****@*****.**']
    if 'queryStringParameters' in event and 'recipients' in event[
            'queryStringParameters']:
        recipients = ast.literal_eval(
            event['queryStringParameters']['recipients'])
    datetime_object = get_date(event)
    current_date = '{d.month}/{d.day}/{d.year}'.format(d=datetime_object)

    cb = ChromeBrowser(datetime_object)

    cb.log_in()

    cb.select_student(case)

    tantrum_graph = cb.get_tantrum_graph(case)

    rm_list = cb.get_recently_mastered(student_name)

    tc_list = cb.get_trial_count(student_name)

    attachment_list = cb.get_attachments()

    if attachment_list:
        send_email(current_date, recipients, attachment_list, rm_list, tc_list,
                   tantrum_graph, student_name)

    response = {"statusCode": 200}

    return response
예제 #12
0
        ]

    # Files in dataframe
    files_df = images['file_name'].str.extract(
        '(?P<camera>[^\d]+)(?P<file_number>\d+)')
    files_df['file_number'] = files_df['file_number'].astype(str)
    images = images.merge(files_df, left_index=True, right_index=True)

    # Get dates info into separate cols
    images['created_month'] = images['created'].str[2:4]
    images['created_day'] = images['created'].str[0:2]
    images['created_year'] = images['created'].str[4:6]

    # Construct a sensible date
    images['created'] = get_date(images['created_year'],
                                 images['created_month'],
                                 images['created_day'])

    # For file names, month needs to be A, B, C for 10, 11 and 12
    images.loc[images['created_month'] == '10', 'created_month'] = 'A'
    images.loc[images['created_month'] == '11', 'created_month'] = 'B'
    images.loc[images['created_month'] == '12', 'created_month'] = 'C'
    images['created_month'] = images['created_month'].str.strip('0')

    # Join IMG_ files normally, treat P files separately further down
    # images['file_exists_img'] = images['created_year'] + '/' + images['file_name']
    # images['file_exists_img'] = images['file_exists_img'].isin(file_names_list)
    # Doesn't seem to be any of these, but there might be in the future so let's leave it in

    # More complicated cases for P files
    images['real_file_name'] = images['camera'] + images[
예제 #13
0
 def test_handles_1990s(self):
     year, month, day  = pd.Series(['98', '10']), pd.Series(['09', '11']), pd.Series(['01', '02'])
     expected = pd.Series(['1998-09-01', '2010-11-02'])
     assert_series_equal(get_date(year, month, day), expected)
예제 #14
0
 def test_constructs_date(self):
     year, month, day  = pd.Series(['19', '20']), pd.Series(['09', '11']), pd.Series(['01', '02'])
     expected = pd.Series(['2019-09-01', '2020-11-02'])
     assert_series_equal(get_date(year, month, day), expected)
예제 #15
0
    LOGGER = get_logger()
    client = boto3.client('logs')

    LOGGER.info(f"Starting at the '{starting_stage[1]}' stage.")
    if starting_stage_index == 1:
        LOGGER.info("Starting check logs")
        start_datetime, end_datetime = check_logs(client)
        LOGGER.info("Finished check logs")

    if starting_stage_index <= 2:
        LOGGER.info(
            "Downloading failed requets from cloudwatch. This may take a few minutes..."
        )
        if starting_stage_index == 2:
            start_datetime = get_date(
                "What date did the failed requests begin? (dd/mm/yy)")
            end_datetime = get_date(
                "What date did the failed requests end? (dd/mm/yy)"
            ) + timedelta(1)

        output_filename = download_failed_requests(
            client,
            start_datetime - timedelta(seconds=1),
            end_datetime + timedelta(seconds=1),
        )
        LOGGER.info("Downloading complete")

        confirm_or_abort(
            f"You should check the contents of '{os.getenv('HOST_WORKING_DIR') + '/' if os.getenv('HOST_WORKING_DIR') else ''}"
            f"{output_filename}' and ensure the requests to be replayed are correct.\nOnce you've done this enter 'yes'. Or enter 'no' to abort."
        )
예제 #16
0
    def fetch_eod_data(
            self, symbol, start=None, index=False
        ):
        'Fetch all End of Day(EOD) data from NSE'
        from_date = get_date(start, start=True)
        to_date = get_date(start=False)
        # Get data from NSE
        try:
            eod_data = get_history(
                symbol=symbol, index=index,
                start=from_date, end=to_date, series='EQ'
            )
            if eod_data.empty:
                warnings.warn(
                    'No data recieved from NSE for {0} from {1} to {2}'.
                    format(symbol, from_date.date(), to_date.date())
                )
                return eod_data
            eod_data.index = pd.to_datetime(eod_data.index)
            if index:
                eod_data['prev_close'] = eod_data['Close'].shift(1)
                eod_data['pct_deliverble'] = 100
                eod_data['vwap'] = eod_data['Close']
                eod_data['last'] = eod_data['Close']
                eod_data['trades'] = 0
            else:
                eod_data.drop(
                    ['Series', 'Deliverable Volume'], 1, inplace=True
                )
        except Exception as exception:
            warnings.warn(
                'Could not get data for {0} from NSE due to {1}'.format(symbol, exception)
            )
            return pd.DataFrame()

        rename_columns(eod_data)
        eod_data['symbol'] = [symbol for i in range(len(eod_data))]
        eod_data = eod_data.reset_index().sort_values(['symbol', 'date', 'close'])
        eod_data = eod_data.drop_duplicates(
            subset=['symbol', 'date'], keep='last'
        )

        # Handle prev_close = 0
        eod_data['prev_close_test'] = eod_data.close.shift(1)
        eod_data.loc[eod_data.prev_close == 0, 'prev_close'] = eod_data.loc[eod_data.prev_close == 0, 'prev_close_test']
        eod_data.drop(['prev_close_test'], axis=1, inplace=True)

        from_date = eod_data.date.min()
        to_date = eod_data.date.max()
        traded_dates = self.get_traded_dates(
            start=from_date,
            end=to_date
        )
        traded_dates = pd.DataFrame(index=traded_dates.index)
        missing_dates = traded_dates.index.difference(eod_data.date)
        eod_data = traded_dates.join(eod_data.set_index('date'), how='outer')
        traded_dates = pd.DataFrame(index=eod_data.index)
        traded_dates['date_count'] = [i+1 for i in range(len(traded_dates))]

        if len(missing_dates) > 0:
            for i in missing_dates:
                date_count = traded_dates.loc[i]['date_count']
                back_date = traded_dates[traded_dates.date_count == date_count-1].index.values[0]
                next_first_valid_date = eod_data.loc[i:].symbol.first_valid_index()
                if next_first_valid_date is None:
                    next_first_valid_date = TODAY
                if eod_data.loc[back_date, 'close'] == eod_data.loc[next_first_valid_date, 'prev_close']:
                    close = eod_data.loc[back_date, 'close']
                    eod_data.loc[i, ['symbol']] = symbol
                    eod_data.loc[i, ['prev_close', 'open', 'high', 'low', 'last', 'close', 'vwap']] = close
                    eod_data.loc[i, ['volume', 'turnover', 'trades', 'pct_deliverble']] = 0
        missing_count = len(traded_dates) - eod_data.symbol.count()
        if missing_count > 0:
            warnings.warn(
                ' {0} missing rows in {1}'.format(missing_count, symbol)
            )
        eod_data['simple_returns'] = (
            (eod_data.close - eod_data.prev_close) / eod_data.prev_close
        )
        eod_data['log_returns'] = np.log(eod_data.close / eod_data.prev_close)
        eod_data['high_low_spread'] = (eod_data.high - eod_data.low) / eod_data.low * 100
        eod_data['open_close_spread'] = (eod_data.close - eod_data.open) / eod_data.open * 100
        eod_data['pct_deliverble'] = eod_data['pct_deliverble'] * 100

        # Handle abnormal returns. i.e Splits
        abnormal_condition = (eod_data.simple_returns < -0.35) | (eod_data.simple_returns > 0.35)
        eod_data.loc[abnormal_condition, ['simple_returns']] = (
            (eod_data[abnormal_condition].high_low_spread + eod_data[abnormal_condition].open_close_spread) / (2 * 100)
        )
        eod_data.loc[abnormal_condition, ['log_returns']] = (
            (eod_data[abnormal_condition].high_low_spread + eod_data[abnormal_condition].open_close_spread) / (2 * 100)
        )
        eod_data.index.name = 'date'
        eod_data = eod_data.reset_index().set_index(['symbol', 'date'])
        eod_data = eod_data.astype(np.float)
        if index:
            eod_data = eod_data.drop(['pct_deliverble', 'vwap', 'last', 'trades'], axis=1)
        return eod_data