Ejemplo n.º 1
0
def write_content_to_csv(content, alpha_index):
    with open(utils.get_raw_file('lse-full-scrape-{}.csv'.format(alpha_index)),
              'w') as file:
        for item in content:
            file.write('"{}","{}","{}","{}","{}"\n'.format(
                item['company'], item['currency'], item['price'],
                item['symbol'], item['isin']))
Ejemplo n.º 2
0
def write_content_to_csv(content, index_label, page):
    with open(
            utils.get_raw_file('{}-partial-page-upto-{}.csv'.format(
                index_label, page)), 'w') as file:
        for item in content:
            file.write('"{}","{}","{}","{}","{}"\n'.format(
                item['symbol'], item['company'], item['isin'],
                item['currency'], item['price']))
Ejemplo n.º 3
0
def scrape_index_constituents(index_label, index_name):
    url = CONSTITUENTS_BASE_URL + index_name
    content = fetch_content(index_label, url)
    df = convert_content_to_dataframe(content)
    filename = '%s_components.csv' % index_label
    logger.info('Saving raw output to %s' % filename)
    df.to_csv(utils.get_raw_file(filename), index=False)
    return df
Ejemplo n.º 4
0
def load_international_passengers():
    raw = (pd.read_csv(
        utils.get_raw_file('international-routes-pax.csv')).assign(
            date=lambda x: pd.
            to_datetime(x.yearMonth, format='%Y%m', errors='coerce')).assign(
                regionDest=lambda x: x.regionDest.str.replace('[^a-zA-Z]', ''))
           )

    # raw['route'] = raw.apply(build_route_name, axis=1)
    raw['route'] = raw.apply(lambda x: '{} - {} ({})'.format(
        x.airportOrigin, x.airportDest, x.countryDest),
                             axis=1)
    return raw
Ejemplo n.º 5
0
def load_all():
    data = []
    missed_sheets = []

    for load_def in company.files:
        for idx, sheet in enumerate(load_def['sheets']):
            load_date = dt(load_def['year'], idx + 1, 1)
            logger.info('Loading %s' % load_date.strftime('%Y-%B'))
            try:
                data.append(load_excel_sheet(load_date, load_def, idx, sheet))
            except:
                missed_sheets.append(load_date.strftime('%Y-%B'))
                logger.exception('LOAD SHEET FAIL')

    if len(missed_sheets) > 0:
        logger.info('Missing: %s' % missed_sheets)

    if len(data) > 0:
        df = pd.concat(data)
        filename = 'company_files_all.csv'
        logger.info('Writing raw output file %s' % filename)
        df.to_csv(utils.get_raw_file(filename), index=False)

    return df
Ejemplo n.º 6
0
    for alpha_index in list(string.ascii_uppercase):
        try:
            content = symbols.fetch_content('all', base_url + alpha_index)
            write_content_to_csv(content, alpha_index)
            master.extend(content)
        except:
            logger.exception('SCRAPE FAILURE')
            return master

    return master


def write_content_to_csv(content, alpha_index):
    with open(utils.get_raw_file('lse-full-scrape-{}.csv'.format(alpha_index)),
              'w') as file:
        for item in content:
            file.write('"{}","{}","{}","{}","{}"\n'.format(
                item['company'], item['currency'], item['price'],
                item['symbol'], item['isin']))


if __name__ == "__main__":
    content = scrape_lse_full()

    try:
        df = symbols.convert_content_to_dataframe(content)
        df.to_csv(utils.get_raw_file('lse_all_components.csv'), index=False)
    except:
        logger.exception('DF FAILURE')
Ejemplo n.º 7
0
def fetch_ftse_aim_allshare_symbols():
    symbols = get_symbols(
        utils.get_raw_file('ftse_aim_allshare_components-20180809.csv'))
    download_adj_daily_prices(symbols)
Ejemplo n.º 8
0
def fetch_ftse_smallcap_symbols():
    symbols = get_symbols(
        utils.get_raw_file('ftse_smallcap_components-20180808.csv'))
    download_adj_daily_prices(symbols)
Ejemplo n.º 9
0
def fetch_aim100_symbols():
    symbols = get_symbols(utils.get_raw_file('aim100_components-20180808.csv'))
    download_adj_daily_prices(symbols)
Ejemplo n.º 10
0
def get_currencies():
    return sorted(
        pd.read_csv(utils.get_raw_file('lse_all_components-20180808.csv'),
                    low_memory=False).
        loc[lambda x: ~x.currency.isin(['GBP', 'GBX'])].currency.unique())