def __init__(self, app_info): self.logger = logging.getLogger(__name__) self.attrs = {} for key in app_info.keys(): val = app_info[key] key = key.replace('(', '').replace(')', '').replace('/', '_') if val in ['', '#REF!']: val = None else: if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower( ) == 'n/a': val = '0' if key in self.DATE_FIELDS: val = _convert_type(val, 'date') elif key in self.INT_FIELDS: val = _convert_type(val, 'int') elif key in self.FLOAT_FIELDS: val = _convert_type(val, 'float') elif key in self.BOOLEAN_FIELDS: val = _convert_type(val, 'bool') else: if sys.version_info < (3, 0): val = val.decode('latin1').encode('utf-8') self.attrs[key] = val if self.attrs.get('x-coordinate') is not None and self.attrs.get( 'y-coordinate') is not None: coord = Coord(self.attrs['x-coordinate'], self.attrs['y-coordinate']) self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84()
def _parse_content(self, content): document = html5lib.parse(content, treebuilder="lxml", namespaceHTMLElements=False) for tbl in document.xpath('.//table'): ths = tbl.xpath('.//tr//th') if len(ths) == 0 or ths[0].text != 'Auction Date': continue for row in tbl.xpath('.//tr')[1:]: row_data = row.xpath('td') if len(row_data) == 0 or row_data[0].text.strip() == '': continue str_good_date = sub('([0-9]*)[a-z]*( [A-z]* [0-9]*)',r'\1\2',row_data[0].text.strip()) dtt = datetime.strptime(str_good_date, "%d %B %Y").date() auction_info = {'date': dtt, 'average_price': _convert_type(row_data[1].text.strip()[1:], 'float'), 'lowest_price': _convert_type(row_data[2].text.strip()[1:], 'float'), 'total_volume': _convert_type(row_data[3].text or '0', 'int'), 'co_fired_volume': _convert_type(row_data[4].text or '0', 'int'), 'period': "{}{:02d}".format(dtt.year, dtt.month)} self.auctions.append(auction_info) for info in self.auctions: if info['period'] in self.periods: previous = self.periods[info['period']] if not isinstance(previous, list): self.periods[info['period']] = [info['average_price'], previous] else: self.periods[info['period']].append(info['average_price']) else: self.periods[info['period']] = info['average_price'] for key in self.periods.keys(): if isinstance(self.periods[key], list): self.periods[key] = sum(self.periods[key]) / len(self.periods[key]) return True
def __init__(self, app_info): self.logger = logging.getLogger(__name__) self.attrs = {} for key in app_info.keys(): val = app_info[key] key = key.replace('(', '').replace(')', '').replace('/', '_') if val in ['', '#REF!']: val = None else: if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower() == 'n/a': val = '0' if key in self.DATE_FIELDS: val = _convert_type(val, 'date') elif key in self.INT_FIELDS: val = _convert_type(val, 'int') elif key in self.FLOAT_FIELDS: val = _convert_type(val, 'float') elif key in self.BOOLEAN_FIELDS: val = _convert_type(val, 'bool') else: if sys.version_info < (3, 0): val = val.decode('latin1').encode('utf-8') self.attrs[key] = val if self.attrs.get('x-coordinate') is not None and self.attrs.get('y-coordinate') is not None: coord = Coord(self.attrs['x-coordinate'], self.attrs['y-coordinate']) self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84()
def __init__(self, el): """ Object to represent a single fuel record entry. These will have the format: <FUEL TYPE="OTHER" IC="N" VAL="13528" PCT="1.6"/> """ self.type = el.get("TYPE") self.icr = el.get("IC") self.val = _convert_type(el.get("VAL"), 'int') self.pct = _convert_type(el.get("PCT"), 'float')
def _extract_row_data(self, wbb, sht, rownum): row_data = { 'sett_id': sht.cell(rownum, 0).value, 'ngc_id': sht.cell(rownum, 1).value, 'name': sht.cell(rownum, 2).value, 'reg_capacity': sht.cell(rownum, 3).value, 'date_added': _mkdate(wbb, sht, rownum, 4), 'bmunit': _convert_type(sht.cell(rownum, 5).value, 'bool'), 'cap': _convert_type(sht.cell(rownum, 6).value, 'float') } if row_data['ngc_id'] == '': return self.units.append(row_data)
def _parse_content(self, content): document = html5lib.parse(content, treebuilder="lxml", namespaceHTMLElements=False) for tbl in document.xpath('.//table'): ths = tbl.xpath('.//tr//th') if len(ths) == 0 or ths[0].text != 'Auction Date': continue for row in tbl.xpath('.//tr')[1:]: row_data = row.xpath('td') if len(row_data) == 0 or row_data[0].text.strip() == '': continue str_good_date = sub('([0-9]*)[a-z]*( [A-z]* [0-9]*)', r'\1\2', row_data[0].text.strip()) dtt = parse_date_string(str_good_date) auction_info = { 'date': dtt, 'average_price': _convert_type(row_data[1].text.strip()[1:], 'float'), 'lowest_price': _convert_type(row_data[2].text.strip()[1:], 'float'), 'total_volume': _convert_type(row_data[3].text or '0', 'int'), 'co_fired_volume': _convert_type(row_data[4].text or '0', 'int'), 'period': "{}{:02d}".format(dtt.year, dtt.month) } self.auctions.append(auction_info) for info in self.auctions: if info['period'] in self.periods: previous = self.periods[info['period']] if not isinstance(previous, list): self.periods[info['period']] = [ info['average_price'], previous ] else: self.periods[info['period']].append(info['average_price']) else: self.periods[info['period']] = info['average_price'] for key in self.periods.keys(): if isinstance(self.periods[key], list): self.periods[key] = sum(self.periods[key]) / len( self.periods[key]) return True
def main(): """ Function that actually does the work :-) """ parser = commandline_parser('Download bulk information from Ofgem to produce an Excel spreadsheet') parser.add_argument('start', type=int, help='Period to start (YYYYMM)') parser.add_argument('end', type=int, help='Period to finish (YYYYMM)') parser.add_argument('--filename', default='certificates.xls', help='Filename to export to') parser.add_argument('--stations', nargs='*', help='Stations to search for') args = parser.parse_args() print(args) if not args.filename.endswith('.xls'): args.filename += '.xls' periods = [] start_dt = _convert_type(args.start, 'period') end_dt = _convert_type(args.end, 'period') for yyy in range(start_dt.year, end_dt.year + 1): mmm = start_dt.month if start_dt.year == yyy else 1 mm2 = end_dt.month if end_dt.year == yyy else 12 for mon in range(mmm, mm2+1): periods.append(date(yyy, mon, 1)) print("Period covered will be {} to {}. A total of {} periods". format(start_dt.strftime("%b-%Y"), end_dt.strftime("%b-%Y"), len(periods))) stations = [] station_names = args.stations or [] if args.input is not None: with open(args.input) as fh: for line in fh.readlines(): station = line.strip() if '#' in station: (station, dummy_junk) = station.split('#', 1) station_names.append(station) if len(station_names) > 0: print("Station names to be searched for:") for stat in station_names: print(" - {}".format(stat)) while True: station = raw_input("Enter a station name (or blank to finish)") if station.strip() == '': break if ',' in station: for s in station.strip().split(','): station_names.append(s) else: station_names.append(station) if len(station_names) == 0: print("No stations to process. Exiting...") sys.exit(0) print("\nSearching for stations...") for name in station_names: print(" - {}".format(name)) sss = StationSearch() sss.start() if sss.filter_name(name) and sss.get_data(): stations.extend(sss.stations) print(" found") else: print(" no stations found") print("A total of {} stations will be recorded".format(len(stations))) wbb = Workbook() add_station_sheet(wbb, stations) print("\nGetting certificate data (this is quicker)...") certificates = {} for station in stations: print(" - {}".format(station.name)) ocs = CertificateSearch() ocs.start() if ocs.filter_generator_id(station.generator_id) and \ ocs.set_start_month(start_dt.month) and \ ocs.set_start_year(start_dt.year) and \ ocs.set_finish_month(end_dt.month) and \ ocs.set_finish_year(end_dt.year) and \ ocs.get_data(): certificates[station.name] = ocs.cert_list add_certificate_sheet(wbb, station, ocs.certificates) print(" added to spreadsheet") else: print(" nothing to add") wbb.save(args.filename) print("\nData saved to {}".format(args.filename))
def main(): """ Function that actually does the work :-) """ parser = commandline_parser( 'Download bulk information from Ofgem to produce an Excel spreadsheet') parser.add_argument('start', type=int, help='Period to start (YYYYMM)') parser.add_argument('end', type=int, help='Period to finish (YYYYMM)') parser.add_argument('--filename', default='certificates.xls', help='Filename to export to') parser.add_argument('--stations', nargs='*', help='Stations to search for') args = parser.parse_args() print(args) if not args.filename.endswith('.xls'): args.filename += '.xls' periods = [] start_dt = _convert_type(args.start, 'period') end_dt = _convert_type(args.end, 'period') for yyy in range(start_dt.year, end_dt.year + 1): mmm = start_dt.month if start_dt.year == yyy else 1 mm2 = end_dt.month if end_dt.year == yyy else 12 for mon in range(mmm, mm2 + 1): periods.append(date(yyy, mon, 1)) print("Period covered will be {} to {}. A total of {} periods".format( start_dt.strftime("%b-%Y"), end_dt.strftime("%b-%Y"), len(periods))) stations = [] station_names = args.stations or [] if args.input is not None: with open(args.input) as fh: for line in fh.readlines(): station = line.strip() if '#' in station: (station, dummy_junk) = station.split('#', 1) station_names.append(station) if len(station_names) > 0: print("Station names to be searched for:") for stat in station_names: print(" - {}".format(stat)) while True: station = raw_input("Enter a station name (or blank to finish)") if station.strip() == '': break if ',' in station: for s in station.strip().split(','): station_names.append(s) else: station_names.append(station) if len(station_names) == 0: print("No stations to process. Exiting...") sys.exit(0) print("\nSearching for stations...") for name in station_names: print(" - {}".format(name)) sss = StationSearch() sss.start() if sss.filter_name(name) and sss.get_data(): stations.extend(sss.stations) print(" found") else: print(" no stations found") print("A total of {} stations will be recorded".format(len(stations))) wbb = Workbook() add_station_sheet(wbb, stations) print("\nGetting certificate data (this is quicker)...") certificates = {} for station in stations: print(" - {}".format(station.name)) ocs = CertificateSearch() ocs.start() if ocs.filter_generator_id(station.generator_id) and \ ocs.set_start_month(start_dt.month) and \ ocs.set_start_year(start_dt.year) and \ ocs.set_finish_month(end_dt.month) and \ ocs.set_finish_year(end_dt.year) and \ ocs.get_data(): certificates[station.name] = ocs.cert_list add_certificate_sheet(wbb, station, ocs.certificates) print(" added to spreadsheet") else: print(" nothing to add") wbb.save(args.filename) print("\nData saved to {}".format(args.filename))