def write_to_local(file_path: str, data: Union[StringIO, bytes, BytesIO, str]) -> int: save_folder = settings.static_folder_path save_file_path = Path(save_folder) / file_path.lstrip("/") dir_path = save_file_path.resolve().parent if not dir_path.is_dir(): makedirs(dir_path) write_data: Optional[str] = None if isinstance(data, StringIO): write_data = data.getvalue() elif isinstance(data, BytesIO): write_data = decode_bytes(data.getvalue()) elif isinstance(data, bytes): write_data = decode_bytes(data) elif data: write_data = data bytes_written = 0 if not write_data: logger.info("No data to write to {}".format(file_path)) return 0 with open(save_file_path, "w") as fh: bytes_written += fh.write(write_data) logger.info("Wrote {} to {}".format(bytes_written, save_file_path)) return bytes_written
def process_item(self, item, spider): if "link" not in item: return item url = item["link"] fh = None content = None _, file_extension = os.path.splitext(url) try: _bytes_obj = _fallback_download_handler(url) content = decode_bytes(_bytes_obj) except Exception as e: logger.error(e) if content: item["content"] = content item["extension"] = file_extension return item try: logger.info("Grabbing: {}".format(url)) fh = open(url) except RequestException: logger.info("Bad link: {}".format(url)) except Exception as e: logger.error("Error: {}".format(e)) if fh: content = fh.read() item["content"] = content item["extension"] = file_extension return item
def parse(self, response) -> Generator[Dict, None, None]: content = None file_mime = mime_from_content(response.body) if not file_mime: file_mime = mime_from_url(response.url) if file_mime == "application/zip": with ZipFile(BytesIO(response.body)) as zf: if len(zf.namelist()) == 1: content = zf.open(zf.namelist()[0]).read() c = [] stream_count = 0 for filename in zf.namelist(): if filename.endswith(".zip"): c.append(_handle_zip(zf.open(filename), "r")) stream_count += 1 else: c.append(zf.open(filename)) content = chain_streams(c).read() else: content = response.body.getvalue() if not content: logger.info("No content from scrapy request") return None content = decode_bytes(content) item = {} item["content"] = content item["extension"] = ".csv" item["mime_type"] = file_mime yield item
def import_nem_interconnects() -> None: session = SessionLocal() # Load the MMS CSV file that contains interconnector info csv_data = load_data( "mms/PUBLIC_DVD_INTERCONNECTOR_202006010000.CSV", from_project=True, ) # gotta be a string otherwise decode if not isinstance(csv_data, str): csv_data = decode_bytes(csv_data) # parse the AEMO CSV into schemas aemo_table_set = None try: aemo_table_set = parse_aemo_csv(csv_data) except AEMOParserException as e: logger.error(e) return None records: List[MarketConfigInterconnector] = aemo_table_set.get_table( "MARKET_CONFIG_INTERCONNECTOR").get_records() for interconnector in records: if not isinstance(interconnector, MarketConfigInterconnector): raise Exception("Not what we're looking for ") # skip SNOWY # @TODO do these need to be remapped for historical if interconnector.regionfrom in [ "SNOWY1" ] or interconnector.regionto in ["SNOWY1"]: continue logger.debug(interconnector) interconnector_station = (session.query(Station).filter_by( code=interconnector.interconnectorid).filter_by( network_code=interconnector.interconnectorid).one_or_none()) if not interconnector_station: interconnector_station = Station( code=interconnector.interconnectorid, network_code=interconnector.interconnectorid, ) interconnector_station.approved = False interconnector_station.created_by = "opennem.importer.interconnectors" if not interconnector_station.location: interconnector_station.location = Location( state=state_from_network_region(interconnector.regionfrom)) interconnector_station.name = interconnector.description # for network_region in [interconnector.regionfrom, interconnector.regionto]: # Fac1 int_facility = (session.query(Facility).filter_by( code=interconnector.interconnectorid).filter_by( dispatch_type=DispatchType.GENERATOR).filter_by( network_id="NEM").filter_by(network_region=interconnector. regionfrom).one_or_none()) if not int_facility: int_facility = Facility( # type: ignore code=interconnector.interconnectorid, dispatch_type=DispatchType.GENERATOR, network_id="NEM", network_region=interconnector.regionfrom, ) int_facility.status_id = "operating" int_facility.approved = False int_facility.created_by = "opennem.importer.interconnectors" int_facility.fueltech_id = None int_facility.interconnector = True int_facility.interconnector_region_to = interconnector.regionto interconnector_station.facilities.append(int_facility) session.add(interconnector_station) logger.debug("Created interconnector station: {}".format( interconnector_station.code)) session.commit() return None