def get_sample_records(self, name: str, storage: storage.Storage) -> Records: obj = storage.get_api().get(name) assert isinstance(obj, SampleableIterator) sample = obj.head(SAMPLE_SIZE_LINES) for r in read_csv(sample): yield r
def str_as_dataframe( env: Environment, test_data: str, module: Optional[SnapflowModule] = None, nominal_schema: Optional[Schema] = None, ) -> DataFrame: # TODO: add conform_dataframe_to_schema option if test_data.endswith(".csv"): if module is None: raise with module.open_module_file(test_data) as f: raw_records = list(read_csv(f.readlines())) elif test_data.endswith(".json"): if module is None: raise with module.open_module_file(test_data) as f: raw_records = [read_json(line) for line in f] else: # Raw str csv raw_records = list(read_raw_string_csv(test_data)) tmp = "_test_obj_" + rand_str() env._local_python_storage.get_api().put(tmp, raw_records) if nominal_schema is None: auto_schema = infer_schema_for_name(tmp, env._local_python_storage) nominal_schema = auto_schema else: PythonRecordsHandler().cast_to_schema( tmp, env._local_python_storage, nominal_schema ) df = DataFrame.from_records(raw_records) return df
def fetch_prices(params: Dict, tries: int = 0) -> Optional[Records]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params, stream=True) try: record = resp.json() # Json response means error if is_alphavantage_error(record): # TODO: Log this failure? print(f"Error for {params} {record}") return None if is_alphavantage_rate_limit(record): time.sleep(60) return fetch_prices(params, tries=tries + 1) except: pass # print(resp.raw.read().decode("utf8")) # resp.raw.seek(0) records = list(read_csv(resp.iter_lines())) return records
def test_records_to_file(): dr = tempfile.gettempdir() s: Storage = Storage.from_url(f"file://{dr}") fs_api: FileSystemStorageApi = s.get_api() mem_s = new_local_python_storage() mem_api: PythonStorageApi = mem_s.get_api() name = f"_test_{rand_str()}" obj = [{"f1": "hi", "f2": 2}] mem_api.put(name, obj) req = CopyRequest(name, mem_s, name, s, CsvFileFormat) RecordsToCsvFile().copy(req) with fs_api.open(name, newline="") as f: recs = list(read_csv(f)) handler = get_handler(RecordsFormat, mem_s.storage_engine) mem_api.put( "output", recs, ) handler().cast_to_schema("output", mem_s, schema=test_records_schema) recs = mem_api.get("output") assert recs == obj
def read_to_object(self, f: IOBase): records = list(read_csv(f.readlines())) return records