Beispiel #1
0
 def get_sample_records(self, name: str,
                        storage: storage.Storage) -> Records:
     obj = storage.get_api().get(name)
     assert isinstance(obj, SampleableIterator)
     sample = obj.head(SAMPLE_SIZE_LINES)
     for r in read_csv(sample):
         yield r
Beispiel #2
0
def str_as_dataframe(
    env: Environment,
    test_data: str,
    module: Optional[SnapflowModule] = None,
    nominal_schema: Optional[Schema] = None,
) -> DataFrame:
    # TODO: add conform_dataframe_to_schema option
    if test_data.endswith(".csv"):
        if module is None:
            raise
        with module.open_module_file(test_data) as f:
            raw_records = list(read_csv(f.readlines()))
    elif test_data.endswith(".json"):
        if module is None:
            raise
        with module.open_module_file(test_data) as f:
            raw_records = [read_json(line) for line in f]
    else:
        # Raw str csv
        raw_records = list(read_raw_string_csv(test_data))
    tmp = "_test_obj_" + rand_str()
    env._local_python_storage.get_api().put(tmp, raw_records)
    if nominal_schema is None:
        auto_schema = infer_schema_for_name(tmp, env._local_python_storage)
        nominal_schema = auto_schema
    else:
        PythonRecordsHandler().cast_to_schema(
            tmp, env._local_python_storage, nominal_schema
        )
    df = DataFrame.from_records(raw_records)
    return df
Beispiel #3
0
 def fetch_prices(params: Dict, tries: int = 0) -> Optional[Records]:
     if tries > 2:
         return None
     resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params, stream=True)
     try:
         record = resp.json()
         # Json response means error
         if is_alphavantage_error(record):
             # TODO: Log this failure?
             print(f"Error for {params} {record}")
             return None
         if is_alphavantage_rate_limit(record):
             time.sleep(60)
             return fetch_prices(params, tries=tries + 1)
     except:
         pass
     # print(resp.raw.read().decode("utf8"))
     # resp.raw.seek(0)
     records = list(read_csv(resp.iter_lines()))
     return records
Beispiel #4
0
def test_records_to_file():
    dr = tempfile.gettempdir()
    s: Storage = Storage.from_url(f"file://{dr}")
    fs_api: FileSystemStorageApi = s.get_api()
    mem_s = new_local_python_storage()
    mem_api: PythonStorageApi = mem_s.get_api()
    name = f"_test_{rand_str()}"
    obj = [{"f1": "hi", "f2": 2}]
    mem_api.put(name, obj)
    req = CopyRequest(name, mem_s, name, s, CsvFileFormat)
    RecordsToCsvFile().copy(req)
    with fs_api.open(name, newline="") as f:
        recs = list(read_csv(f))
        handler = get_handler(RecordsFormat, mem_s.storage_engine)
        mem_api.put(
            "output",
            recs,
        )
        handler().cast_to_schema("output", mem_s, schema=test_records_schema)
        recs = mem_api.get("output")
        assert recs == obj
Beispiel #5
0
 def read_to_object(self, f: IOBase):
     records = list(read_csv(f.readlines()))
     return records