Ejemplo n.º 1
0
def run_stripe_test(api_key: str):
    from snapflow_stripe import module as stripe

    if not api_key:
        api_key = TEST_API_KEY
    storage = get_tmp_sqlite_db_url()
    env = Environment(DataspaceCfg(metadata_storage="sqlite://", storages=[storage]))
    env.add_module(stripe)

    # Initial graph
    raw_charges = GraphCfg(
        key="import_charges",
        function="stripe.import_charges",
        params={"api_key": api_key},
    )
    clean_charges = GraphCfg(
        key="clean_charges", function="stripe.clean_charges", input="import_charges"
    )
    g = GraphCfg(nodes=[raw_charges, clean_charges])
    results = env.produce(
        clean_charges.key, g, target_storage=storage, execution_timelimit_seconds=1
    )
    records = results[0].stdout().as_records()
    assert len(records) >= 100
    assert records[0]["amount"] > 0
 def as_dataframe(self, env: Environment, sess: Session):
     schema = None
     if self.schema:
         schema = env.get_schema(self.schema, sess)
     return str_as_dataframe(self.data,
                             module=self.module,
                             nominal_schema=schema)
Ejemplo n.º 3
0
 def as_dataframe(self, env: Environment):
     schema = None
     if self.schema:
         schema = env.get_schema(self.schema)
     return str_as_dataframe(env,
                             self.data,
                             package=self.package,
                             nominal_schema=schema)
Ejemplo n.º 4
0
def ensure_function(
        env: Environment, function_like: Union[DataFunctionLike,
                                               str]) -> DataFunction:
    if isinstance(function_like, DataFunction):
        return function_like
    if isinstance(function_like, str):
        return env.get_function(function_like)
    return make_function(function_like)
Ejemplo n.º 5
0
 def get_translation_to(self, env: Environment, sess: Session,
                        other: Schema) -> Optional[SchemaTranslation]:
     if not self.implementations:
         return None
     for impl in self.implementations:
         schema = env.get_schema(impl.schema_key, sess)
         if schema.key == other.key:
             return impl.as_schema_translation(env, sess, other)
     return None
Ejemplo n.º 6
0
def test_bigcommerce_order_products():
    from snapflow_bigcommerce import module as snapflow_bigcommerce

    env = Environment(metadata_storage="sqlite://")
    env.add_module(snapflow_bigcommerce)

    g = graph()

    # Initial graph
    api_key = ensure_api_key()
    store_id = ensure_store_id()
    get_orders = g.create_node(
        snapflow_bigcommerce.functions.import_order_products,
        params={"api_key": api_key, "store_id": store_id,},
    )

    blocks = env.produce(get_orders, g, execution_timelimit_seconds=2,)
    assert len(blocks[0].as_records()) > 0
Ejemplo n.º 7
0
def produce_function_output_for_static_input(
        function: DataFunction,
        params: Dict[str, Any] = None,
        input: Any = None,
        inputs: Any = None,
        env: Optional[Environment] = None,
        module: Optional[SnapflowModule] = None,
        target_storage: Optional[Storage] = None,
        upstream: Any = None,  # TODO: DEPRECATED
) -> Iterator[List[DataBlock]]:
    inputs = input or inputs or upstream
    if env is None:
        db = get_tmp_sqlite_db_url()
        env = Environment(metadata_storage=db)
    if module:
        env.add_module(module)
    with provide_test_storages(function, target_storage) as target_storage:
        if target_storage:
            target_storage = env.add_storage(target_storage)
        with env.md_api.begin():
            g = Graph(env)
            input_datas = inputs
            input_nodes: Dict[str, Node] = {}
            pi = function.get_interface()
            if not isinstance(inputs, dict):
                assert len(pi.get_non_recursive_inputs()) == 1
                input_datas = {
                    pi.get_single_non_recursive_input().name: inputs
                }
            for inpt in pi.inputs.values():
                if inpt.is_self_reference:
                    continue
                assert inpt.name is not None
                input_data = input_datas[inpt.name]
                if isinstance(input_data, str):
                    input_data = DataInput(data=input_data)
                assert isinstance(input_data, DataInput)
                n = g.create_node(
                    key=f"_input_{inpt.name}",
                    function="core.import_dataframe",
                    params={
                        "dataframe": input_data.as_dataframe(env),
                        "schema": input_data.get_schema_key(),
                    },
                )
                input_nodes[inpt.name] = n
            test_node = g.create_node(
                key=f"{function.name}",
                function=function,
                params=params,
                inputs=input_nodes,
            )
            blocks = env.produce(test_node,
                                 to_exhaustion=False,
                                 target_storage=target_storage)
            yield blocks
Ejemplo n.º 8
0
def update_matching_field_definitions(env: Environment, schema: Schema,
                                      update_with_schema: Schema) -> Schema:
    fields = []
    modified = False
    for f in schema.fields:
        new_f = f
        try:
            new_f = update_with_schema.get_field(f.name)
            modified = True
        except NameError:
            pass
        fields.append(new_f)
    if not modified:
        return schema
    schema_dict = asdict(schema)
    schema_dict["name"] = f"{schema.name}_with_{update_with_schema.name}"
    schema_dict["fields"] = fields
    updated = Schema.from_dict(schema_dict)
    env.add_new_generated_schema(updated)
    return updated
Ejemplo n.º 9
0
def test_shopify():
    from snapflow_shopify import module as shopify

    api_key = ensure_api_key()
    storage = get_tmp_sqlite_db_url()
    env = Environment(
        DataspaceCfg(metadata_storage="sqlite://", storages=[storage]))

    # Initial graph
    orders = GraphCfg(
        key="import_orders",
        function="shopify.import_orders",
        params={"admin_url": api_key},
    )
    g = GraphCfg(nodes=[orders])
    results = env.produce(orders.key,
                          g,
                          target_storage=storage,
                          execution_timelimit_seconds=1)
    records = results[0].stdout().as_records()
    assert len(records) > 0
Ejemplo n.º 10
0
def produce_pipe_output_for_static_input(
    pipe: Pipe,
    config: Dict[str, Any] = None,
    input: Any = None,
    upstream: Any = None,
    env: Optional[Environment] = None,
    module: Optional[SnapflowModule] = None,
    target_storage: Optional[Storage] = None,
) -> Iterator[Optional[DataBlock]]:
    input = input or upstream
    if env is None:
        db = get_tmp_sqlite_db_url()
        env = Environment(metadata_storage=db)
    if target_storage:
        target_storage = env.add_storage(target_storage)
    with env.session_scope() as sess:
        g = Graph(env)
        input_datas = input
        input_nodes: Dict[str, Node] = {}
        pi = pipe.get_interface()
        if not isinstance(input, dict):
            assert len(pi.get_non_recursive_inputs()) == 1
            input_datas = {pi.get_non_recursive_inputs()[0].name: input}
        for input in pi.inputs:
            if input.is_self_ref:
                continue
            assert input.name is not None
            input_data = input_datas[input.name]
            if isinstance(input_data, str):
                input_data = DataInput(data=input_data)
            n = g.create_node(
                key=f"_input_{input.name}",
                pipe="core.extract_dataframe",
                config={
                    "dataframe": input_data.as_dataframe(env, sess),
                    "schema": input_data.get_schema_key(),
                },
            )
            input_nodes[input.name] = n
        test_node = g.create_node(key=f"{pipe.name}",
                                  pipe=pipe,
                                  config=config,
                                  upstream=input_nodes)
        db = env.produce(test_node,
                         to_exhaustion=False,
                         target_storage=target_storage)
        yield db
Ejemplo n.º 11
0
def run_test_crunchbase_import_funding_rounds(user_key):
    from snapflow_crunchbase import module as snapflow_crunchbase

    env = Environment()
    env.add_module(snapflow_crunchbase)
    env.add_storage("file://.")
    g = graph()

    # test funding rounds importer
    import_funding_rounds = g.create_node(
        snapflow_crunchbase.functions.import_funding_rounds,
        params={
            "user_key": user_key,
            "use_sample": True
        },
    )
    output = env.produce(node_like=import_funding_rounds, graph=g)
    assert len(output[0].as_dataframe()) == 50
Ejemplo n.º 12
0
def test_fred():
    api_key = ensure_api_key()

    from snapflow_fred import module as fred

    env = Environment(metadata_storage="sqlite://")

    g = graph()

    # Initial graph
    gdp = g.create_node(
        "fred.import_observations",
        params={
            "api_key": api_key,
            "series_id": "gdp"
        },
    )
    blocks = produce(gdp, env=env, modules=[fred])
    records = blocks[0].as_records()
    assert len(records) >= (utcnow().year - 1946) * 4 - 1
    assert len(records) < (utcnow().year + 1 - 1946) * 4 - 1
Ejemplo n.º 13
0
 def schema(self, env: Environment, sess: Session) -> Schema:
     return env.get_schema(self.schema_key, sess)
Ejemplo n.º 14
0
    return "select customer, sum(amount) as amount from txs group by customer"
    # Can use jinja templates too
    # return template("sql/customer_lifetime_sales.sql", ctx)


g = graph_from_yaml("""
nodes:
  - key: stripe_charges
    function: stripe.import_charges
    params:
      api_key: sk_test_4eC39HqLyjWDarjtT1zdp7dc
  - key: accumulated_stripe_charges
    function: core.accumulator
    input: stripe_charges
  - key: stripe_customer_lifetime_sales
    function: customer_lifetime_sales
    input: accumulated_stripe_charges
""")

# print(g)
assert len(g._nodes) == 3

env = Environment(modules=[stripe])
run(g, env=env, execution_timelimit_seconds=1)

# Get the final output block
datablock = env.get_latest_output("stripe_customer_lifetime_sales", g)
df = datablock.as_dataframe()
assert len(df.columns) == 2
assert len(df) > 1 and len(df) <= 100  # Stripe data varies
Ejemplo n.º 15
0
def ensure_pipe(env: Environment, pipe_like: Union[PipeLike, str]) -> Pipe:
    if isinstance(pipe_like, Pipe):
        return pipe_like
    if isinstance(pipe_like, str):
        return env.get_pipe(pipe_like)
    return make_pipe(pipe_like)
Ejemplo n.º 16
0
def ensure_snap(env: Environment, snap_like: Union[SnapLike, str]) -> _Snap:
    if isinstance(snap_like, _Snap):
        return snap_like
    if isinstance(snap_like, str):
        return env.get_snap(snap_like)
    return make_snap(snap_like)