def test_side_effects_from_different_boxes(self): lk = lynx.kite.LynxKite() @lk.workspace_with_side_effects(parameters=[text('snapshot_path')]) def save_graph_to_snapshot(sec, graph): graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec) @lk.workspace_with_side_effects(parameters=[text('snapshot_path')]) def save_and_return_graph(sec, graph): graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec) return dict(graph=graph) @lk.workspace_with_side_effects('Muliple graph snapshots') def eg_snapshots(sec): eg = lk.createExampleGraph() save_graph_to_snapshot(eg, snapshot_path='side effect snapshots/a').register(sec) first = save_and_return_graph(eg, snapshot_path='side effect snapshots/b') first.register(sec) save_graph_to_snapshot(first, snapshot_path='side effect snapshots/c').register(sec) eg_snapshots.save('side effect snapshots example folder') lk.remove_name('side effect snapshots', force=True) for btt in eg_snapshots.side_effect_paths(): eg_snapshots.trigger_saved(btt, 'side effect snapshots example folder') entries = lk.list_dir('side effect snapshots') expected = [ 'side effect snapshots/a', 'side effect snapshots/b', 'side effect snapshots/c'] self.assertEqual([e.name for e in entries], expected)
def test_ws_decorator_with_ws_parameters(self): lk = lynx.kite.LynxKite() @lk.workspace(parameters=[text('a'), text('b'), text('c')]) def add_ws(): return (lk.createVertices(size='5').deriveGraphAttribute( output='total', expr=pp('${a.toInt+b.toInt+c.toInt}'))) graph = add_ws(a='2', b='3', c='4').get_graph() scalars = { s.title: lk.get_graph_attribute(s.id) for s in graph.graphAttributes } self.assertEqual(scalars['total'].string, '9')
def parametric_ws(self): from lynx.kite import pp, text lk = lynx.kite.LynxKite() state = lk.createExampleGraph().sql( pp('select name from `vertices` where age = $ap')).output(name='table') ws = lynx.kite.Workspace([state], name='ws params', ws_parameters=[text('ap', '18.2')]) return ws
def test_multiple_ws_decorators(self): lk = lynx.kite.LynxKite() @lk.workspace(parameters=[text('field'), text('limit')]) def filter_table(table): query = pp( 'select name, income from input where ${field} > ${limit}') out = table.sql(query) return dict(table=out) @lk.workspace() def graph_to_table(graph): return dict(table=graph.sql('select * from vertices')) @lk.workspace() def full_workflow(): return dict( result=filter_table(graph_to_table(lk.createExampleGraph()), field='income', limit=500)) table = full_workflow().get_table_data() values = [(row[0].string, row[1].string) for row in table.data] self.assertEqual(values, [('Adam', '1000'), ('Bob', '2000')])
def test_side_effects_unsaved_workspace(self): lk = lynx.kite.LynxKite() @lk.workspace_with_side_effects(parameters=[text('snapshot_path')]) def save_graph_to_snapshot(sec, graph): graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec) @lk.workspace_with_side_effects() def eg_snapshots(sec): eg = lk.createExampleGraph() save_graph_to_snapshot(eg, snapshot_path='unsaved/a').register(sec) save_graph_to_snapshot(eg, snapshot_path='unsaved/b').register(sec) lk.remove_name('unsaved', force=True) eg_snapshots.trigger_all_side_effects() entries = lk.list_dir('unsaved') expected = ['unsaved/a', 'unsaved/b'] self.assertEqual([e.name for e in entries], expected)
def get_components_by_date_wss(lk): @lk.workspace(parameters=[text('date')]) def components(): graph = lk.createVertices(size=1000).createRandomEdges( degree=2, seed=pp('${date.hashCode()}')) component_metrics = graph.findConnectedComponents().sql( pp(''' select "$date" as date_id, max(size) as max_size, min(size) as min_size, count(*) as num_components from `connected_components.vertices`''')) return dict(metrics=component_metrics) return lynx.automation.WorkspaceSequence(ws=components, schedule='30 * * * *', start_date=datetime(2018, 7, 13), lk_root='tedx_components_by_date', input_recipes=[])
def get_components_from_inputs_wss(input_folder, lk): @lk.workspace(parameters=[text('date')]) def components(table): graph = table.useTableAsGraph(src='src', dst='dst') component_metrics = graph.findConnectedComponents().sql( pp(''' select "$date" as date_id, max(size) as max_size, min(size) as min_size, count(*) as num_components from `connected_components.vertices`''')) return dict(metrics=component_metrics) return lynx.automation.WorkspaceSequence( ws=components, schedule='30 * * * *', start_date=datetime(2018, 7, 13), lk_root='tedx_components_from_inputs', input_recipes=[CSVRecipe(input_folder, lk)])
def test_ws_with_side_effects(self): lk = lynx.kite.LynxKite() @lk.workspace_with_side_effects(parameters=[text('export_path')]) def csv_exporter(sec, table): table.exportToCSV(path=pp('$export_path')).register(sec) @lk.workspace_with_side_effects('Example graph exports') def eg_exports(sec): eg = lk.createExampleGraph() t1 = eg.sql('select name, age from vertices where age < 10') t2 = eg.sql('select name, income from vertices where income > 1000') csv_exporter(t1, export_path='DATA$/side effect exports/a').register(sec) csv_exporter(t2, export_path='DATA$/side effect exports/b').register(sec) eg_exports.save('side effect example folder') for btt in eg_exports.side_effect_paths(): eg_exports.trigger_saved(btt, 'side effect example folder') i1 = lk.importCSVNow(filename='DATA$/side effect exports/a') i2 = lk.importCSVNow(filename='DATA$/side effect exports/b') self.assertEqual(i1.get_table_data().data[0][0].string, 'Isolated Joe') self.assertEqual(i2.get_table_data().data[0][0].string, 'Bob')
def get_export_results_wss(input_folder, output_folder, lk): @lk.workspace_with_side_effects(parameters=[text('date')]) def components(sec_collector, table): graph = table.useTableAsGraph(src='src', dst='dst') component_metrics = graph.findConnectedComponents().sql( pp(''' select "$date" as date_id, max(size) as max_size, min(size) as min_size, count(*) as num_components from `connected_components.vertices`''')) exp = component_metrics.exportToParquet(path=pp(output_folder + '/${date}')) exp.register(sec_collector) return dict(metrics=component_metrics) return lynx.automation.WorkspaceSequence( ws=components, schedule='30 * * * *', start_date=datetime(2018, 7, 13), lk_root='tedx_export_results', input_recipes=[CSVRecipe(input_folder, lk)])
class CSVRecipe(lynx.automation.InputRecipe): def __init__(self, lk): self.lk = lk def is_ready(self, date): import os.path return os.path.isfile(filename('index', date)) def build_boxes(self, date): prefixed_path = self.lk.upload(open(filename('index', date))) return self.lk.importCSVNow(filename=prefixed_path) @lk.workspace(parameters=[text('date')]) def save(table): table_with_timestamp = table.sql( pp(''' select "$date" as date_id, cast(supporters as integer) as supporters, cast(amount as integer) as amount from input''')) return dict(hourly=table_with_timestamp) save_wss = lynx.automation.WorkspaceSequence(ws=save, schedule='0 * * * *', start_date=datetime( 2018, 11, 21, 12, 0),