Exemple #1
0
  def test_side_effects_from_different_boxes(self):
    lk = lynx.kite.LynxKite()

    @lk.workspace_with_side_effects(parameters=[text('snapshot_path')])
    def save_graph_to_snapshot(sec, graph):
      graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec)

    @lk.workspace_with_side_effects(parameters=[text('snapshot_path')])
    def save_and_return_graph(sec, graph):
      graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec)
      return dict(graph=graph)

    @lk.workspace_with_side_effects('Muliple graph snapshots')
    def eg_snapshots(sec):
      eg = lk.createExampleGraph()
      save_graph_to_snapshot(eg, snapshot_path='side effect snapshots/a').register(sec)
      first = save_and_return_graph(eg, snapshot_path='side effect snapshots/b')
      first.register(sec)
      save_graph_to_snapshot(first, snapshot_path='side effect snapshots/c').register(sec)

    eg_snapshots.save('side effect snapshots example folder')
    lk.remove_name('side effect snapshots', force=True)
    for btt in eg_snapshots.side_effect_paths():
      eg_snapshots.trigger_saved(btt, 'side effect snapshots example folder')
    entries = lk.list_dir('side effect snapshots')
    expected = [
        'side effect snapshots/a',
        'side effect snapshots/b',
        'side effect snapshots/c']
    self.assertEqual([e.name for e in entries], expected)
    def test_ws_decorator_with_ws_parameters(self):
        lk = lynx.kite.LynxKite()

        @lk.workspace(parameters=[text('a'), text('b'), text('c')])
        def add_ws():
            return (lk.createVertices(size='5').deriveGraphAttribute(
                output='total', expr=pp('${a.toInt+b.toInt+c.toInt}')))

        graph = add_ws(a='2', b='3', c='4').get_graph()
        scalars = {
            s.title: lk.get_graph_attribute(s.id)
            for s in graph.graphAttributes
        }
        self.assertEqual(scalars['total'].string, '9')
 def parametric_ws(self):
   from lynx.kite import pp, text
   lk = lynx.kite.LynxKite()
   state = lk.createExampleGraph().sql(
       pp('select name from `vertices` where age = $ap')).output(name='table')
   ws = lynx.kite.Workspace([state], name='ws params', ws_parameters=[text('ap', '18.2')])
   return ws
    def test_multiple_ws_decorators(self):
        lk = lynx.kite.LynxKite()

        @lk.workspace(parameters=[text('field'), text('limit')])
        def filter_table(table):
            query = pp(
                'select name, income from input where ${field} > ${limit}')
            out = table.sql(query)
            return dict(table=out)

        @lk.workspace()
        def graph_to_table(graph):
            return dict(table=graph.sql('select * from vertices'))

        @lk.workspace()
        def full_workflow():
            return dict(
                result=filter_table(graph_to_table(lk.createExampleGraph()),
                                    field='income',
                                    limit=500))

        table = full_workflow().get_table_data()
        values = [(row[0].string, row[1].string) for row in table.data]
        self.assertEqual(values, [('Adam', '1000'), ('Bob', '2000')])
Exemple #5
0
  def test_side_effects_unsaved_workspace(self):
    lk = lynx.kite.LynxKite()

    @lk.workspace_with_side_effects(parameters=[text('snapshot_path')])
    def save_graph_to_snapshot(sec, graph):
      graph.sql('select * from vertices').saveToSnapshot(path=pp('$snapshot_path')).register(sec)

    @lk.workspace_with_side_effects()
    def eg_snapshots(sec):
      eg = lk.createExampleGraph()
      save_graph_to_snapshot(eg, snapshot_path='unsaved/a').register(sec)
      save_graph_to_snapshot(eg, snapshot_path='unsaved/b').register(sec)

    lk.remove_name('unsaved', force=True)
    eg_snapshots.trigger_all_side_effects()
    entries = lk.list_dir('unsaved')
    expected = ['unsaved/a', 'unsaved/b']
    self.assertEqual([e.name for e in entries], expected)
Exemple #6
0
def get_components_by_date_wss(lk):
    @lk.workspace(parameters=[text('date')])
    def components():
        graph = lk.createVertices(size=1000).createRandomEdges(
            degree=2, seed=pp('${date.hashCode()}'))
        component_metrics = graph.findConnectedComponents().sql(
            pp('''
          select
          "$date" as date_id,
          max(size) as max_size,
          min(size) as min_size,
          count(*) as num_components
          from `connected_components.vertices`'''))
        return dict(metrics=component_metrics)

    return lynx.automation.WorkspaceSequence(ws=components,
                                             schedule='30 * * * *',
                                             start_date=datetime(2018, 7, 13),
                                             lk_root='tedx_components_by_date',
                                             input_recipes=[])
Exemple #7
0
def get_components_from_inputs_wss(input_folder, lk):
    @lk.workspace(parameters=[text('date')])
    def components(table):
        graph = table.useTableAsGraph(src='src', dst='dst')
        component_metrics = graph.findConnectedComponents().sql(
            pp('''
          select
          "$date" as date_id,
          max(size) as max_size,
          min(size) as min_size,
          count(*) as num_components
          from `connected_components.vertices`'''))
        return dict(metrics=component_metrics)

    return lynx.automation.WorkspaceSequence(
        ws=components,
        schedule='30 * * * *',
        start_date=datetime(2018, 7, 13),
        lk_root='tedx_components_from_inputs',
        input_recipes=[CSVRecipe(input_folder, lk)])
Exemple #8
0
  def test_ws_with_side_effects(self):
    lk = lynx.kite.LynxKite()

    @lk.workspace_with_side_effects(parameters=[text('export_path')])
    def csv_exporter(sec, table):
      table.exportToCSV(path=pp('$export_path')).register(sec)

    @lk.workspace_with_side_effects('Example graph exports')
    def eg_exports(sec):
      eg = lk.createExampleGraph()
      t1 = eg.sql('select name, age from vertices where age < 10')
      t2 = eg.sql('select name, income from vertices where income > 1000')
      csv_exporter(t1, export_path='DATA$/side effect exports/a').register(sec)
      csv_exporter(t2, export_path='DATA$/side effect exports/b').register(sec)

    eg_exports.save('side effect example folder')
    for btt in eg_exports.side_effect_paths():
      eg_exports.trigger_saved(btt, 'side effect example folder')
    i1 = lk.importCSVNow(filename='DATA$/side effect exports/a')
    i2 = lk.importCSVNow(filename='DATA$/side effect exports/b')
    self.assertEqual(i1.get_table_data().data[0][0].string, 'Isolated Joe')
    self.assertEqual(i2.get_table_data().data[0][0].string, 'Bob')
Exemple #9
0
def get_export_results_wss(input_folder, output_folder, lk):
    @lk.workspace_with_side_effects(parameters=[text('date')])
    def components(sec_collector, table):
        graph = table.useTableAsGraph(src='src', dst='dst')
        component_metrics = graph.findConnectedComponents().sql(
            pp('''
          select
          "$date" as date_id,
          max(size) as max_size,
          min(size) as min_size,
          count(*) as num_components
          from `connected_components.vertices`'''))
        exp = component_metrics.exportToParquet(path=pp(output_folder +
                                                        '/${date}'))
        exp.register(sec_collector)
        return dict(metrics=component_metrics)

    return lynx.automation.WorkspaceSequence(
        ws=components,
        schedule='30 * * * *',
        start_date=datetime(2018, 7, 13),
        lk_root='tedx_export_results',
        input_recipes=[CSVRecipe(input_folder, lk)])

class CSVRecipe(lynx.automation.InputRecipe):
    def __init__(self, lk):
        self.lk = lk

    def is_ready(self, date):
        import os.path
        return os.path.isfile(filename('index', date))

    def build_boxes(self, date):
        prefixed_path = self.lk.upload(open(filename('index', date)))
        return self.lk.importCSVNow(filename=prefixed_path)


@lk.workspace(parameters=[text('date')])
def save(table):
    table_with_timestamp = table.sql(
        pp('''
    select
    "$date" as date_id,
    cast(supporters as integer) as supporters,
    cast(amount as integer) as amount
    from input'''))
    return dict(hourly=table_with_timestamp)


save_wss = lynx.automation.WorkspaceSequence(ws=save,
                                             schedule='0 * * * *',
                                             start_date=datetime(
                                                 2018, 11, 21, 12, 0),