Esempi in Python per Sessions

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: apache_beam.window

Metodo/funzione: Sessions

Esempi su hotexamples.com: 6

Sessions in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per apache_beam.window.Sessions, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: transforms.py Progetto: sujan060/testing111

 def expand(self, pcoll):
   return (
     pcoll
       | 'Filter Relevant Events' >> beam.Filter(self.filter_event)
       | 'Key by ID and Environment' >> beam.Map(tupleByIdAndEnvironment)
       | 'Window {}'.format(self.session_window) >> beam.WindowInto(window.Sessions(self.session_window))
       | beam.GroupByKey()
       | 'Calculate Time Between' >> beam.FlatMap(self.calculate_time)
   )

Esempio n. 2

Mostra file

File: traces.py Progetto: lethain/learning-beam

def analyze(args, opts):
    "Core of the pipeline here."
    with beam.Pipeline(options=opts) as p:
        lines = p | ReadFromText(args.input, coder=JsonCoder())
        output = (lines
                  | beam.Map(lambda x: (x['trace'], x))
                  | beam.WindowInto(window.Sessions(10))
                  | beam.GroupByKey()
                  | beam.ParDo(AssembleTrace()))
        output | WriteToText(args.output)

Esempio n. 3

Mostra file

File: snippets_test.py Progetto: wileeam/beam

 def test_setting_session_windows(self):
     p = TestPipeline()
     unkeyed_items = p | beam.Create([2, 11, 16, 27])
     items = (unkeyed_items
              | 'key' >> beam.Map(lambda x: beam.window.TimestampedValue(
                  ('k', x), x)))
     # [START setting_session_windows]
     from apache_beam import window
     session_windowed_items = (
         items | 'window' >> beam.WindowInto(window.Sessions(10)))
     # [END setting_session_windows]
     summed = (session_windowed_items
               | 'group' >> beam.GroupByKey()
               | 'combine' >> beam.CombineValues(sum))
     unkeyed = summed | 'unkey' >> beam.Map(lambda x: x[1])
     beam.assert_that(unkeyed, beam.equal_to([29, 27]))
     p.run()

Esempio n. 4

Mostra file

File: SaiStudy - Apache Beam JR-Transformers-Session-WIndow.py Progetto: meetreks/SaiStudyAll

from datetime import datetime

file_in = 'tags.csv'
skip_head = "userId,movieId,tag,timestamp"


class ParseNewMovies(beam.DoFn):
    def process(self,element):
        if(element!= skip_head):
            z = element.split(",")
            y=int(z[3])
            i = datetime.utcfromtimestamp(y)
            x = i.strftime('%Y-%m-%d %H:%M:%S')
            yield z[2],(z[1],x)

with beam.Pipeline() as pipeline:
    item = (

            pipeline
            | 'Read lines' >> beam.io.ReadFromText(file_in)
            | 'Par D1' >> beam.ParDo(ParseNewMovies())


    )
    x = (
        item | 'Par D3' >>  beam.WindowInto(window.Sessions(10 * 60))
             | 'Par D2' >>  beam.combiners.Count.PerKey()
             | 'Par D4' >>  beam.Map(print)
    )

Esempio n. 5

Mostra file

File: top_wikipedia_sessions.py Progetto: wikier/beam

 def expand(self, pcoll):
     return (pcoll
             | 'ComputeSessionsWindow' >> beam.WindowInto(
                 window.Sessions(gap_size=ONE_HOUR_IN_SECONDS))
             | combiners.Count.PerElement())

Esempio n. 6

Mostra file

File: to_visualizer.py Progetto: rincon-santi/dataflow_taxis_python

def main(argv=None):
    def json_parser(x):
        parsed = json.loads(x)
        return parsed

    def bye(x):
        logging.info('outing: %s', x)
        return x

    parser = argparse.ArgumentParser()
    parser.add_argument("--input_topic")
    parser.add_argument("--output_topic")
    known_args = parser.parse_known_args(argv)

    p = beam.Pipeline(options=PipelineOptions())

    data = (p
            | 'ReadData' >>
            beam.io.ReadFromPubSub(topic=READ_TOPIC).with_output_types(bytes)
            | "JSONParse" >> beam.Map(json_parser))

    (data
     | "AddingKeyToSumUp" >> beam.WithKeys(lambda x: x["ride_id"])
     | "Windowing" >> beam.WindowInto(
         window.Sessions(60),
         trigger=tr.AfterWatermark(early=tr.Repeatedly(
             tr.AfterAll(tr.AfterCount(1), tr.AfterProcessingTime(2)))),
         accumulation_mode=tr.AccumulationMode.DISCARDING,
         allowed_lateness=0)
     | 'ToBytes' >> beam.Map(lambda x: json.dumps(x, indent=2).encode('utf-8'))
     | 'Bye' >> beam.Map(bye)
     | 'WriteToPubSub' >> beam.io.WriteToPubSub(TOPIC))

    (data
     | "SlidWindowing" >> beam.WindowInto(
         window.FixedWindows(60),
         trigger=(tr.AfterWatermark(early=tr.Repeatedly(
             tr.AfterAll(tr.AfterCount(1), tr.AfterProcessingTime(1))),
                                    late=tr.Repeatedly(tr.AfterCount(1)))),
         allowed_lateness=300,
         accumulation_mode=tr.AccumulationMode.ACCUMULATING)
     | "Extract" >> beam.Map(lambda x: x["meter_increment"])
     | "Sum_up" >> beam.CombineGlobally(sum).without_defaults()
     | "Reformat" >> beam.Map(lambda x: {"dollar_run_rate_per_minute": x})
     | "Enrich with time data" >> beam.ParDo(Enrich())
     | "ToBytesCount" >>
     beam.Map(lambda x: json.dumps(x, indent=2).encode('utf-8'))
     | 'Bye2' >> beam.Map(bye)
     | "WriteCount" >> beam.io.WriteToPubSub(TOPIC))

    (data
     | "AddingKey" >> beam.WithKeys(lambda x: x["ride_id"])
     | "SessionWindowing" >> beam.WindowInto(
         window.Sessions(60),
         trigger=tr.AfterWatermark(early=tr.Repeatedly(
             tr.AfterAll(tr.AfterCount(1), tr.AfterProcessingTime(1)))),
         accumulation_mode=tr.AccumulationMode.ACCUMULATING,
         allowed_lateness=0)
     | "GroupInPickup" >> beam.CombinePerKey(PickupFn())
     | "Discarding Key" >> beam.Map(lambda x: x[1])
     | "Filter not pickup" >>
     beam.Map(lambda x: x if str(x["ride_status"]) == "pickup" else None)
     | "ToBytesPickup" >>
     beam.Map(lambda x: json.dumps(x, indent=2).encode('utf-8'))
     | 'Bye3' >> beam.Map(bye)
     | "WritePickup" >> beam.io.WriteToPubSub(TOPIC))

    result = p.run()
    result.wait_until_finish()