Example #1
0
def load(events, metadata=None):
  return (
      events
      | nexmark_query_util.JustBids()
      | 'filter_by_skip' >>
      beam.Filter(lambda bid: bid.auction % metadata.get('auction_skip') == 0)
      | 'project' >>
      beam.Map(lambda bid: auction_price.AuctionPrice(bid.auction, bid.price)))
Example #2
0
def load(events, metadata=None):
    return (events
            | nexmark_query_util.JustBids()
            | 'filter_by_skip' >> beam.Filter(
                lambda bid: bid.auction % metadata.get('auction_skip') == 0)
            | 'project' >> beam.Map(lambda bid: {
                ResultNames.AUCTION_ID: bid.auction,
                ResultNames.PRICE: bid.price
            }))
Example #3
0
def load(events, metadata=None, pipeline_options=None):
    # window bids into fixed window
    sliding_bids = (events
                    | nexmark_query_util.JustBids()
                    | beam.WindowInto(
                        window.FixedWindows(metadata.get('window_size_sec'))))
    # find the largest price in all bids per window
    max_prices = (sliding_bids
                  | beam.Map(lambda bid: bid.price)
                  | beam.CombineGlobally(max).without_defaults())
    return (sliding_bids
            | 'select_bids' >> beam.ParDo(SelectMaxBidFn(),
                                          beam.pvalue.AsSingleton(max_prices)))
Example #4
0
def load(events, metadata=None):
  return (
      events
      | nexmark_query_util.JustBids()
      | 'query12_extract_bidder' >> beam.Map(lambda bid: bid.bidder)
      # windowing with processing time trigger, currently not supported in batch
      | beam.WindowInto(
          window.GlobalWindows(),
          trigger=trigger.Repeatedly(
              trigger.AfterProcessingTime(metadata.get('window_size_sec'))),
          accumulation_mode=trigger.AccumulationMode.DISCARDING,
          allowed_lateness=0)
      | 'query12_bid_count' >> beam.combiners.Count.PerElement()
      | 'query12_output' >> beam.Map(
          lambda t: {
              ResultNames.BIDDER_ID: t[0], ResultNames.BID_COUNT: t[1]
          }))
Example #5
0
  def expand(self, pcoll):
    events = pcoll | beam.WindowInto(self.auction_or_bid_windowFn)

    auction_by_id = (
        events
        | nexmark_query_util.JustAuctions()
        | 'auction_by_id' >> beam.ParDo(nexmark_query_util.AuctionByIdFn()))
    bids_by_auction_id = (
        events
        | nexmark_query_util.JustBids()
        | 'bid_by_auction' >> beam.ParDo(nexmark_query_util.BidByAuctionIdFn()))

    return ({
        nexmark_query_util.AUCTION_TAG: auction_by_id,
        nexmark_query_util.BID_TAG: bids_by_auction_id
    }
            | beam.CoGroupByKey()
            | beam.ParDo(JoinAuctionBidFn()))
Example #6
0
def load(events, metadata=None):
    return (
        events
        | nexmark_query_util.JustBids()
        | 'query5_sliding_window' >> beam.WindowInto(
            window.SlidingWindows(metadata.get('window_size_sec'),
                                  metadata.get('window_period_sec')))
        # project out only the auction id for each bid
        | 'extract_bid_auction' >> beam.Map(lambda bid: bid.auction)
        | 'bid_count_per_auction' >> beam.combiners.Count.PerElement()
        | 'bid_max_count' >> beam.CombineGlobally(
            MostBidCombineFn()).without_defaults()
        # TODO(leiyiz): fanout with sliding window produces duplicated results,
        #   uncomment after it is fixed [BEAM-10617]
        # .with_fanout(metadata.get('fanout'))
        | beam.FlatMap(lambda auc_count: [{
            ResultNames.AUCTION_ID: auction,
            ResultNames.NUM: auc_count[1]
        } for auction in auc_count[0]]))
Example #7
0
def load(events, metadata=None, pipeline_options=None):

    return (events
            # filter to get only bids and then extract bidder id
            | nexmark_query_util.JustBids()
            | 'query11_extract_bidder' >> beam.Map(lambda bid: bid.bidder)
            # window auction and key by auctions' seller
            | 'query11_session_window' >> beam.WindowInto(
                window.Sessions(metadata.get('window_size_sec')),
                trigger=trigger.AfterWatermark(
                    early=trigger.AfterCount(metadata.get('max_log_events'))),
                accumulation_mode=trigger.AccumulationMode.DISCARDING,
                allowed_lateness=metadata.get('occasional_delay_sec') // 2)
            # count per bidder
            | beam.combiners.Count.PerElement()
            | beam.Map(
                lambda bidder_count: {
                    ResultNames.BIDDER_ID: bidder_count[0],
                    ResultNames.BID_COUNT: bidder_count[1]
                }))
Example #8
0
def load(events, query_args=None):
    return (events
            | nexmark_query_util.JustBids()
            | 'ConvertToEuro' >> beam.Map(lambda bid: nexmark_model.Bid(
                bid.auction, bid.bidder, bid.price * USD_TO_EURO, bid.
                date_time, bid.extra)))