def process_invites(db_session, message, account, invites): new_uids = [event.uid for event in invites] # Get the list of events which share a uid with those we received. # Note that we're limiting this query to events in the 'emailed events' # calendar, because that's where all the invites go. existing_events = (db_session.query(Event).filter( Event.calendar_id == account.emailed_events_calendar_id, Event.namespace_id == account.namespace.id, Event.uid.in_(new_uids), ).all()) existing_events_table = {event.uid: event for event in existing_events} for event in invites: if event.uid not in existing_events_table: # This is some SQLAlchemy trickery -- the events returned # by events_from_ics aren't bound to a session yet. Because of # this, we don't care if they get garbage-collected. This is # important because we only want to keep events we haven't seen # yet --- updates are merged with the existing events and are # dropped immediately afterwards. # By associating the event to the message we make sure it # will be flushed to the db. event.calendar = account.emailed_events_calendar event.message = message db_session.flush() update_contacts_from_event(db_session, event, account.namespace.id) else: # This is an event we already have in the db. # Let's see if the version we have is older or newer. existing_event = existing_events_table[event.uid] if existing_event.sequence_number <= event.sequence_number: merged_participants = existing_event._partial_participants_merge( event) existing_event.update(event) existing_event.message = message # We have to do this mumbo-jumbo because MutableList does # not register changes to nested elements. # We could probably change MutableList to handle it (see: # https://groups.google.com/d/msg/sqlalchemy/i2SIkLwVYRA/mp2WJFaQxnQJ) # but this sounds very brittle. existing_event.participants = [] for participant in merged_participants: existing_event.participants.append(participant) db_session.flush() existing_event.contacts = [] update_contacts_from_event(db_session, existing_event, account.namespace.id)
def handle_event_updates(namespace_id, calendar_id, events, log, db_session): """Persists new or updated Event objects to the database.""" added_count = 0 updated_count = 0 existing_event_query = (db_session.query(Event).filter( Event.namespace_id == namespace_id, Event.calendar_id == calendar_id).exists()) events_exist = db_session.query(existing_event_query).scalar() for event in events: assert event.uid is not None, "Got remote item with null uid" local_event = None if events_exist: # Skip this lookup if there are no local events at all, for faster # first sync. local_event = (db_session.query(Event).filter( Event.namespace_id == namespace_id, Event.calendar_id == calendar_id, Event.uid == event.uid, ).first()) if local_event is not None: # We also need to mark all overrides as cancelled if we're # cancelling a recurring event. However, note the original event # may not itself be recurring (recurrence may have been added). if (isinstance(local_event, RecurringEvent) and event.status == "cancelled" and local_event.status != "cancelled"): for override in local_event.overrides: override.status = "cancelled" local_event.update(event) local_event.participants = event.participants updated_count += 1 else: local_event = event local_event.namespace_id = namespace_id local_event.calendar_id = calendar_id db_session.add(local_event) added_count += 1 db_session.flush() local_event.contacts = [] update_contacts_from_event(db_session, local_event, namespace_id) # If we just updated/added a recurring event or override, make sure # we link it to the right master event. if isinstance(event, (RecurringEvent, RecurringEventOverride)): link_events(db_session, event) # Batch commits to avoid long transactions that may lock calendar rows. if (added_count + updated_count) % 10 == 0: db_session.commit() log.info( "synced added and updated events", calendar_id=calendar_id, added=added_count, updated=updated_count, )
def process_shard(shard_id, dry_run, id_start=0): # At 500K events, we need to process 6 events per second to finish within a day. batch_size = 100 rps = 6 / batch_size window = 5 throttle = limitlion.throttle_wait("create-event-contact-associations", rps=rps, window=window) with session_scope_by_shard_id(shard_id) as db_session: # NOTE: The session is implicitly autoflushed, which ensures no # duplicate contacts are created. n = 0 n_skipped = 0 n_updated = 0 while True: event_query = list( db_session.query(Event).filter(Event.id > id_start).order_by( asc(Event.id)).limit(batch_size)) if not event_query: break for event in event_query: n += 1 id_start = event.id if n % batch_size == 0: log.info( "progress", shard_id=shard_id, id_start=id_start, n=n, n_skipped=n_skipped, n_updated=n_updated, ) if event.contacts: continue if not dry_run: event.contacts = [] update_contacts_from_event(db_session, event, event.namespace_id) n_updated += 1 if n_updated % batch_size == 0: db_session.commit() log.info( "committed", shard_id=shard_id, n=n, n_skipped=n_skipped, n_updated=n_updated, ) throttle() log.info("finished", shard_id=shard_id, n=n, n_skipped=n_skipped, n_updated=n_updated)