def make_user_messages( zerver_message: List[ZerverFieldsT], subscriber_map: Dict[int, Set[int]], is_pm_data: bool, mention_map: Dict[int, Set[int]]) -> List[ZerverFieldsT]: zerver_usermessage = [] for message in zerver_message: message_id = message['id'] recipient_id = message['recipient'] sender_id = message['sender'] mention_user_ids = mention_map[message_id] subscriber_ids = subscriber_map.get(recipient_id, set()) user_ids = subscriber_ids | {sender_id} for user_id in user_ids: is_mentioned = user_id in mention_user_ids user_message = build_user_message( user_id=user_id, message_id=message_id, is_private=is_pm_data, is_mentioned=is_mentioned, ) zerver_usermessage.append(user_message) return zerver_usermessage
def make_user_messages(zerver_message: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], mention_map: Dict[int, Set[int]]) -> List[ZerverFieldsT]: subscriber_map = dict() # type: Dict[int, Set[int]] for sub in zerver_subscription: user_id = sub['user_profile'] recipient_id = sub['recipient'] if recipient_id not in subscriber_map: subscriber_map[recipient_id] = set() subscriber_map[recipient_id].add(user_id) zerver_usermessage = [] for message in zerver_message: message_id = message['id'] recipient_id = message['recipient'] mention_user_ids = mention_map[message_id] user_ids = subscriber_map.get(recipient_id, set()) for user_id in user_ids: is_mentioned = user_id in mention_user_ids user_message = build_user_message( id=NEXT_ID('user_message'), user_id=user_id, message_id=message_id, is_mentioned=is_mentioned, ) zerver_usermessage.append(user_message) return zerver_usermessage
def write_message_data(realm_id: int, zerver_recipient: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], zerver_userprofile: List[ZerverFieldsT], data_dir: str, output_dir: str) -> None: room_dir_glob = os.path.join(data_dir, 'rooms', '*', 'history.json') history_files = glob.glob(room_dir_glob) user_map = {user['id']: user for user in zerver_userprofile} def fix_mentions(content: str, mention_user_ids: List[int]) -> str: for user_id in mention_user_ids: user = user_map[user_id] hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content def process(fn: str) -> List[ZerverFieldsT]: rooms_dir = os.path.dirname(fn) room_id = os.path.basename(rooms_dir) stream_id = int(room_id) data = json.load(open(fn)) flat_data = [d['UserMessage'] for d in data] return [ dict( stream_id=stream_id, sender_id=d['sender']['id'], content=d['message'], mention_user_ids=d['mentions'], pub_date=str_date_to_float(d['timestamp']), ) for d in flat_data ] raw_messages = [message for fn in history_files for message in process(fn)] stream_id_to_recipient_id = { d['type_id']: d['id'] for d in zerver_recipient if d['type'] == Recipient.STREAM } mention_map = dict() # type: Dict[int, Set[int]] def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT: # One side effect here: mention_map[message_id] = set(raw_message['mention_user_ids']) content = fix_mentions( content=raw_message['content'], mention_user_ids=raw_message['mention_user_ids'], ) pub_date = raw_message['pub_date'] stream_id = raw_message['stream_id'] recipient_id = stream_id_to_recipient_id[stream_id] rendered_content = None subject = 'archived' user_id = raw_message['sender_id'] return build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, subject=subject, user_id=user_id, ) zerver_message = [ make_message(message_id=i + 1, raw_message=raw_message) for i, raw_message in enumerate(raw_messages) ] subscriber_map = dict() # type: Dict[int, Set[int]] for sub in zerver_subscription: user_id = sub['user_profile'] recipient_id = sub['recipient'] if recipient_id not in subscriber_map: subscriber_map[recipient_id] = set() subscriber_map[recipient_id].add(user_id) zerver_usermessage = [] usermessage_id = 1 for message in zerver_message: message_id = message['id'] recipient_id = message['recipient'] mention_user_ids = mention_map[message_id] user_ids = subscriber_map.get(recipient_id, set()) for user_id in user_ids: is_mentioned = user_id in mention_user_ids user_message = build_user_message( id=usermessage_id, user_id=user_id, message_id=message_id, is_mentioned=is_mentioned, ) zerver_usermessage.append(user_message) usermessage_id += 1 message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = 1 message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def process_message_file(fn: str, get_recipient_id: Callable[[ZerverFieldsT], int], message_key: str, user_map: Dict[int, ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], data_dir: str, output_dir: str) -> None: def fix_mentions(content: str, mention_user_ids: List[int]) -> str: for user_id in mention_user_ids: user = user_map[user_id] hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content def get_raw_messages(fn: str) -> List[ZerverFieldsT]: dir = os.path.dirname(fn) fn_id = int(os.path.basename(dir)) data = json.load(open(fn)) flat_data = [ d[message_key] for d in data if message_key in d ] return [ dict( fn_id=fn_id, sender_id=d['sender']['id'], receiver_id=d.get('receiver', {}).get('id'), content=d['message'], mention_user_ids=d['mentions'], pub_date=str_date_to_float(d['timestamp']), ) for d in flat_data ] raw_messages = get_raw_messages(fn) mention_map = dict() # type: Dict[int, Set[int]] def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT: # One side effect here: mention_map[message_id] = set(raw_message['mention_user_ids']) content = fix_mentions( content=raw_message['content'], mention_user_ids=raw_message['mention_user_ids'], ) pub_date = raw_message['pub_date'] recipient_id = get_recipient_id(raw_message) rendered_content = None subject = 'archived' user_id = raw_message['sender_id'] return build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, subject=subject, user_id=user_id, ) zerver_message = [ make_message( message_id=NEXT_ID('message'), raw_message=raw_message ) for raw_message in raw_messages ] subscriber_map = dict() # type: Dict[int, Set[int]] for sub in zerver_subscription: user_id = sub['user_profile'] recipient_id = sub['recipient'] if recipient_id not in subscriber_map: subscriber_map[recipient_id] = set() subscriber_map[recipient_id].add(user_id) zerver_usermessage = [] for message in zerver_message: message_id = message['id'] recipient_id = message['recipient'] mention_user_ids = mention_map[message_id] user_ids = subscriber_map.get(recipient_id, set()) for user_id in user_ids: is_mentioned = user_id in mention_user_ids user_message = build_user_message( id=NEXT_ID('user_message'), user_id=user_id, message_id=message_id, is_mentioned=is_mentioned, ) zerver_usermessage.append(user_message) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id') message_file = "/messages-%06d.json" % (dump_file_id,) create_converted_data_files(message_json, output_dir, message_file)