def _get_submission_payloads(submission): """Yield a submission's payloads. Most submissions only contain one payload, but if the long-polling connection was closed while something happened, there can be multiple payloads. """ for sub in javascript.loads(submission): # the submission number, increments with each payload # sub_num = sub[0] # the submission type sub_type = sub[1][0] if sub_type == 'c': # session ID, should be the same for every request # session_id = sub[1][1][0] # payload type payload_type = sub[1][1][1][0] if payload_type == 'bfo': # Payload is submessages in the list format. These are the # payloads we care about. yield javascript.loads(sub[1][1][1][1]) elif payload_type == 'tm': # Payload is object format. I'm not sure what these are for, # but they don't seem very important. pass elif payload_type == 'wh': # Payload is null. These messages don't contain any information # other than the session_id, and appear to be just heartbeats. pass elif payload_type == 'otr': # Not sure what this is for, might be something to do with # XMPP. pass elif payload_type == 'ho:hin': # Sent when a video call starts/stops. pass else: logger.warning( 'Got submission with unknown payload type {}:\n{}'.format( payload_type, sub)) elif sub_type == 'noop': # These contain no information and only seem to appear once as the # first message when a channel is opened. pass else: logger.warning( 'Got submission with unknown submission type: {}\n{}'.format( sub_type, sub))
def _get_submission_payloads(submission): """Yield a submission's payloads. Most submissions only contain one payload, but if the long-polling connection was closed while something happened, there can be multiple payloads. """ for sub in javascript.loads(submission): # the submission number, increments with each payload # sub_num = sub[0] # the submission type sub_type = sub[1][0] if sub_type == 'c': # session ID, should be the same for every request # session_id = sub[1][1][0] # payload type payload_type = sub[1][1][1][0] if payload_type == 'bfo': # Payload is submessages in the list format. These are the # payloads we care about. yield javascript.loads(sub[1][1][1][1]) elif payload_type == 'tm': # Payload is object format. I'm not sure what these are for, # but they don't seem very important. pass elif payload_type == 'wh': # Payload is null. These messages don't contain any information # other than the session_id, and appear to be just heartbeats. pass elif payload_type == 'otr': # Not sure what this is for, might be something to do with # XMPP. pass elif payload_type == 'ho:hin': # Sent when a video call starts/stops. pass else: logger.warning( 'Got submission with unknown payload type {}:\n{}' .format(payload_type, sub) ) elif sub_type == 'noop': # These contain no information and only seem to appear once as the # first message when a channel is opened. pass else: logger.warning('Got submission with unknown submission type: {}\n{}' .format(sub_type, sub))
def _pb_request(self, endpoint, request_pb, response_pb): """Send a Protocol Buffer formatted chat API request. Args: endpoint (str): The chat API endpoint to use. request_pb: The request body as a Protocol Buffer message. response_pb: The response body as a Protocol Buffer message. Raises: NetworkError: If the request fails. """ logger.debug('Sending Protocol Buffer request %s:\n%s', endpoint, request_pb) res = yield from self._base_request( 'https://clients6.google.com/chat/v1/{}'.format(endpoint), 'application/json+protobuf', # The request body is pblite. 'protojson', # The response should be pblite. json.dumps(pblite.encode(request_pb)) ) pblite.decode(response_pb, javascript.loads(res.body.decode()), ignore_first_item=True) logger.debug('Received Protocol Buffer response:\n%s', response_pb) status = response_pb.response_header.status if status != hangouts_pb2.RESPONSE_STATUS_OK: description = response_pb.response_header.error_description raise exceptions.NetworkError( 'Request failed with status {}: \'{}\'' .format(status, description) )
def syncallnewevents(self, timestamp): """List all events occuring at or after timestamp. This method requests protojson rather than json so we have one chat message parser rather than two. timestamp: datetime.datetime instance specifying the time after which to return all events occuring in. Raises hangups.NetworkError if the request fails. """ try: res = yield self._request('conversations/syncallnewevents', [ self._get_request_header(), int(timestamp.timestamp()) * 1000000, [], None, [], False, [], 1048576 # max response size? (number of bytes in a MB) ], use_json=False) except (httpclient.HTTPError, IOError) as e: # In addition to HTTPError, httpclient can raise IOError (which # includes socker.gaierror). raise exceptions.NetworkError(e) # can return 200 but still contain an error res = javascript.loads(res.body.decode()) res_status = res[1][0] if res_status != 1: raise exceptions.NetworkError('Response status is \'{}\'' .format(res_status)) return res
def syncallnewevents(self, timestamp): """List all events occurring at or after timestamp. This method requests protojson rather than json so we have one chat message parser rather than two. timestamp: datetime.datetime instance specifying the time after which to return all events occurring in. Raises hangups.NetworkError if the request fails. Returns a ClientSyncAllNewEventsResponse. """ res = yield from self._request('conversations/syncallnewevents', [ self._get_request_header(), # last_sync_timestamp parsers.to_timestamp(timestamp), [], None, [], False, [], 1048576 # max_response_size_bytes ], use_json=False) try: res = schemas.CLIENT_SYNC_ALL_NEW_EVENTS_RESPONSE.parse( javascript.loads(res.body.decode()) ) except ValueError as e: raise exceptions.NetworkError('Response failed to parse: {}' .format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError('Response status is \'{}\'' .format(status)) return res
def syncallnewevents(self, timestamp): """List all events occuring at or after timestamp. This method requests protojson rather than json so we have one chat message parser rather than two. timestamp: datetime.datetime instance specifying the time after which to return all events occuring in. Raises hangups.NetworkError if the request fails. Returns a ClientSyncAllNewEventsResponse. """ res = yield from self._request('conversations/syncallnewevents', [ self._get_request_header(), # last_sync_timestamp parsers.to_timestamp(timestamp), [], None, [], False, [], 1048576 # max_response_size_bytes ], use_json=False) try: res = schemas.CLIENT_SYNC_ALL_NEW_EVENTS_RESPONSE.parse( javascript.loads(res.body.decode()) ) except ValueError as e: raise exceptions.NetworkError('Response failed to parse: {}' .format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError('Response status is \'{}\'' .format(status)) return res
def _get_submission_payloads(submission): """Yield a submission's payloads. Most submissions only contain one payload, but if the long-polling connection was closed while something happened, there can be multiple payloads. """ for sub in javascript.loads(submission): if sub[1][0] != 'noop': wrapper = javascript.loads(sub[1][0]['p']) # pylint: disable=invalid-sequence-index if '3' in wrapper and '2' in wrapper['3']: client_id = wrapper['3']['2'] # Hack to pass the client ID back to Client yield {'client_id': client_id} if '2' in wrapper: yield javascript.loads(wrapper['2']['2'])
def _parse_sid_response(res): """Parse response format for request for new channel SID. Example format (after parsing JS): [ [0,["c","SID_HERE","",8]], [1,[{"gsid":"GSESSIONID_HERE"}]]] Returns (SID, gsessionid) tuple. """ res = javascript.loads(list(PushDataParser().get_submissions(res))[0]) sid = res[0][1][1] gsessionid = res[1][1][0]['gsid'] return (sid, gsessionid)
def getconversation(self, conversation_id, event_timestamp, max_events=50): """Return conversation events. This is mainly used for retrieving conversation scrollback. Events occurring before event_timestamp are returned, in order from oldest to newest. Raises hangups.NetworkError if the request fails. """ res = yield from self._request( 'conversations/getconversation', [ self._get_request_header(), [[conversation_id], [], []], # conversationSpec False, # includeConversationMetadata True, # includeEvents None, # ??? max_events, # maxEventsPerConversation # eventContinuationToken (specifying timestamp is sufficient) [ None, # eventId None, # storageContinuationToken parsers.to_timestamp(event_timestamp), # eventTimestamp ] ], use_json=False) try: res = schemas.CLIENT_GET_CONVERSATION_RESPONSE.parse( javascript.loads(res.body.decode())) except ValueError as e: raise exceptions.NetworkError( 'Response failed to parse: {}'.format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError( 'Response status is \'{}\''.format(status)) return res
def _parse_sid_response(res): """Parse response format for request for new channel SID. Returns (SID, header_client, gsessionid). """ sid = None header_client = None gsessionid = None p = longpoll.PushDataParser() res = javascript.loads(list(p.get_submissions(res.decode()))[0]) for segment in res: num, message = segment if num == 0: sid = message[1] elif message[0] == 'c': type_ = message[1][1][0] if type_ == 'cfj': header_client = message[1][1][1].split('/')[1] elif type_ == 'ei': gsessionid = message[1][1][1] return (sid, header_client, gsessionid)
def _parse_sid_response(res): """Parse response format for request for new channel SID. Returns (SID, email, header_client, gsessionid). """ sid = None header_client = None gsessionid = None p = PushDataParser() res = javascript.loads(list(p.get_submissions(res))[0]) for segment in res: num, message = segment if num == 0: sid = message[1] elif message[0] == 'c': type_ = message[1][1][0] if type_ == 'cfj': email, header_client = message[1][1][1].split('/') elif type_ == 'ei': gsessionid = message[1][1][1] return(sid, email, header_client, gsessionid)
def getentitybyid(self, chat_id_list): """Return information about a list of contacts. Raises hangups.NetworkError if the request fails. """ res = yield from self._request('contacts/getentitybyid', [ self._get_request_header(), None, [[str(chat_id)] for chat_id in chat_id_list], ], use_json=False) try: res = schemas.CLIENT_GET_ENTITY_BY_ID_RESPONSE.parse( javascript.loads(res.body.decode()) ) except ValueError as e: raise exceptions.NetworkError('Response failed to parse: {}' .format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError('Response status is \'{}\'' .format(status)) return res
def getentitybyid(self, chat_id_list): """Return information about a list of contacts. Raises hangups.NetworkError if the request fails. """ res = yield from self._request('contacts/getentitybyid', [ self._get_request_header(), None, [[str(chat_id)] for chat_id in chat_id_list], ], use_json=False) try: res = schemas.CLIENT_GET_ENTITY_BY_ID_RESPONSE.parse( javascript.loads(res.body.decode())) except ValueError as e: raise exceptions.NetworkError( 'Response failed to parse: {}'.format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError( 'Response status is \'{}\''.format(status)) return res
def getconversation(self, conversation_id, event_timestamp, max_events=50): """Return conversation events. This is mainly used for retrieving conversation scrollback. Events occurring before event_timestamp are returned, in order from oldest to newest. Raises hangups.NetworkError if the request fails. """ res = yield from self._request('conversations/getconversation', [ self._get_request_header(), [[conversation_id], [], []], # conversationSpec False, # includeConversationMetadata True, # includeEvents None, # ??? max_events, # maxEventsPerConversation # eventContinuationToken (specifying timestamp is sufficient) [ None, # eventId None, # storageContinuationToken parsers.to_timestamp(event_timestamp), # eventTimestamp ] ], use_json=False) try: res = schemas.CLIENT_GET_CONVERSATION_RESPONSE.parse( javascript.loads(res.body.decode()) ) except ValueError as e: raise exceptions.NetworkError('Response failed to parse: {}' .format(e)) # can return 200 but still contain an error status = res.response_header.status if status != 1: raise exceptions.NetworkError('Response status is \'{}\'' .format(status)) return res
def _init_talkgadget_1(self): """Make first talkgadget request and parse response. The response body is a HTML document containing a series of script tags containing JavaScript object. We need to parse the object to get at the data. """ url = 'https://talkgadget.google.com/u/0/talkgadget/_/chat' params = { 'prop': 'aChromeExtension', 'fid': 'gtn-roster-iframe-id', 'ec': '["ci:ec",true,true,false]', } headers = { # appears to require a browser user agent 'user-agent': ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36' '(KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36'), } res = yield http_utils.fetch(url, cookies=self._cookies, params=params, headers=headers, connect_timeout=CONNECT_TIMEOUT, request_timeout=REQUEST_TIMEOUT) logger.debug('First talkgadget request result:\n{}'.format(res.body)) if res.code != 200: raise ValueError( "First talkgadget request failed with {}: {}".format( res.code, res.body)) res = res.body.decode() # Parse the response by using a regex to find all the JS objects, and # parsing them. res = res.replace('\n', '') regex = re.compile( r"(?:<script>AF_initDataCallback\((.*?)\);</script>)") data_dict = {} for data in regex.findall(res): try: data = javascript.loads(data) data_dict[data['key']] = data['data'] except ValueError as e: # not everything will be parsable, but we don't care logger.debug('Failed to parse JavaScript: {}\n{}'.format( e, data)) # TODO: handle errors here self._api_key = data_dict['ds:7'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] self._channel_path = data_dict['ds:4'][0][1] self._clid = data_dict['ds:4'][0][7] self._channel_ec_param = data_dict['ds:4'][0][4] self._channel_prop_param = data_dict['ds:4'][0][5] # build dict of conversations and their participants initial_conversations = {} self.initial_users = {} # {UserID: User} # add self to the contacts self_contact = data_dict['ds:20'][0][2] self.self_user_id = UserID(chat_id=self_contact[8][0], gaia_id=self_contact[8][1]) self.initial_users[self.self_user_id] = User( id_=self.self_user_id, full_name=self_contact[9][1], first_name=self_contact[9][2], is_self=True) conversations = data_dict['ds:19'][0][3] for c in conversations: id_ = c[1][0][0] participants = c[1][13] last_modified = c[1][3][12] # With every converstion, we get a list of up to 20 of the most # recent messages, sorted oldest to newest. messages = [] for raw_message in c[2]: message = longpoll._parse_chat_message([raw_message]) # A message may parse to None if it's just a conversation name # change. if message is not None: messages.append(message[1:]) initial_conversations[id_] = { 'participants': [], 'last_modified': last_modified, 'name': c[1][2], 'messages': messages, } # Add the participants for this conversation. for p in participants: user_id = UserID(chat_id=p[0][0], gaia_id=p[0][1]) initial_conversations[id_]['participants'].append(user_id) # Add the participant to our list of contacts as a fallback, in # case they can't be found later by other methods. # TODO We should note who these users are and try to request # them. # p[1] can be a full name, None, or out of range. try: display_name = p[1] except IndexError: display_name = None if display_name is None: display_name = 'Unknown' self.initial_users[user_id] = User( id_=user_id, first_name=display_name.split()[0], full_name=display_name, is_self=(user_id == self.self_user_id)) # build dict of contacts and their names (doesn't include users not in # contacts) contacts_main = data_dict['ds:21'][0] # contacts_main[2] has some, but the format is slightly different contacts = (contacts_main[4][2] + contacts_main[5][2] + contacts_main[6][2] + contacts_main[7][2] + contacts_main[8][2]) for c in contacts: user_id = UserID(chat_id=c[0][8][0], gaia_id=c[0][8][1]) self.initial_users[user_id] = User( id_=user_id, full_name=c[0][9][1], first_name=c[0][9][2], is_self=(user_id == self.self_user_id)) # Create a dict of the known conversations. self.initial_conversations = { conv_id: Conversation( self, conv_id, [ self.initial_users[user_id] for user_id in conv_info['participants'] ], conv_info['last_modified'], conv_info['name'], conv_info['messages'], ) for conv_id, conv_info in initial_conversations.items() }
def test_loads_parse_error(): """Test loading invalid JS that fails parsing.""" with pytest.raises(ValueError): javascript.loads('{"foo": 1}}')
def test_loads_lex_error(): """Test loading invalid JS that fails lexing.""" with pytest.raises(ValueError): javascript.loads('{""": 1}')
def test_loads(input_, expected): """Test loading JS from a string.""" assert javascript.loads(input_) == expected
def test_loads(input_, expected): """Test loading Javascript from a string.""" assert javascript.loads(input_) == expected
def _init_talkgadget_1(self): """Make first talkgadget request and parse response. The response body is a HTML document containing a series of script tags containing JavaScript object. We need to parse the object to get at the data. """ url = 'https://talkgadget.google.com/u/0/talkgadget/_/chat' params = { 'prop': 'aChromeExtension', 'fid': 'gtn-roster-iframe-id', 'ec': '["ci:ec",true,true,false]', } headers = { # appears to require a browser user agent 'user-agent': ( 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36' '(KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36' ), } res = yield http_utils.fetch( url, cookies=self._cookies, params=params, headers=headers, connect_timeout=CONNECT_TIMEOUT, request_timeout=REQUEST_TIMEOUT ) logger.debug('First talkgadget request result:\n{}'.format(res.body)) if res.code != 200: raise ValueError("First talkgadget request failed with {}: {}" .format(res.code, res.body)) res = res.body.decode() # Parse the response by using a regex to find all the JS objects, and # parsing them. res = res.replace('\n', '') regex = re.compile( r"(?:<script>AF_initDataCallback\((.*?)\);</script>)" ) data_dict = {} for data in regex.findall(res): try: data = javascript.loads(data) data_dict[data['key']] = data['data'] except ValueError as e: # not everything will be parsable, but we don't care logger.debug('Failed to parse JavaScript: {}\n{}' .format(e, data)) # TODO: handle errors here self._api_key = data_dict['ds:7'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] self._channel_path = data_dict['ds:4'][0][1] self._clid = data_dict['ds:4'][0][7] self._channel_ec_param = data_dict['ds:4'][0][4] self._channel_prop_param = data_dict['ds:4'][0][5] # build dict of conversations and their participants initial_conversations = {} self.initial_users = {} # {UserID: User} # add self to the contacts self_contact = data_dict['ds:20'][0][2] self.self_user_id = UserID(chat_id=self_contact[8][0], gaia_id=self_contact[8][1]) self.initial_users[self.self_user_id] = User( id_=self.self_user_id, full_name=self_contact[9][1], first_name=self_contact[9][2], is_self=True ) conversations = data_dict['ds:19'][0][3] for c in conversations: id_ = c[1][0][0] participants = c[1][13] last_modified = c[1][3][12] initial_conversations[id_] = { 'participants': [], 'last_modified': last_modified, } for p in participants: user_id = UserID(chat_id=p[0][0], gaia_id=p[0][1]) initial_conversations[id_]['participants'].append( user_id ) # Add the user to our list of contacts if their name is # present. This is a hack to deal with some contacts not being # found via the other methods. # TODO We should note who these users are and try to request # them. if len(p) > 1: display_name = p[1] self.initial_users[user_id] = User( id_=user_id, first_name=display_name.split()[0], full_name=display_name, is_self=(user_id == self.self_user_id) ) # build dict of contacts and their names (doesn't include users not in # contacts) contacts_main = data_dict['ds:21'][0] # contacts_main[2] has some, but the format is slightly different contacts = (contacts_main[4][2] + contacts_main[5][2] + contacts_main[6][2] + contacts_main[7][2] + contacts_main[8][2]) for c in contacts: user_id = UserID(chat_id=c[0][8][0], gaia_id=c[0][8][1]) self.initial_users[user_id] = User( id_=user_id, full_name=c[0][9][1], first_name=c[0][9][2], is_self=(user_id == self.self_user_id) ) # Create a dict of the known conversations. self.initial_conversations = {conv_id: Conversation( self, conv_id, [self.initial_users[user_id] for user_id in conv_info['participants']], conv_info['last_modified'], ) for conv_id, conv_info in initial_conversations.items()}
def _initialize_chat(self): """Request push channel creation and initial chat data. Returns instance of InitialData. The response body is a HTML document containing a series of script tags containing JavaScript objects. We need to parse the objects to get at the data. """ # We first need to fetch the 'pvt' token, which is required for the # initialization request (otherwise it will return 400). try: res = yield from http_utils.fetch( 'get', PVT_TOKEN_URL, cookies=self._cookies, connector=self._connector ) CHAT_INIT_PARAMS['pvt'] = javascript.loads(res.body.decode())[1] logger.info('Found PVT token: {}'.format(CHAT_INIT_PARAMS['pvt'])) except (exceptions.NetworkError, ValueError) as e: raise exceptions.HangupsError('Failed to fetch PVT token: {}' .format(e)) # Now make the actual initialization request: try: res = yield from http_utils.fetch( 'get', CHAT_INIT_URL, cookies=self._cookies, params=CHAT_INIT_PARAMS, connector=self._connector ) except exceptions.NetworkError as e: raise exceptions.HangupsError('Initialize chat request failed: {}' .format(e)) # Parse the response by using a regex to find all the JS objects, and # parsing them. Not everything will be parsable, but we don't care if # an object we don't need can't be parsed. data_dict = {} for data in CHAT_INIT_REGEX.findall(res.body.decode()): try: logger.debug("Attempting to load javascript: {}..." .format(repr(data[:100]))) data = javascript.loads(data) # pylint: disable=invalid-sequence-index data_dict[data['key']] = data['data'] except ValueError as e: try: data = data.replace("data:function(){return", "data:") data = data.replace("}}", "}") data = javascript.loads(data) data_dict[data['key']] = data['data'] except ValueError as e: raise # logger.debug('Failed to parse initialize chat object: {}\n{}' # .format(e, data)) # Extract various values that we will need. try: self._api_key = data_dict['ds:7'][0][2] self._email = data_dict['ds:34'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] _sync_timestamp = parsers.from_timestamp( # cgserp? # data_dict['ds:21'][0][1][4] # data_dict['ds:35'][0][1][4] data_dict['ds:21'][0][1][4] ) except KeyError as e: raise exceptions.HangupsError('Failed to get initialize chat ' 'value: {}'.format(e)) # Parse the entity representing the current user. self_entity = schemas.CLIENT_GET_SELF_INFO_RESPONSE.parse( # cgsirp? # data_dict['ds:20'][0] # data_dict['ds:35'][0] data_dict['ds:20'][0] ).self_entity # Parse every existing conversation's state, including participants. initial_conv_states = schemas.CLIENT_CONVERSATION_STATE_LIST.parse( # csrcrp? # data_dict['ds:19'][0][3] # data_dict['ds:36'][0][3] data_dict['ds:19'][0][3] ) initial_conv_parts = [] for conv_state in initial_conv_states: initial_conv_parts.extend(conv_state.conversation.participant_data) # Parse the entities for the user's contacts (doesn't include users not # in contacts). If this fails, continue without the rest of the # entities. initial_entities = [] try: entities = schemas.INITIAL_CLIENT_ENTITIES.parse( # cgserp? # data_dict['ds:21'][0] # data_dict['ds:37'][0] data_dict['ds:21'][0] ) except ValueError as e: logger.warning('Failed to parse initial client entities: {}' .format(e)) else: initial_entities.extend(entities.entities) initial_entities.extend(e.entity for e in itertools.chain( entities.group1.entity, entities.group2.entity, entities.group3.entity, entities.group4.entity, entities.group5.entity )) return InitialData(initial_conv_states, self_entity, initial_entities, initial_conv_parts, _sync_timestamp)
def _initialize_chat(self): """Request push channel creation and initial chat data. Returns instance of InitialData. The response body is a HTML document containing a series of script tags containing JavaScript objects. We need to parse the objects to get at the data. """ # We first need to fetch the 'pvt' token, which is required for the # initialization request (otherwise it will return 400). try: res = yield from http_utils.fetch('get', PVT_TOKEN_URL, cookies=self._cookies, connector=self._connector) CHAT_INIT_PARAMS['pvt'] = javascript.loads(res.body.decode())[1] logger.info('Found PVT token: {}'.format(CHAT_INIT_PARAMS['pvt'])) except (exceptions.NetworkError, ValueError) as e: raise exceptions.HangupsError( 'Failed to fetch PVT token: {}'.format(e)) # Now make the actual initialization request: try: res = yield from http_utils.fetch('get', CHAT_INIT_URL, cookies=self._cookies, params=CHAT_INIT_PARAMS, connector=self._connector) except exceptions.NetworkError as e: raise exceptions.HangupsError( 'Initialize chat request failed: {}'.format(e)) # Parse the response by using a regex to find all the JS objects, and # parsing them. Not everything will be parsable, but we don't care if # an object we don't need can't be parsed. data_dict = {} for data in CHAT_INIT_REGEX.findall(res.body.decode()): try: logger.debug("Attempting to load javascript: {}...".format( repr(data[:100]))) data = javascript.loads(data) # pylint: disable=invalid-sequence-index data_dict[data['key']] = data['data'] except ValueError as e: try: data = data.replace("data:function(){return", "data:") data = data.replace("}}", "}") data = javascript.loads(data) data_dict[data['key']] = data['data'] except ValueError as e: raise # logger.debug('Failed to parse initialize chat object: {}\n{}' # .format(e, data)) # Extract various values that we will need. try: self._api_key = data_dict['ds:7'][0][2] self._email = data_dict['ds:34'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] _sync_timestamp = parsers.from_timestamp( # cgserp? # data_dict['ds:21'][0][1][4] # data_dict['ds:35'][0][1][4] data_dict['ds:21'][0][1][4]) except KeyError as e: raise exceptions.HangupsError('Failed to get initialize chat ' 'value: {}'.format(e)) # Parse the entity representing the current user. self_entity = schemas.CLIENT_GET_SELF_INFO_RESPONSE.parse( # cgsirp? # data_dict['ds:20'][0] # data_dict['ds:35'][0] data_dict['ds:20'][0]).self_entity # Parse every existing conversation's state, including participants. initial_conv_states = schemas.CLIENT_CONVERSATION_STATE_LIST.parse( # csrcrp? # data_dict['ds:19'][0][3] # data_dict['ds:36'][0][3] data_dict['ds:19'][0][3]) initial_conv_parts = [] for conv_state in initial_conv_states: initial_conv_parts.extend(conv_state.conversation.participant_data) # Parse the entities for the user's contacts (doesn't include users not # in contacts). If this fails, continue without the rest of the # entities. initial_entities = [] try: entities = schemas.INITIAL_CLIENT_ENTITIES.parse( # cgserp? # data_dict['ds:21'][0] # data_dict['ds:37'][0] data_dict['ds:21'][0]) except ValueError as e: logger.warning( 'Failed to parse initial client entities: {}'.format(e)) else: initial_entities.extend(entities.entities) initial_entities.extend(e.entity for e in itertools.chain( entities.group1.entity, entities.group2.entity, entities. group3.entity, entities.group4.entity, entities.group5.entity)) return InitialData(initial_conv_states, self_entity, initial_entities, initial_conv_parts, _sync_timestamp)
def _initialize_chat(self): """Request push channel creation and initial chat data. Returns instance of InitialData. The response body is a HTML document containing a series of script tags containing JavaScript objects. We need to parse the objects to get at the data. """ try: res = yield from http_utils.fetch('get', CHAT_INIT_URL, cookies=self._cookies, params=CHAT_INIT_PARAMS, connector=self._connector) except exceptions.NetworkError as e: raise exceptions.HangupsError( 'Initialize chat request failed: {}'.format(e)) # Parse the response by using a regex to find all the JS objects, and # parsing them. Not everything will be parsable, but we don't care if # an object we don't need can't be parsed. data_dict = {} for data in CHAT_INIT_REGEX.findall(res.body.decode()): try: data = javascript.loads(data) # pylint: disable=invalid-sequence-index data_dict[data['key']] = data['data'] except ValueError as e: logger.debug( 'Failed to parse initialize chat object: {}\n{}'.format( e, data)) # Extract various values that we will need. try: self._api_key = data_dict['ds:7'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] self._channel_path = data_dict['ds:4'][0][1] self._clid = data_dict['ds:4'][0][7] self._channel_ec_param = data_dict['ds:4'][0][4] self._channel_prop_param = data_dict['ds:4'][0][5] _sync_timestamp = parsers.from_timestamp( data_dict['ds:21'][0][1][4]) except KeyError as e: raise exceptions.HangupsError('Failed to get initialize chat ' 'value: {}'.format(e)) # Parse the entity representing the current user. self_entity = schemas.CLIENT_GET_SELF_INFO_RESPONSE.parse( data_dict['ds:20'][0]).self_entity # Parse every existing conversation's state, including participants. initial_conv_states = schemas.CLIENT_CONVERSATION_STATE_LIST.parse( data_dict['ds:19'][0][3]) initial_conv_parts = [] for conv_state in initial_conv_states: initial_conv_parts.extend(conv_state.conversation.participant_data) # Parse the entities for the user's contacts (doesn't include users not # in contacts). If this fails, continue without the rest of the # entities. initial_entities = [] try: entities = schemas.INITIAL_CLIENT_ENTITIES.parse( data_dict['ds:21'][0]) except ValueError as e: logger.warning( 'Failed to parse initial client entities: {}'.format(e)) else: initial_entities.extend(entities.entities) initial_entities.extend(e.entity for e in itertools.chain( entities.group1.entity, entities.group2.entity, entities. group3.entity, entities.group4.entity, entities.group5.entity)) return InitialData(initial_conv_states, self_entity, initial_entities, initial_conv_parts, _sync_timestamp)
def _initialize_chat(self): """Request push channel creation and initial chat data. Returns instance of InitialData. The response body is a HTML document containing a series of script tags containing JavaScript objects. We need to parse the objects to get at the data. """ try: res = yield from http_utils.fetch( 'get', CHAT_INIT_URL, cookies=self._cookies, params=CHAT_INIT_PARAMS, connector=self._connector ) except exceptions.NetworkError as e: raise exceptions.HangupsError('Initialize chat request failed: {}' .format(e)) # Parse the response by using a regex to find all the JS objects, and # parsing them. Not everything will be parsable, but we don't care if # an object we don't need can't be parsed. data_dict = {} for data in CHAT_INIT_REGEX.findall(res.body.decode()): try: data = javascript.loads(data) # pylint: disable=invalid-sequence-index data_dict[data['key']] = data['data'] except ValueError as e: logger.debug('Failed to parse initialize chat object: {}\n{}' .format(e, data)) # Extract various values that we will need. try: self._api_key = data_dict['ds:7'][0][2] self._header_date = data_dict['ds:2'][0][4] self._header_version = data_dict['ds:2'][0][6] self._header_id = data_dict['ds:4'][0][7] self._channel_path = data_dict['ds:4'][0][1] self._clid = data_dict['ds:4'][0][7] self._channel_ec_param = data_dict['ds:4'][0][4] self._channel_prop_param = data_dict['ds:4'][0][5] _sync_timestamp = parsers.from_timestamp( data_dict['ds:21'][0][1][4] ) except KeyError as e: raise exceptions.HangupsError('Failed to get initialize chat ' 'value: {}'.format(e)) # Parse the entity representing the current user. self_entity = schemas.CLIENT_GET_SELF_INFO_RESPONSE.parse( data_dict['ds:20'][0] ).self_entity # Parse every existing conversation's state, including participants. initial_conv_states = schemas.CLIENT_CONVERSATION_STATE_LIST.parse( data_dict['ds:19'][0][3] ) initial_conv_parts = [] for conv_state in initial_conv_states: initial_conv_parts.extend(conv_state.conversation.participant_data) # Parse the entities for the user's contacts (doesn't include users not # in contacts). If this fails, continue without the rest of the # entities. initial_entities = [] try: entities = schemas.INITIAL_CLIENT_ENTITIES.parse( data_dict['ds:21'][0] ) except ValueError as e: logger.warning('Failed to parse initial client entities: {}' .format(e)) else: initial_entities.extend(entities.entities) initial_entities.extend(e.entity for e in itertools.chain( entities.group1.entity, entities.group2.entity, entities.group3.entity, entities.group4.entity, entities.group5.entity )) return InitialData(initial_conv_states, self_entity, initial_entities, initial_conv_parts, _sync_timestamp)