def check_remote_mailbox_identical_to_local(the_self, gmvaulter, extra_labels = []): #pylint: disable=C0103,R0912,R0914,R0915 """ Check that the remote mailbox is identical to the local one attached to gmvaulter Need a connected gmvaulter """ # get all email data from gmvault-db pivot_dir = None gmail_ids = gmvaulter.gstorer.get_all_existing_gmail_ids(pivot_dir) print("gmail_ids = %s\n" % (gmail_ids)) #need to check that all labels are there for emails in essential gmvaulter.src.select_folder('ALLMAIL') # check the number of id on disk imap_ids = gmvaulter.src.search({ 'type' : 'imap', 'req' : 'ALL'}) #get everything the_self.assertEquals(len(imap_ids), \ len(gmail_ids), \ "Error. Should have the same number of emails: local nb of emails %d,"\ " remote nb of emails %d" % (len(gmail_ids), len(imap_ids))) for gm_id in gmail_ids: print("Fetching id %s with request %s" % (gm_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA)) #get disk_metadata disk_metadata = gmvaulter.gstorer.unbury_metadata(gm_id) print("disk metadata %s\n" % (disk_metadata)) #date = disk_metadata['internal_date'].strftime('"%d %b %Y"') subject = disk_metadata.get('subject', None) msgid = disk_metadata.get('msg_id', None) received = disk_metadata.get('x_gmail_received', None) req = "(" has_something = False #if date: # req += 'HEADER DATE {date}'.format(date=date) # has_something = True if subject: #split on ' when contained in subject to keep only the first part subject = subject.split("'")[0] subject = subject.split('"')[0] if has_something: #add extra space if it has a date req += ' ' req += 'SUBJECT "{subject}"'.format(subject=subject.strip().encode('utf-8')) has_something = True if msgid: if has_something: #add extra space if it has a date req += ' ' req += 'HEADER MESSAGE-ID {msgid}'.format(msgid=msgid.strip()) has_something = True if received: if has_something: req += ' ' req += 'HEADER X-GMAIL-RECEIVED {received}'.format(received=received.strip()) has_something = True req += ")" print("Req = %s\n" % (req)) imap_ids = gmvaulter.src.search({ 'type' : 'imap', 'req': req, 'charset': 'utf-8'}) print("imap_ids = %s\n" % (imap_ids)) if len(imap_ids) != 1: the_self.fail("more than one imap_id (%s) retrieved for request %s" % (imap_ids, req)) imap_id = imap_ids[0] # get online_metadata online_metadata = gmvaulter.src.fetch(imap_id, \ imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA) print("online_metadata = %s\n" % (online_metadata)) print("disk_metadata = %s\n" % (disk_metadata)) header_fields = online_metadata[imap_id]['BODY[HEADER.FIELDS (MESSAGE-ID SUBJECT X-GMAIL-RECEIVED)]'] subject, msgid, received = gmvault_db.GmailStorer.parse_header_fields(header_fields) #compare metadata the_self.assertEquals(subject, disk_metadata.get('subject', None)) the_self.assertEquals(msgid, disk_metadata.get('msg_id', None)) the_self.assertEquals(received, disk_metadata.get('x_gmail_received', None)) # check internal date it is plus or minus 1 hour online_date = online_metadata[imap_id].get('INTERNALDATE', None) disk_date = disk_metadata.get('internal_date', None) if online_date != disk_date: min_date = disk_date - datetime.timedelta(hours=1) max_date = disk_date + datetime.timedelta(hours=1) if min_date <= online_date <= max_date: print("online_date (%s) and disk_date (%s) differs but "\ "within one hour. This is OK (timezone pb) *****" % (online_date, disk_date)) else: the_self.fail("online_date (%s) and disk_date (%s) are different" % (online_date, disk_date)) #check labels disk_labels = disk_metadata.get('labels', None) #add extra labels for x_lab in extra_labels: disk_labels.append(x_lab) online_labels = imap_utils.decode_labels(online_metadata[imap_id].get('X-GM-LABELS', None)) #clean potential labels with multiple spaces disk_labels = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in disk_labels ] online_labels = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in online_labels ] if not disk_labels: #no disk_labels check that there are no online_labels the_self.assertTrue(not online_labels) print("disk_labels = %s\n" % (disk_labels)) print("online_labels = %s\n" % (online_labels)) the_self.assertEquals(len(disk_labels), len(online_labels)) for label in disk_labels: #change label Migrated (lower and uppercase) to gmv-migrated because reserved by Gmail if label.lower() == "migrated": label = "gmv-migrated" elif label.lower() == r"\muted": label = "gmv-muted" if label not in online_labels: the_self.fail("label %s should be in online_labels %s as"\ " it is in disk_labels %s" % (label, online_labels, disk_labels)) # check flags disk_flags = disk_metadata.get('flags', None) online_flags = online_metadata[imap_id].get('FLAGS', None) if not disk_flags: #no disk flags the_self.assertTrue(not online_flags) the_self.assertEquals(len(disk_flags), len(online_flags)) for flag in disk_flags: if flag not in online_flags: the_self.fail("flag %s should be in "\ "online_flags %s as it is in disk_flags %s" \ % (flag, online_flags, disk_flags))
def check_remote_mailbox_identical_to_local(the_self, gmvaulter, extra_labels=[]): #pylint: disable=C0103,R0912,R0914,R0915 """ Check that the remote mailbox is identical to the local one attached to gmvaulter Need a connected gmvaulter """ # get all email data from gmvault-db pivot_dir = None gmail_ids = gmvaulter.gstorer.get_all_existing_gmail_ids(pivot_dir) print("gmail_ids = %s\n" % (gmail_ids)) #need to check that all labels are there for emails in essential gmvaulter.src.select_folder('ALLMAIL') # check the number of id on disk imap_ids = gmvaulter.src.search({ 'type': 'imap', 'req': 'ALL' }) #get everything the_self.assertEquals(len(imap_ids), \ len(gmail_ids), \ "Error. Should have the same number of emails: local nb of emails %d,"\ " remote nb of emails %d" % (len(gmail_ids), len(imap_ids))) for gm_id in gmail_ids: print("Fetching id %s with request %s" % (gm_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA)) #get disk_metadata disk_metadata = gmvaulter.gstorer.unbury_metadata(gm_id) print("disk metadata %s\n" % (disk_metadata)) #date = disk_metadata['internal_date'].strftime('"%d %b %Y"') subject = disk_metadata.get('subject', None) msgid = disk_metadata.get('msg_id', None) received = disk_metadata.get('x_gmail_received', None) req = "(" has_something = False #if date: # req += 'HEADER DATE {date}'.format(date=date) # has_something = True if subject: #split on ' when contained in subject to keep only the first part subject = subject.split("'")[0] subject = subject.split('"')[0] if has_something: #add extra space if it has a date req += ' ' req += 'SUBJECT "{subject}"'.format( subject=subject.strip().encode('utf-8')) has_something = True if msgid: if has_something: #add extra space if it has a date req += ' ' req += 'HEADER MESSAGE-ID {msgid}'.format(msgid=msgid.strip()) has_something = True if received: if has_something: req += ' ' req += 'HEADER X-GMAIL-RECEIVED {received}'.format( received=received.strip()) has_something = True req += ")" print("Req = %s\n" % (req)) imap_ids = gmvaulter.src.search({ 'type': 'imap', 'req': req, 'charset': 'utf-8' }) print("imap_ids = %s\n" % (imap_ids)) if len(imap_ids) != 1: the_self.fail( "more than one imap_id (%s) retrieved for request %s" % (imap_ids, req)) imap_id = imap_ids[0] # get online_metadata online_metadata = gmvaulter.src.fetch(imap_id, \ imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA) print("online_metadata = %s\n" % (online_metadata)) print("disk_metadata = %s\n" % (disk_metadata)) header_fields = online_metadata[imap_id][ 'BODY[HEADER.FIELDS (MESSAGE-ID SUBJECT X-GMAIL-RECEIVED)]'] subject, msgid, received = gmvault_db.GmailStorer.parse_header_fields( header_fields) #compare metadata the_self.assertEquals(subject, disk_metadata.get('subject', None)) the_self.assertEquals(msgid, disk_metadata.get('msg_id', None)) the_self.assertEquals(received, disk_metadata.get('x_gmail_received', None)) # check internal date it is plus or minus 1 hour online_date = online_metadata[imap_id].get('INTERNALDATE', None) disk_date = disk_metadata.get('internal_date', None) if online_date != disk_date: min_date = disk_date - datetime.timedelta(hours=1) max_date = disk_date + datetime.timedelta(hours=1) if min_date <= online_date <= max_date: print("online_date (%s) and disk_date (%s) differs but "\ "within one hour. This is OK (timezone pb) *****" % (online_date, disk_date)) else: the_self.fail( "online_date (%s) and disk_date (%s) are different" % (online_date, disk_date)) #check labels disk_labels = disk_metadata.get('labels', None) #add extra labels for x_lab in extra_labels: disk_labels.append(x_lab) online_labels = imap_utils.decode_labels(online_metadata[imap_id].get( 'X-GM-LABELS', None)) #clean potential labels with multiple spaces disk_labels = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in disk_labels ] online_labels = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in online_labels ] if not disk_labels: #no disk_labels check that there are no online_labels the_self.assertTrue(not online_labels) print("disk_labels = %s\n" % (disk_labels)) print("online_labels = %s\n" % (online_labels)) the_self.assertEquals(len(disk_labels), len(online_labels)) for label in disk_labels: #change label Migrated (lower and uppercase) to gmv-migrated because reserved by Gmail if label.lower() == "migrated": label = "gmv-migrated" elif label.lower() == r"\muted": label = "gmv-muted" if label not in online_labels: the_self.fail("label %s should be in online_labels %s as"\ " it is in disk_labels %s" % (label, online_labels, disk_labels)) # check flags disk_flags = disk_metadata.get('flags', None) online_flags = online_metadata[imap_id].get('FLAGS', None) if not disk_flags: #no disk flags the_self.assertTrue(not online_flags) the_self.assertEquals(len(disk_flags), len(online_flags)) for flag in disk_flags: if flag not in online_flags: the_self.fail("flag %s should be in "\ "online_flags %s as it is in disk_flags %s" \ % (flag, online_flags, disk_flags))