def _restore(self, args, credential): """ Execute All restore operations """ LOG.critical("Connect to Gmail server.") # Create a gmvault releaving read_only_access restorer = gmvault.GMVaulter(args['db-dir'], args['host'], args['port'], \ args['email'], credential, read_only_access = False) #full sync is the first one if args.get('type', '') == 'full': #call restore labels = [args['label']] if args['label'] else [] restorer.restore(extra_labels = labels, restart = args['restart']) elif args.get('type', '') == 'quick': #take the last two to 3 months depending on the current date # today - 2 months today = datetime.date.today() begin = today - datetime.timedelta(2*365/12) starting_dir = gmvault_utils.get_ym_from_datetime(begin) #call restore labels = [args['label']] if args['label'] else [] restorer.restore(pivot_dir = starting_dir, extra_labels = labels, restart = args['restart']) else: raise ValueError("Unknown synchronisation mode %s. Please use full (default), quick.") #print error report LOG.critical(restorer.get_error_report())
def _create_update_sync(self, imap_ids, compress, ownership_control = True ): """ First part of the double pass strategy: create and update emails in db """ gstorer = GmailStorer(self.db_root_dir, self.use_encryption) #check ownership self._check_email_db_ownership(gstorer, ownership_control) #save db_owner for next time gstorer.store_db_owner(self.login) total_nb_emails_to_process = len(imap_ids) # total number of emails to get LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process)) nb_emails_processed = 0 timer = gmvault_utils.Timer() # needed for enhancing the user information timer.start() for the_id in imap_ids: try: gid = None LOG.debug("\nProcess imap id %s" % ( the_id )) #if the_id == 12: # print("we have to break") #get everything but data new_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA ) #print("data = %s" %(new_data[the_id])) #if 0 in new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_LABELS]: # print("we have to break") if new_data.get(the_id, None): gid = new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID] the_dir = gmvault_utils.get_ym_from_datetime(new_data[the_id][imap_utils.GIMAPFetcher.IMAP_INTERNALDATE]) LOG.critical("Process email num %d (imap_id:%s) from %s." % (nb_emails_processed, the_id, the_dir)) #pass the dir and the ID curr_metadata = GMVaulter.check_email_on_disk( gstorer , \ new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \ the_dir) #if on disk check that the data is not different if curr_metadata: LOG.debug("metadata for %s already exists. Check if different." % (gid)) if self._metadata_needs_update(curr_metadata, new_data[the_id]): #restore everything at the moment gid = gstorer.bury_metadata(new_data[the_id], local_dir = the_dir) LOG.debug("update email with imap id %s and gmail id %s." % (the_id, gid)) #update local index id gid => index per directory to be thought out else: #get the data email_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY ) new_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] # store data on disk within year month dir gid = gstorer.bury_email(new_data[the_id], local_dir = the_dir, compress = compress) #update local index id gid => index per directory to be thought out LOG.debug("Create and store email with imap id %s, gmail id %s." % (the_id, gid)) else: # case when gmail IMAP server returns OK without any data whatsoever # eg. imap uid 142221L ignore it self.error_report['empty'].append((the_id, None)) nb_emails_processed += 1 #indicate every 50 messages the number of messages left to process left_emails = (total_nb_emails_to_process - nb_emails_processed) if (nb_emails_processed % 50) == 0 and (left_emails > 0): elapsed = timer.elapsed() #elapsed time in seconds LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \ (nb_emails_processed, timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails))) # save id every 20 restored emails if (nb_emails_processed % 20) == 0: if gid: self.save_lastid(self.OP_SYNC, gid) except imaplib.IMAP4.abort, _: # imap abort error # ignore it # will have to do something with these ignored messages LOG.critical("Error while fetching message with imap id %s." % (the_id)) LOG.critical("\n=== Exception traceback ===\n") LOG.critical(gmvault_utils.get_exception_traceback()) LOG.critical("=== End of Exception traceback ===\n") try: #try to get the gmail_id raise Exception("Error") curr = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID) except Exception, _: #pylint:disable-msg=W0703 curr = None LOG.critical("Error when trying to get gmail id for message with imap id %s." % (the_id)) LOG.critical("Disconnect, wait for 20 sec then reconnect.") self.src.disconnect() #could not fetch the gm_id so disconnect and sleep #sleep 20 sec time.sleep(20) LOG.critical("Reconnecting ...") self.src.connect() if curr: gmail_id = curr[the_id][imap_utils.GIMAPFetcher.GMAIL_ID] else: gmail_id = None #add ignored id self.error_report['cannot_be_fetched'].append((the_id, gmail_id)) LOG.critical("Forced to ignore message with imap id %s, (gmail id %s)." % (the_id, (gmail_id if gmail_id else "cannot be read")))
def _create_update_sync(self, imap_ids, compress, ownership_control = True ): """ First part of the double pass strategy: create and update emails in db """ gstorer = GmailStorer(self.db_root_dir, self.use_encryption) #check ownership self._check_email_db_ownership(gstorer, ownership_control) #save db_owner for next time gstorer.store_db_owner(self.login) total_nb_emails_to_process = len(imap_ids) # total number of emails to get LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process)) nb_emails_processed = 0 timer = gmvault_utils.Timer() # needed for enhancing the user information timer.start() for the_id in imap_ids: try: LOG.debug("\nProcess imap id %s" % ( the_id )) #get everything once for all new_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA ) if new_data.get(the_id, None): the_dir = gmvault_utils.get_ym_from_datetime(new_data[the_id][imap_utils.GIMAPFetcher.IMAP_INTERNALDATE]) LOG.critical("Process email num %d (imap_id:%s) from %s." % (nb_emails_processed, the_id, the_dir)) #pass the dir and the ID curr_metadata = GMVaulter.check_email_on_disk( gstorer , \ new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \ the_dir) #if on disk check that the data is not different if curr_metadata: LOG.debug("metadata for %s already exists. Check if different." % (new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID])) if self._metadata_needs_update(curr_metadata, new_data[the_id]): #restore everything at the moment gid = gstorer.bury_metadata(new_data[the_id], local_dir = the_dir) LOG.debug("update email with imap id %s and gmail id %s." % (the_id, gid)) #update local index id gid => index per directory to be thought out else: #get everything once for all email_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY ) new_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] # store data on disk within year month dir gid = gstorer.bury_email(new_data[the_id], local_dir = the_dir, compress = compress) #update local index id gid => index per directory to be thought out LOG.debug("Create and store email with imap id %s, gmail id %s." % (the_id, gid)) else: # case when gmail IMAP server returns OK without any data whatsoever # eg. imap uid 142221L ignore it self.error_report['empty'].append((the_id, None)) nb_emails_processed += 1 #indicate every 50 messages the number of messages left to process left_emails = (total_nb_emails_to_process - nb_emails_processed) if (nb_emails_processed % 50) == 0 and (left_emails > 0): elapsed = timer.elapsed() #elapsed time in seconds LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \ (nb_emails_processed, timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails))) except imaplib.IMAP4.error, error: # check if this is a cannot be fetched error # I do not like to do string guessing within an exception but I do not have any choice here LOG.exception("Error [%s]" % error.message, error ) if error.message == "fetch failed: 'Some messages could not be FETCHed (Failure)'": try: #try to get the gmail_id curr = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID) except Exception, _: #pylint:disable-msg=W0703 curr = None if curr: gmail_id = curr[the_id][imap_utils.GIMAPFetcher.GMAIL_ID] else: gmail_id = None #add ignored id self.error_report['cannot_be_fetched'].append((the_id, gmail_id)) else: raise error #rethrow error
def _create_update_sync(self, imap_ids, compress, ownership_control=True): """ First part of the double pass strategy: create and update emails in db """ gstorer = GmailStorer(self.db_root_dir, self.use_encryption) #check ownership self._check_email_db_ownership(gstorer, ownership_control) #save db_owner for next time gstorer.store_db_owner(self.login) total_nb_emails_to_process = len( imap_ids) # total number of emails to get LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process)) nb_emails_processed = 0 timer = gmvault_utils.Timer( ) # needed for enhancing the user information timer.start() for the_id in imap_ids: try: LOG.debug("\nProcess imap id %s" % (the_id)) #get everything once for all new_data = self.src.fetch( the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA) if new_data.get(the_id, None): the_dir = gmvault_utils.get_ym_from_datetime( new_data[the_id][ imap_utils.GIMAPFetcher.IMAP_INTERNALDATE]) LOG.critical("Process email num %d (imap_id:%s) from %s." % (nb_emails_processed, the_id, the_dir)) #pass the dir and the ID curr_metadata = GMVaulter.check_email_on_disk( gstorer , \ new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \ the_dir) #if on disk check that the data is not different if curr_metadata: LOG.debug( "metadata for %s already exists. Check if different." % (new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID] )) if self._metadata_needs_update(curr_metadata, new_data[the_id]): #restore everything at the moment gid = gstorer.bury_metadata(new_data[the_id], local_dir=the_dir) LOG.debug( "update email with imap id %s and gmail id %s." % (the_id, gid)) #update local index id gid => index per directory to be thought out else: #get everything once for all email_data = self.src.fetch( the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY) new_data[the_id][ imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[ the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] # store data on disk within year month dir gid = gstorer.bury_email(new_data[the_id], local_dir=the_dir, compress=compress) #update local index id gid => index per directory to be thought out LOG.debug( "Create and store email with imap id %s, gmail id %s." % (the_id, gid)) else: # case when gmail IMAP server returns OK without any data whatsoever # eg. imap uid 142221L ignore it self.error_report['empty'].append((the_id, None)) nb_emails_processed += 1 #indicate every 50 messages the number of messages left to process left_emails = (total_nb_emails_to_process - nb_emails_processed) if (nb_emails_processed % 50) == 0 and (left_emails > 0): elapsed = timer.elapsed() #elapsed time in seconds LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \ (nb_emails_processed, timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails))) except imaplib.IMAP4.error, error: # check if this is a cannot be fetched error # I do not like to do string guessing within an exception but I do not have any choice here LOG.exception("Error [%s]" % error.message, error) if error.message == "fetch failed: 'Some messages could not be FETCHed (Failure)'": try: #try to get the gmail_id curr = self.src.fetch( the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID) except Exception, _: #pylint:disable-msg=W0703 curr = None if curr: gmail_id = curr[the_id][ imap_utils.GIMAPFetcher.GMAIL_ID] else: gmail_id = None #add ignored id self.error_report['cannot_be_fetched'].append( (the_id, gmail_id)) else: raise error #rethrow error