Example #1
0
 def _restore(self, args, credential):
     """
        Execute All restore operations
     """
     LOG.critical("Connect to Gmail server.")
     # Create a gmvault releaving read_only_access
     restorer = gmvault.GMVaulter(args['db-dir'], args['host'], args['port'], \
                                    args['email'], credential, read_only_access = False)
     
     #full sync is the first one
     if args.get('type', '') == 'full':
         
         #call restore
         labels = [args['label']] if args['label'] else []
         restorer.restore(extra_labels = labels, restart = args['restart'])
         
     elif args.get('type', '') == 'quick':
         
         #take the last two to 3 months depending on the current date
         
         # today - 2 months
         today = datetime.date.today()
         begin = today - datetime.timedelta(2*365/12)
         
         starting_dir = gmvault_utils.get_ym_from_datetime(begin)
         
         #call restore
         labels = [args['label']] if args['label'] else []
         restorer.restore(pivot_dir = starting_dir, extra_labels = labels, restart = args['restart'])
     
     else:
         raise ValueError("Unknown synchronisation mode %s. Please use full (default), quick.")
     
     #print error report
     LOG.critical(restorer.get_error_report()) 
Example #2
0
 def _restore(self, args, credential):
     """
        Execute All restore operations
     """
     LOG.critical("Connect to Gmail server.")
     # Create a gmvault releaving read_only_access
     restorer = gmvault.GMVaulter(args['db-dir'], args['host'], args['port'], \
                                    args['email'], credential, read_only_access = False)
     
     #full sync is the first one
     if args.get('type', '') == 'full':
         
         #call restore
         labels = [args['label']] if args['label'] else []
         restorer.restore(extra_labels = labels, restart = args['restart'])
         
     elif args.get('type', '') == 'quick':
         
         #take the last two to 3 months depending on the current date
         
         # today - 2 months
         today = datetime.date.today()
         begin = today - datetime.timedelta(2*365/12)
         
         starting_dir = gmvault_utils.get_ym_from_datetime(begin)
         
         #call restore
         labels = [args['label']] if args['label'] else []
         restorer.restore(pivot_dir = starting_dir, extra_labels = labels, restart = args['restart'])
     
     else:
         raise ValueError("Unknown synchronisation mode %s. Please use full (default), quick.")
     
     #print error report
     LOG.critical(restorer.get_error_report()) 
Example #3
0
 def _create_update_sync(self, imap_ids, compress, ownership_control = True ):
     """
        First part of the double pass strategy: 
        create and update emails in db
     """
     gstorer =  GmailStorer(self.db_root_dir, self.use_encryption)
     
     #check ownership 
     self._check_email_db_ownership(gstorer, ownership_control)
         
     #save db_owner for next time
     gstorer.store_db_owner(self.login)
     
     total_nb_emails_to_process = len(imap_ids) # total number of emails to get
     
     LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process))
     
     nb_emails_processed = 0
     timer = gmvault_utils.Timer() # needed for enhancing the user information
     timer.start()
     
     for the_id in imap_ids:
         
         try:
             
             gid = None
             
             LOG.debug("\nProcess imap id %s" % ( the_id ))
             
             #if the_id == 12:
             #    print("we have to break")
             
             #get everything but data
             new_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA )
             
             #print("data = %s" %(new_data[the_id]))
             
             #if 0 in new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_LABELS]:
             #    print("we have to break")
             
             if new_data.get(the_id, None):
                 
                 gid = new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID]
                 
                 the_dir      = gmvault_utils.get_ym_from_datetime(new_data[the_id][imap_utils.GIMAPFetcher.IMAP_INTERNALDATE])
                 
                 LOG.critical("Process email num %d (imap_id:%s) from %s." % (nb_emails_processed, the_id, the_dir))
             
                 #pass the dir and the ID
                 curr_metadata = GMVaulter.check_email_on_disk( gstorer , \
                                                                new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \
                                                                the_dir)
                 
                 #if on disk check that the data is not different
                 if curr_metadata:
                     
                     LOG.debug("metadata for %s already exists. Check if different." % (gid))
                     
                     if self._metadata_needs_update(curr_metadata, new_data[the_id]):
                         #restore everything at the moment
                         gid  = gstorer.bury_metadata(new_data[the_id], local_dir = the_dir)
                         
                         LOG.debug("update email with imap id %s and gmail id %s." % (the_id, gid))
                         
                         #update local index id gid => index per directory to be thought out
                 else:  
                     
                     #get the data
                     email_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY )
                     
                     new_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY]
                     
                     # store data on disk within year month dir 
                     gid  = gstorer.bury_email(new_data[the_id], local_dir = the_dir, compress = compress)
                     
                     #update local index id gid => index per directory to be thought out
                     LOG.debug("Create and store email with imap id %s, gmail id %s." % (the_id, gid))   
                 
             else:
                 # case when gmail IMAP server returns OK without any data whatsoever
                 # eg. imap uid 142221L ignore it
                 self.error_report['empty'].append((the_id, None))
             
             nb_emails_processed += 1
             
             #indicate every 50 messages the number of messages left to process
             left_emails = (total_nb_emails_to_process - nb_emails_processed)
             
             if (nb_emails_processed % 50) == 0 and (left_emails > 0):
                 elapsed = timer.elapsed() #elapsed time in seconds
                 LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \
                              (nb_emails_processed,  timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails)))
             
             # save id every 20 restored emails
             if (nb_emails_processed % 20) == 0:
                 if gid:
                     self.save_lastid(self.OP_SYNC, gid)
                     
         except imaplib.IMAP4.abort, _:
             # imap abort error 
             # ignore it 
             # will have to do something with these ignored messages
             LOG.critical("Error while fetching message with imap id %s." % (the_id))
             LOG.critical("\n=== Exception traceback ===\n")
             LOG.critical(gmvault_utils.get_exception_traceback())
             LOG.critical("=== End of Exception traceback ===\n")
             try:
                 #try to get the gmail_id
                 raise Exception("Error")
                 curr = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID) 
             except Exception, _: #pylint:disable-msg=W0703
                 curr = None
                 LOG.critical("Error when trying to get gmail id for message with imap id %s." % (the_id))
                 LOG.critical("Disconnect, wait for 20 sec then reconnect.")
                 self.src.disconnect()
                 #could not fetch the gm_id so disconnect and sleep
                 #sleep 20 sec
                 time.sleep(20)
                 LOG.critical("Reconnecting ...")
                 self.src.connect()
                 
             if curr:
                 gmail_id = curr[the_id][imap_utils.GIMAPFetcher.GMAIL_ID]
             else:
                 gmail_id = None
                 
             #add ignored id
             self.error_report['cannot_be_fetched'].append((the_id, gmail_id))
             
             LOG.critical("Forced to ignore message with imap id %s, (gmail id %s)." % (the_id, (gmail_id if gmail_id else "cannot be read")))
Example #4
0
 def _create_update_sync(self, imap_ids, compress, ownership_control = True ):
     """
        First part of the double pass strategy: 
        create and update emails in db
     """
     gstorer =  GmailStorer(self.db_root_dir, self.use_encryption)
     
     #check ownership 
     self._check_email_db_ownership(gstorer, ownership_control)
         
     #save db_owner for next time
     gstorer.store_db_owner(self.login)
     
     total_nb_emails_to_process = len(imap_ids) # total number of emails to get
     
     LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process))
     
     nb_emails_processed = 0
     timer = gmvault_utils.Timer() # needed for enhancing the user information
     timer.start()
     
     for the_id in imap_ids:
         
         try:
             
             LOG.debug("\nProcess imap id %s" % ( the_id ))
             
             #get everything once for all
             new_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA )
             
             if new_data.get(the_id, None):
                 the_dir      = gmvault_utils.get_ym_from_datetime(new_data[the_id][imap_utils.GIMAPFetcher.IMAP_INTERNALDATE])
                 
                 LOG.critical("Process email num %d (imap_id:%s) from %s." % (nb_emails_processed, the_id, the_dir))
             
                 #pass the dir and the ID
                 curr_metadata = GMVaulter.check_email_on_disk( gstorer , \
                                                                new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \
                                                                the_dir)
                 
                 #if on disk check that the data is not different
                 if curr_metadata:
                     
                     LOG.debug("metadata for %s already exists. Check if different." % (new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID]))
                     
                     if self._metadata_needs_update(curr_metadata, new_data[the_id]):
                         #restore everything at the moment
                         gid  = gstorer.bury_metadata(new_data[the_id], local_dir = the_dir)
                         
                         LOG.debug("update email with imap id %s and gmail id %s." % (the_id, gid))
                         
                         #update local index id gid => index per directory to be thought out
                 else:  
                     
                     #get everything once for all
                     email_data = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY )
                     
                     new_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[the_id][imap_utils.GIMAPFetcher.EMAIL_BODY]
                     
                     # store data on disk within year month dir 
                     gid  = gstorer.bury_email(new_data[the_id], local_dir = the_dir, compress = compress)
                     
                     #update local index id gid => index per directory to be thought out
                     LOG.debug("Create and store email with imap id %s, gmail id %s." % (the_id, gid))   
                 
             else:
                 # case when gmail IMAP server returns OK without any data whatsoever
                 # eg. imap uid 142221L ignore it
                 self.error_report['empty'].append((the_id, None))
             
             nb_emails_processed += 1
             
             #indicate every 50 messages the number of messages left to process
             left_emails = (total_nb_emails_to_process - nb_emails_processed)
             
             if (nb_emails_processed % 50) == 0 and (left_emails > 0):
                 elapsed = timer.elapsed() #elapsed time in seconds
                 LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \
                              (nb_emails_processed,  timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails)))
         
         except imaplib.IMAP4.error, error:
             # check if this is a cannot be fetched error 
             # I do not like to do string guessing within an exception but I do not have any choice here
             
             LOG.exception("Error [%s]" % error.message, error )
             
             if error.message == "fetch failed: 'Some messages could not be FETCHed (Failure)'":
                 try:
                     #try to get the gmail_id
                     curr = self.src.fetch(the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID) 
                 except Exception, _: #pylint:disable-msg=W0703
                     curr = None
                 
                 if curr:
                     gmail_id = curr[the_id][imap_utils.GIMAPFetcher.GMAIL_ID]
                 else:
                     gmail_id = None
                 
                 #add ignored id
                 self.error_report['cannot_be_fetched'].append((the_id, gmail_id))
             else:
                 raise error #rethrow error
Example #5
0
    def _create_update_sync(self, imap_ids, compress, ownership_control=True):
        """
           First part of the double pass strategy: 
           create and update emails in db
        """
        gstorer = GmailStorer(self.db_root_dir, self.use_encryption)

        #check ownership
        self._check_email_db_ownership(gstorer, ownership_control)

        #save db_owner for next time
        gstorer.store_db_owner(self.login)

        total_nb_emails_to_process = len(
            imap_ids)  # total number of emails to get

        LOG.critical("%d emails to be fetched." % (total_nb_emails_to_process))

        nb_emails_processed = 0
        timer = gmvault_utils.Timer(
        )  # needed for enhancing the user information
        timer.start()

        for the_id in imap_ids:

            try:

                LOG.debug("\nProcess imap id %s" % (the_id))

                #get everything once for all
                new_data = self.src.fetch(
                    the_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA)

                if new_data.get(the_id, None):
                    the_dir = gmvault_utils.get_ym_from_datetime(
                        new_data[the_id][
                            imap_utils.GIMAPFetcher.IMAP_INTERNALDATE])

                    LOG.critical("Process email num %d (imap_id:%s) from %s." %
                                 (nb_emails_processed, the_id, the_dir))

                    #pass the dir and the ID
                    curr_metadata = GMVaulter.check_email_on_disk( gstorer , \
                                                                   new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID], \
                                                                   the_dir)

                    #if on disk check that the data is not different
                    if curr_metadata:

                        LOG.debug(
                            "metadata for %s already exists. Check if different."
                            %
                            (new_data[the_id][imap_utils.GIMAPFetcher.GMAIL_ID]
                             ))

                        if self._metadata_needs_update(curr_metadata,
                                                       new_data[the_id]):
                            #restore everything at the moment
                            gid = gstorer.bury_metadata(new_data[the_id],
                                                        local_dir=the_dir)

                            LOG.debug(
                                "update email with imap id %s and gmail id %s."
                                % (the_id, gid))

                            #update local index id gid => index per directory to be thought out
                    else:

                        #get everything once for all
                        email_data = self.src.fetch(
                            the_id, imap_utils.GIMAPFetcher.GET_DATA_ONLY)

                        new_data[the_id][
                            imap_utils.GIMAPFetcher.EMAIL_BODY] = email_data[
                                the_id][imap_utils.GIMAPFetcher.EMAIL_BODY]

                        # store data on disk within year month dir
                        gid = gstorer.bury_email(new_data[the_id],
                                                 local_dir=the_dir,
                                                 compress=compress)

                        #update local index id gid => index per directory to be thought out
                        LOG.debug(
                            "Create and store email with imap id %s, gmail id %s."
                            % (the_id, gid))

                else:
                    # case when gmail IMAP server returns OK without any data whatsoever
                    # eg. imap uid 142221L ignore it
                    self.error_report['empty'].append((the_id, None))

                nb_emails_processed += 1

                #indicate every 50 messages the number of messages left to process
                left_emails = (total_nb_emails_to_process -
                               nb_emails_processed)

                if (nb_emails_processed % 50) == 0 and (left_emails > 0):
                    elapsed = timer.elapsed()  #elapsed time in seconds
                    LOG.critical("\n== Processed %d emails in %s. %d left to be stored (time estimate %s).==\n" % \
                                 (nb_emails_processed,  timer.seconds_to_human_time(elapsed), left_emails, timer.estimate_time_left(nb_emails_processed, elapsed, left_emails)))

            except imaplib.IMAP4.error, error:
                # check if this is a cannot be fetched error
                # I do not like to do string guessing within an exception but I do not have any choice here

                LOG.exception("Error [%s]" % error.message, error)

                if error.message == "fetch failed: 'Some messages could not be FETCHed (Failure)'":
                    try:
                        #try to get the gmail_id
                        curr = self.src.fetch(
                            the_id, imap_utils.GIMAPFetcher.GET_GMAIL_ID)
                    except Exception, _:  #pylint:disable-msg=W0703
                        curr = None

                    if curr:
                        gmail_id = curr[the_id][
                            imap_utils.GIMAPFetcher.GMAIL_ID]
                    else:
                        gmail_id = None

                    #add ignored id
                    self.error_report['cannot_be_fetched'].append(
                        (the_id, gmail_id))
                else:
                    raise error  #rethrow error