def get_recent_chat_ids(self, fromdatetime=None): """ Get only chats dirs """ # first create a normal dir and sort it below with an OrderedDict # beware orderedDict preserve order by insertion and not by key order gmail_ids = {} chat_dir = '%s/%s' % (self._db_dir, self.CHATS_AREA) if os.path.exists(chat_dir): the_iter = gmvault_utils.ordered_dirwalk(chat_dir, "*.meta") #get recent ids for filepath in the_iter: directory, fname = os.path.split(filepath) recent_dir = fromdatetime.strftime('%Y') + '-' + fromdatetime.strftime('%m') if recent_dir in directory: if self.unbury_metadata(os.path.splitext(fname)[0])['internal_date'] > fromdatetime: gmail_ids[long(os.path.splitext(fname)[0])] = os.path.basename(directory) #sort by key #used own orderedDict to be compliant with version 2.5 gmail_ids = collections_utils.OrderedDict( sorted(gmail_ids.items(), key=lambda t: t[0])) return gmail_ids
def get_all_existing_gmail_ids(self, pivot_dir = None, ignore_sub_dir = ['chats']): #pylint:disable=W0102 """ get all existing gmail_ids from the database within the passed month and all posterior months """ # first create a normal dir and sort it below with an OrderedDict # beware orderedDict preserve order by insertion and not by key order gmail_ids = {} if pivot_dir == None: #the_iter = gmvault_utils.dirwalk(self._db_dir, "*.meta") the_iter = gmvault_utils.ordered_dirwalk(self._db_dir, "*.meta", ignore_sub_dir) else: # get all yy-mm dirs to list dirs = gmvault_utils.get_all_dirs_posterior_to(pivot_dir, \ gmvault_utils.get_all_dirs_under(self._db_dir, ignore_sub_dir)) #create all iterators and chain them to keep the same interface iter_dirs = [gmvault_utils.ordered_dirwalk('%s/%s' \ % (self._db_dir, the_dir), "*.meta", ignore_sub_dir) for the_dir in dirs] the_iter = itertools.chain.from_iterable(iter_dirs) #get all ids for filepath in the_iter: directory, fname = os.path.split(filepath) gmail_ids[long(os.path.splitext(fname)[0])] = os.path.basename(directory) #sort by key #used own orderedDict to be compliant with version 2.5 gmail_ids = collections_utils.OrderedDict(sorted(gmail_ids.items(), key=lambda t: t[0])) return gmail_ids
def get_recent_gmail_ids(self, pivot_dir=None, ignore_sub_dir=('chats',), fromdatetime=None): """ get all existing gmail_ids from the database within the passed month and all posterior months """ # first create a normal dir and sort it below with an OrderedDict # beware orderedDict preserve order by insertion and not by key order gmail_ids = {} if pivot_dir is None: #the_iter = gmvault_utils.dirwalk(self._db_dir, "*.meta") the_iter = gmvault_utils.ordered_dirwalk(self._db_dir, "*.meta", ignore_sub_dir) else: # get all yy-mm dirs to list dirs = gmvault_utils.get_all_dirs_posterior_to( pivot_dir, gmvault_utils.get_all_dirs_under(self._db_dir, ignore_sub_dir)) #create all iterators and chain them to keep the same interface iter_dirs = [gmvault_utils.ordered_dirwalk('%s/%s' % (self._db_dir, the_dir), "*.meta", ignore_sub_dir) for the_dir in dirs] the_iter = itertools.chain.from_iterable(iter_dirs) #get recent ids for filepath in the_iter: directory, fname = os.path.split(filepath) recent_dir = fromdatetime.strftime('%Y') + '-' + fromdatetime.strftime('%m') if recent_dir in directory: if self.unbury_metadata(os.path.splitext(fname)[0])['internal_date'] > fromdatetime: gmail_ids[long(os.path.splitext(fname)[0])] = os.path.basename(directory) #sort by key #used own orderedDict to be compliant with version 2.5 gmail_ids = collections_utils.OrderedDict(sorted(gmail_ids.items(), key=lambda t: t[0])) return gmail_ids
def get_all_chats_gmail_ids(self): """ Get only chats dirs """ # first create a normal dir and sort it below with an OrderedDict # beware orderedDict preserve order by insertion and not by key order gmail_ids = {} chat_dir = '%s/%s' % (self._db_dir, self.CHATS_AREA) if os.path.exists(chat_dir): the_iter = gmvault_utils.ordered_dirwalk(chat_dir, "*.meta") #get all ids for filepath in the_iter: directory, fname = os.path.split(filepath) gmail_ids[long(os.path.splitext(fname)[0])] = os.path.basename(directory) #sort by key #used own orderedDict to be compliant with version 2.5 gmail_ids = collections_utils.OrderedDict(sorted(gmail_ids.items(), key=lambda t: t[0])) return gmail_ids
def test_read_lots_of_files(self): """ Test to mesure how long it takes to list over 100 000 files On server: 250 000 meta files in 50 dirs (50,5000) => 9.74 sec to list them 100 000 meta files in 20 dirs (20,5000) => 3.068 sec to list them 60 000 meta files in 60 dirs (60,1000) => 1.826 sec to list them On linux macbook pro linux virtual machine: 250 000 meta files in 50 dirs (50,5000) => 9.91 sec to list them 100 000 meta files in 20 dirs (20,5000) => 6.59 sec to list them 60 000 meta files in 60 dirs (60,1000) => 2.26 sec to list them On Win7 laptop machine: 250 000 meta files in 50 dirs (50,5000) => 56.50 sec (3min 27 sec if dir created and listed afterward) to list them 100 000 meta files in 20 dirs (20,5000) => 20.1 sec to list them 60 000 meta files in 60 dirs (60,1000) => 9.96 sec to list them """ root_dir = '/tmp/dirs' #create dirs and files #t1 = datetime.datetime.now() #self._create_dirs('/tmp/dirs', 50, 5000) #t2 = datetime.datetime.now() #print("\nTime to create dirs : %s\n" % (t2-t1)) #print("\nFiles and dirs created.\n") the_iter = gmvault_utils.dirwalk(root_dir, a_wildcards= '*.meta') t1 = datetime.datetime.now() gmail_ids = collections_utils.OrderedDict() for filepath in the_iter: directory, fname = os.path.split(filepath) gmail_ids[os.path.splitext(fname)[0]] = os.path.basename(directory) t2 = datetime.datetime.now() print("\nnb of files = %s" % (len(gmail_ids.keys()))) print("\nTime to read all meta files : %s\n" % (t2-t1))
class GMVaultLauncher(object): """ GMVault launcher handling the command parsing """ SYNC_TYPES = ['full', 'quick', 'custom'] RESTORE_TYPES = ['full', 'quick'] CHECK_TYPES = ['full'] EXPORT_TYPES = collections_utils.OrderedDict([ ('offlineimap', gmvault_export.OfflineIMAP), ('dovecot', gmvault_export.Dovecot), ('maildir', gmvault_export.OfflineIMAP), ('mbox', gmvault_export.MBox) ]) EXPORT_TYPE_NAMES = ", ".join(EXPORT_TYPES) DEFAULT_GMVAULT_DB = "%s/gmvault-db" % (os.getenv("HOME", ".")) def __init__(self): """ constructor """ super(GMVaultLauncher, self).__init__() @gmvault_utils.memoized def _create_parser(self): #pylint: disable=R0915 """ Create the argument parser Return the created parser """ parser = CmdLineParser() parser.epilogue = GLOBAL_HELP_EPILOGUE parser.add_argument("-v", '--version', action='version', version='Gmvault v%s' % (GMVAULT_VERSION)) subparsers = parser.add_subparsers(title='subcommands', help='valid subcommands.') # A sync command sync_parser = subparsers.add_parser('sync', \ help='synchronize with a given gmail account.') #email argument can be optional so it should be an option sync_parser.add_argument('email', \ action='store', default='empty_$_email', help='email to sync with.') # sync typ sync_parser.add_argument('-t', '-type', '--type', \ action='store', dest='type', \ default='full', help='type of synchronisation: full|quick|custom. (default: full)') sync_parser.add_argument("-d", "--db-dir", \ action='store', help="Database root directory. (default: $HOME/gmvault-db)",\ dest="db_dir", default= self.DEFAULT_GMVAULT_DB) # for both when seen add const empty otherwise not_seen # this allow to distinguish between an empty value and a non seen option sync_parser.add_argument("-o", "--oauth", \ help="use oauth for authentication. (default recommended method)",\ action='store_const', dest="oauth_token", const='empty', default='not_seen') sync_parser.add_argument("-p", "--passwd", \ help="use interactive password authentication. (not recommended)", action= 'store_const' , dest="passwd", const='empty', default='not_seen') sync_parser.add_argument("-2", "--2-legged-oauth", \ help="use 2 legged oauth for authentication. (Google Apps Business or Education accounts)",\ action='store_const', dest="two_legged_oauth_token", const='empty', default='not_seen') sync_parser.add_argument("--renew-oauth-tok", \ help="renew the stored oauth token (two legged or normal) via an interactive authentication session.", action= 'store_const' , dest="oauth_token", const='renew') sync_parser.add_argument("--renew-passwd", \ help="renew the stored password via an interactive authentication session. (not recommended)", action= 'store_const' , dest="passwd", const='renew') sync_parser.add_argument("--store-passwd", \ help="use interactive password authentication, encrypt and store the password. (not recommended)", action= 'store_const' , dest="passwd", const='store') sync_parser.add_argument("-r", "--imap-req", metavar = "REQ", \ help="Imap request to restrict sync.",\ dest="imap_request", default=None) sync_parser.add_argument("-g", "--gmail-req", metavar = "REQ", \ help="Gmail search request to restrict sync as defined in"\ "https://support.google.com/mail/bin/answer.py?hl=en&answer=7190",\ dest="gmail_request", default=None) # activate the resume mode --restart is deprecated sync_parser.add_argument("--resume", "--restart", \ action='store_true', dest='restart', \ default=False, help= 'Resume the sync action from the last saved gmail id.') # activate the resume mode --restart is deprecated sync_parser.add_argument("--emails-only", \ action='store_true', dest='only_emails', \ default=False, help= 'Only sync emails.') # activate the resume mode --restart is deprecated sync_parser.add_argument("--chats-only", \ action='store_true', dest='only_chats', \ default=False, help= 'Only sync chats.') sync_parser.add_argument("-e", "--encrypt", \ help="encrypt stored email messages in the database.",\ action='store_true',dest="encrypt", default=False) sync_parser.add_argument("-c", "--check-db", metavar = "VAL", \ help="enable/disable the removal from the gmvault db of the emails "\ "that have been deleted from the given gmail account. VAL = yes or no.",\ dest="db_cleaning", default=None) sync_parser.add_argument("-m", "--multiple-db-owner", \ help="Allow the email database to be synchronized with emails from multiple accounts.",\ action='store_true',dest="allow_mult_owners", default=False) # activate the restart mode sync_parser.add_argument("--no-compression", \ action='store_false', dest='compression', \ default=True, help= 'disable email storage compression (gzip).') sync_parser.add_argument("--server", metavar = "HOSTNAME", \ action='store', help="Gmail imap server hostname. (default: imap.gmail.com)",\ dest="host", default="imap.gmail.com") sync_parser.add_argument("--port", metavar = "PORT", \ action='store', help="Gmail imap server port. (default: 993)",\ dest="port", default=993) sync_parser.add_argument("--debug", "-debug", \ action='store_true', help="Activate debugging info",\ dest="debug", default=False) sync_parser.set_defaults(verb='sync') sync_parser.epilogue = SYNC_HELP_EPILOGUE # restore command rest_parser = subparsers.add_parser('restore', \ help='restore gmvault-db to a given email account.') #email argument can be optional so it should be an option rest_parser.add_argument('email', \ action='store', default='empty_$_email', help='email account to restore.') # restore typ rest_parser.add_argument('-t', '-type', '--type', \ action='store', dest='type', \ default='full', help='type of restoration: full|quick. (default: full)') # add a label rest_parser.add_argument('-a', '--apply-label', \ action='store', dest='apply_label', \ default=None, help='Apply a label to restored emails') # activate the resume mode --restart is deprecated rest_parser.add_argument("--resume", "--restart", \ action='store_true', dest='restart', \ default=False, help= 'Restart from the last saved gmail id.') # activate the resume mode --restart is deprecated rest_parser.add_argument("--emails-only", \ action='store_true', dest='only_emails', \ default=False, help= 'Only sync emails.') # activate the resume mode --restart is deprecated rest_parser.add_argument("--chats-only", \ action='store_true', dest='only_chats', \ default=False, help= 'Only sync chats.') rest_parser.add_argument("-d", "--db-dir", \ action='store', help="Database root directory. (default: $HOME/gmvault-db)",\ dest="db_dir", default= self.DEFAULT_GMVAULT_DB) # for both when seen add const empty otherwise not_seen # this allow to distinguish between an empty value and a non seen option rest_parser.add_argument("-o", "--oauth", \ help="use oauth for authentication. (default method)",\ action='store_const', dest="oauth_token", const='empty', default='not_seen') rest_parser.add_argument("-p", "--passwd", \ help="use interactive password authentication. (not recommended)", action='store_const', dest="passwd", const='empty', default='not_seen') rest_parser.add_argument("-2", "--2-legged-oauth", \ help="use 2 legged oauth for authentication. (Google Apps Business or Education accounts)",\ action='store_const', dest="two_legged_oauth_token", const='empty', default='not_seen') rest_parser.add_argument("--server", metavar = "HOSTNAME", \ action='store', help="Gmail imap server hostname. (default: imap.gmail.com)",\ dest="host", default="imap.gmail.com") rest_parser.add_argument("--port", metavar = "PORT", \ action='store', help="Gmail imap server port. (default: 993)",\ dest="port", default=993) rest_parser.add_argument("--debug", "-debug", \ action='store_true', help="Activate debugging info",\ dest="debug", default=False) rest_parser.set_defaults(verb='restore') rest_parser.epilogue = REST_HELP_EPILOGUE # check_db command check_parser = subparsers.add_parser('check', \ help='check and clean the gmvault-db disk database.') #email argument check_parser.add_argument('email', \ action='store', default='empty_$_email', help='gmail account against which to check.') check_parser.add_argument("-d", "--db-dir", \ action='store', help="Database root directory. (default: $HOME/gmvault-db)",\ dest="db_dir", default= self.DEFAULT_GMVAULT_DB) # for both when seen add const empty otherwise not_seen # this allow to distinguish between an empty value and a non seen option check_parser.add_argument("-o", "--oauth", \ help="use oauth for authentication. (default method)",\ action='store_const', dest="oauth_token", const='empty', default='not_seen') check_parser.add_argument("-p", "--passwd", \ help="use interactive password authentication. (not recommended)", action='store_const', dest="passwd", const='empty', default='not_seen') check_parser.add_argument("-2", "--2-legged-oauth", \ help="use 2 legged oauth for authentication. (Google Apps Business or Education accounts)",\ action='store_const', dest="two_legged_oauth_token", const='empty', default='not_seen') check_parser.add_argument("--server", metavar = "HOSTNAME", \ action='store', help="Gmail imap server hostname. (default: imap.gmail.com)",\ dest="host", default="imap.gmail.com") check_parser.add_argument("--port", metavar = "PORT", \ action='store', help="Gmail imap server port. (default: 993)",\ dest="port", default=993) check_parser.add_argument("--debug", "-debug", \ action='store_true', help="Activate debugging info",\ dest="debug", default=False) check_parser.set_defaults(verb='check') # export command export_parser = subparsers.add_parser('export', \ help='Export the gmvault-db database to another format.') export_parser.add_argument('output_dir', \ action='store', help='destination directory to export to.') export_parser.add_argument("-d", "--db-dir", \ action='store', help="Database root directory. (default: $HOME/gmvault-db)",\ dest="db_dir", default= self.DEFAULT_GMVAULT_DB) export_parser.add_argument('-t', '-type', '--type', \ action='store', dest='type', \ default='mbox', help='type of export: %s. (default: mbox)' % self.EXPORT_TYPE_NAMES) export_parser.add_argument('-l', '--label', \ action='append', dest='label', \ default=None, help='specify a label to export') export_parser.add_argument("--debug", "-debug", \ action='store_true', help="Activate debugging info",\ dest="debug", default=False) export_parser.set_defaults(verb='export') export_parser.epilogue = EXPORT_HELP_EPILOGUE return parser @classmethod def _parse_common_args(cls, options, parser, parsed_args, list_of_types=[]): #pylint:disable=W0102 """ Parse the common arguments for sync and restore """ #add email parsed_args['email'] = options.email parsed_args['debug'] = options.debug parsed_args['restart'] = options.restart #user entered both authentication methods if options.passwd == 'empty' and (options.oauth_token == 'empty' or options.two_legged_oauth_token == 'empty'): parser.error('You have to use one authentication method. '\ 'Please choose between XOAuth and password (recommend XOAuth).') # user entered no authentication methods => go to default oauth if options.passwd == 'not_seen' and options.oauth_token == 'not_seen' and options.two_legged_oauth_token == 'not_seen': #default to xoauth options.oauth_token = 'empty' # add passwd parsed_args['passwd'] = options.passwd # add oauth tok if options.oauth_token == 'empty': parsed_args['oauth'] = options.oauth_token parsed_args['two_legged'] = False elif options.oauth_token == 'renew': parsed_args['oauth'] = 'renew' parsed_args[ 'two_legged'] = True if options.two_legged_oauth_token == 'empty' else False elif options.two_legged_oauth_token == 'empty': parsed_args['oauth'] = options.two_legged_oauth_token parsed_args['two_legged'] = True #add ops type if options.type: tempo_list = ['auto'] tempo_list.extend(list_of_types) if options.type.lower() in tempo_list: parsed_args['type'] = options.type.lower() else: parser.error('Unknown type for command %s. The type should be one of %s' \ % (parsed_args['command'], list_of_types)) #add db_dir parsed_args['db-dir'] = options.db_dir LOG.critical("Use gmvault-db located in %s.\n" % (parsed_args['db-dir'])) # add host parsed_args['host'] = options.host #convert to int if necessary port_type = type(options.port) try: if port_type == type('s') or port_type == type("s"): port = int(options.port) else: port = options.port except Exception, _: #pylint:disable=W0703 parser.error( "--port option %s is not a number. Please check the port value" % (port)) # add port parsed_args['port'] = port return parsed_args