Example #1
0
 def __init__(self, acct_directory, xml_dir, acct_name):
     self.account_name = acct_name
     self.account_directory = acct_directory
     self.xml_dir = xml_dir
     self.account = Account(acct_name, xml_dir)
     self.current_folder = None
     self.messages = []
     self.current_relpath = None  # type: str
     self.total_messages_processed = 0
     self.logger = logging.getLogger("EmlWalker")
     self.message_pack = DefaultListOrderedDict()
     self.account.start_account()
     self.account.write_global_id()
     self.chunks = 0
     self.new_account = True
     self.from_tomes = CommonMethods.get_tomes_tool()
     self.data_dir = os.path.join(CommonMethods.get_process_paths(),
                                  "mboxes")
     self.folder_map = {}
     self.expanded_path = str
     self.new_dir = True
     self.cur_fn = str
    def _arg_parse(self):
        parser = argparse.ArgumentParser(description='Convert mbox into XML.')

        parser.add_argument('--account',
                            '-a',
                            dest='account_name',
                            required=True,
                            help='email account name')

        parser.add_argument(
            '--directory',
            '-d',
            dest='account_directory',
            help='directory to hold all files for this account')

        parser.add_argument(
            '--chunk',
            '-c',
            dest='chunk',
            default=self.NO_CHUNK,
            help=
            'An approximate number of messages to put in one output XML file. '
            'NOTE: This this will be approximate because '
            'DarcMailCLI maintains the integrity of the Folder. A chunk test is made after a '
            'folder is processed. If the total number of processed messages is below the chunk '
            'size, the next folder will be added to the current XML file.  If it exceeds '
            'the chunk limit the a new file will be opened. Each file will have the '
            'starting LocalID as part of the filename.  default = no limit or all messages '
            'in one file.')

        parser.add_argument(
            '--no_subdirectories',
            '-n',
            dest='no_subdirectories',
            action='store_true',
            help='do NOT make subdirectories to hold external content'
            '(default = make subdirectories)')

        parser.add_argument(
            '--data-dir',
            '-dd',
            dest='data_dir',
            type=str,
            default='attachments',
            help='path to store the account attachments. DEFAULT: "attachments"'
        )

        parser.add_argument('--from-eml',
                            '-fe',
                            dest='from_eml',
                            help='The destination is a series of emls',
                            action='store_true')

        parser.add_argument(
            '--stitch',
            '-st',
            dest='stitch',
            action='store_true',
            help=
            'Stitch can only be used in conjunction with the --chunk switch. The purpose is to '
            'reduce memory load.  If you find that DarcMailCLI is crashing on your email accounts'
            'due to memory errors, but you still want to have a single EAXS file, then chunk the '
            'process, which clears memory faster, and stitch will rebuild a single file at the end'
            'of the process')
        parser.add_argument(
            '--tomes_tool',
            '-tt',
            dest='tomes_tool',
            action='store_true',
            help=
            'Indicates that the source tree is built from the Dockerized pst_extractor.'
        )

        args = parser.parse_args()
        argdict = vars(args)
        for k, v in argdict.items():
            print("{}:{}".format(k, v))

        CommonMethods.set_base_path(CommonMethods.get_process_paths())

        self.account_name = argdict['account_name'].strip()
        self.account_directory = os.path.join(CommonMethods.get_base_path(),
                                              'mboxes')
        if argdict['account_directory']:
            self.account_directory = os.path.normpath(
                os.path.abspath(argdict['account_directory'].strip()))

        # Initialize common features and common attributes

        CommonMethods.set_store_rtf_body(False)
        CommonMethods.init_hash_dict()
        CommonMethods.set_dedupe()

        self.eaxs = os.path.join(CommonMethods.get_base_path(), 'eaxs')
        self.mboxes = os.path.join(CommonMethods.get_base_path(), 'eaxs')
        self.emls = os.path.join(CommonMethods.get_base_path(), 'emls')
        self.psts = os.path.join(CommonMethods.get_base_path(), 'pst')

        if argdict['from_eml']:
            self.eml_struct = True

        CommonMethods.set_from_tomes(False)
        if argdict['tomes_tool']:
            CommonMethods.set_from_tomes(True)

        #TODO: Remove this maybe
        if 'max_internal' in argdict.keys():
            self.max_internal = int(argdict['max_internal'])

        if argdict['no_subdirectories']:
            self.levels = self.NO_LEVELS

        if argdict['data_dir']:
            self._data_dir(argdict['data_dir'])
        else:
            self._data_dir()

        if argdict['chunk']:
            self.chunksize = argdict['chunk']
            CommonMethods.set_chunk_size(1000)
            CommonMethods.set_stitch(argdict['stitch'])