Esempio n. 1
0
    def do_walk(self):
        """
        do_walk is the main function of the module.

        :return:
        """
        # If this is a TOMES_TOOL Struct use the folder_map
        if self.from_tomes:
            self.account_directory = os.path.join(self.data_dir,
                                                  self.account_name)
            # Did someone make a mistake? Check to make sure folder_map is there
            if os.path.exists(
                    os.path.join(self.account_directory, "folder_map.tsv")):
                self._build_folder_map(
                    os.path.join(self.account_directory, "folder_map.tsv"))
            else:
                CommonMethods.set_from_tomes(False)

        print("Scanning data structure for emails.")
        for root, dirs, files in os.walk(self.account_directory):
            for f in files:
                if root not in self.message_pack:
                    self.message_pack[root] = []
                if f.endswith("eml"):
                    self.message_pack[root].append(f)
        self.process_folders()
    def _arg_parse(self):
        parser = argparse.ArgumentParser(description='Convert mbox into XML.')

        parser.add_argument('--account',
                            '-a',
                            dest='account_name',
                            required=True,
                            help='email account name')

        parser.add_argument(
            '--directory',
            '-d',
            dest='account_directory',
            help='directory to hold all files for this account')

        parser.add_argument(
            '--chunk',
            '-c',
            dest='chunk',
            default=self.NO_CHUNK,
            help=
            'An approximate number of messages to put in one output XML file. '
            'NOTE: This this will be approximate because '
            'DarcMailCLI maintains the integrity of the Folder. A chunk test is made after a '
            'folder is processed. If the total number of processed messages is below the chunk '
            'size, the next folder will be added to the current XML file.  If it exceeds '
            'the chunk limit the a new file will be opened. Each file will have the '
            'starting LocalID as part of the filename.  default = no limit or all messages '
            'in one file.')

        parser.add_argument(
            '--no_subdirectories',
            '-n',
            dest='no_subdirectories',
            action='store_true',
            help='do NOT make subdirectories to hold external content'
            '(default = make subdirectories)')

        parser.add_argument(
            '--data-dir',
            '-dd',
            dest='data_dir',
            type=str,
            default='attachments',
            help='path to store the account attachments. DEFAULT: "attachments"'
        )

        parser.add_argument('--from-eml',
                            '-fe',
                            dest='from_eml',
                            help='The destination is a series of emls',
                            action='store_true')

        parser.add_argument(
            '--stitch',
            '-st',
            dest='stitch',
            action='store_true',
            help=
            'Stitch can only be used in conjunction with the --chunk switch. The purpose is to '
            'reduce memory load.  If you find that DarcMailCLI is crashing on your email accounts'
            'due to memory errors, but you still want to have a single EAXS file, then chunk the '
            'process, which clears memory faster, and stitch will rebuild a single file at the end'
            'of the process')
        parser.add_argument(
            '--tomes_tool',
            '-tt',
            dest='tomes_tool',
            action='store_true',
            help=
            'Indicates that the source tree is built from the Dockerized pst_extractor.'
        )

        args = parser.parse_args()
        argdict = vars(args)
        for k, v in argdict.items():
            print("{}:{}".format(k, v))

        CommonMethods.set_base_path(CommonMethods.get_process_paths())

        self.account_name = argdict['account_name'].strip()
        self.account_directory = os.path.join(CommonMethods.get_base_path(),
                                              'mboxes')
        if argdict['account_directory']:
            self.account_directory = os.path.normpath(
                os.path.abspath(argdict['account_directory'].strip()))

        # Initialize common features and common attributes

        CommonMethods.set_store_rtf_body(False)
        CommonMethods.init_hash_dict()
        CommonMethods.set_dedupe()

        self.eaxs = os.path.join(CommonMethods.get_base_path(), 'eaxs')
        self.mboxes = os.path.join(CommonMethods.get_base_path(), 'eaxs')
        self.emls = os.path.join(CommonMethods.get_base_path(), 'emls')
        self.psts = os.path.join(CommonMethods.get_base_path(), 'pst')

        if argdict['from_eml']:
            self.eml_struct = True

        CommonMethods.set_from_tomes(False)
        if argdict['tomes_tool']:
            CommonMethods.set_from_tomes(True)

        #TODO: Remove this maybe
        if 'max_internal' in argdict.keys():
            self.max_internal = int(argdict['max_internal'])

        if argdict['no_subdirectories']:
            self.levels = self.NO_LEVELS

        if argdict['data_dir']:
            self._data_dir(argdict['data_dir'])
        else:
            self._data_dir()

        if argdict['chunk']:
            self.chunksize = argdict['chunk']
            CommonMethods.set_chunk_size(1000)
            CommonMethods.set_stitch(argdict['stitch'])