Esempio n. 1
0
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads)
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated", "info", True)
        else:
            self.project.log("transaction", "Metadata acquisition initiated", "info", True)

        self.initialize_items()
        cnt = len(self.files)

        self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True)
        self.metadata()

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['path'], "info", True)

            if file['is_dir'] == False:
                download_uri = lambda f=file: self._get_download_uri(f)
                metadata_download_uri = self.oauth_provider.config['API_ENDPOINT'] + '/metadata/auto' + file['path']
                parentmap = self._get_parent_mapping(file)
                filetitle = self._get_file_name(file)
                orig = os.path.basename(file['path'])
                if filetitle != orig:
                    self.project.log("exception", "Normalized '{}' to '{}'".format(orig, filetitle), "warning", True)

                if 'bytes' in file:
                    self.file_size_bytes += int(file['bytes'])

                save_metadata_path = Common.assert_path(os.path.normpath(os.path.join(os.path.join(self.project.project_folders['metadata'], parentmap), filetitle + ".json")), self.project)
                if save_metadata_path:
                    self.project.log("transaction", "Queueing {} for download...".format(orig), "info", True)
                    d.put(Downloader.DownloadSlip(metadata_download_uri, file, save_metadata_path, 'path'))

                if self.project.args.mode == "full":
                    save_download_path = Common.assert_path(os.path.normpath(os.path.join(os.path.join(self.project.project_folders['data'], parentmap), filetitle)), self.project)
                    if save_download_path:
                        self.project.log("transaction", "Queueing {} for download...".format(orig), "info", True)
                        d.put(Downloader.DownloadSlip(download_uri, file, save_download_path, 'path'))

        self.project.log("transaction", "Total size of files to be acquired is {}".format(Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True)
        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1

        self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
Esempio n. 2
0
    def _save_raw_mail(self, data, slip):
        data = data.read().decode('utf-8')
        msg = json.loads(data)
        msg_data = msg["raw"]
        msg_data = base64.urlsafe_b64decode(msg_data).decode('utf-8')
        labels = msg["labelIds"]
        data_dir = self.project.project_folders["data"]
        for label in labels:
            mbox = mailbox.mbox(os.path.join(self.mbox_dir, label))
            mbox_msg = email.message_from_bytes(msg_data.encode(), mailbox.mboxMessage)
            mbox.add(mbox_msg)
            label_path = os.path.join(data_dir, label)
            save_path = os.path.join(label_path, slip.savepath)
            save_path = Common.assert_path(save_path, self.project)
            if save_path:
                if not os.path.isdir(os.path.dirname(save_path)):
                    os.makedirs(os.path.dirname(save_path), exist_ok=True)
                self.project.savedata(msg_data, save_path, False)
                self.project.log("transaction", "Saved file to " + save_path, "info", True)

            for part in mbox_msg.walk():
                content_disposition =part.get("Content-Disposition", None)
                if content_disposition:
                    data = part.get_payload(decode=True)
                    att_name = part.get_filename()
                    if att_name:
                        att_dir = os.path.join(label_path, slip.savepath[:slip.savepath.index('.')])
                        att_path = os.path.join(att_dir, att_name)
                        os.makedirs(att_dir, exist_ok=True)
                        with open(att_path, 'wb') as f:
                            f.write(data)
                        self.project.log("transaction", "Saved attachment to " + save_path, "info", True)
            mbox.flush()
Esempio n. 3
0
    def _save_metadata(self, data, slip):
        data = data.read().decode('utf-8')
        thread = json.loads(data)
        f = open(self.metadata_file, 'ab')
        for message in thread['messages']:
            for label in message['labelIds']:
                label_dir = os.path.join(self.project.project_folders['metadata'], label)
                thread_dir = os.path.join(label_dir, thread['id'])
                message_dir = os.path.join(thread_dir, message['id'])
                msg_metadata_path = os.path.join(message_dir, message['id'] + ".json")
                msg_metadata_path = Common.assert_path(msg_metadata_path, self.project)
                # Save metadata of each message individually, inside label/thread/message directory
                if msg_metadata_path:
                    os.makedirs(message_dir, exist_ok=True)
                    self.project.savedata(json.dumps(message, sort_keys=True, indent=4), msg_metadata_path, False)
                    self.project.log("transaction", "Saving metadata to {}".format(msg_metadata_path), "info", True)
                thread_metadata_path = os.path.join(thread_dir, thread['id'] + ".json")

                # Save metadata of each thread individually inside label/thread directory
                thread_metadata_path = Common.assert_path(thread_metadata_path, self.project)
                if thread_metadata_path:
                    os.makedirs(thread_dir, exist_ok=True)
                    self.project.savedata(json.dumps(thread, sort_keys=True, indent=4), thread_metadata_path, False)
                    self.project.log("transaction", "Saving metadata to {}".format(thread_metadata_path), "info", True)
            headers = message['payload']['headers']
            label_list = ",".join(message['labelIds'])
            internal_date = message['internalDate']
            internal_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(internal_date) / 1000))
            header_date = 'N/A' if not self.extract_header_value(headers, 'Date') else self.extract_header_value(headers, 'Date')
            header_to = 'N/A' if not self.extract_header_value(headers, 'To') else self.extract_header_value(headers, 'To')
            header_from = 'N/A' if not self.extract_header_value(headers, 'From') else self.extract_header_value(headers, 'From')
            header_subject = 'N/A' if not self.extract_header_value(headers, 'Subject') else self.extract_header_value(headers, 'Subject')
            snippet = message['snippet']
            thread_id = thread['id']
            f.write('"{id}","{internaldate}","{labels}","{headerdate}","{to}","{xfrom}","{subject}","{snippet}","{threadid}"{sep}'.format(id=message['id'],internaldate=internal_date,labels=label_list,headerdate=header_date,to=header_to,xfrom=header_from,subject=header_subject,snippet=snippet,threadid=thread_id,sep=os.linesep).encode('utf-8'))
        f.close()
Esempio n. 4
0
    def _save_raw_mail(self, data, slip):
        data = data.read().decode('utf-8')
        msg = json.loads(data)
        msg_data = msg["raw"]
        msg_data = base64.urlsafe_b64decode(msg_data).decode('utf-8')
        labels = msg["labelIds"]
        data_dir = self.project.project_folders["data"]
        for label in labels:
            mbox = mailbox.mbox(os.path.join(self.mbox_dir, label))
            mbox_msg = email.message_from_bytes(msg_data.encode(),
                                                mailbox.mboxMessage)
            mbox.add(mbox_msg)
            label_path = os.path.join(data_dir, label)
            save_path = os.path.join(label_path, slip.savepath)
            save_path = Common.assert_path(save_path, self.project)
            if save_path:
                if not os.path.isdir(os.path.dirname(save_path)):
                    os.makedirs(os.path.dirname(save_path), exist_ok=True)
                self.project.savedata(msg_data, save_path, False)
                self.project.log("transaction", "Saved file to " + save_path,
                                 "info", True)

            for part in mbox_msg.walk():
                content_disposition = part.get("Content-Disposition", None)
                if content_disposition:
                    data = part.get_payload(decode=True)
                    att_name = part.get_filename()
                    if att_name:
                        att_dir = os.path.join(
                            label_path,
                            slip.savepath[:slip.savepath.index('.')])
                        att_path = os.path.join(att_dir, att_name)
                        os.makedirs(att_dir, exist_ok=True)
                        with open(att_path, 'wb') as f:
                            f.write(data)
                        self.project.log("transaction",
                                         "Saved attachment to " + save_path,
                                         "info", True)
            mbox.flush()
Esempio n. 5
0
    def _save_metadata(self, data, slip):
        data = data.read().decode('utf-8')
        thread = json.loads(data)
        f = open(self.metadata_file, 'ab')
        for message in thread['messages']:
            for label in message['labelIds']:
                label_dir = os.path.join(
                    self.project.project_folders['metadata'], label)
                thread_dir = os.path.join(label_dir, thread['id'])
                message_dir = os.path.join(thread_dir, message['id'])
                msg_metadata_path = os.path.join(message_dir,
                                                 message['id'] + ".json")
                msg_metadata_path = Common.assert_path(msg_metadata_path,
                                                       self.project)
                # Save metadata of each message individually, inside label/thread/message directory
                if msg_metadata_path:
                    os.makedirs(message_dir, exist_ok=True)
                    self.project.savedata(
                        json.dumps(message, sort_keys=True, indent=4),
                        msg_metadata_path, False)
                    self.project.log(
                        "transaction",
                        "Saving metadata to {}".format(msg_metadata_path),
                        "info", True)
                thread_metadata_path = os.path.join(thread_dir,
                                                    thread['id'] + ".json")

                # Save metadata of each thread individually inside label/thread directory
                thread_metadata_path = Common.assert_path(
                    thread_metadata_path, self.project)
                if thread_metadata_path:
                    os.makedirs(thread_dir, exist_ok=True)
                    self.project.savedata(
                        json.dumps(thread, sort_keys=True, indent=4),
                        thread_metadata_path, False)
                    self.project.log(
                        "transaction",
                        "Saving metadata to {}".format(thread_metadata_path),
                        "info", True)
            headers = message['payload']['headers']
            label_list = ",".join(message['labelIds'])
            internal_date = message['internalDate']
            internal_date = time.strftime(
                "%Y-%m-%d %H:%M:%S", time.gmtime(int(internal_date) / 1000))
            header_date = 'N/A' if not self.extract_header_value(
                headers, 'Date') else self.extract_header_value(
                    headers, 'Date')
            header_to = 'N/A' if not self.extract_header_value(
                headers, 'To') else self.extract_header_value(headers, 'To')
            header_from = 'N/A' if not self.extract_header_value(
                headers, 'From') else self.extract_header_value(
                    headers, 'From')
            header_subject = 'N/A' if not self.extract_header_value(
                headers, 'Subject') else self.extract_header_value(
                    headers, 'Subject')
            snippet = message['snippet']
            thread_id = thread['id']
            f.write(
                '"{id}","{internaldate}","{labels}","{headerdate}","{to}","{xfrom}","{subject}","{snippet}","{threadid}"{sep}'
                .format(id=message['id'],
                        internaldate=internal_date,
                        labels=label_list,
                        headerdate=header_date,
                        to=header_to,
                        xfrom=header_from,
                        subject=header_subject,
                        snippet=snippet,
                        threadid=thread_id,
                        sep=os.linesep).encode('utf-8'))
        f.close()
Esempio n. 6
0
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated",
                             "info", True)
            d = Downloader.Downloader(self.project,
                                      self.oauth_provider.http_intercept,
                                      self._save_file,
                                      self.oauth_provider.get_auth_header,
                                      self.project.threads)
        else:
            self.project.log("transaction", "Metadata acquisition initiated",
                             "info", True)

        self.initialize_items()
        cnt = len(self.files)
        self.project.log("transaction",
                         "Total items queued for acquisition: " + str(cnt),
                         "info", True)
        self.metadata()

        trash_folder = os.path.join(self.project.acquisition_dir, "trash")
        trash_metadata_folder = os.path.join(self.project.acquisition_dir,
                                             "trash_metadata")

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['title'],
                             "info", True)
            download_uri = self._get_download_url(file)
            parentmap = self._get_parent_mapping(file, self.files)

            filetitle = self._get_file_name(file)
            if filetitle != file['title']:
                self.project.log(
                    "exception", "Normalized '" + file['title'] + "' to '" +
                    filetitle + "'", "warning", True)

            if file['labels']['trashed'] == True:
                save_download_path = os.path.join(trash_folder, parentmap)
                save_metadata_path = os.path.join(trash_metadata_folder,
                                                  parentmap)
                save_download_path = os.path.normpath(
                    os.path.join(save_download_path, filetitle))
                save_metadata_path = os.path.normpath(
                    os.path.join(save_metadata_path, filetitle + '.json'))
            else:
                save_download_path = os.path.normpath(
                    os.path.join(
                        os.path.join(self.project.project_folders["data"],
                                     parentmap), filetitle))
                save_metadata_path = os.path.normpath(
                    os.path.join(
                        os.path.join(self.project.project_folders["metadata"],
                                     parentmap), filetitle + ".json"))

            save_download_path = Common.assert_path(save_download_path,
                                                    self.project)
            save_metadata_path = Common.assert_path(save_metadata_path,
                                                    self.project)

            if self.project.args.mode == "full":
                if save_download_path:
                    v = {
                        "remote_file": os.path.join(parentmap, file['title']),
                        "local_file": save_download_path
                    }

                    download_file = True
                    if 'md5Checksum' in file:
                        v['remote_hash'] = file['md5Checksum']

                    if os.path.isfile(save_download_path):
                        if 'md5Checksum' in file:
                            file_hash = Common.hashfile(
                                open(save_download_path, 'rb'), hashlib.md5())
                            if file_hash == file['md5Checksum']:
                                download_file = False
                                self.project.log(
                                    "exception",
                                    "Local and remote hash matches for " +
                                    file['title'] + " ... Skipping download",
                                    "warning", True)
                            else:
                                self.project.log(
                                    "exception",
                                    "Local and remote hash differs for " +
                                    file['title'] +
                                    " ... Queuing for download", "critical",
                                    True)

                        else:
                            self.project.log(
                                "exception",
                                "No hash information for file ' " +
                                file['title'] + "'", "warning", True)

                    if download_file and download_uri:
                        self.project.log(
                            "transaction",
                            "Queueing " + file['title'] + " for download...",
                            "info", True)
                        d.put(
                            Downloader.DownloadSlip(download_uri, file,
                                                    save_download_path,
                                                    'title'))
                        if 'fileSize' in file:
                            self.file_size_bytes += int(file['fileSize'])

                    # If it's a file we can add it to verification file
                    if download_uri:
                        self.verification.append(v)

            if save_metadata_path:
                self._save_file(
                    json.dumps(file, sort_keys=True, indent=4),
                    Downloader.DownloadSlip(download_uri, file,
                                            save_metadata_path, 'title'),
                    False)

        self.project.log(
            "transaction", "Total size of files to be acquired is {}".format(
                Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight",
            True)

        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1
        self.verify()
        self.project.log("transaction",
                         "Acquisition completed in {}".format(str(delt)),
                         "highlight", True)
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated", "info", True)
            d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header,
                                  self.project.threads)
        else:
            self.project.log("transaction", "Metadata acquisition initiated", "info", True)

        self.initialize_items()
        cnt = len(self.files)
        self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True)
        self.metadata()

        trash_folder = os.path.join(self.project.acquisition_dir, "trash")
        trash_metadata_folder = os.path.join(self.project.acquisition_dir, "trash_metadata")

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['title'], "info", True)
            download_uri = self._get_download_url(file)
            parentmap = self._get_parent_mapping(file, self.files)

            filetitle = self._get_file_name(file)
            if filetitle != file['title']:
                    self.project.log("exception", "Normalized '" + file['title'] + "' to '" + filetitle + "'", "warning",
                                     True)

            if file['labels']['trashed'] == True:
                save_download_path = os.path.join(trash_folder, parentmap)
                save_metadata_path = os.path.join(trash_metadata_folder, parentmap)
                save_download_path = os.path.normpath(os.path.join(save_download_path, filetitle))
                save_metadata_path = os.path.normpath(os.path.join(save_metadata_path, filetitle + '.json'))
            else:
                save_download_path = os.path.normpath(os.path.join(os.path.join(self.project.project_folders["data"], parentmap), filetitle))
                save_metadata_path = os.path.normpath(os.path.join(os.path.join(self.project.project_folders["metadata"], parentmap), filetitle + ".json"))

            save_download_path = Common.assert_path(save_download_path, self.project)
            save_metadata_path = Common.assert_path(save_metadata_path, self.project)

            if self.project.args.mode == "full":
                if save_download_path:
                    v = {"remote_file": os.path.join(parentmap, file['title']),
                         "local_file": save_download_path}

                    download_file = True
                    if 'md5Checksum' in file:
                        v['remote_hash'] = file['md5Checksum']

                    if os.path.isfile(save_download_path):
                        if 'md5Checksum' in file:
                            file_hash = Common.hashfile(open(save_download_path, 'rb'), hashlib.md5())
                            if file_hash == file['md5Checksum']:
                                download_file = False
                                self.project.log("exception", "Local and remote hash matches for " + file[
                                    'title'] + " ... Skipping download", "warning", True)
                            else:
                                self.project.log("exception", "Local and remote hash differs for " + file[
                                    'title'] + " ... Queuing for download", "critical", True)


                        else:
                            self.project.log("exception", "No hash information for file ' " + file['title'] + "'", "warning", True)

                    if download_file and download_uri:
                        self.project.log("transaction", "Queueing " + file['title'] + " for download...", "info", True)
                        d.put(Downloader.DownloadSlip(download_uri, file, save_download_path, 'title'))
                        if 'fileSize' in file:
                            self.file_size_bytes += int(file['fileSize'])

                    # If it's a file we can add it to verification file
                    if download_uri:
                        self.verification.append(v)

            if save_metadata_path:
                self._save_file(json.dumps(file, sort_keys=True, indent=4), Downloader.DownloadSlip(download_uri, file, save_metadata_path, 'title'), False)

        self.project.log("transaction", "Total size of files to be acquired is {}".format(
            Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True)

        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1
        self.verify()
        self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
Esempio n. 8
0
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader(self.project,
                                  self.oauth_provider.http_intercept,
                                  self._save_file,
                                  self.oauth_provider.get_auth_header,
                                  self.project.threads)
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated",
                             "info", True)
        else:
            self.project.log("transaction", "Metadata acquisition initiated",
                             "info", True)

        self.initialize_items()
        cnt = len(self.files)

        self.project.log("transaction",
                         "Total items queued for acquisition: " + str(cnt),
                         "info", True)
        self.metadata()

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['path'],
                             "info", True)

            if file['is_dir'] == False:
                download_uri = lambda f=file: self._get_download_uri(f)
                metadata_download_uri = self.oauth_provider.config[
                    'API_ENDPOINT'] + '/metadata/auto' + file['path']
                parentmap = self._get_parent_mapping(file)
                filetitle = self._get_file_name(file)
                orig = os.path.basename(file['path'])
                if filetitle != orig:
                    self.project.log(
                        "exception",
                        "Normalized '{}' to '{}'".format(orig, filetitle),
                        "warning", True)

                if 'bytes' in file:
                    self.file_size_bytes += int(file['bytes'])

                save_metadata_path = Common.assert_path(
                    os.path.normpath(
                        os.path.join(
                            os.path.join(
                                self.project.project_folders['metadata'],
                                parentmap), filetitle + ".json")),
                    self.project)
                if save_metadata_path:
                    self.project.log(
                        "transaction",
                        "Queueing {} for download...".format(orig), "info",
                        True)
                    d.put(
                        Downloader.DownloadSlip(metadata_download_uri, file,
                                                save_metadata_path, 'path'))

                if self.project.args.mode == "full":
                    save_download_path = Common.assert_path(
                        os.path.normpath(
                            os.path.join(
                                os.path.join(
                                    self.project.project_folders['data'],
                                    parentmap), filetitle)), self.project)
                    if save_download_path:
                        self.project.log(
                            "transaction",
                            "Queueing {} for download...".format(orig), "info",
                            True)
                        d.put(
                            Downloader.DownloadSlip(download_uri, file,
                                                    save_download_path,
                                                    'path'))

        self.project.log(
            "transaction", "Total size of files to be acquired is {}".format(
                Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight",
            True)
        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1

        self.project.log("transaction",
                         "Acquisition completed in {}".format(str(delt)),
                         "highlight", True)