コード例 #1
0
ファイル: _base.py プロジェクト: uptake/updoc
    def extract_docs_from_tarball(self,
                                  category: str,
                                  doc_name: str,
                                  tmp_tarball_fp: str):
        """
        Extracts the tarball into a static html folder for serving in the application and adds the doc to the registry.

        Args:
            category (str): Category of the incoming document tarball.
            doc_name (str): Document name of the incoming tarball.
            tmp_tarball_fp (str): File path to the tarball to store and extract.
        """
        logger.info("Extracting document: {doc_filename} with category: "
                    "{doc_category} from tarball.".format(doc_filename=doc_name,
                                                          doc_category=category))
        doc_category_dir = os.path.join('docserver', 'static', category)
        this_doc_dir = os.path.join(doc_category_dir, doc_name)
        flask_doc_path = os.path.join('static', category, doc_name)

        # Create category directory if it doesn't exist.
        if not os.path.exists(doc_category_dir):
            os.mkdir(doc_category_dir)

        # Remove previous document html if it's already there.
        if os.path.exists(this_doc_dir):
            rmtree(this_doc_dir, ignore_errors=True)

        # TODO: Add check to make sure extracted tarball folder has the same name as doc_name.
        # TODO: Add check to make sure index.html exists.
        with tarfile.open(tmp_tarball_fp, mode="r:gz") as tar:
            tar.extractall(path=doc_category_dir)

        self.registry.set(name=category + "_" + doc_name,
                          value=flask_doc_path)
コード例 #2
0
def home_post():
    if 'file' not in request.files:
        return redirect(request.url)
    doc_obj = request.files['file']
    if doc_obj:
        try:
            logger.info(
                'Attempting to save {filename} document tarball.'.format(
                    filename=doc_obj.filename))
            doc_id = doc_obj.filename.replace(".tar.gz", "")
            _, doc_name = doc_id.split('_', 1)
            doc_obj.seek(0)

            with TemporaryDirectory() as tmp_dir:
                tmp_tarball_fp = os.path.join(tmp_dir, doc_name + ".tar.gz")

                with open(tmp_tarball_fp, 'wb') as tmp_tarball_file:
                    copyfileobj(doc_obj, tmp_tarball_file, length=16384)

                doc_storage[doc_id] = tmp_tarball_fp
        except Exception as e:
            log_exception(raised_exception=e)
            abort(
                ERROR_CODE,
                'Something failed with uploading, storing, or extracting your document tarball.'
            )
        else:
            msg = "Document: {doc_name} was correctly uploaded, stored, and extracted.\n".format(
                doc_name=doc_name)
            return msg, 201

    return abort(400,
                 'You must upload a tarball file to use the POST endpoint.')
コード例 #3
0
    def store_tarball(self,
                      category: str,
                      doc_name: str,
                      tmp_tarball_fp: str):
        """Stores the tarball in the folder specified by ``category`` in the S3 bucket.

        Args:
            category (str): Category of the incoming document tarball.
            doc_name (str): Document name of the incoming tarball.
            tmp_tarball_fp (str): File path to the tarball to store.
        """
        logger.info("Storing document: {doc_filename} with category: "
                    "{doc_category} in S3 bucket.".format(doc_filename=doc_name,
                                                          doc_category=category))
        s3_tarball_loc = os.path.join(self.s3_folder, category, doc_name + ".tar.gz")

        self.bucket.upload_file(tmp_tarball_fp, s3_tarball_loc)
コード例 #4
0
    def initialize_storage(self):
        """Pulls down any previously stored tarballs from S3 and initializes the static html for each."""
        logger.info("Initializing S3 storage, pulling down any docs from S3 if they exist.")
        available_docs = [obj.key for obj in self.bucket.objects.filter(Prefix=os.path.join(self.s3_folder))]

        for document_key in available_docs:
            # Only want to download non-directories.
            *_, doc_category, doc_filename = document_key.split("/")

            if doc_filename != '':
                logger.info("Downloading document: {doc_filename} with category: "
                            "{doc_category} from S3.".format(doc_filename=doc_filename,
                                                             doc_category=doc_category))

                with TemporaryDirectory() as tmp_dir:
                    target_path = os.path.join(tmp_dir, doc_filename)
                    self.bucket.download_file(document_key, target_path)
                    self.extract_docs_from_tarball(category=doc_category,
                                                   doc_name=doc_filename.replace(".tar.gz", ""),
                                                   tmp_tarball_fp=target_path)
コード例 #5
0
ファイル: _local.py プロジェクト: uptake/updoc
    def store_tarball(self, category: str, doc_name: str, tmp_tarball_fp: str):
        """Stores an incoming tarball locally in ``tarball_dir``.

        Args:
            category (str): Category of the incoming document tarball.
            doc_name (str): Document name of the incoming tarball.
            tmp_tarball_fp (str): File path to the tarball to store.
        """
        logger.info("Storing document: {doc_filename} with category: "
                    "{doc_category} locally.".format(doc_filename=doc_name,
                                                     doc_category=category))
        tarball_category_dir = os.path.join(self.tarball_dir, category)
        dest_tarball_fp = os.path.join(tarball_category_dir,
                                       doc_name + ".tar.gz")

        # Create category directory if it doesn't exist.
        if not os.path.exists(tarball_category_dir):
            os.mkdir(tarball_category_dir)

        # Remove previous tarball if it's already there.
        if os.path.exists(dest_tarball_fp):
            rmtree(dest_tarball_fp, ignore_errors=True)

        copyfile(tmp_tarball_fp, dest_tarball_fp)