Example #1
0
 def _parse_title(self, tex_source: str) -> str:
     command = LaTeXCommand(
         "title",
         LaTeXCommandElement(name="short_title",
                             required=False,
                             bracket="["),
         LaTeXCommandElement(name="long_title", required=True, bracket="{"),
     )
     titles = [t for t in command.parse(tex_source)]
     if len(titles) == 0:
         raise RuntimeError("Could not parse a title command.")
     return titles[-1]["long_title"]
Example #2
0
 def _parse_date(self, tex_source: str) -> str:
     command = LaTeXCommand(
         "date",
         LaTeXCommandElement(name="date", required=True, bracket="{"),
     )
     dates = [t for t in command.parse(tex_source)]
     if len(dates) == 0:
         raise RuntimeError("Could not parse a date command.")
     return dates[-1]["date"]
Example #3
0
    def _parse_title(self, tex_source: str) -> str:
        """Parse the title command from the lsstdoc."""
        command = LaTeXCommand(
            "title",
            LaTeXCommandElement(name="short_title",
                                required=False,
                                bracket="["),
            LaTeXCommandElement(name="long_title", required=True, bracket="{"),
        )

        parsed_titles = [_ for _ in command.parse(tex_source)]
        if len(parsed_titles) == 0:
            logger.warning("lsstdoc has no title")
            title = ""
        else:
            title_content = prep_lsstdoc_latex(parsed_titles[-1]["long_title"])
            title = convert_text(content=title_content,
                                 source_fmt="latex",
                                 output_fmt="plain")
        return title
Example #4
0
    def _parse_doc_ref(self, tex_source: str) -> Optional[str]:
        """Parse the setDocRef command to get the document identifier."""
        command = LaTeXCommand(
            "setDocRef",
            LaTeXCommandElement(name="handle", required=True, bracket="{"),
        )
        parsed = [_ for _ in command.parse(tex_source)]
        if len(parsed) == 0:
            logger.warning("lsstdoc has no setDocRef")
            return None

        return parsed[-1]["handle"]
Example #5
0
    def _parse_abstract(self, tex_source: str) -> Optional[FormattedString]:
        """Parse the setDocAbstract command."""
        command = LaTeXCommand(
            "setDocAbstract",
            LaTeXCommandElement(name="abstract", required=True, bracket="{"),
        )
        parsed = [_ for _ in command.parse(tex_source)]
        if len(parsed) == 0:
            logger.warning("lsstdoc has no abstract")
            return None

        content = parsed[-1]["abstract"].strip()

        return FormattedString.from_latex(prep_lsstdoc_latex(content))
Example #6
0
    def _parse_author(tex_source: str) -> List[Person]:
        r"""Parse the author command from TeX source.

        Goal is to parse::

           \author{
           A.~Author,
           B.~Author,
           and
           C.~Author}

        Into::

           ['A. Author', 'B. Author', 'C. Author']
        """
        command = LaTeXCommand(
            "author",
            LaTeXCommandElement(name="authors", required=True, bracket="{"),
        )
        parsed_commands = [_ for _ in command.parse(tex_source)]
        if len(parsed_commands) == 0:
            logger.warning("lsstdoc has no author")
            authors: List[Person] = []
        else:
            content = parsed_commands[-1]["authors"]

            # Clean content
            content = content.replace("\n", " ")
            content = content.replace("~", " ")
            content = content.strip()

            # Split content into list of individual authors
            authors = []
            for part in content.split(","):
                part = part.strip()
                if "and" in part:
                    for split_part in part.split("and"):
                        split_part = split_part.strip()
                        if len(split_part) > 0:
                            authors.append(
                                LsstDocParser._parse_individual_author(
                                    split_part))
                else:
                    authors.append(
                        LsstDocParser._parse_individual_author(part))

        return authors
Example #7
0
    def _parse_date(self, tex_source: str) -> Optional[datetime.datetime]:
        r"""Parse the ``\date`` command as a datetime."""
        command = LaTeXCommand(
            "date",
            LaTeXCommandElement(name="content", required=True, bracket="{"),
        )
        parsed = [_ for _ in command.parse(tex_source)]
        if len(parsed) == 0:
            logger.warning("lsstdoc has no date command")
            return None

        command_content = parsed[-1]["content"].strip()

        # Try to parse a date from the \date command
        # if command_content == r"\today":
        #     return None
        if command_content is not None and command_content != r"\today":
            try:
                doc_datetime = datetime.datetime.strptime(
                    command_content, "%Y-%m-%d")
                # Assume Noon LSST project time (Pacific) given a precise
                # date is not available.
                project_tz = tz.gettz("US/Pacific")
                doc_datetime = doc_datetime.replace(hour=12, tzinfo=project_tz)
                # Normalize to UTC
                doc_datetime = doc_datetime.astimezone(tz.UTC)
                return doc_datetime
            except ValueError:
                logger.warning(
                    "Could not parse a datetime from "
                    "lsstdoc date command: %r",
                    command_content,
                )

        # Fallback to parsing from Git
        try:
            doc_datetime = self.git_repository.compute_date_modified(
                extensions=["tex", "bib", "pdf", "jpg", "png", "csv"])
            return doc_datetime
        except Exception:
            return None