Ejemplo n.º 1
0
    SerializableEntity,
    SerializableSymbol,
    SerializableToken,
)
from entities.sentences.commands.extract_contexts import make_extract_contexts_command
from entities.sentences.types import TexWrapper
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from .colorize import adjust_color_positions
from .commands.collect_symbol_locations import CollectSymbolLocations
from .commands.extract_symbols import ExtractSymbols
from .commands.find_symbol_matches import FindSymbolMatches
from .commands.locate_composite_symbols import LocateCompositeSymbols
from .upload import upload_symbols

directories.register("detected-equation-tokens")
directories.register("detected-symbols")
directories.register("symbol-matches")
directories.register("contexts-for-symbols")
directories.register("sources-with-colorized-equation-tokens")
directories.register("compiled-sources-with-colorized-equation-tokens")
directories.register("paper-images-with-colorized-equation-tokens")
directories.register("diffed-images-with-colorized-equation-tokens")
directories.register("equation-tokens-locations")
directories.register("composite-symbols-locations")
directories.register("sources-with-colorized-symbols-with-affixes")
directories.register("compiled-sources-with-colorized-symbols-with-affixes")
directories.register("paper-images-with-colorized-symbols-with-affixes")
directories.register("diffed-images-with-colorized-symbols-with-affixes")
directories.register("symbols-with-affixes-locations")
directories.register("symbols-locations")
Ejemplo n.º 2
0
from common import directories
from common.commands.base import Command, CommandList
from common.commands.detect_entities import make_detect_entities_command
from common.commands.locate_entities import make_locate_entities_command
from common.commands.upload_entities import make_upload_entities_command
from entities.common import create_entity_localization_command_sequence
from scripts.pipelines import EntityPipeline, register_entity_pipeline

# from .colorize import get_definition_color_positions
from .commands.detect_definitions import DetectDefinitions
from .commands.embellish_sentences import EmbellishSentences
from .types import Definiendum, Definition, TermReference
from .upload import upload_definitions

# Register directories for output from intermediate pipeline stages.
directories.register("embellished-sentences")
directories.register("detected-definitions")
directories.register("sources-with-colorized-definitions")
directories.register("compiled-sources-with-colorized-definitions")
directories.register("paper-images-with-colorized-definitions")
directories.register("diffed-images-with-colorized-definitions")
directories.register("definitions-locations")

upload_command = make_upload_entities_command(
    "definitions",
    upload_definitions,
    DetectedEntityType={
        "entities-definiendums.csv": Definiendum,
        "entities-definitions.csv": Definition,
        "entities-term-references.csv": TermReference,
    },
Ejemplo n.º 3
0
from common import directories
from common.commands.base import CommandList
from common.commands.locate_entities import make_locate_entities_command
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from .colorize import colorize_citations
from .commands.extract_bibitems import ExtractBibitems
from .commands.locate_citations import LocateCitations
from .commands.resolve_bibitems import ResolveBibitems
from .commands.upload_citations import UploadCitations
from .make_digest import make_digest
from .types import Bibitem

directories.register("detected-citations")
directories.register("bibitem-resolutions")
directories.register("sources-with-colorized-citations")
directories.register("compiled-sources-with-colorized-citations")
directories.register("paper-images-with-colorized-citations")
directories.register("diffed-images-with-colorized-citations")
directories.register("citations-locations")
directories.register("citation-cluster-locations")
directories.register("sources-with-annotated-symbols")

commands: CommandList = [
    ExtractBibitems,
    ResolveBibitems,
    make_locate_entities_command("citations",
                                 DetectedEntityType=Bibitem,
                                 colorize_func=colorize_citations),
    LocateCitations,
    UploadCitations,
Ejemplo n.º 4
0
from .colorize import adjust_color_positions
from .extractor import GlossaryTermExtractor
from .upload import upload_terms

commands = create_entity_localization_command_sequence(
    "glossary-terms",
    GlossaryTermExtractor,
    DetectedEntityType=Term,
    colorize_options=ColorizeOptions(adjust_color_positions=adjust_color_positions),
    upload_func=upload_terms,
)

# Before uploading entities, extract contexts that each term appeared in.
upload_command_index = len(commands)
for i, command in enumerate(commands):
    if command.get_name() == "upload-glossary-terms":
        upload_command_index = i

directories.register("contexts-for-glossary-terms")
commands.insert(
    upload_command_index,
    make_extract_contexts_command(
        "glossary-terms",
        EntityType=Term,
        tex_wrapper=TexWrapper(before="**", after="**"),
    ),
)

terms_pipeline = EntityPipeline("glossary-terms", commands)
register_entity_pipeline(terms_pipeline)
Ejemplo n.º 5
0
from common import directories
from common.commands.base import CommandList
from common.commands.locate_entities import make_locate_entities_command
from common.commands.upload_entities import make_upload_entities_command
from common.types import ColorizeOptions, SerializableEntity
from entities.sentences.commands.extract_contexts import make_extract_contexts_command
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from .commands.create_annotation_files import CreateAnnotationFiles
from .commands.detect_definitions import DetectDefinitions
from .commands.tokenize_sentences import TokenizeSentences
from .types import Definiendum, Definition, TermReference
from .upload import upload_definitions

# Register directories for output from intermediate pipeline stages.
directories.register("sentence-tokens")
directories.register("annotation-files")
directories.register("detected-definitions")
directories.register("contexts-for-definitions")
directories.register("sources-with-colorized-definitions")
directories.register("compiled-sources-with-colorized-definitions")
directories.register("paper-images-with-colorized-definitions")
directories.register("diffed-images-with-colorized-definitions")
directories.register("definitions-locations")

upload_command = make_upload_entities_command(
    "definitions",
    upload_definitions,
    DetectedEntityType={
        "entities-definiendums.csv": Definiendum,
        "entities-definitions.csv": Definition,
Ejemplo n.º 6
0
from common import directories
from common.commands.base import CommandList
from entities.common import create_entity_localization_command_sequence
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from .colorize import get_sentence_color_positions
from .extractor import SentenceExtractor
from .types import Sentence
from .upload import Sentence as SentenceModel
from .upload import upload_sentences

commands = create_entity_localization_command_sequence(
    "sentences",
    SentenceExtractor,
    DetectedEntityType=Sentence,
    get_color_positions=get_sentence_color_positions,
    upload_func=upload_sentences,
)

# Register additional directories to be used by the upload function
directories.register("sentences-model-ids")

sentences_pipeline = EntityPipeline("sentences",
                                    commands,
                                    database_models=[SentenceModel])
register_entity_pipeline(sentences_pipeline)
Ejemplo n.º 7
0
from common.commands.raster_pages import make_raster_pages_command
from common.make_digest import make_default_paper_digest
from common.types import ArxivId, EntityProcessingDigest
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from ..sentences.commands.find_entity_sentences import (
    make_find_entity_sentences_command,
)
from .commands.colorize_equation_tokens import ColorizeEquationTokens
from .commands.extract_symbols import ExtractSymbols
from .commands.find_symbol_matches import FindSymbolMatches
from .commands.find_symbol_sentences import FindSymbolSentences
from .commands.locate_symbols import LocateSymbols
from .commands.upload_symbols import UploadSymbols

directories.register("detected-equation-tokens")
directories.register("symbol-matches")
directories.register("sentences-for-equation-tokens")
directories.register("sentences-for-symbols")
directories.register("sources-with-colorized-equation-tokens")
directories.register("compiled-sources-with-colorized-equation-tokens")
directories.register("paper-with-colorized-equation-tokens-images")
directories.register("diff-images-with-colorized-equation-tokens")
directories.register("hue-locations-for-equation-tokens")
directories.register("symbol-locations")


commands = [
    ExtractSymbols,
    FindSymbolMatches,
    make_find_entity_sentences_command("equation-tokens"),
Ejemplo n.º 8
0
def create_entity_localization_command_sequence(
    entity_name: str,
    EntityExtractorType: Type[EntityExtractor],
    extract_contexts: bool = False,
    DetectedEntityType: Optional[Type[SerializableEntity]] = None,
    upload_func: Optional[EntityUploadCallable] = None,
    colorize_options: ColorizeOptions = ColorizeOptions(),
    colorize_func: Optional[ColorizeFunc] = None,
) -> List[Type[Command]]:  # type: ignore
    """
    Create a set of commands that can be used to locate a new type of entity. In the simplest case,
    all you have to provide is and 'entity_name' to be used for naming output files, and
    'entity_type' that can be used to filter which commands are being run when you the full
    pipeline is run, and an 'EntityExtractorType' that locates all instances of that entity in the
    TeX. This function creates the commands necessary to colorize the entities, compile the
    LaTeX, raster the pages, and locate the colors in the pages. You may define additional
    paramters (e.g., 'colorize_options') to fine-tune the commands.

    To extract the contexts for an entity (i.e., the sentences in which the entities appear),
    set 'extract_contexts' to True.

    If you are trying to find the locations of a new type of entity, it is highly recommended that
    you use this convenience methods instead of creating new commands yourself.
    """

    commands: CommandList = []

    directories.register(f"detected-{entity_name}")
    commands.append(
        make_detect_entities_command(entity_name, EntityExtractorType))

    if extract_contexts:
        directories.register(f"contexts-for-{entity_name}")
        commands.append(make_extract_contexts_command(entity_name))

    directories.register(f"sources-with-colorized-{entity_name}")
    directories.register(f"compiled-sources-with-colorized-{entity_name}")
    directories.register(f"paper-images-with-colorized-{entity_name}")
    directories.register(f"diffed-images-with-colorized-{entity_name}")
    directories.register(f"{entity_name}-locations")
    commands.append(
        make_locate_entities_command(entity_name, None, DetectedEntityType,
                                     colorize_options, colorize_func))

    if upload_func is not None:
        upload_command = make_upload_entities_command(
            entity_name, upload_func, DetectedEntityType=DetectedEntityType)
        commands.append(upload_command)

    return commands
Ejemplo n.º 9
0
from common import directories
from common.commands.base import CommandList
from common.commands.compile_tex import make_compile_tex_command
from common.commands.diff_images import make_diff_images_command
from common.commands.locate_hues import make_locate_hues_command
from common.commands.raster_pages import make_raster_pages_command
from scripts.pipelines import EntityPipeline, register_entity_pipeline

from .commands.colorize_citations import ColorizeCitations
from .commands.extract_bibitems import ExtractBibitems
from .commands.locate_citations import LocateCitations
from .commands.resolve_bibitems import ResolveBibitems
from .commands.upload_citations import UploadCitations
from .make_digest import make_digest

directories.register("bibitems")
directories.register("bibitem-resolutions")
directories.register("sources-with-colorized-citations")
directories.register("compiled-sources-with-colorized-citations")
directories.register("paper-with-colorized-citations-images")
directories.register("diff-images-with-colorized-citations")
directories.register("hue-locations-for-citations")
directories.register("citation-locations")
directories.register("sources-with-annotated-symbols")


commands: CommandList = [
    ExtractBibitems,
    ResolveBibitems,
    ColorizeCitations,
    make_compile_tex_command("citations"),
Ejemplo n.º 10
0
def create_entity_localization_command_sequence(
    entity_name: str,
    EntityExtractorType: Type[EntityExtractor],
    DetectedEntityType: Optional[Type[SerializableEntity]] = None,
    upload_func: Optional[EntityUploadCallable] = None,
    colorize_entity_when: Optional[ColorWhenFunc] = None,
    get_color_positions: Optional[ColorPositionsFunc] = None,
) -> List[Type[Command]]:  # type: ignore
    """
    Create a set of commands that can be used to locate a new type of entity. In the simplest case,
    all you have to provide is and 'entity_name' to be used for naming output files, and
    'entity_type' that can be used to filter which commands are being run when you the full
    pipeline is run, and an 'EntityExtractorType' that locates all instances of that entity in the
    TeX. This function creates the commands necessary to colorize the entities, compile the
    LaTeX, raster the pages, and locate the colors in the pages. You may define additional
    paramters (e.g., 'colorize_entity_when') to fine-tune the commands.

    If you are trying to find the locations of a new type of entity, it is highly recommended that
    you use this convenience methods instead of creating new commands yourself.
    """

    # Register directories for output from intermediate pipeline stages.
    directories.register(f"detected-{entity_name}")
    directories.register(f"sources-with-colorized-{entity_name}")
    directories.register(f"compiled-sources-with-colorized-{entity_name}")
    directories.register(f"paper-with-colorized-{entity_name}-images")
    directories.register(f"diff-images-with-colorized-{entity_name}")
    directories.register(f"hue-locations-for-{entity_name}")

    commands: CommandList = [
        make_detect_entities_command(entity_name, EntityExtractorType),
        make_colorize_tex_command(
            entity_name=entity_name,
            DetectedEntityType=DetectedEntityType,
            when=colorize_entity_when,
            get_color_positions=get_color_positions,
        ),
        make_compile_tex_command(entity_name),
        make_raster_pages_command(entity_name),
        make_diff_images_command(entity_name),
        make_locate_hues_command(entity_name),
    ]

    if upload_func is not None:
        upload_command = make_upload_entities_command(
            entity_name, upload_func, DetectedEntityType=DetectedEntityType
        )
        commands.append(upload_command)

    return commands