예제 #1
0
def get_pipeline_api_cln_simi_db(api_track: list = ["to", "and", "from"],
                                 threshold_output: int = 200,
                                 verbosity: bool = False):
    """ Gets a pipeline consisting of;
            - FeedFromAPIPipe
            - CleaningPipe
            - SimiPipe
            - DBPipe
        The return is a function(closure) which
        uses two threads:
            1 - API.
            2 - For processing pipes.
        For more information, see;
        packages.pipes.collection.*
    """
    api_pipe = FeedFromAPIPipe(track=api_track,
                               threshold_output=threshold_output,
                               verbosity=verbosity)
    cln_pipe = CleaningPipe(previous_pipe=api_pipe,
                            threshold_output=threshold_output,
                            verbosity=verbosity)
    simi_pipe = SimiPipe(
        previous_pipe=cln_pipe,
        threshold_output=threshold_output,
        verbosity=verbosity,
        recursion_level=1  # @ make global
    )
    db_pipe = DBPipe(previous_pipe=simi_pipe,
                     start_fresh=True,
                     threshold_output=threshold_output,
                     verbosity=verbosity)
    return Pipeline(pipes=[api_pipe, cln_pipe, simi_pipe, db_pipe])
예제 #2
0
def get_pipeline_dsk_cln_simi_db(filepath: str,
                                 threshold_output: int = 200,
                                 verbosity: bool = False):  # @ Not tested.
    """ Gets a pipeline consisting of;
            - FeedFromAPIPipe
            - CleaningPipe
            - SimiPipe
            - DBPipe
        The return is a function(closure) which
        uses two threads:
            1 - API.
            2 - For processing pipes.
        For more information, see;
        packages.pipes.collection.*
    """
    dsk_pipe = FeedFromDiskPipe(filepath=filepath,
                                threshold_output=threshold_output,
                                verbosity=verbosity)
    cln_pipe = CleaningPipe(previous_pipe=dsk_pipe,
                            threshold_output=threshold_output,
                            verbosity=verbosity)
    simi_pipe = SimiPipe(
        previous_pipe=cln_pipe,
        threshold_output=threshold_output,
        verbosity=verbosity,
        recursion_level=1  # @ make global
    )
    db_pipe = DBPipe(previous_pipe=simi_pipe,
                     start_fresh=True,
                     threshold_output=threshold_output,
                     verbosity=verbosity)
    return Pipeline(pipes=[dsk_pipe, cln_pipe, simi_pipe, db_pipe])
예제 #3
0
def get_pipeline_dsk_cln_simi_js(
        filepath:str, 
        initial_query:list,
        rec_lvl:int = 1,
        threshold_output:int = 200,
        verbosity:bool = False
    ):
    """ Gets a pipeline instance consisting of
            - FeedFromDiskPipe
            - CleaningPipe
            - SimiPipe
            - PyJSBridgePipe
        The return is started with .run()

        Params:
            - filepath: Path to input dataset.
            - rec_lvl: Recursion lvl for SimiPipe(v2w).
            - initial_query: Words tracked by system.
            - threshold_output: Max data cap in each pipe.
            - verbosity: Whether or not pipes are verbose.
    """
    dsk_pipe = FeedFromDiskPipe(
            filepath=filepath,
            threshold_output=threshold_output, 
            verbosity=verbosity
    )
    cln_pipe = CleaningPipe(
            previous_pipe=dsk_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity
    )
    simi_pipe = SimiPipe(
            previous_pipe=cln_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity,
            recursion_level=rec_lvl
    )
    bridge_pipe = PyJSBridgePipe(
        previous_pipe=simi_pipe,
        query=initial_query,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    return Pipeline(
        pipes=[dsk_pipe, cln_pipe, simi_pipe, bridge_pipe]
    )
예제 #4
0
def get_pipeline_api_cln_simi_db(
        api_track:list = ["to", "and", "from", "but", "how", "why"],
        rec_lvl:int = 1,
        threshold_output:int = 200,
        verbosity:bool = False
    ):
    """ Gets a pipeline instance consisting of
            - FeedFromAPIPipe
            - CleaningPipe
            - SimiPipe
            - DBPipe
        The return is started with .run()

        Params:
            - api_track: What the Twitter API will track.
            - rec_lvl: Recursion lvl for SimiPipe(v2w).
            - threshold_output: Max data cap in each pipe.
            - verbosity: Whether or not pipes are verbose.

    """
    api_pipe = FeedFromAPIPipe(
        track=api_track,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    cln_pipe = CleaningPipe(
            previous_pipe=api_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity
    )
    simi_pipe = SimiPipe(
            previous_pipe=cln_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity,
            recursion_level=rec_lvl
    )
    db_pipe = DBPipe(
        previous_pipe=simi_pipe,
        start_fresh=True,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    return Pipeline(
        pipes=[api_pipe, cln_pipe, simi_pipe, db_pipe]
    )
예제 #5
0
def get_pipeline_api_cln_simi_js(
        api_track:list = ["to", "and", "from", "but", "how", "why"], 
        initial_query:list = ["python"],
        rec_lvl:int = 1,
        threshold_output:int = 200,
        verbosity:bool = False
    ):
    """ Gets a pipeline instance consisting of
            - FeedFromAPIPipe
            - CleaningPipe
            - SimiPipe
            - PyJSBridgePipe
        The return is started with .run()

        Params:
            - api_track: What the Twitter API will track.
            - initial_query: Words tracked by system.
            - threshold_output: Max data cap in each pipe.
            - verbosity: Whether or not pipes are verbose.
    """
    api_pipe = FeedFromAPIPipe(
        track=api_track,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    cln_pipe = CleaningPipe(
            previous_pipe=api_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity
    )
    simi_pipe = SimiPipe(
            previous_pipe=cln_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity,
            recursion_level=rec_lvl
    )
    bridge_pipe = PyJSBridgePipe(
        previous_pipe=simi_pipe,
        query=initial_query,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    return Pipeline(
        pipes=[api_pipe, cln_pipe, simi_pipe, bridge_pipe]
    )
예제 #6
0
def get_pipeline_dsk_cln_simi_db(
        filepath:str,
        rec_lvl:int = 1,
        threshold_output:int = 200,
        verbosity:bool = False
    ):
    """ Gets a pipeline instance consisting of
            - FeedFromDiskPipe
            - CleaningPipe
            - SimiPipe
            - DBPipe
        The return is started with .run()

        Params:
            - filepath: Path to input dataset.
            - rec_lvl: Recursion lvl for SimiPipe(v2w).
            - threshold_output: Max data cap in each pipe.
            - verbosity: Whether or not pipes are verbose.
    """
    dsk_pipe = FeedFromDiskPipe(
            filepath=filepath,
            threshold_output=threshold_output,
            verbosity=verbosity
    )
    cln_pipe = CleaningPipe(
            previous_pipe=dsk_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity
    )
    simi_pipe = SimiPipe(
            previous_pipe=cln_pipe,
            threshold_output=threshold_output,
            verbosity=verbosity,
            recursion_level=rec_lvl
    )
    db_pipe = DBPipe(
        previous_pipe=simi_pipe,
        start_fresh=True,
        threshold_output=threshold_output,
        verbosity=verbosity
    )
    return Pipeline(
        pipes=[dsk_pipe, cln_pipe, simi_pipe, db_pipe]
    )
예제 #7
0
def get_pipeline_dsk_cln_simi_js(filepath: str,
                                 initial_query: list,
                                 threshold_output: int = 200,
                                 verbosity: bool = False):
    dsk_pipe = FeedFromDiskPipe(filepath=filepath,
                                threshold_output=threshold_output,
                                verbosity=verbosity)
    cln_pipe = CleaningPipe(previous_pipe=dsk_pipe,
                            threshold_output=threshold_output,
                            verbosity=verbosity)
    simi_pipe = SimiPipe(previous_pipe=cln_pipe,
                         threshold_output=threshold_output,
                         verbosity=verbosity,
                         recursion_level=1)
    bridge_pipe = PyJSBridgePipe(previous_pipe=simi_pipe,
                                 query=initial_query,
                                 threshold_output=threshold_output,
                                 verbosity=verbosity)
    return Pipeline(pipes=[dsk_pipe, cln_pipe, simi_pipe, bridge_pipe])
예제 #8
0
def get_pipeline_api_cln_simi_js(api_track: list = ["to", "and", "from"],
                                 initial_query: list = ["python"],
                                 threshold_output: int = 200,
                                 verbosity: bool = False):

    api_pipe = FeedFromAPIPipe(track=api_track,
                               threshold_output=threshold_output,
                               verbosity=verbosity)
    cln_pipe = CleaningPipe(previous_pipe=api_pipe,
                            threshold_output=threshold_output,
                            verbosity=verbosity)
    simi_pipe = SimiPipe(
        previous_pipe=cln_pipe,
        threshold_output=threshold_output,
        verbosity=verbosity,
        recursion_level=1  # @ make global
    )
    bridge_pipe = PyJSBridgePipe(previous_pipe=simi_pipe,
                                 query=initial_query,
                                 threshold_output=threshold_output,
                                 verbosity=verbosity)
    return Pipeline(pipes=[api_pipe, cln_pipe, simi_pipe, bridge_pipe])