def get_pipeline_api_cln_simi_db(api_track: list = ["to", "and", "from"], threshold_output: int = 200, verbosity: bool = False): """ Gets a pipeline consisting of; - FeedFromAPIPipe - CleaningPipe - SimiPipe - DBPipe The return is a function(closure) which uses two threads: 1 - API. 2 - For processing pipes. For more information, see; packages.pipes.collection.* """ api_pipe = FeedFromAPIPipe(track=api_track, threshold_output=threshold_output, verbosity=verbosity) cln_pipe = CleaningPipe(previous_pipe=api_pipe, threshold_output=threshold_output, verbosity=verbosity) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=1 # @ make global ) db_pipe = DBPipe(previous_pipe=simi_pipe, start_fresh=True, threshold_output=threshold_output, verbosity=verbosity) return Pipeline(pipes=[api_pipe, cln_pipe, simi_pipe, db_pipe])
def get_pipeline_dsk_cln_simi_db(filepath: str, threshold_output: int = 200, verbosity: bool = False): # @ Not tested. """ Gets a pipeline consisting of; - FeedFromAPIPipe - CleaningPipe - SimiPipe - DBPipe The return is a function(closure) which uses two threads: 1 - API. 2 - For processing pipes. For more information, see; packages.pipes.collection.* """ dsk_pipe = FeedFromDiskPipe(filepath=filepath, threshold_output=threshold_output, verbosity=verbosity) cln_pipe = CleaningPipe(previous_pipe=dsk_pipe, threshold_output=threshold_output, verbosity=verbosity) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=1 # @ make global ) db_pipe = DBPipe(previous_pipe=simi_pipe, start_fresh=True, threshold_output=threshold_output, verbosity=verbosity) return Pipeline(pipes=[dsk_pipe, cln_pipe, simi_pipe, db_pipe])
def get_pipeline_dsk_cln_simi_js( filepath:str, initial_query:list, rec_lvl:int = 1, threshold_output:int = 200, verbosity:bool = False ): """ Gets a pipeline instance consisting of - FeedFromDiskPipe - CleaningPipe - SimiPipe - PyJSBridgePipe The return is started with .run() Params: - filepath: Path to input dataset. - rec_lvl: Recursion lvl for SimiPipe(v2w). - initial_query: Words tracked by system. - threshold_output: Max data cap in each pipe. - verbosity: Whether or not pipes are verbose. """ dsk_pipe = FeedFromDiskPipe( filepath=filepath, threshold_output=threshold_output, verbosity=verbosity ) cln_pipe = CleaningPipe( previous_pipe=dsk_pipe, threshold_output=threshold_output, verbosity=verbosity ) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=rec_lvl ) bridge_pipe = PyJSBridgePipe( previous_pipe=simi_pipe, query=initial_query, threshold_output=threshold_output, verbosity=verbosity ) return Pipeline( pipes=[dsk_pipe, cln_pipe, simi_pipe, bridge_pipe] )
def get_pipeline_api_cln_simi_db( api_track:list = ["to", "and", "from", "but", "how", "why"], rec_lvl:int = 1, threshold_output:int = 200, verbosity:bool = False ): """ Gets a pipeline instance consisting of - FeedFromAPIPipe - CleaningPipe - SimiPipe - DBPipe The return is started with .run() Params: - api_track: What the Twitter API will track. - rec_lvl: Recursion lvl for SimiPipe(v2w). - threshold_output: Max data cap in each pipe. - verbosity: Whether or not pipes are verbose. """ api_pipe = FeedFromAPIPipe( track=api_track, threshold_output=threshold_output, verbosity=verbosity ) cln_pipe = CleaningPipe( previous_pipe=api_pipe, threshold_output=threshold_output, verbosity=verbosity ) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=rec_lvl ) db_pipe = DBPipe( previous_pipe=simi_pipe, start_fresh=True, threshold_output=threshold_output, verbosity=verbosity ) return Pipeline( pipes=[api_pipe, cln_pipe, simi_pipe, db_pipe] )
def get_pipeline_api_cln_simi_js( api_track:list = ["to", "and", "from", "but", "how", "why"], initial_query:list = ["python"], rec_lvl:int = 1, threshold_output:int = 200, verbosity:bool = False ): """ Gets a pipeline instance consisting of - FeedFromAPIPipe - CleaningPipe - SimiPipe - PyJSBridgePipe The return is started with .run() Params: - api_track: What the Twitter API will track. - initial_query: Words tracked by system. - threshold_output: Max data cap in each pipe. - verbosity: Whether or not pipes are verbose. """ api_pipe = FeedFromAPIPipe( track=api_track, threshold_output=threshold_output, verbosity=verbosity ) cln_pipe = CleaningPipe( previous_pipe=api_pipe, threshold_output=threshold_output, verbosity=verbosity ) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=rec_lvl ) bridge_pipe = PyJSBridgePipe( previous_pipe=simi_pipe, query=initial_query, threshold_output=threshold_output, verbosity=verbosity ) return Pipeline( pipes=[api_pipe, cln_pipe, simi_pipe, bridge_pipe] )
def get_pipeline_dsk_cln_simi_db( filepath:str, rec_lvl:int = 1, threshold_output:int = 200, verbosity:bool = False ): """ Gets a pipeline instance consisting of - FeedFromDiskPipe - CleaningPipe - SimiPipe - DBPipe The return is started with .run() Params: - filepath: Path to input dataset. - rec_lvl: Recursion lvl for SimiPipe(v2w). - threshold_output: Max data cap in each pipe. - verbosity: Whether or not pipes are verbose. """ dsk_pipe = FeedFromDiskPipe( filepath=filepath, threshold_output=threshold_output, verbosity=verbosity ) cln_pipe = CleaningPipe( previous_pipe=dsk_pipe, threshold_output=threshold_output, verbosity=verbosity ) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=rec_lvl ) db_pipe = DBPipe( previous_pipe=simi_pipe, start_fresh=True, threshold_output=threshold_output, verbosity=verbosity ) return Pipeline( pipes=[dsk_pipe, cln_pipe, simi_pipe, db_pipe] )
def get_pipeline_dsk_cln_simi_js(filepath: str, initial_query: list, threshold_output: int = 200, verbosity: bool = False): dsk_pipe = FeedFromDiskPipe(filepath=filepath, threshold_output=threshold_output, verbosity=verbosity) cln_pipe = CleaningPipe(previous_pipe=dsk_pipe, threshold_output=threshold_output, verbosity=verbosity) simi_pipe = SimiPipe(previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=1) bridge_pipe = PyJSBridgePipe(previous_pipe=simi_pipe, query=initial_query, threshold_output=threshold_output, verbosity=verbosity) return Pipeline(pipes=[dsk_pipe, cln_pipe, simi_pipe, bridge_pipe])
def get_pipeline_api_cln_simi_js(api_track: list = ["to", "and", "from"], initial_query: list = ["python"], threshold_output: int = 200, verbosity: bool = False): api_pipe = FeedFromAPIPipe(track=api_track, threshold_output=threshold_output, verbosity=verbosity) cln_pipe = CleaningPipe(previous_pipe=api_pipe, threshold_output=threshold_output, verbosity=verbosity) simi_pipe = SimiPipe( previous_pipe=cln_pipe, threshold_output=threshold_output, verbosity=verbosity, recursion_level=1 # @ make global ) bridge_pipe = PyJSBridgePipe(previous_pipe=simi_pipe, query=initial_query, threshold_output=threshold_output, verbosity=verbosity) return Pipeline(pipes=[api_pipe, cln_pipe, simi_pipe, bridge_pipe])