class BatchProgram(Program): class Config(ProgramConfig): title = "batch" aliases = ("b", ) description = "Execute previously created batch of requests." settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") batch_file: Path = Argument( alias="batch-file", short_alias="b", description="Batch file to which requests have been appended.", metavar="FILE", group=_BATCH_ARGUMENT_GROUP, ) results_dir: Path = Argument( alias="results-dir", short_alias="r", description="Directory to which results will be written.", metavar="DIR", group=_BATCH_ARGUMENT_GROUP, ) @overrides def run(self) -> None: batch = Batch() batch.load(self.batch_file) batch.execute(self.results_dir)
class _ExportProgram(Program): class Config(ProgramConfig): title = "export" aliases = ("e", ) description = "Export a dataset subset to CSV." settings: NastyAnalysisSettings = Argument( alias="config", description="Overwrite default config file path.") dataset: Optional[str] = Argument( short_alias="d", description="Name of the dataset.", metavar="NAME", group=_INDEX_ARGUMENT_GROUP, ) query: str = Argument( short_alias="q", description="Elasticsearch query string for the exported subset.", group=_EXPORT_ARGUMENT_GROUP, ) output: Path = Argument( short_alias="o", description="CSV-File to which the output will be written.", metavar="FILE", group=_EXPORT_ARGUMENT_GROUP, ) @overrides def run(self) -> None: dataset = _make_dataset(self.settings, self.dataset) self.settings.setup_elasticsearch_connection() dataset.export(self.query, self.output)
class RequestProgram(Program): max_tweets: Optional[int] = Argument( 100, alias="max-tweets", short_alias="n", description=( "Maximum number of tweets to retrieve. Set to -1 to receive as many as " "possible. Defaults to 100." ), metavar="N", group=_REQUEST_ARGUMENT_GROUP, ) @validator("max_tweets") def _max_tweets_validator(cls, v: Optional[int]) -> Optional[int]: # noqa: N805 return v if v != -1 else None batch_size: int = Argument( -1, alias="batch-size", short_alias="i", description=( "Batch size to retrieve Tweets in. Set to -1 for default behavior. Only " "change when necessary." ), metavar="N", group=_REQUEST_ARGUMENT_GROUP, ) @validator("batch_size") def _batch_size_validator(cls, v: int) -> int: # noqa: N805 return v if v != -1 else DEFAULT_BATCH_SIZE to_batch: Optional[Path] = Argument( alias="to-batch", short_alias="b", description="Append request to batch file instead of executing.", metavar="FILE", group=_BATCH_ARGUMENT_GROUP, ) @overrides def run(self) -> None: request = self._build_request() if self.to_batch: batch = Batch() if self.to_batch.exists(): batch.load(self.to_batch) self._batch_submit(batch, request) batch.dump(self.to_batch) else: for tweet in request.request(): sys.stdout.write(json.dumps(tweet.to_json()) + "\n") def _build_request(self) -> Request: raise NotImplementedError() def _batch_submit(self, batch: Batch, request: Request) -> None: batch.append(request)
class ArgParsingProgram(Program): class Config(ProgramConfig): title = "myprog" version = "0.0.0" description = "My program description." foo: str = "foo" bar: int = Argument(5, short_alias="b", description="my arg desc", group=_MY_GROUP) baz: int = Argument(alias="baz-alias", metavar="VALUE", group=_MY_GROUP) qux: bool
class UnidifyProgram(Program): class Config(ProgramConfig): title = "unidify" aliases = ("u", "unid") description = ( "Collect full Tweet information from Tweet-IDs (via official Twitter API)." ) settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") in_dir: Optional[Path] = Argument( alias="in-dir", short_alias="i", description="Directory with idified batch results.", metavar="DIR", group=_UNIDIFY_ARGUMENT_GROUP, ) out_dir: Optional[Path] = Argument( alias="out-dir", short_alias="o", description= ("Directory to which unidified batch results will be written. If not " "given, will use input directory."), metavar="DIR", group=_UNIDIFY_ARGUMENT_GROUP, ) @validator("out_dir") def _out_dir_validator( cls, v: Optional[Path], values: Mapping[str, object] # noqa: N805 ) -> Optional[Path]: if v and not values["in_dir"]: raise ValueError("-o/--out-dir requires -i/--in-dir.") return v @overrides def run(self) -> None: if self.in_dir: batch_results = BatchResults(self.in_dir) batch_results.unidify( self.settings.twitter_api, self.out_dir if self.out_dir else self.in_dir, ) else: for tweet in statuses_lookup( (TweetId(line.strip()) for line in sys.stdin), self.settings.twitter_api): if tweet is not None: sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
class MyProgram(Program): class Config(ProgramConfig): title = "myprog" version = nasty_utils.__version__ description = "Description of my program." arg: int = Argument(0, short_alias="a", description="Description of my arg.") settings: LoggingSettings @overrides def run(self) -> None: _LOGGER.debug("before") for i in tqdm(cast(Iterator[int], range(3)), desc="Epoch"): _LOGGER.debug("foo {} bar".format(i)) for _ in tqdm( cast(Iterator[int], range(30)), desc="Batch {}".format(i), leave=False ): sleep(0.01) _LOGGER.debug("after") _LOGGER.info("arg: '{}' {{{}}}", self.arg, bool(self.arg)) _LOGGER.debug("debug") _LOGGER.info("info") _LOGGER.warning("warning") _LOGGER.error("error") _LOGGER.critical("critical")
class IdifyProgram(Program): class Config(ProgramConfig): title = "idify" aliases = ("i", "id") description = "Reduce Tweet-collection to Tweet-IDs (for publishing)." settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") in_dir: Optional[Path] = Argument( alias="in-dir", short_alias="i", description="Directory with results of a batch of requests.", metavar="DIR", group=_IDIFY_ARGUMENT_GROUP, ) out_dir: Optional[Path] = Argument( alias="out-dir", short_alias="o", description= ("Directory to which Tweet-IDs will be written. If not given, will use " "input directory."), metavar="DIR", group=_IDIFY_ARGUMENT_GROUP, ) @validator("out_dir") def _out_dir_validator( cls, v: Optional[Path], values: Mapping[str, object] # noqa: N805 ) -> Optional[Path]: if v and not values["in_dir"]: raise ValueError("-o/--out-dir requires -i/--in-dir.") return v @overrides def run(self) -> None: if self.in_dir: batch_results = BatchResults(self.in_dir) batch_results.idify(self.out_dir if self.out_dir else self.in_dir) else: for line in sys.stdin: sys.stdout.write(str(Tweet(json.loads(line)).id) + "\n")
class MultipleSettingsProgram(Program): foo: FooSettings bar: BarSettings = Argument( short_alias="b", description="Overwrite bar setting path." ) @overrides def run(self) -> None: print(self.foo.n * self.bar.m) # noqa: T001
class _RetrieveProgram(Program): class Config(ProgramConfig): title = "retrieve" aliases = ("r", ) description = "Retrieve a dataset." settings: NastyAnalysisSettings = Argument( alias="config", description="Overwrite default config file path.") dataset: Optional[str] = Argument( short_alias="d", description="Name of the dataset.", metavar="NAME", group=_RETRIEVE_ARGUMENT_GROUP, ) @overrides def run(self) -> None: dataset = _make_dataset(self.settings, self.dataset) dataset.retrieve()
class _IndexProgram(Program): class Config(ProgramConfig): title = "index" aliases = ("i", ) description = "Index a dataset into Elasticsearch." settings: NastyAnalysisSettings = Argument( alias="config", description="Overwrite default config file path.") dataset: Optional[str] = Argument( short_alias="d", description="Name of the dataset.", metavar="NAME", group=_INDEX_ARGUMENT_GROUP, ) @overrides def run(self) -> None: dataset = _make_dataset(self.settings, self.dataset) self.settings.setup_elasticsearch_connection() dataset.index()
class RepliesProgram(RequestProgram): class Config(ProgramConfig): title = "replies" aliases = ("r", ) description = "Retrieve all directly replying Tweets to a Tweet." settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") tweet_id: TweetId = Argument( alias="tweet-id", short_alias="t", description="ID of the Tweet to retrieve replies for (required).", metavar="ID", group=_REPLIES_ARGUMENT_GROUP, ) @overrides def _build_request(self) -> Request: return Replies(self.tweet_id, max_tweets=self.max_tweets, batch_size=self.batch_size)
class ThreadProgram(RequestProgram): class Config(ProgramConfig): title = "thread" aliases = ("t", ) description = "Retrieve all Tweets threaded under a Tweet." settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") tweet_id: TweetId = Argument( alias="tweet-id", short_alias="t", description= "ID of the Tweet to retrieve threaded Tweets for (required).", metavar="ID", group=_THREAD_ARGUMENT_GROUP, ) @overrides def _build_request(self) -> Request: return Thread(self.tweet_id, max_tweets=self.max_tweets, batch_size=self.batch_size)
class _GdeltProgram(Program): # TODO: integrate into RetrieveProgram. class Config(ProgramConfig): title = "gdelt" aliases = ("g", ) description = "TODO" settings: NastyAnalysisSettings = Argument( alias="config", description="Overwrite default config file path.") @overrides def run(self) -> None: from nasty_analysis.gdelt import gdelt gdelt()
class NastyProgram(Program): class Config(ProgramConfig): title = "nasty" version = nasty.__version__ description = "NASTY Advanced Search Tweet Yielder." subprograms = ( SearchProgram, RepliesProgram, ThreadProgram, BatchProgram, IdifyProgram, UnidifyProgram, ) settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.")
class WallcropProgram(Program): class Config(ProgramConfig): title = "wallcrop" version = wallcrop.__version__ description = "Multi-monitor wallpaper cropping tool." settings: WallcropSettings = Argument( alias="config", description="Overwrite default config file path.") @overrides def run(self) -> None: # TODO: pydantic validation of settings. wallpaper_path = Path("assets/Nordic Landscape 1125x250.png") with Image.open(wallpaper_path) as wallpaper: window = Window(self.settings.workstations[0], wallpaper) window.mainloop()
class SearchProgram(RequestProgram): class Config(ProgramConfig): title = "search" aliases = ("s", ) description = "Retrieve Tweets using the Twitter advanced search." settings: NastySettings = Argument( alias="config", description="Overwrite default config file path.") query: str = Argument( short_alias="q", description="Search string (required).", group=_SEARCH_ARGUMENT_GROUP, ) since: Optional[date] = Argument( short_alias="s", description="Earliest date for Tweets (inclusive) as YYYY-MM-DD.", metavar="DATE", group=_SEARCH_ARGUMENT_GROUP, ) @validator("since", pre=True) def _since_validator(cls, v: Optional[str]) -> Optional[date]: # noqa: N805 return parse_yyyy_mm_dd(v) if v else None until: Optional[date] = Argument( short_alias="u", description="Latest date for Tweets (exclusive) as YYYY-MM-DD.", metavar="DATE", group=_SEARCH_ARGUMENT_GROUP, ) @validator("until", pre=True, always=False) def _until_validator(cls, v: Optional[str]) -> Optional[date]: # noqa: N805 return parse_yyyy_mm_dd(v) if v else None filter_: SearchFilter = Argument( DEFAULT_FILTER, alias="filter", short_alias="f", description= ("Sorting/filtering of Tweets (top, latest, photos, videos). Defaults " "to 'top'."), group=_SEARCH_ARGUMENT_GROUP, ) lang: str = Argument( "en", short_alias="l", description= ("Language for Tweets, presumably as ISO 3166-1 two or three letter codes. " "Defaults to 'en'."), group=_SEARCH_ARGUMENT_GROUP, ) daily: bool = Argument( False, short_alias="d", description= ("For a request with since and until date, append one search request " "per day in the date-range with identical settings otherwise."), group=_BATCH_ARGUMENT_GROUP, ) @validator("daily") def _daily_validator( cls, v: bool, values: Mapping[str, object] # noqa:N805 ) -> bool: if v and not values["to_batch"]: raise ValueError("-d/--daily requires -b/--to-batch.") if v and (values["since"] is None or values["until"] is None): raise ValueError("-d/--daily requires -s/--since and -u/--until.") return v @overrides def _build_request(self) -> Search: return Search( self.query, since=self.since, until=self.until, filter_=self.filter_, lang=self.lang, max_tweets=self.max_tweets, batch_size=self.batch_size, ) @overrides def _batch_submit(self, batch: Batch, request: Request) -> None: request = checked_cast(Search, request) if self.daily: for daily_request in request.to_daily_requests(): super()._batch_submit(batch, daily_request) else: super()._batch_submit(batch, request)
class _ServeProgram(Program): class Config(ProgramConfig): title = "serve" aliases = ("s", ) description = "Start Bokeh visualization server." settings: NastyAnalysisSettings = Argument( alias="config", description="Overwrite default config file path.") show: bool = Argument( False, short_alias="s", description="Open visualization server in a browser after startup.", group=_SERVE_ARGUMENTS_GROUP, ) develop: bool = Argument( False, short_alias="develop", description="Run in development mode (autoreload scripts).", group=_SERVE_ARGUMENTS_GROUP, ) @overrides def run(self) -> None: self.settings.setup_elasticsearch_connection() # The following is a simpler `bokeh serve src/nasty_analysis/visualization`. # Code for that is in `bokeh.commands.subcommands.serve.Serve.invoke`. # Also Bokeh provides this example: # https://github.com/bokeh/bokeh/blob/2.0.2/examples/howto/server_embed/standalone_embed.py address = self.settings.analysis.serve.address port = self.settings.analysis.serve.port num_procs = self.settings.analysis.num_procs autoreload = False if self.develop: num_procs = 1 autoreload = True watch(str(self.settings.find_settings_file())) for file in Path(nasty_analysis.__file__).parent.glob("**/*.js"): watch(str(file)) application = ParameterPassingApplication( DirectoryHandler(filename=Path(serve.__file__).parent), server_context_params={"settings": self.settings}, ) with report_server_init_errors(address=address, port=port): server = Server( {"/": application}, address=address, port=port, allow_websocket_origin=[f"{address}:{port}"], num_procs=num_procs, autoreload=autoreload, ) server.start() if self.show: server.io_loop.add_callback(server.show, "/") server.run_until_shutdown()