def addTweetToDatabase(repo: Dolt, table: str, data: dict): # TODO: Figure Out If Tweet Still Accessible Despite Some Error Messages if 'errors' in data and data['errors'][0]['parameter'] == 'id': errorMessage = archiveErrorMessage(data) create_table = ''' UPDATE {table} set isDeleted="{isDeleted}", json="{json}" where id={id} '''.format(table=table, id=errorMessage['id'], isDeleted=errorMessage['isDeleted'], json=errorMessage['json']) repo.sql(create_table, result_format='csv') return # Tweet Data tweet = extractTweet(data) df = getDataFrame(tweet) # Use `python3 this-script.py --log=VERBOSE` in order to see this output logger.log(VERBOSE, json.dumps(tweet, indent=4)) writeData(repo=repo, table=table, dataFrame=df, requiredKeys=['id'])
def setDeletedStatus(repo: Dolt, table: str, tweet_id: str, deleted: bool): tweets: Table = Table(table) query: QueryBuilder = Query.update(tweets) \ .set(tweets.isDeleted, int(deleted)) \ .where(tweets.id == tweet_id) repo.sql(query=query.get_sql(quote_char=None), result_format='csv')
def setStreamJSON(repo: Dolt, table: str, tweet_id: str, data: dict): sql_converter: conversion.MySQLConverter = conversion.MySQLConverter() escaped_json: str = sql_converter.escape(value=json.dumps(data)) tweets: Table = Table(table) query: QueryBuilder = Query.update(tweets) \ .set(tweets.stream_json, escaped_json) \ .where(tweets.id == tweet_id) repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
def addMediaFiles(repo: Dolt, table: str, tweet_id: str, data: List[str]): sql_converter: conversion.MySQLConverter = conversion.MySQLConverter() escaped_json: str = sql_converter.escape(value=json.dumps(data)) media: Table = Table(table) query: QueryBuilder = Query.into(media) \ .insert(tweet_id, escaped_json) # query: QueryBuilder = Query.update(media) \ # .set(media.file, escaped_json) \ # .where(media.id == tweet_id) repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
def lookupCurrentPresident(repo: Dolt) -> dict: current_time_query = ''' SELECT CURRENT_TIMESTAMP; ''' # I probably shouldn't be hardcoding the value of the query current_time = repo.sql(current_time_query, result_format='csv')[0]['CURRENT_TIMESTAMP()'] logger.debug("Current SQL Time: {}".format(current_time)) current_president_query = ''' select `Twitter User ID`, `Database Name` from presidents where `Start Term`<'{current_date}' and (`End Term`>'{current_date}' or `End Term` is null) limit 1; '''.format(current_date=current_time) return repo.sql(current_president_query, result_format='csv')
def createTableIfNotExists(repo: Dolt, table: str) -> str: columns = ''' `id` bigint unsigned NOT NULL, `date` datetime NOT NULL, `text` longtext NOT NULL, `device` longtext NOT NULL, `favorites` bigint unsigned NOT NULL, `retweets` bigint unsigned NOT NULL, `quoteTweets` bigint unsigned, `replies` bigint unsigned, `isRetweet` tinyint NOT NULL, `isDeleted` tinyint NOT NULL, `repliedToTweetId` bigint unsigned, `repliedToUserId` bigint unsigned, `repliedToTweetDate` datetime, `retweetedTweetId` bigint unsigned, `retweetedUserId` bigint unsigned, `retweetedTweetDate` datetime, `expandedUrls` longtext, `json` longtext, PRIMARY KEY (`id`) ''' settings = ''' ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; ''' create_table = ''' CREATE TABLE IF NOT EXISTS {table} ({columns}) {settings} '''.format(table=table, columns=columns, settings=settings) return repo.sql(create_table, result_format='csv')
def count_tweets(search_phrase: str, repo: Dolt, table: str, account_id: Optional[int] = None, hide_deleted_tweets: bool = False, only_deleted_tweets: bool = False) -> int: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(Count(tweets.id)) \ .orderby(tweets.id, order=Order.desc) \ .where(Lower(tweets.text).like( search_phrase.lower() ) # TODO: lower(text) COLLATE utf8mb4_unicode_ci like lower('{search_phrase}') ) if account_id is not None: # Show Results For Specific Account query: QueryBuilder = query.where(tweets.twitter_user_id == account_id) if hide_deleted_tweets: # Filter Out Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 0) elif only_deleted_tweets: # Only Show Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 1) # Perform Count Query count_result = repo.sql(query=query.get_sql(quote_char=None), result_format="json")["rows"] # Retrieve Count of Tweets From Search for header in count_result[0]: return count_result[0][header] return -1
def retrieveAccountInfo(repo: Dolt, account_id: int) -> dict: government: Table = Table("government") query: QueryBuilder = Query.from_(government) \ .select(Star()) \ .where(government.twitter_user_id == account_id) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def lookupActiveAccounts(repo: Dolt) -> dict: government: Table = Table("government") query: QueryBuilder = Query.from_(government) \ .select(government.twitter_user_id, government.first_name, government.last_name) \ .where(government.archived == 0) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def retrieveMissingBroadcastInfo(repo: Dolt, table: str) -> dict: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id, tweets.expandedUrls) \ .where(tweets.expandedUrls.like("https://twitter.com/i/broadcasts/%")) \ .where(tweets.broadcast_json.isnull()) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def createTableIfNotExists(repo: Dolt, table: str): query: CreateQueryBuilder = Query.create_table(table=table) \ .columns( Column("id", "bigint unsigned", nullable=False), Column("twitter_user_id", "bigint unsigned", nullable=False), Column("date", "datetime", nullable=False), Column("text", "longtext", nullable=False), Column("device", "longtext", nullable=False), Column("favorites", "bigint unsigned", nullable=False), Column("retweets", "bigint unsigned", nullable=False), Column("quoteTweets", "bigint unsigned"), Column("replies", "bigint unsigned"), Column("isRetweet", "tinyint", nullable=False), Column("isDeleted", "tinyint", nullable=False), Column("repliedToTweetId", "bigint unsigned"), Column("repliedToUserId", "bigint unsigned"), Column("repliedToTweetDate", "datetime"), Column("retweetedTweetId", "bigint unsigned"), Column("retweetedUserId", "bigint unsigned"), Column("retweetedTweetDate", "datetime"), Column("expandedUrls", "longtext"), Column("json", "longtext"), Column("json_v1", "longtext"), Column("notes", "longtext") ).primary_key("id") # TODO: Figure Out How To Add The Below Parameters # -------------------------------------------------------------------------------------------------------------- # KEY `twitter_user_id_idx` (`twitter_user_id`), # CONSTRAINT `twitter_user_id_ref` FOREIGN KEY (`twitter_user_id`) REFERENCES `government` (`twitter_user_id`) # -------------------------------------------------------------------------------------------------------------- # ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; # -------------------------------------------------------------------------------------------------------------- repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
def lookupLatestTweet(repo: Dolt, table: str) -> str: latest_tweet_id_query = ''' select id from {table} order by id desc limit 1; '''.format(table=table) tweet_id = repo.sql(latest_tweet_id_query, result_format='csv') # 1330487624402935808 # tweet_id = "1331393812728573952" if len(tweet_id) < 1 or 'id' not in tweet_id[0]: return None return tweet_id[0]['id']
def retrieveTweetJSON(repo: Dolt, table: str, tweet_id: str) -> Optional[str]: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id, tweets.json) \ .where(tweets.id == tweet_id) \ .limit(1) result = repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"] if len(result) < 1: return None return result[0]
def retrieveMissingBroadcastFiles(repo: Dolt, tweets_table: str, media_table: str) -> dict: # select id from tweets where stream_json is not null and id not in (select id from media); media: Table = Table(media_table) sub_query: QueryBuilder = Query.from_(media) \ .select(media.id) tweets: Table = Table(tweets_table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id, tweets.stream_json) \ .where(tweets.stream_json.notnull()) \ .where(tweets.id.notin(sub_query)) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def lookupLatestTweetId(repo: Dolt, table: str, twitter_user_id: str) -> Optional[int]: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id) \ .where(tweets.twitter_user_id == twitter_user_id) \ .orderby(tweets.id, order=Order.desc) \ .limit(1) tweet_id = repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"] if len(tweet_id) < 1 or 'id' not in tweet_id[0]: return None return tweet_id[0]['id']
def pickRandomOfficials(repo: Dolt, max_results: int = 3) -> dict: # select first_name, last_name from government where twitter_user_id # is not null group by first_name, last_name order by rand() limit 3 randFunc: CustomFunction = CustomFunction("rand()") government: Table = Table("government") query: QueryBuilder = Query.from_(government) \ .select(government.first_name, government.last_name) \ .where(government.twitter_user_id.notnull()) \ .groupby(government.first_name, government.last_name) \ .orderby(randFunc.name) \ .limit(max_results) print(query.get_sql(quote_char=None)) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def retrieveTweet(repo: Dolt, table: str, tweet_id: str, hide_deleted_tweets: bool = False, only_deleted_tweets: bool = False) -> Optional[dict]: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(Star()) \ .where(tweets.id == tweet_id) \ .limit(1) if hide_deleted_tweets: # Filter Out Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 0) elif only_deleted_tweets: # Only Show Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 1) return repo.sql(query=query.get_sql(quote_char=None), result_format='json')["rows"]
def __init__(self, repo: Dolt): ShowBase.__init__(self) # Load the environment model. self.scene = self.loader.loadModel("models/environment") # Re-parent the model to render. self.scene.reparentTo(self.render) # Apply scale and position transforms on the model. self.scene.setScale(0.25, 0.25, 0.25) self.scene.setPos(-8, 42, 0) # Add the spinCameraTask procedure to the task manager. self.taskMgr.add(self.spin_camera_task, "SpinCameraTask") # Load and transform the panda actor. self.pandaActor = Actor("models/panda-model", {"walk": "models/panda-walk4"}) self.pandaActor.setScale(0.005, 0.005, 0.005) self.pandaActor.reparentTo(self.render) # Loop its animation. self.pandaActor.loop("walk") # Create the four lerp intervals needed for the panda to # walk back and forth. posInterval1 = self.pandaActor.posInterval(13, Point3(0, -10, 0), startPos=Point3(0, 10, 0)) posInterval2 = self.pandaActor.posInterval(13, Point3(0, 10, 0), startPos=Point3(0, -10, 0)) hprInterval1 = self.pandaActor.hprInterval(3, Point3(180, 0, 0), startHpr=Point3(0, 0, 0)) hprInterval2 = self.pandaActor.hprInterval(3, Point3(0, 0, 0), startHpr=Point3(180, 0, 0)) # Create and play the sequence that coordinates the intervals. self.pandaPace = Sequence(posInterval1, hprInterval1, posInterval2, hprInterval2, name="pandaPace") self.pandaPace.loop() random_tweet_query: str = ''' select * from tweets order by rand() limit 1; ''' tweet: List[dict] = repo.sql(query=random_tweet_query, result_format='csv') tweet_text = OnscreenText(text=tweet[0]["text"], pos=(-0.5, 0.02), scale=0.07)
def fix_json(repo: Dolt, table: str, branch: str): get_json_query = ''' select * from {table} order by id asc '''.format(table=table) results = repo.sql(get_json_query, result_format='json')["rows"] for result in results: fixed_result = result try: fixed_result["json"] = json.dumps( ast.literal_eval(fixed_result["json"])) except ValueError: continue logger.warning( "Fixing Tweet {tweet_id}".format(tweet_id=fixed_result["id"])) df = getDataFrame(row=result) writeData(repo=repo, table=table, dataFrame=df, requiredKeys=['id'])
def search_tweets(search_phrase: str, repo: Dolt, table: str, max_responses: int = 10, account_id: Optional[int] = None, hide_deleted_tweets: bool = False, only_deleted_tweets: bool = False) -> dict: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id, tweets.twitter_user_id, tweets.date, tweets.text, tweets.device, tweets.favorites, tweets.retweets, tweets.quoteTweets, tweets.replies, tweets.isRetweet, tweets.isDeleted, tweets.repliedToTweetId, tweets.repliedToUserId, tweets.repliedToTweetDate, tweets.retweetedTweetId, tweets.retweetedUserId, tweets.retweetedTweetDate, tweets.hasWarning, tweets.warningLabel) \ .orderby(tweets.id, order=Order.desc) \ .limit(max_responses) \ .where(Lower(tweets.text).like( search_phrase.lower() ) # TODO: lower(text) COLLATE utf8mb4_unicode_ci like lower('{search_phrase}') ) if account_id is not None: # Show Results For Specific Account query: QueryBuilder = query.where(tweets.twitter_user_id, account_id) if hide_deleted_tweets: # Filter Out Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 0) elif only_deleted_tweets: # Only Show Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 1) # Perform Search Query # Use Commit https://github.com/dolthub/dolt/commit/6089d7e15d5fe4b02a4dc13630289baee7f937b0 Until JSON Escaping Bug Is Fixed return repo.sql(query=query.get_sql(quote_char=None), result_format="json")["rows"]
def latest_tweets(repo: Dolt, table: str, max_responses: int = 10, account_id: Optional[int] = None, hide_deleted_tweets: bool = False, only_deleted_tweets: bool = False, last_tweet_id: Optional[int] = None) -> dict: tweets: Table = Table(table) query: QueryBuilder = Query.from_(tweets) \ .select(tweets.id, tweets.twitter_user_id, tweets.date, tweets.text, tweets.device, tweets.favorites, tweets.retweets, tweets.quoteTweets, tweets.replies, tweets.isRetweet, tweets.isDeleted, tweets.repliedToTweetId, tweets.repliedToUserId, tweets.repliedToTweetDate, tweets.retweetedTweetId, tweets.retweetedUserId, tweets.retweetedTweetDate, tweets.hasWarning, tweets.warningLabel) \ .orderby(tweets.id, order=Order.desc) \ .limit(max_responses) if account_id is not None: # Show Results For Specific Account query: QueryBuilder = query.where(tweets.twitter_user_id, account_id) if last_tweet_id is not None: query: QueryBuilder = query.where(tweets.id > last_tweet_id) if hide_deleted_tweets: # Filter Out Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 0) elif only_deleted_tweets: # Only Show Deleted Tweets query: QueryBuilder = query.where(tweets.isDeleted == 1) # Retrieve Latest Tweets return repo.sql(query=query.get_sql(quote_char=None), result_format="json")["rows"]
def lookupLatestArchivedTweet(repo: Dolt, table: str) -> str: latest_tweet = ''' SELECT id FROM {table} ORDER BY date DESC LIMIT 1 '''.format(table=table) return repo.sql(latest_tweet, result_format='csv')