def next_batch(self): tweets_todb = [] from_date_str = self.next_datetime.strftime("%Y%m%d%H%M") to_date_str = (self.next_datetime + timedelta(days=1)).strftime("%Y%m%d%H%M") print(f"Searching for date {from_date_str}...") write_to_log( self.log_file, f'-------[{datetime.now().strftime("%H:%M:%S")}] Start searching for date {from_date_str}-------' ) tweets_count = 0 next_token = None while tweets_count < self.tweets_per_day and next_token != "Not existed!": try: tweets_list, next_token = premium_search( product=self.product, label=self.label, query=self.query, from_date=from_date_str, to_date=to_date_str, next_token=next_token) tweets_count += len(tweets_list) for obj in tweets_list: tweets_todb.append([ obj["tweet_id"], obj["full_text"], obj["created_at"], obj["language"], obj["hashtags_str"], obj["mentions_str"], obj["favorite_count"], obj["retweet_count"] ]) except Exception as e: error_code = str(e).split(":")[0] if error_code == "88" or error_code == "429": print("Rate limit exceeded!") write_to_log( self.log_file, f'**[{datetime.now().strftime("%H:%M:%S")}]** Rate limit exceeded! Next date to search: {from_date_str}' ) program_sleep(61) else: write_to_log(self.log_file, f"{e}") break self.next_datetime += timedelta(days=1) write_to_log( self.log_file, f'-------[{datetime.now().strftime("%H:%M:%S")}] Finished searching for date {from_date_str}-------\n' ) return tweets_todb
create_table(table_name=t.name, cols_constraints_dict=t.cols_const, cur=CUR, primary_key=t.pk, foreign_keys=t.fks) CONN.commit() if CLEAR_TABLE: for t in [RETWEETS]: clear_table(t.name, CUR) CONN.commit() """recent search""" end_time = datetime.strptime("2020-07-02_21:53:20", "%Y-%m-%d_%H:%M:%S") start_time = datetime.strptime("2020-07-02_21:00:00", "%Y-%m-%d_%H:%M:%S") jrs = JULY_RETWEET_SEARCH(PRODUCT, LABEL, SEARCH_QUERY, start_time, end_time, BEARER_TOKEN, CONN, CUR) while jrs.has_next_batch(): try: jrs.save_next_batch() except Exception as e: if str(e) == "Rate limit exceeded!": program_sleep(60) except: print(traceback.format_exc()) """close db connection""" CONN.commit() CONN.close()
else: self.next_id_ptr = len(self.tweets_id_list) if __name__ == "__main__": """db connection""" CONN = sqlite3.connect(DB_FILE) CUR = CONN.cursor() """authorization""" BEARER_TOKEN = BearerTokenAuth(consumer_key, consumer_secret) usr = UPDATE_STREAM_RETWEETS(BEARER_TOKEN, CONN, CUR) usr.add_text_column() while usr.has_next_batch(): try: usr.save_next_batch() except Exception as e: error_msg = str(e) if error_msg == "[Self Defined]Limit Exceeded!": print("Limite Exceeded!") program_sleep(900) else: print(error_msg) break """close db connection""" CONN.commit() CONN.close() print("Finished updating!")
CONN.commit() """recent search""" while True: checkpoint_timestamp = datetime.now().astimezone( pytz.utc) - timedelta(minutes=1) start_time = checkpoint_timestamp - timedelta(days=7) + timedelta( minutes=1) rs = RECENT_SEARCH(SEARCH_QUERY, start_time, checkpoint_timestamp, BEARER_TOKEN, CONN, CUR) while rs.has_next_batch() and (datetime.now().astimezone( pytz.utc) - checkpoint_timestamp < timedelta(days=1)): try: rs.save_next_batch() except Exception as e: if "429" in str(e) or "Rate limit exceeded" in str(e): program_sleep(900) rs.update_start_time(datetime.now().astimezone(pytz.utc) - timedelta(days=7) + timedelta(minutes=1)) if datetime.now().astimezone( pytz.utc) - checkpoint_timestamp < timedelta(days=1): sleep_time = ( checkpoint_timestamp + timedelta(days=1) - datetime.now().astimezone(pytz.utc)).total_seconds() program_sleep(math.ceil(sleep_time)) except: print(traceback.format_exc()) """close db connection""" CONN.commit() CONN.close()
for t in [REGULAR_TWEETS, RETWEETS]: create_table(table_name=t.name, cols_constraints_dict=t.cols_const, cur=CUR, primary_key=t.pk, foreign_keys=t.fks) CONN.commit() if CLEAR_TABLE: for t in [REGULAR_TWEETS, RETWEETS]: clear_table(t.name, CUR) CONN.commit() """filter stream""" fs = FILTERED_STREAM(BEARER_TOKEN, FILTER_RULES, CONN, CUR) fs.rules_setup() timeout = 0 while True: try: fs.stream_connect() timeout = 0 except Exception as e: if str(e).startswith("429"): program_sleep(2 ** timeout) timeout += 1 else: print(e) except: print(traceback.format_exc()) """close db connection""" CONN.commit() CONN.close()