def __init__(self): self.st = Sentiment() try: self.connect() except Exception as e: print("Failure in connection: {}".format(str(e))) self.create_table() self.all = self.get_all()
def __init__(self): self.st = Sentiment() try: self.connect() except: print("Failure in connection") self.create_table() self.all = self.get_all()
def __init__(self): self.db = Database() self.st = Sentiment() self.rp = Report() #self.auth = tweepy.OAuthHandler(GLOBAL_CFG['consumer_key'], GLOBAL_CFG['consumer_secret']) #self.auth.set_access_token(GLOBAL_CFG['access_token'], GLOBAL_CFG['access_token_secret']) self.index = 0 self.state = "" self.count_tweets = 0
class Database: def __init__(self): self.st = Sentiment() try: self.connect() except Exception as e: print("Failure in connection: {}".format(str(e))) self.create_table() self.all = self.get_all() def environment_config(self): if os.environ['ENV'] == 'prod': return PROD_CFG else: return DEV_CFG def connect(self): cfg = self.environment_config() self.connection = psycopg2.connect(cfg["database_url"], sslmode=cfg["sslmode"]) self.connection.autocommit = True self.cursor = self.connection.cursor() def create_table(self): create_table_command = ( "CREATE TABLE IF NOT EXISTS tweets(id serial PRIMARY KEY, id_twitter varchar(50),\ name varchar(500), content text, image varchar(300), followers integer, location varchar(200),\ classification varchar(216), query varchar(200), state varchar(200), colected_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP);" ) self.cursor.execute(create_table_command) create_date_table = ''' CREATE TABLE IF NOT EXISTS search_status(since varchar(50), state varchar(200))''' self.cursor.execute(create_date_table) def insert(self, id_twitter, name, text, image, followers, location, query, state, created_at): print("entrou no insert") insert_command = ( "INSERT INTO tweets(id_twitter, name, content, image, followers, location, query, state, created_at)\ VALUES('%s','%s','%s','%s','%d','%s','%s', '%s', '%s')" % (id_twitter, self.str_(name), self.str_(text), image, followers, self.str_(location), query, state, created_at)) self.cursor.execute(insert_command) def get_all(self): sql = "SELECT id_twitter,content FROM public.tweets ORDER BY id ASC" self.cursor.execute(sql) all = [r for r in self.cursor.fetchall()] return all def get_since_date(self, state): sql = "SELECT since from search_status where state = '{}'".format( state) self.cursor.execute(sql) try: return self.cursor.fetchone()[0] except: return '2021-01-17' '''if len(result) == 0 or result[0] == False: return '2021-01-17' else: return date.today().strftime("%Y-%m-%d")''' def set_since_date(self, date, state): sql = "UPDATE search_status SET since = '{}' WHERE state = '{}'".format( date, state) self.cursor.execute(sql) def get_all_states(self): sql = "SELECT DISTINCT state FROM tweets" self.cursor.execute(sql) return [state for states in self.cursor.fetchall()] def get_state_info(self, state): sql = "SELECT COUNT(content) AS count_tweet, MAX(created_at) AS last_tweeted, MAX(colected_at) AS last_collected FROM tweets WHERE state = '{}'".format( state) self.cursor.execute(sql) return [info for infos in self.cursor.fetchall()] def main(self, id_twitter, name, text, image, followers, location, query, state, created_at): if self.st.sentiment_avg(text): diff = self.close_matches(text) if diff: print("tweet igual") pass else: print("tweet add") self.all.append((id_twitter, text)) self.insert(id_twitter, name, text, image, followers, location, query, state, created_at) else: print("entrou no else") def delete(self, id): sql = "DELETE FROM public.tweets WHERE id = %s" % id self.cursor.execute(sql) def close_matches(self, text): matches = [] rage_text = int(len(text) / 3) for i in self.all: count = 0 for y in range(rage_text): try: if i[1][y] == text[y]: count += 1 except: break if y == 0 and count == 0: break if count == rage_text: matches.append(i) return matches def save(self, result, query, state): print("save") text = result.content '''try: text = result.content except: text = result.full_text''' id_twitter = result.id name = result.user.displayname img = result.user.profileImageUrl followers = result.user.followersCount location = result.user.location created_at = result.date self.main(id_twitter, name, text, img, followers, location, query, state, created_at) def str_(self, string): string = str(string) string = string.encode('utf-8').decode('utf-8') string = string.replace("'", "´") string = string.replace('"', "\"") return string
def __init__(self): self.db = Database() self.st = Sentiment()
class Report: def __init__(self): self.db = Database() self.st = Sentiment() def load_json_report(self, dic, name): dic = open('%s.json' % name, 'w').write(json.dumps(dic, ensure_ascii=False)) return dic def save_report(self, query, count, name, param): try: dic = self.open_json(name) try: dic[query][param].append(count) dic = self.load_json_report(dic, name) except: dic[query] = {param: count} dic = self.load_json_report(dic, name) except: dic = {} dic = self.load_json_report(dic, name) self.save_report(query, count, name, param) def open_json(self, name): with open('%s.json' % name, 'r') as file_json: return json.loads(file_json.read()) def rm_acentos(self, txt): temp = normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII') return temp.lower() def update(self): querys = self.last_attempt() tweets = self.db.get_all() print(len(tweets)) for i in querys: i = self.rm_acentos(i) count = 0 for y in tweets: text = self.rm_acentos(y[1]) if re.search(i, text): count += 1 self.save_report(i, count, 'dic', 'count') def last_attempt(self): try: dic = self.open_json('dic') adjs = self.st.adjectives() els = list(dic.items()) last_query = els[-1][0] index = adjs.index(last_query) return adjs[index:len(adjs)] except: return self.st.adjectives() def last_id_tweet(self, query, id_tweet): self.save_report(query, id_tweet, 'temp', 'id') def last_id(self, query): try: dic = self.open_json('temp') try: dic[query]['id'] except: return 0 except: dic = {} dic = self.load_json_report(dic, 'temp') return 0
class Collector(): def __init__(self): self.db = Database() self.st = Sentiment() self.rp = Report() #self.auth = tweepy.OAuthHandler(GLOBAL_CFG['consumer_key'], GLOBAL_CFG['consumer_secret']) #self.auth.set_access_token(GLOBAL_CFG['access_token'], GLOBAL_CFG['access_token_secret']) self.index = 0 self.state = "" self.count_tweets = 0 def collect(self, min_per_query = 15, min_search = 1440): search_time = time.time() + min_search * 60 print("Collect began at {}".format(time.asctime(time.localtime(time.time())))) while time.time() < search_time: timeout = time.time() + min_per_query * 60 query = self.creating_query() try: self.doing(timeout, query) except tweepy.error.TweepError: error_time = time.time() print("Rate limit exception. Time: {}".format(error_time)) time.sleep(30) #time.sleep(30) #min_search = (search_time - error_time) / 60 self.collect(min_per_query,min_search) print("tweets for state {}: {}".format(self.state, self.count_tweets)) def doing(self,timeout, query): #api = self.auth_() #last = self.rp.last_id(query) last = js.get_last_id(self.state) since = self.db.get_since_date(self.state) today_date = date.today() print('collecting tweets with key %s' %normalize('NFKD', query).encode('ASCII', 'ignore').decode('ASCII')) print("last: {}".format(last)) for i, tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{} since_id:{}'.format(query, js, since, today_date)).get_items()): if i > 100: print("bateu 100 tweets") break if tweet: self.count_tweets += 1 self.db.save(tweet, query, self.state) #self.rp.last_id_tweet(query,tweet.id) js.save_last_id(self.state, tweet.id) self.db.set_since_date(since, self.state) def auth_(self): api = tweepy.API(self.auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True) return api def creating_query(self): data = self.st.getData()[self.index] self.update_index() self.state = data[0] return "vacinas OR vacina OR vacinacao OR vacinação ({} OR {})".format(data[1], data[2]) def update_index(self): if self.index == 13: self.index = 0 else: self.index += 1
class Database: def __init__(self): self.st = Sentiment() try: self.connect() except: print("Failure in connection") self.create_table() self.all = self.get_all() def connect(self): self.connection = psycopg2.connect( "dbname='%s' user='******' host='%s' password='******'" % (dbname, user, host, password)) self.connection.autocommit = True self.cursor = self.connection.cursor() def create_table(self): create_table_command = ( "CREATE TABLE IF NOT EXISTS tweets(id serial PRIMARY KEY, id_twitter varchar(50),\ name varchar(500), text varchar(500), image varchar(300), followers integer, location varchar(200),\ classification varchar(216), query varchar(200));") self.cursor.execute(create_table_command) def insert(self, id_twitter, name, text, image, followers, location, query): insert_command = ( "INSERT INTO tweets(id_twitter, name, text, image, followers, location, query)\ VALUES('%s','%s','%s','%s','%d','%s','%s')" % (id_twitter, self.str_(name), self.str_(text), image, followers, self.str_(location), query)) self.cursor.execute(insert_command) def get_all(self): sql = "SELECT id_twitter,text FROM public.tweets ORDER BY id ASC" self.cursor.execute(sql) all = [r for r in self.cursor.fetchall()] return all def main(self, id_twitter, name, text, image, followers, location, query): if self.st.sentiment_avg(text): diff = self.close_matches(text) if diff: pass else: self.all.append((id_twitter, text)) self.insert(id_twitter, name, text, image, followers, location, query) def delete(self, id): sql = "DELETE FROM public.tweets WHERE id = %s" % id self.cursor.execute(sql) def close_matches(self, text): matches = [] rage_text = int(len(text) / 3) for i in self.all: count = 0 for y in range(rage_text): try: if i[1][y] == text[y]: count += 1 except: break if y == 0 and count == 0: break if count == rage_text: matches.append(i) return matches def save(self, result, query): try: text = result.retweeted_status.full_text except: text = result.full_text id_twitter = result.id name = result.user.screen_name img = result.user.profile_image_url followers = result.user.followers_count location = result.user.location self.main(id_twitter, name, text, img, followers, location, query) def str_(self, string): string = str(string) string = string.encode('utf-8').decode('utf-8') string = string.replace("'", "´") string = string.replace('"', "\"") return string