Beispiel #1
0
 def __init__(self):
     self.st = Sentiment()
     try:
         self.connect()
     except Exception as e:
         print("Failure in connection: {}".format(str(e)))
     self.create_table()
     self.all = self.get_all()
Beispiel #2
0
 def __init__(self):
     self.st = Sentiment()
     try:
         self.connect()
     except:
         print("Failure in connection")
     self.create_table()
     self.all = self.get_all()
 def __init__(self):
     self.db = Database()
     self.st = Sentiment()
     self.rp = Report()
     #self.auth = tweepy.OAuthHandler(GLOBAL_CFG['consumer_key'], GLOBAL_CFG['consumer_secret'])
     #self.auth.set_access_token(GLOBAL_CFG['access_token'], GLOBAL_CFG['access_token_secret'])
     self.index = 0
     self.state = ""
     self.count_tweets = 0   
Beispiel #4
0
class Database:
    def __init__(self):
        self.st = Sentiment()
        try:
            self.connect()
        except Exception as e:
            print("Failure in connection: {}".format(str(e)))
        self.create_table()
        self.all = self.get_all()

    def environment_config(self):
        if os.environ['ENV'] == 'prod':
            return PROD_CFG
        else:
            return DEV_CFG

    def connect(self):
        cfg = self.environment_config()
        self.connection = psycopg2.connect(cfg["database_url"],
                                           sslmode=cfg["sslmode"])
        self.connection.autocommit = True
        self.cursor = self.connection.cursor()

    def create_table(self):
        create_table_command = (
            "CREATE TABLE IF NOT EXISTS tweets(id serial PRIMARY KEY, id_twitter varchar(50),\
        name varchar(500), content text, image varchar(300), followers integer, location varchar(200),\
        classification varchar(216), query varchar(200), state varchar(200), colected_at TIMESTAMP DEFAULT NOW(), created_at TIMESTAMP);"
        )
        self.cursor.execute(create_table_command)
        create_date_table = ''' CREATE TABLE IF NOT EXISTS search_status(since varchar(50), state varchar(200))'''
        self.cursor.execute(create_date_table)

    def insert(self, id_twitter, name, text, image, followers, location, query,
               state, created_at):
        print("entrou no insert")
        insert_command = (
            "INSERT INTO tweets(id_twitter, name, content, image, followers, location, query, state, created_at)\
         VALUES('%s','%s','%s','%s','%d','%s','%s', '%s', '%s')" %
            (id_twitter, self.str_(name), self.str_(text), image, followers,
             self.str_(location), query, state, created_at))
        self.cursor.execute(insert_command)

    def get_all(self):
        sql = "SELECT id_twitter,content FROM public.tweets ORDER BY id ASC"
        self.cursor.execute(sql)
        all = [r for r in self.cursor.fetchall()]
        return all

    def get_since_date(self, state):
        sql = "SELECT since from search_status where state = '{}'".format(
            state)
        self.cursor.execute(sql)
        try:
            return self.cursor.fetchone()[0]
        except:
            return '2021-01-17'
        '''if len(result) == 0 or result[0] == False:
            return '2021-01-17'
        else:
            return date.today().strftime("%Y-%m-%d")'''

    def set_since_date(self, date, state):
        sql = "UPDATE search_status SET since = '{}' WHERE state = '{}'".format(
            date, state)
        self.cursor.execute(sql)

    def get_all_states(self):
        sql = "SELECT DISTINCT state FROM tweets"
        self.cursor.execute(sql)
        return [state for states in self.cursor.fetchall()]

    def get_state_info(self, state):
        sql = "SELECT COUNT(content) AS count_tweet, MAX(created_at) AS last_tweeted, MAX(colected_at) AS last_collected FROM tweets WHERE state = '{}'".format(
            state)
        self.cursor.execute(sql)
        return [info for infos in self.cursor.fetchall()]

    def main(self, id_twitter, name, text, image, followers, location, query,
             state, created_at):
        if self.st.sentiment_avg(text):
            diff = self.close_matches(text)
            if diff:
                print("tweet igual")
                pass
            else:
                print("tweet add")
                self.all.append((id_twitter, text))
                self.insert(id_twitter, name, text, image, followers, location,
                            query, state, created_at)
        else:
            print("entrou no else")

    def delete(self, id):
        sql = "DELETE FROM public.tweets WHERE id = %s" % id
        self.cursor.execute(sql)

    def close_matches(self, text):
        matches = []
        rage_text = int(len(text) / 3)
        for i in self.all:
            count = 0
            for y in range(rage_text):
                try:
                    if i[1][y] == text[y]:
                        count += 1
                except:
                    break
                if y == 0 and count == 0:
                    break
            if count == rage_text:
                matches.append(i)
        return matches

    def save(self, result, query, state):
        print("save")
        text = result.content
        '''try:
            text = result.content
        except:
            text = result.full_text'''
        id_twitter = result.id
        name = result.user.displayname
        img = result.user.profileImageUrl
        followers = result.user.followersCount
        location = result.user.location
        created_at = result.date
        self.main(id_twitter, name, text, img, followers, location, query,
                  state, created_at)

    def str_(self, string):
        string = str(string)
        string = string.encode('utf-8').decode('utf-8')
        string = string.replace("'", "´")
        string = string.replace('"', "\"")
        return string
 def __init__(self):
     self.db = Database()
     self.st = Sentiment()
class Report:
    def __init__(self):
        self.db = Database()
        self.st = Sentiment()

    def load_json_report(self, dic, name):
        dic = open('%s.json' % name,
                   'w').write(json.dumps(dic, ensure_ascii=False))
        return dic

    def save_report(self, query, count, name, param):
        try:
            dic = self.open_json(name)
            try:
                dic[query][param].append(count)
                dic = self.load_json_report(dic, name)
            except:
                dic[query] = {param: count}
                dic = self.load_json_report(dic, name)
        except:
            dic = {}
            dic = self.load_json_report(dic, name)
            self.save_report(query, count, name, param)

    def open_json(self, name):
        with open('%s.json' % name, 'r') as file_json:
            return json.loads(file_json.read())

    def rm_acentos(self, txt):
        temp = normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')
        return temp.lower()

    def update(self):
        querys = self.last_attempt()
        tweets = self.db.get_all()
        print(len(tweets))
        for i in querys:
            i = self.rm_acentos(i)
            count = 0
            for y in tweets:
                text = self.rm_acentos(y[1])
                if re.search(i, text):
                    count += 1
            self.save_report(i, count, 'dic', 'count')

    def last_attempt(self):
        try:
            dic = self.open_json('dic')
            adjs = self.st.adjectives()
            els = list(dic.items())
            last_query = els[-1][0]
            index = adjs.index(last_query)
            return adjs[index:len(adjs)]
        except:
            return self.st.adjectives()

    def last_id_tweet(self, query, id_tweet):
        self.save_report(query, id_tweet, 'temp', 'id')

    def last_id(self, query):
        try:
            dic = self.open_json('temp')
            try:
                dic[query]['id']
            except:
                return 0
        except:
            dic = {}
            dic = self.load_json_report(dic, 'temp')
            return 0
class Collector():
    def __init__(self):
        self.db = Database()
        self.st = Sentiment()
        self.rp = Report()
        #self.auth = tweepy.OAuthHandler(GLOBAL_CFG['consumer_key'], GLOBAL_CFG['consumer_secret'])
        #self.auth.set_access_token(GLOBAL_CFG['access_token'], GLOBAL_CFG['access_token_secret'])
        self.index = 0
        self.state = ""
        self.count_tweets = 0   

    def collect(self, min_per_query = 15, min_search = 1440):
        search_time = time.time() + min_search * 60
        print("Collect began at {}".format(time.asctime(time.localtime(time.time()))))
        while time.time() < search_time:
            timeout = time.time() + min_per_query * 60
            query = self.creating_query()
            try:
                self.doing(timeout, query)
            except tweepy.error.TweepError:
                error_time = time.time()
                print("Rate limit exception. Time: {}".format(error_time))
                time.sleep(30)
                #time.sleep(30)
                #min_search = (search_time - error_time) / 60
                self.collect(min_per_query,min_search)

            print("tweets for state {}: {}".format(self.state, self.count_tweets))

    def doing(self,timeout, query):
        #api = self.auth_()
        #last = self.rp.last_id(query)
        last = js.get_last_id(self.state)
        since = self.db.get_since_date(self.state)
        today_date = date.today()
        print('collecting tweets with key %s' %normalize('NFKD', query).encode('ASCII', 'ignore').decode('ASCII'))
        print("last: {}".format(last))
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{} since_id:{}'.format(query, js, since, today_date)).get_items()):
            if i > 100:
                print("bateu 100 tweets")
                break
            if tweet:
                self.count_tweets += 1
                self.db.save(tweet, query, self.state)
                #self.rp.last_id_tweet(query,tweet.id)
                js.save_last_id(self.state, tweet.id)
        self.db.set_since_date(since, self.state)

    def auth_(self):
        api = tweepy.API(self.auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
        return api

    def creating_query(self):
        data = self.st.getData()[self.index]
        self.update_index()
        self.state = data[0]
        return "vacinas OR vacina OR vacinacao OR vacinação ({} OR {})".format(data[1], data[2])

    def update_index(self):
        if self.index == 13:
            self.index = 0 
        else: 
            self.index += 1
Beispiel #8
0
class Database:
    def __init__(self):
        self.st = Sentiment()
        try:
            self.connect()
        except:
            print("Failure in connection")
        self.create_table()
        self.all = self.get_all()

    def connect(self):
        self.connection = psycopg2.connect(
            "dbname='%s' user='******' host='%s' password='******'" %
            (dbname, user, host, password))
        self.connection.autocommit = True
        self.cursor = self.connection.cursor()

    def create_table(self):
        create_table_command = (
            "CREATE TABLE IF NOT EXISTS tweets(id serial PRIMARY KEY, id_twitter varchar(50),\
        name varchar(500), text varchar(500), image varchar(300), followers integer, location varchar(200),\
        classification varchar(216), query varchar(200));")
        self.cursor.execute(create_table_command)

    def insert(self, id_twitter, name, text, image, followers, location,
               query):
        insert_command = (
            "INSERT INTO tweets(id_twitter, name, text, image, followers, location, query)\
         VALUES('%s','%s','%s','%s','%d','%s','%s')" %
            (id_twitter, self.str_(name), self.str_(text), image, followers,
             self.str_(location), query))
        self.cursor.execute(insert_command)

    def get_all(self):
        sql = "SELECT id_twitter,text FROM public.tweets ORDER BY id ASC"
        self.cursor.execute(sql)
        all = [r for r in self.cursor.fetchall()]
        return all

    def main(self, id_twitter, name, text, image, followers, location, query):
        if self.st.sentiment_avg(text):
            diff = self.close_matches(text)
            if diff:
                pass
            else:
                self.all.append((id_twitter, text))
                self.insert(id_twitter, name, text, image, followers, location,
                            query)

    def delete(self, id):
        sql = "DELETE FROM public.tweets WHERE id = %s" % id
        self.cursor.execute(sql)

    def close_matches(self, text):
        matches = []
        rage_text = int(len(text) / 3)
        for i in self.all:
            count = 0
            for y in range(rage_text):
                try:
                    if i[1][y] == text[y]:
                        count += 1
                except:
                    break
                if y == 0 and count == 0:
                    break
            if count == rage_text:
                matches.append(i)
        return matches

    def save(self, result, query):
        try:
            text = result.retweeted_status.full_text
        except:
            text = result.full_text
        id_twitter = result.id
        name = result.user.screen_name
        img = result.user.profile_image_url
        followers = result.user.followers_count
        location = result.user.location
        self.main(id_twitter, name, text, img, followers, location, query)

    def str_(self, string):
        string = str(string)
        string = string.encode('utf-8').decode('utf-8')
        string = string.replace("'", "´")
        string = string.replace('"', "\"")
        return string