/
stream_poc.py
100 lines (72 loc) · 2.39 KB
/
stream_poc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# https://docs.tweepy.org/
# https://towardsdatascience.com/named-entity-recognition-with-nltk-and-spacy-8c4a7d88e7da
# TODO - TRUECASING?
# TODO - FINE TUNE NE MODEL TO NOT CARE ABOUT CASE
# TODO - Model needs to not grab as many words like 'yeah' and 's'
# TODO - Slack integration (ping me, ability to immediately sell, etc.)
# TODO - Can probably auto strip out words he uses all the time like FSD and TESLA and LOOP
# TODO - Add: mike burry, the virgin galactic guy, portnoy
import tweepy
import configparser
import nltk
import sqlite3
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
def preprocess(sent):
sent = nltk.word_tokenize(sent)
sent = nltk.pos_tag(sent)
sent = ne_chunk(sent)
return sent
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text)
print('AAA')
def on_error(self, status_code):
print(status_code)
if status_code == 420:
#returning False in on_error disconects the stream
return False
config = configparser.ConfigParser()
config.read('./secrets/config.ini')
twitter = config['twitter']
auth = tweepy.OAuthHandler(twitter['api_key'], twitter['secret_key'])
auth.set_access_token(twitter['access_token'], twitter['access_token_secret'])
api = tweepy.API(auth)
nes = []
# for tweet in api.user_timeline('cnbc'):
# doc = nlp(tweet.text.lower())
# for el in doc:
# print(el)
# print(el.ent_type_)
for i in range(0, 3):
for tweet in api.user_timeline('stoolpresidente', page = i):
for node in preprocess(tweet.text):
if type(node) == nltk.tree.Tree:
for tup in node:
ne = (tup[0])
if ne.lower() == 'first' or ne.lower() == 's' or ne.lower == 'yup' or ne.lower == 'yeah':
continue
nes.append(ne.lower())
print(nes)
symbols = []
con = sqlite3.connect('./data/symbols.db')
cursor = con.cursor()
query = 'SELECT * FROM symbols'
for ne in nes:
for row in cursor.execute(query):
name = row[1]
if ne.lower() in name:
print(ne.lower())
print(name)
symbols.append(row[0])
print(symbols)
cursor.close()
# myStreamListener = MyStreamListener()
# myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener)
# myStream.filter(track=['lebron'])