/
stubhub_id_getter.py
178 lines (150 loc) · 6.47 KB
/
stubhub_id_getter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import requests
#from api_keys import
from dbconnection import start_db_connection
import fuzzy
from contextlib import closing
from api_keys import stubhub_x_stubhub_user_guid, stubhub_application_token
import datetime
from time import sleep
#https://api.stubhub.com/search/catalog/events/v3?status=active&
#q=jukebox+the+ghost+2016-03-22
#CUSTOM EXCEPTIONS:
# Parking Lots
#THIS DOES NOT PULL ARTISTS WITHOUT POPULARITY DATA
#artists don't have popularity data without mbid_id
#INSERT NEEDS PROTECTION
def get_events_to_update():
conn = start_db_connection('AWS')
with closing(conn.cursor()) as cur:
cur.execute('''SELECT a.name, v.name, v.state, v.latitude, v.longitude,
e.event_date, e.onsale_date, AVG(pv.value), e.id
FROM artist a
JOIN event_artist ON a.id = event_artist.artist_id
JOIN event e ON e.id=event_artist.event_id
JOIN Venue v ON v.id = e.venue_id
LEFT JOIN stubhub_listing sl on e.id=sl.event_id
JOIN popularity_point pp ON a.id=pp.artist_id
JOIN popularity_value pv ON pp.id=pv.pp_id
JOIN popularity_type pt ON pt.id=pv.pt_id
WHERE sl.event_id IS NULL AND (e.onsale_date < (now()+'2 days'::interval))
AND (e.onsale_date > (now()-'14 days'::interval))
AND pp.id IN (SELECT id FROM (SELECT DISTINCT ON (artist_id) id, artist_id, update_date
FROM popularity_point ORDER BY artist_id, update_date DESC) as foo)
GROUP BY a.name, v.name, v.state, v.latitude, v.longitude, e.event_date,
e.onsale_date, e.id''')
event_list = cur.fetchall()
conn.close()
return event_list
def parse_events(event_list):
if not event_list:
return None
events_artists = []
current = None
other_artists = []
p = 0
while p < len(event_list):
#print event_list[p]
if not current:
current, other_artists = event_list[p][1:], [event_list[p][0]]
elif current[0]==event_list[p][1] and current[4]==event_list[p][5]:
other_artists.append(event_list[p][0])
else:
events_artists.append((current, other_artists))
current, other_artists = event_list[p][1:], [event_list[p][0]]
p += 1
events_artists.append((current, other_artists))
return events_artists
def request_ids(event_info):
event_info, artists = event_info
event_id = event_info[7]
authentication_header = generate_authentication_header()
print artists
for artist in artists:
payload = generate_payload(event_info, artist)
spotify_event_id = get_event_id(payload, authentication_header, event_info, artist)
if spotify_event_id:
print artist, event_id
return (event_id, spotify_event_id)
def generate_authentication_header():
return {'Authorization': 'Bearer ' + stubhub_application_token,
'Accept-Encoding': 'application/json',
'Accept': 'application/json'}
#event_info: venuename, state, lat, long, eventdate, onsaledate, spotpop, echo hotttness, bit id
def generate_payload(event_info, artist):
payload = {}
payload['point'] = gen_point(event_info[2],event_info[3])
payload['state'] = event_info[1]
#payload['q'] = gen_query(artist, event_info[0])
payload['q'] = gen_query(artist)
#payload['venue'] = gen_query(event_info[0])
payload['EventDateLocal'] = event_info[4].strftime('%Y-%m-%d')
#print payload
return payload
def gen_point(lat, longitude):
return str(lat)+','+str(longitude)
def gen_query(*args):
return '"{}"'.format(' '.join(args))
#return '+'.join(' '.join(args).split())
def get_event_id(payload, authentication_header, event_info, artist):
sleep(6)
base_uri = 'https://api.stubhub.com/search/catalog/events/v3'
try:
r = requests.get(base_uri, params=payload, headers=authentication_header)
#print r.url
if r.status_code != 200:
return
data = r.json()
if not data['numFound']:
return None
for event in data['events']:
if matches(event, event_info, artist):
return event['id']
except requests.exceptions.ConnectionError as e:
print e
def matches(event, event_info, artist):
def artist_matches():
if 'performersCollection' in event:
performers = [x['name'] for x in event['performersCollection']
if 'name' in x]
#print fuzzy.best_match_all(artist, performers)[0][1]
return fuzzy.best_match_all(artist, performers)[0][1] > 0.85
return False
def venue_matches():
#print fuzzy.best_match_all(event_info[0], [event['venue']['name']])[0][1]
return fuzzy.best_match_all(event_info[0], [event['venue']['name']])[0][1] > 0.85
#return fuzzy.best_match_all(event_info[0], [event['displayAttributes']
# ['primaryName']])[0][1] > 0.85
def date_matches():
#print event['eventDateLocal'][:10] == event_info[4].strftime('%Y-%m-%d')
return event['eventDateLocal'][:10] == event_info[4].strftime('%Y-%m-%d')
def parking_exception():
if 'parking' in event['name'].lower() or 'parking' in event['description'].lower():
return False
return True
return artist_matches() and venue_matches() and date_matches() and parking_exception()
def upload_stubhub_ids(event_ids):
conn = start_db_connection('AWS')
with closing(conn.cursor()) as cur:
for event_id, stubhub_id in event_ids:
cur.execute('''INSERT INTO stubhub_listing (stubhubid, event_id)
SELECT %s, %s WHERE NOT EXISTS (SELECT * FROM stubhub_listing
WHERE stubhubid = %s OR event_id = %s)''', (stubhub_id,
event_id, str(stubhub_id), str(event_id)))
#cur.execute('''INSERT INTO stubhub_listing (stubhub_id, event_id)
# VALUES (%s, %s)''', (stubhub_id, event_id))
conn.commit()
print 'StubHub Artist: ' + str(stubhub_id)
conn.close()
def run():
print 'Stubhub ID Getter'
event_ids = []
#for event_info in parse_events(get_events_to_update()[:150]):
for event_info in parse_events(get_events_to_update()):
event_id = request_ids(event_info)
if event_id:
event_ids.append(event_id)
print
upload_stubhub_ids(event_ids)
#print len(event_ids)
if __name__ == '__main__':
run()