-
Notifications
You must be signed in to change notification settings - Fork 0
/
memeMatcher.py
272 lines (244 loc) · 10.1 KB
/
memeMatcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/env python
# encoding: utf-8
"""
memeMatcher.py
Created by Benjamin Fields on 2013-12-07.
Copyright (c) 2013 . All rights reserved.
"""
import sys
import os, os.path
import unittest
import tempfile
import json
import random
import md5
import sqlite3
import requests
import pyechonest.track
import pygn
import psycopg2 as dbapi2
from itertools import izip_longest
from bs4 import BeautifulSoup
GRACENOTE_KEY = os.environ['GRACENOTE_KEY']
def database_connection():
return dbapi2.connect (database="burns", user="burns", password="lol", host="127.0.0.1")
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
def base_path():
return os.path.dirname(os.path.realpath(__file__))
def rep_genius_parser(artist='', title=''):
rg_base = "http://rapgenius.com"
rg_search = rg_base+"/search?hide_unexplained_songs=false&q={terms}"
#EN or something has produced artist and track name
r = requests.get(rg_search.format(terms=artist+u'+'+title))
soup = BeautifulSoup(r.content)
track_link = soup.select(".song_list li a")[0].get('href')
r = requests.get(rg_base+track_link)
soup = BeautifulSoup(r.content)
raw_lyrics = soup.select("div.lyrics p")[0].get_text()
lyrics = []
phrase = []
for line in raw_lyrics.split('\n'):
if len(line) == 0 and len(phrase) > 0:
lyrics.append(phrase)
phrase = []
continue
if len(line) > 0 and line[0] != '[':
#don't add functional phrase labels
phrase.append(line)
return lyrics
class memeMatcher:
def __init__(self, filepath):
self.filepath = filepath
self.filename = os.path.split(filepath)[-1]
self._run()
def _run(self):
sync_method = 'linear_seq'
self.status = 'processing file'
self._fetch_EN_analysis()
try:
#cache grab
self.timings = self.deserialize_timing(sync_method)
except TypeError:
#otherwise make a new one
self.status = 'gathering lyrics'
self._fetch_lyrics()
self.status = 'gathering cover art'
self._fetch_cover_art()
self.status = 'aligning to memes'
self.select_and_align_memes()
self.status = 'done'
self.serialize_timing(sync_method, self.timings)
def _fetch_EN_analysis(self):
self.track = pyechonest.track.track_from_filename(self.filepath)
self.track.get_analysis()
self.artist = self.track.artist
self.title = self.track.title
def _fetch_lyrics(self):
#mild cleaning
artist = self.artist
title = self.title.split('/')[0]
if " feat" in title:
title = " ".join(title.split(" feat")[:-1])
if " - " in title:
title = " ".join(title.split(" - ")[:-1])
if "(" in title:
title = title.split("(")[0]
self.lyrics = rep_genius_parser(artist, title)
def _fetch_cover_art(self):
clientID = GRACENOTE_KEY
userID = pygn.register(clientID)
metadata = pygn.search(clientID=clientID, userID=userID, artist=self.artist, track=self.title.split('/')[0])
try:
artist_pic = os.path.join(base_path(), 'tmp', md5.md5(metadata['album_art_url']).hexdigest())
if not os.path.exists(artist_pic):
r = requests.get(metadata['album_art_url'])
with open(artist_pic, 'wb') as wh:
wh.write(r.content)
self.album_art = '/audio_files/'+os.path.split(artist_pic)[1]
except (KeyError, requests.exceptions.MissingSchema):
self.album_art = None
def deserialize_timing(self, sync_method):
conn = database_connection()
cur = conn.cursor()
try:
self.song_id
except AttributeError:
cur.execute("SELECT id from upload WHERE file_name=%s", (self.filename, ))
self.song_id = cur.fetchone()[0]
cur.execute("SELECT json from timings WHERE id=%s AND sync_method=%s", (self.song_id, sync_method,))
return json.loads(cur.fetchone()[0])
def serialize_timing(self, sync_method, timing):
conn = database_connection()
cur = conn.cursor()
try:
self.song_id
except AttributeError:
cur.execute("SELECT id from upload WHERE file_name=%s", (self.filename, ))
self.song_id = cur.fetchone()[0]
cur.execute("INSERT INTO timings (id, sync_method, json) VALUES (%s, %s, %s)", (self.song_id, sync_method, json.dumps(timing)))
conn.commit()
def select_and_align_memes(self, method='linear_seq', intro_num_sections = 1, **kwargs):
with open(os.path.join(base_path(),'images.json')) as rh:
memes = json.loads(rh.read())['memes']
if self.album_art:
self.timings = [{"image_url":self.album_art,
"transition_after": int(sum([sect[u'duration'] for sect in self.track.sections[:intro_num_sections]])*1000),
"top_text": '',
"bottom_text": '',
}]
start_section = intro_num_sections
else:
self.timings = []
start_section = 0
if method == 'random_shuffle':
self.random_shuffle_memes(memes, start_section)
elif method == 'maximum_spread':
self.maximum_spread(memes, start_section)
elif method == 'linear_seq':
self.linear_sequence_memes(memes, start_section, **kwargs)
else:
raise ValueError('unknown alignment method')
def linear_sequence_memes(self, memes, start_section, first_block=4, second_block=4):
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
flat_lyrics = []
for lyric_block in self.lyrics:
if len(lyric_block) % 2 == 1:
if len(lyric_block) > 4:
lyric_block.pop(-3)
else:
lyric_block.pop(-1)
flat_lyrics += lyric_block
bars_left = [bar for bar in self.track.bars if (bar[u'start']>=self.track.sections[start_section][u'start'])]
for some_bars in grouper(first_block+second_block, bars_left):
for block_length in (first_block, second_block):
key = shuffled_img.pop()
img = memes[key]
img_path = img["source_image"]
if len(shuffled_img)==0:
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
if len(flat_lyrics) < 2:
flat_lyrics = reduce(lambda x,y:x+y, self.lyrics)
top_line = flat_lyrics.pop(0)
bottom_line = flat_lyrics.pop(0)
duration = int(sum([bar[u'duration'] for bar in some_bars[:block_length] if bar != None])*1000)
some_bars = some_bars[block_length:] #trim the bars that were used
self.timings.append({"image_url":img_path,
"transition_after": duration,
"top_text": top_line,
"bottom_text": bottom_line,
})
def random_shuffle_memes(self, memes, start_section):
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
shuffled_phrases = random.sample(self.lyrics, len(self.lyrics))
for section in self.track.sections[start_section:]:
#draw cards
key = shuffled_img.pop()
img = memes[key]
img_path = img["source_image"]
this_phrase = shuffled_phrases.pop()
while len(this_phrase) < 2:
if len(shuffled_phrases)==0:
this_phrase=['','']
else:
this_phrase = shuffled_phrases.pop()
#reshuffle the decks, if needed
if len(shuffled_img)==0:
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
if len(shuffled_phrases)==0:
shuffled_phrases = random.sample(self.lyrics, len(self.lyrics))
lyric_idx = random.sample(range(len(this_phrase)/2),1)[0]*2
self.timings.append({"image_url":img_path,
"transition_after": int(section['duration']*1000),
"top_text": this_phrase[lyric_idx],
"bottom_text": this_phrase[lyric_idx+1],
})
def maximum_spread(self, memes, start_section):
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
sections_per_phrase = len(self.track.sections[start_section:-1])/len(self.lyrics)
current_section=start_section
for lyric_block in self.lyrics:
bars = []
for section in self.track.sections[current_section:current_section+sections_per_phrase]:
bars += [bar for bar in self.track.bars \
if (bar[u'start']>=section[u'start']) and (bar[u'start']+bar[u'duration'])<=(section[u'start']+section[u'duration'])]
bars_per_linepair = len(bars)/(len(lyric_block)/2)
if bars_per_linepair <= 4:
bars_per_linepair = 4
else:
bars_per_linepair = 8
# bars_per_linepair = 4
for some_bars in grouper(bars_per_linepair,bars):
if some_bars.count(None)>0 or len(lyric_block)<2:
#combine with the last one and force last linepair of the phrase
self.timings[-1]["transition_after"] += int(sum([bar[u'duration'] for bar in bars if bar != None])*1000)
if len(lyric_block) > 1:
self.timings[-1]['bottom_text'] = lyric_block.pop(-1)
self.timings[-1]['top_text'] = lyric_block.pop(-1)
else:
key = shuffled_img.pop()
img = memes[key]
img_path = img["source_image"]
if len(shuffled_img)==0:
shuffled_img = random.sample(memes.keys(), len(memes.keys()))
top = lyric_block.pop(0)
bottom = lyric_block.pop(0)
self.timings.append({"image_url":img_path,
"transition_after": int(sum([bar[u'duration'] for bar in some_bars])*1000),
"top_text": top,
"bottom_text": bottom,
})
current_section += sections_per_phrase
#last section gets the album art again
self.timings.append({"image_url":self.album_art,
"transition_after": int(self.track.sections[-1]['duration']*1000),
"top_text": '',
"bottom_text": '',
})
class memeMatcherTests(unittest.TestCase):
def setUp(self):
self.tester = memeMatcher('')
if __name__ == '__main__':
unittest.main()