def processItemFullSize(arr): data = arr['data'] q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() > 1: raise Exception("More than one of the same permalink in db for permalink:" + data['permalink']) if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif ".jpeg" not in data['url']: #have not evaluated this submission yet, run tests and store filepath = dropBoxDir + 'target.jpg' if ".jpg" in data['url'] or ".png" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', fullSize).data) f.close() img_corrupt = False c2 = False #classify.classify() gets 2 elements: image macro/none, strong/weak classification = classify.classify(filepath) if classification[0] == None and classification[2] != None: c2 = True macro = None #try classifying on potential libs classification2 = classify.classify(filepath, directory = dropBoxDir + 'potential_libs/') if classification2[0] == None: #add image to potential_libs p = PotentialImageMacro(thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], source = 'adviceanimals', created = data['created'] , threadLink = 'http://reddit.com' + data['permalink'], title = data['permalink'].replace('/', '') + '.jpg') p.save() potentialize(data['permalink'].replace('/', '')) elif classification2[2] < 20: #only classify as potential_lib if very confident librarize(classification2[0][8:]) macro = ImageMacro.objects.get(filename = 'library/' + classification2[0][8:]) print "Added " + classification2[0][8:] + " to the library while classifying: " + fullSize classification = classification2 elif classification[2] == None: macro = None img_corrupt = True else: macro = ImageMacro.objects.get(filename = classification[0]) m = Meme(classification = macro, thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], topDist = classification[2] , topCorr = classification[3] , source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink'], strong_classification = classification[1], img_corrupt = img_corrupt) m.save() if classification[2] < 25 and classification[0] != None: if c2: merge(classification[0], macro, detract = 1) else: merge(classification[0], macro)
def librarize(fileName): print fileName shutil.copyfile(dropBoxDir + 'potential_libs/' + fileName, dropBoxDir + 'library/' + fileName) os.remove(dropBoxDir + 'potential_libs/' + fileName) pim = PotentialImageMacro.objects.get(title = fileName) im = ImageMacro.objects.create(filename = 'library/' + fileName) m = Meme.objects.get(threadLink = pim.threadLink) m.classification = im m.topdist = 0 m.strong_classification = True m.save() pim.delete()
def librarize(fileName): print fileName shutil.copyfile(dropBoxDir + 'potential_libs/' + fileName, dropBoxDir + 'library/' + fileName) os.remove(dropBoxDir + 'potential_libs/' + fileName) pim = PotentialImageMacro.objects.get(title=fileName) im = ImageMacro.objects.create(filename='library/' + fileName) m = Meme.objects.get(threadLink=pim.threadLink) m.classification = im m.topdist = 0 m.strong_classification = True m.save() pim.delete()
def processItem(arr): data = arr['data'] q = Meme.objects.filter(threadLink='http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif data['thumbnail'] != 'default': #have not evaluated this submission yet, run tests and store thumbnailPage = urllib3.PoolManager().request('GET', data['thumbnail']) filepath = dropBoxDir + 'target.jpg' f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', data['thumbnail']).data) f.close() #classify.classify() gets elements #1: image macro filepath that it belongs to- (0 if no match) #2: topTwoDist touple - (best match, 2nd best match) #3: topTwoCorr touple - (best match, 2nd best match) classification = classify.classify(filepath) if ".jpg" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) if classification[0] == None: macro = None else: macro = ImageMacro.objects.get(filename=classification[0]) m = Meme(classification=macro, thumbnailLink=data['thumbnail'], fullSizeLink=fullSize, score=data['score'], submitter=data['author'], corrDict=repr(classification[2]), distDict=repr(classification[1]), source='adviceanimals', created=data['created'], threadLink='http://reddit.com' + data['permalink']) m.save()
def processItem(arr): data = arr['data'] q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif data['thumbnail'] != 'default': #have not evaluated this submission yet, run tests and store thumbnailPage = urllib3.PoolManager().request('GET', data['thumbnail']) filepath = dropBoxDir + 'target.jpg' f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', data['thumbnail']).data) f.close() #classify.classify() gets elements #1: image macro filepath that it belongs to- (0 if no match) #2: topTwoDist touple - (best match, 2nd best match) #3: topTwoCorr touple - (best match, 2nd best match) classification = classify.classify(filepath) if ".jpg" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) if classification[0] == None: macro = None else: macro = ImageMacro.objects.get(filename = classification[0]) m = Meme(classification = macro, thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], corrDict = repr(classification[2]), distDict = repr(classification[1]), source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink']) m.save()
def processItemFullSize(arr): data = arr['data'] q = Meme.objects.filter(threadLink='http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() > 1: raise Exception( "More than one of the same permalink in db for permalink:" + data['permalink']) if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif ".jpeg" not in data['url']: #have not evaluated this submission yet, run tests and store filepath = dropBoxDir + 'target.jpg' if ".jpg" in data['url'] or ".png" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', fullSize).data) f.close() img_corrupt = False c2 = False #classify.classify() gets 2 elements: image macro/none, strong/weak classification = classify.classify(filepath) if classification[0] == None and classification[2] != None: c2 = True macro = None #try classifying on potential libs classification2 = classify.classify(filepath, directory=dropBoxDir + 'potential_libs/') if classification2[0] == None: #add image to potential_libs p = PotentialImageMacro( thumbnailLink=data['thumbnail'], fullSizeLink=fullSize, score=data['score'], submitter=data['author'], source='adviceanimals', created=data['created'], threadLink='http://reddit.com' + data['permalink'], title=data['permalink'].replace('/', '') + '.jpg') p.save() potentialize(data['permalink'].replace('/', '')) elif classification2[ 2] < 20: #only classify as potential_lib if very confident librarize(classification2[0][8:]) macro = ImageMacro.objects.get(filename='library/' + classification2[0][8:]) print "Added " + classification2[0][ 8:] + " to the library while classifying: " + fullSize classification = classification2 elif classification[2] == None: macro = None img_corrupt = True else: macro = ImageMacro.objects.get(filename=classification[0]) m = Meme(classification=macro, thumbnailLink=data['thumbnail'], fullSizeLink=fullSize, score=data['score'], submitter=data['author'], topDist=classification[2], topCorr=classification[3], source='adviceanimals', created=data['created'], threadLink='http://reddit.com' + data['permalink'], strong_classification=classification[1], img_corrupt=img_corrupt) m.save() if classification[2] < 25 and classification[0] != None: if c2: merge(classification[0], macro, detract=1) else: merge(classification[0], macro)