Ejemplo n.º 1
0
def processItemFullSize(arr):
  data = arr['data']
  q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink'])
  #Have we evaluated this submission yet?  Might be worth considering only checking 
  #memes within the last day.
  if q.count() > 1:
    raise Exception("More than one of the same permalink in db for permalink:" + data['permalink'])
  if q.count() == 1:
    #if we have, update the score and move on
    m = q[0]
    m.score = data['score']
    m.save()
  elif ".jpeg" not in data['url']:
    #have not evaluated this submission yet, run tests and store
    filepath = dropBoxDir + 'target.jpg'
    if ".jpg" in data['url'] or ".png" in data['url']:
      fullSize = data['url']
    else:
      fullSize = fullSizePhoto(data['url'])
    f = open(filepath, 'wb')
    f.write(urllib3.PoolManager().request('GET', fullSize).data)
    f.close()
    img_corrupt = False
    c2 = False
    #classify.classify() gets 2 elements: image macro/none, strong/weak
    classification = classify.classify(filepath) 
    if classification[0] == None and classification[2] != None:
      c2 = True
      macro = None
      #try classifying on potential libs
      classification2 = classify.classify(filepath, directory = dropBoxDir + 'potential_libs/')
      if classification2[0] == None:
        #add image to potential_libs
        p = PotentialImageMacro(thumbnailLink = data['thumbnail'], fullSizeLink = fullSize,
          score = data['score'], submitter = data['author'], source = 'adviceanimals', created = data['created']
          , threadLink = 'http://reddit.com' + data['permalink'], title = data['permalink'].replace('/', '') + '.jpg')
        p.save()
        potentialize(data['permalink'].replace('/', ''))
      elif classification2[2] < 20: #only classify as potential_lib if very confident
        librarize(classification2[0][8:])
        macro = ImageMacro.objects.get(filename = 'library/' + classification2[0][8:])
        print "Added " + classification2[0][8:] + " to the library while classifying: " + fullSize
        classification = classification2
    elif classification[2] == None:
      macro = None
      img_corrupt = True
    else:
      macro = ImageMacro.objects.get(filename = classification[0])
    m = Meme(classification = macro, thumbnailLink = data['thumbnail'],
          fullSizeLink = fullSize, score = data['score'], submitter = data['author'],
          topDist = classification[2] , topCorr = classification[3] ,
          source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink'],
          strong_classification = classification[1], img_corrupt = img_corrupt)
    m.save()
    if classification[2] < 25 and classification[0] != None:
      if c2: merge(classification[0], macro, detract = 1)
      else: merge(classification[0], macro)
Ejemplo n.º 2
0
def processItem(arr, target):
  q = Meme.objects.filter(threadLink = arr['threadLink']).distinct()
  #Have we evaluated this submission yet?  Might be worth considering only checking 
  #memes within the last day, or otherwise making the filter stronger
  print "Processing thread: " + arr['threadLink']
  if q.count() > 1:
    print "More than one of the same permalink in db for permalink:" + arr['threadLink']
    return
  if q.count() == 1:
    #if we have, update the score and move on
    print "Repeat submission. Updating score and moving on..."
    m = q[0]
    m.score = arr['score']
    m.save()
  else:
    #have not evaluated this submission yet, run tests and store
    #classify.classify() gets 2 elements: image macro/none, strong/weak
    img_corrupt = False
    classification = classify.classify(target, 'macros') 
    if classification[0] == None and classification[1] != None:
      macro = None
      #try classifying on potential libs
      classification = classify.classify(target, 'potentialmacros')
      if classification[0] == None:
        #add image to potential_libs
        p = PotentialImageMacro(thumbnailLink = arr['thumbnailLink'], fullSizeLink = arr['fullSizeLink'],
          score = arr['score'], submitter = arr['author'], source = arr['source'], created = arr['created']
          , threadLink = arr['threadLink'], key = arr['threadLink'].replace('/', ''))
        p.save()
        potentialize(arr['threadLink'].replace('/', ''), target)
        print "Added as potential macro."
      elif classification[1] < 14: #only classify as potential if very confident
        librarize(classification[0])
        macro = ImageMacro.objects.get(key = classification[0])
        merge(macro, target)
        print "Moved " + classification[0] + " over to the library, and classified this item as such."
      #Unaddressed case: weak classification.  Do not want to classify as potential because
      #doing sois going out on a limb without strong reason to do so.  Also do not want to
      #add it as a potential macro because it is likely reduntant.  
    #Image must be corrput because a value was not attained for closest with distance
    elif classification[1] == None:
      print "Image corrupt"
      macro = None
      img_corrupt = True
    else:
      macro = ImageMacro.objects.get(key = classification[0])
      if classification[1] < 25: merge(macro, target)
      print "Classified as " + classification[0]
    m = Meme(classification = macro, thumbnailLink = arr['thumbnailLink'],
          fullSizeLink = arr['fullSizeLink'], score = arr['score'], submitter = arr['author'],
          topDist = classification[1] , topCorr = classification[2] ,
          source = arr['source'], created = arr['created'], threadLink = arr['threadLink'],
          img_corrupt = img_corrupt, name = name(arr['fullSizeLink']))
    m.save()
    if m.classification != None:
      updateName(m.classification)
Ejemplo n.º 3
0
def processItemFullSize(arr):
    data = arr['data']
    q = Meme.objects.filter(threadLink='http://reddit.com' + data['permalink'])
    #Have we evaluated this submission yet?  Might be worth considering only checking
    #memes within the last day.
    if q.count() > 1:
        raise Exception(
            "More than one of the same permalink in db for permalink:" +
            data['permalink'])
    if q.count() == 1:
        #if we have, update the score and move on
        m = q[0]
        m.score = data['score']
        m.save()
    elif ".jpeg" not in data['url']:
        #have not evaluated this submission yet, run tests and store
        filepath = dropBoxDir + 'target.jpg'
        if ".jpg" in data['url'] or ".png" in data['url']:
            fullSize = data['url']
        else:
            fullSize = fullSizePhoto(data['url'])
        f = open(filepath, 'wb')
        f.write(urllib3.PoolManager().request('GET', fullSize).data)
        f.close()
        img_corrupt = False
        c2 = False
        #classify.classify() gets 2 elements: image macro/none, strong/weak
        classification = classify.classify(filepath)
        if classification[0] == None and classification[2] != None:
            c2 = True
            macro = None
            #try classifying on potential libs
            classification2 = classify.classify(filepath,
                                                directory=dropBoxDir +
                                                'potential_libs/')
            if classification2[0] == None:
                #add image to potential_libs
                p = PotentialImageMacro(
                    thumbnailLink=data['thumbnail'],
                    fullSizeLink=fullSize,
                    score=data['score'],
                    submitter=data['author'],
                    source='adviceanimals',
                    created=data['created'],
                    threadLink='http://reddit.com' + data['permalink'],
                    title=data['permalink'].replace('/', '') + '.jpg')
                p.save()
                potentialize(data['permalink'].replace('/', ''))
            elif classification2[
                    2] < 20:  #only classify as potential_lib if very confident
                librarize(classification2[0][8:])
                macro = ImageMacro.objects.get(filename='library/' +
                                               classification2[0][8:])
                print "Added " + classification2[0][
                    8:] + " to the library while classifying: " + fullSize
                classification = classification2
        elif classification[2] == None:
            macro = None
            img_corrupt = True
        else:
            macro = ImageMacro.objects.get(filename=classification[0])
        m = Meme(classification=macro,
                 thumbnailLink=data['thumbnail'],
                 fullSizeLink=fullSize,
                 score=data['score'],
                 submitter=data['author'],
                 topDist=classification[2],
                 topCorr=classification[3],
                 source='adviceanimals',
                 created=data['created'],
                 threadLink='http://reddit.com' + data['permalink'],
                 strong_classification=classification[1],
                 img_corrupt=img_corrupt)
        m.save()
        if classification[2] < 25 and classification[0] != None:
            if c2: merge(classification[0], macro, detract=1)
            else: merge(classification[0], macro)