コード例 #1
0
def sendDupMessage(newE, dups, commenter, db, byCrc=True):
#    newE = (newE[u'datetime'], "", newE[u'id'], newE[u'title'])
    newE = (newE[u'datetime'], "", newE[u'account_id'], newE[u'id'], newE[u'title'])
    #datetime, userurl, userid, link, title = dups[-1]
#     trip = False
#     if lastSeen[2] == firstSeen[2]:
#         newE, lastSeen = sorted([newE, lastSeen], reverse=True)
#     else:
#         newE, lastSeen, firstSeen = sorted([newE, lastSeen, firstSeen], reverse=True)
#         trip = True
        
    if db.commentWritten(newE[2]):
        return 
    
    message = list()
    
    #commenter.appendComment(galleryId, message, retries, parentCommentId=-1, childComment=None):
    
    # shouldn'T be needed as the db sort it by date, but sometimes there is a bug ?! TODO: 
    dups.sort()
    
    lastSeen = dups[-1]
    firstSeen = dups[0]
    seen = len(dups)
    trip = firstSeen[3] != lastSeen[3]
        
    # last seen    
    to, tn = (lastSeen[4].lower().strip(), newE[4].lower().strip())
    titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'')
    if byCrc:
        #commentId = commenter.writeComment(newE[2], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[2], titleInf))
        message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[3], titleInf), 5, -1]
    else:
        #commentId = commenter.writeComment(newE[2], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[2], titleInf))
        message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[3], titleInf), 5, -1]

                
    # first seen
    if trip:
        to, tn = (firstSeen[4].lower().strip(), newE[4].lower().strip())
        titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'')
        if byCrc:
            if seen > 2:
                #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf))
                message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] )
            else:
                #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf))
                message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] )
        else:
            if seen > 2:                
                #commentId = commenter.writeReply(newE[2], commentId, str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf))
                message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] )
            else:
                #commentId = commenter.writeReply(newE[2], commentId, str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf))
                message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] )
        #sleep(28)

    commenter.queue.append(message)
    db.logComment(newE[3], -1)
コード例 #2
0
def shouldIPost(con, elem, dups, maxPostTime):
    #return True
    #lastDup = dups[-1]
    datetime, userurl, userid, link, title = dups[-1]
    to, tn = (elem[u'title'].lower().strip(), title.lower().strip())
    levn = levenshtein_n(to, tn)
    # TODO: make this readable    
    return (
                time() < elem[u'datetime'] + maxPostTime
                and (
                     not (
                          # TODO: use userblacklist by id plus names till all entries have ids
                          ( elem[u'account_url'] is not None and con.is_user_blocked(int(elem[u'account_id'])))
                          #or ( userurl is not None and lastDup[1].lower() in userBlackList )
                     )
                ) 
                and (
                     levn < 0.4 or (
                                        abs(elem[u'datetime']-datetime) < 60*60*24*60         # time diff < 60 days
                                        and u'mrw' not in elem[u'title'].lower().split(u' ')    # not mrw
                                        and u'mfw' not in elem[u'title'].lower().split(u' ')    # not mfw
                                        and not elem[u'title'].lower().startswith(u'when')      # not start with when
                                    )
                     
                )
                # TODO: drop this when all user have an userid
                and (elem[u'account_url'] is None or elem[u'account_url'] != userurl)
                and (elem['account_id'] != 0 or elem['account_id'] != userid)
            )