def sendDupMessage(newE, dups, commenter, db, byCrc=True): # newE = (newE[u'datetime'], "", newE[u'id'], newE[u'title']) newE = (newE[u'datetime'], "", newE[u'account_id'], newE[u'id'], newE[u'title']) #datetime, userurl, userid, link, title = dups[-1] # trip = False # if lastSeen[2] == firstSeen[2]: # newE, lastSeen = sorted([newE, lastSeen], reverse=True) # else: # newE, lastSeen, firstSeen = sorted([newE, lastSeen, firstSeen], reverse=True) # trip = True if db.commentWritten(newE[2]): return message = list() #commenter.appendComment(galleryId, message, retries, parentCommentId=-1, childComment=None): # shouldn'T be needed as the db sort it by date, but sometimes there is a bug ?! TODO: dups.sort() lastSeen = dups[-1] firstSeen = dups[0] seen = len(dups) trip = firstSeen[3] != lastSeen[3] # last seen to, tn = (lastSeen[4].lower().strip(), newE[4].lower().strip()) titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'') if byCrc: #commentId = commenter.writeComment(newE[2], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[2], titleInf)) message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[3], titleInf), 5, -1] else: #commentId = commenter.writeComment(newE[2], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[2], titleInf)) message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[3], titleInf), 5, -1] # first seen if trip: to, tn = (firstSeen[4].lower().strip(), newE[4].lower().strip()) titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'') if byCrc: if seen > 2: #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) else: #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) else: if seen > 2: #commentId = commenter.writeReply(newE[2], commentId, str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) else: #commentId = commenter.writeReply(newE[2], commentId, str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) #sleep(28) commenter.queue.append(message) db.logComment(newE[3], -1)
def shouldIPost(con, elem, dups, maxPostTime): #return True #lastDup = dups[-1] datetime, userurl, userid, link, title = dups[-1] to, tn = (elem[u'title'].lower().strip(), title.lower().strip()) levn = levenshtein_n(to, tn) # TODO: make this readable return ( time() < elem[u'datetime'] + maxPostTime and ( not ( # TODO: use userblacklist by id plus names till all entries have ids ( elem[u'account_url'] is not None and con.is_user_blocked(int(elem[u'account_id']))) #or ( userurl is not None and lastDup[1].lower() in userBlackList ) ) ) and ( levn < 0.4 or ( abs(elem[u'datetime']-datetime) < 60*60*24*60 # time diff < 60 days and u'mrw' not in elem[u'title'].lower().split(u' ') # not mrw and u'mfw' not in elem[u'title'].lower().split(u' ') # not mfw and not elem[u'title'].lower().startswith(u'when') # not start with when ) ) # TODO: drop this when all user have an userid and (elem[u'account_url'] is None or elem[u'account_url'] != userurl) and (elem['account_id'] != 0 or elem['account_id'] != userid) )