Beispiel #1
0
def request_Key_Word_Classifier(submission, phrase_set):
    '''
    This is a hand made feature set to id titles that make reddit-typical 
    requests for help, but might not phrase the request as a question

    '''    
    text = ' '.join(summarizeText.parseStringSimple01(submission.title))
    # Pass phrase_set through string parser and back, it'll help? 
    #phrase_set = botHelperFunctions.load_autoreply_key_phrases(fl_path='misc/autoreplyKeyPhrases.txt')
    '''
    phrase_set = ['need help', '[help]', '[ help ]', '[question]', 
                    '[ question ]', 'noob ', 'n00b ', ' newb','please help', 
                    'noobie question', 'help!', 'help me', "isn't working",
                    'not working', 'issues with', 'issue with',
                    'looking for tutorial', 'Quick question', 'help needed',
                    'plz help', "what's wrong", "need some help", '[q]',
                    '[Beginner Question]']
    '''
    request_Made = False
    #print text
    for phrase in phrase_set:
        if ' '.join(summarizeText.parseStringSimple01(phrase)).lower() in text:
            logging.info(phrase + " Was used in the post title")
            request_Made = True
            break
    
    #if submission.id not in submission.url:
        # Links off site 
        # This check was not in early versions of the bot (v pa0.1.01 and earlier)
    #    logging.debug(  '\t'+'Results: Error. Classification is dead,  (URL) Mismatch. ')
    #    request_Made = False

    return request_Made
Beispiel #2
0
def xrequest_Key_Word_Filter(submission, phrase_set):
    '''
    This is a hand made feature set to id titles that make reddit-typical 
    requests for help, but might not phrase the request as a question

    '''

    # Consider adding a time delay here?

    text = ' '.join(summarizeText.parseStringSimple01(submission.title))
    # Pass phrase_set through string parser and back, it'll help?
    #phrase_set = botHelperFunctions.load_autoreply_key_phrases(fl_path='misc/autoreplyKeyPhrases.txt')
    '''
    phrase_set = ['need help', '[help]', '[ help ]', '[question]', 
                    '[ question ]', 'noob ', 'n00b ', ' newb','please help', 
                    'noobie question', 'help!', 'help me', "isn't working",
                    'not working', 'issues with', 'issue with',
                    'looking for tutorial', 'Quick question', 'help needed',
                    'plz help', "what's wrong", "need some help", '[q]',
                    '[Beginner Question]']
    '''
    request_Made = False
    #print text
    for phrase in phrase_set:
        if ' '.join(summarizeText.parseStringSimple01(phrase)).lower() in text:
            logging.info(phrase + " Was used in the post title")
            request_Made = True
            break

    if request_Made:
        text = summarizeText.parseStringSimple01(submission.selftext)
        sents = nltk.sent_tokenize(' '.join(text))
        # Returning the last sentence is chosen purely based on a guess
        # It'll be more useful to select sents based on idf and entropy score
        try:
            return sents[-1]
        except:
            logging.info(
                "Failed to grab last sentence: Probably links offsite")
            pass

    return False
Beispiel #3
0
def basicQuestionClassify(submission, classifier):
    """
    A really simple classifier. if a submission is old enough, has low enough votes
    and asks a question, it's treated as a basic question that r/learnpython is 
    better suited for.
    Parameters
    ----------
    submission : praw submission object
    user : praw user object
    classifier : nltk classifier object
    tdm : term document matrix object
    Returns
    -------
     
    Notes
    -----
    It's not what I want, but it'll force me to
    References
    ----------
    Examples
    --------
    """
    #title = summarizeText.parseStringSimple01(submission.title)
    #text = summarizeText.parseStringSimple01(submission.selftext)
    #postText = title + text

    postAge = datetime.datetime.utcnow() - submission.created_utc
    hours2 = datetime.timedelta(hours=2)
    #logging.debug(  '\t'+"Post Age: "+ str(postAge) )

    votes = submission.score 
    upvoteRatio = submission.upvote_ratio 


    #print title
    #logging.debug(  '\t'+ "Votes: "+ str(votes))
    #logging.debug(  '\t'+ "Upvote Ratio: "+str( upvoteRatio))

    if postAge < hours2:
        return False

    if votes > 0:
        return False

    if upvoteRatio > 0.41:
        return False

    if submission.id not in submission.url:
        # Links off site 
        logging.debug(  '\t'+'Results: Error. Classification is dead,  (URL) Mismatch. ')
        return False



    # ID if a question is here right now
    title = summarizeText.parseStringSimple01(submission.title, removeURL=True)
    text = summarizeText.parseStringSimple01(submission.selftext, removeURL=True)
    postText = title + text
    sents = nltk.sent_tokenize(' '.join(postText))# nltk.sent_tokenize(title) + nltk.sent_tokenize(text)
    #print " ".join(sents)
    question_Sents = []
    for sent in sents:
        #sentDisplay = ' '.join(sent.strip().split('\n'))
        #print '\t', sentDisplay.strip()
        classified = questionIdentifier.classifyString(sent, classifier)
        #print classified
        if "question" in classified.lower():
            question_Sents.append(sent)
            logging.debug(str(classified) +': '+ str(sent))
            print('\tSentence: ', sent)
            print('\tClassified As: ', classified)
        #print '\t', classified
    if len(question_Sents) > 0:
        logging.info('|'.join(question_Sents))
        return question_Sents
    logging.debug("\tNo Question Identified")



    
    # All else
    return False