def parse_total_page(self, response):
        analyzer = Analyzer()
        total_pq = analyzer.get_html(response.body,
                                     'script:contains("W_pages")')
        friendcircle_analyzer = keyword_info_analyzer()
        total_pages = friendcircle_analyzer.get_totalpages(
            total_pq)  #需要爬取的微博朋友圈页数
        logger.info("the total_pages is: %d", total_pages)

        getweibopage = GetWeibopage()
        mainpage_url = response.meta['mainpage_url']
        user_id = response.meta['uid']
        is_search = response.meta['is_search']

        for page in range(total_pages):  #TODO 此处要更改为total_pages
            GetWeibopage.data['uid'] = user_id
            GetWeibopage.data['page'] = page + 1
            firstload_url = mainpage_url + getweibopage.get_firstloadurl()
            yield Request(url=firstload_url,
                          cookies=random.choice(COOKIES),
                          meta={
                              'uid': user_id,
                              'is_search': is_search
                          },
                          callback=self.parse_load)

            secondload_url = mainpage_url + getweibopage.get_secondloadurl()
            yield Request(url=secondload_url,
                          cookies=random.choice(COOKIES),
                          meta={
                              'uid': user_id,
                              'is_search': is_search
                          },
                          callback=self.parse_load)

            thirdload_url = mainpage_url + getweibopage.get_thirdloadurl()
            yield Request(url=thirdload_url,
                          cookies=random.choice(COOKIES),
                          meta={
                              'uid': user_id,
                              'is_search': is_search
                          },
                          callback=self.parse_load,
                          dont_filter=True)
コード例 #2
0
ファイル: application.py プロジェクト: AmrMKayid/Tweetyzer
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)

    # TODO
    
    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)
    
    positive, negative, neutral = 0.0, 0.0, 0.0
    
    for tweet in tweets:
        score = analyzer.analyze(tweet)
        if score > 0.0:
            positive += 1
        elif score < 0.0:
            negative += 1
        else:
            neutral += 1
    
    sentiments = positive + negative + neutral
    
    # Percentage 
    positive = (positive /  sentiments) * 100
    negative = (negative /  sentiments) * 100
    neutral = (neutral /  sentiments) * 100 
    
    #print("Positive: " + str(positive) + " Negative: " + str(negative) + " Neutral: " + str(neutral))

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #3
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)
    if not screen_name:
        return redirect(url_for("index"))
    if not tweets:
        return redirect(url_for("index"))
    else:
        positives = os.path.join(sys.path[0], "positive-words.txt")
        negatives = os.path.join(sys.path[0], "negative-words.txt")
        
        #queries Twitter’s API for a user’s most recent 100 tweets
        from helpers import get_user_timeline 

        tweetlist = helpers.get_user_timeline(screen_name,100)
        #analyzes the sentiment of each of those tweets
        # instantiate analyzer
      
        positive = 0
        negative = 0
        neutral = 0
        
        tw_analyzer = Analyzer(positives, negatives)
        # analyze word
        for tweet in tweetlist:
            score =  tw_analyzer.analyze(tweet)
            if score > 0.0:
                positive = positive +1
            elif score < 0.0:
                 negative =  negative +1
            else:
                neutral = neutral +1
    
        # generate chart
        chart = helpers.chart(positive, negative, neutral)
    
        # render results
        return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #4
0
def main():
    # global fs, analyzer
    # fs = FakeSender()
    # analyzer = Analyzer(fs.get_queue(), 11)

    q = Queue()
    opq = Queue()
    receiving_list = []
    token_list = []
    context_list = []
    final_string = ""
    connected = [0]
    r = Receiver(q, receiving_list, connected, opq)
    a = Analyzer(receiving_list, token_list)
    s = Semantic(q, receiving_list, token_list, context_list, final_string)
    sender = Sender()

    interface = Interface(q, receiving_list, token_list, context_list,
                          final_string, s, connected, sender, opq)
コード例 #5
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    name = screen_name.strip('@')

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, 100)
    if tweets == None:
        return redirect(url_for("index"))

    # declare count variables
    count, positive_count, negative_count, neutral_count = 0, 0, 0, 0

    # get single tweets and count them
    for tweet in tweets:
        count += 1
        score = analyzer.analyze(tweet)
        if score > 0.0:
            positive_count += 1
        elif score < 0.0:
            negative_count += 1
        else:
            neutral_count += 1

    # get 100 percent
    positive, negative, neutral = positive_count / count * 100, negative_count / count * 100, neutral_count / count * 100

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=name)
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, count=100)
    if not tweets:
        return redirect(url_for("index"))

    # TODO
    analyzer = Analyzer('positive-words.txt', 'negative-words.txt')

    # analyze word
    positive = 0
    negative = 0
    neutral = 0
    total = 0

    for tweet in tweets:

        score = analyzer.analyze(tweet)
        if score > 0.0:
            positive += 1
            total += 1
        elif score < 0.0:
            negative += 1
            total += 1
        else:
            neutral += 1
            total += 1
    positive = positive / total
    negative = negative / total
    neutral = neutral / total
    print(positive, 'pos', negative, 'neg', neutral, 'neut')

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #7
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, count=100)

    Analyzer(positives="positive-words.txt", negatives="negative-words.txt")

    positive = 0
    negative = 0
    neutral = 0

    for tweet in tweets:
        words = tweet.lower().split(" ")
        score = 0
        for word in words:
            if word.lower() in Analyzer.positive_words:
                score += 1
            elif word.lower() in Analyzer.negative_words:
                score -= 1
            else:
                score = score
                if score > 0.0:
                    positive += 1
                elif score < 0.0:
                    negative += 1
                else:
                    neutral += 1

    total = (positive + negative + neutral)
    posititve = (positive / total) * 100
    negative = (negative / total) * 100
    neutral = (neutral / total) * 100

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #8
0
    def parse_userinfo(self, response):
        item = WeibospiderItem()
        analyzer = Analyzer()
        try:
            total_pq1 = analyzer.get_html(response.body,
                                          'script:contains("pf_photo")')
            #item['image_urls'] = analyzer.get_userphoto_url(total_pq1)
            item['image_urls'] = None

            total_pq2 = analyzer.get_html(response.body,
                                          'script:contains("PCD_text_b")')
            item['userinfo'] = analyzer.get_userinfo(total_pq2)
        except Exception, e:
            item['userinfo'] = {}.fromkeys(
                ('昵称:'.decode('utf-8'), '所在地:'.decode('utf-8'),
                 '性别:'.decode('utf-8'), '博客:'.decode('utf-8'),
                 '个性域名:'.decode('utf-8'), '简介:'.decode('utf-8'),
                 '生日:'.decode('utf-8'), '注册时间:'.decode('utf-8')), '')
            item['image_urls'] = None
コード例 #9
0
def start_evaluator(predict_function):
    an = Analyzer()
    raw_input("\nPRESS ENTER TO CONTINUE")
    os.system('clear')
    sentence = raw_input(
        "\nEscriba una frase a analizar o 'exit' para salir > ")
    while sentence != 'exit':
        try:
            sentence = _correction(sentence)
            analized_sentence = [item['form'] for item in an.analyze(sentence)]
            Y = predict_function(analized_sentence)
            print '\nRESULTADO:'
            print ' '.join(["%s" % ("\033[91m"+wd+"\033[0m" if tg else wd) for wd,tg in \
            zip([text for text in analized_sentence], Y ) ])
        except:
            print '-- Ocurrio un error --'

        sentence = raw_input(
            "\nEscriba una opinion a analizar o exit para salir > ")
コード例 #10
0
ファイル: application.py プロジェクト: lianagitsit/CS50
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)

    # TODO
    if tweets == None:
        return redirect(url_for("index"))
    
    tknzr = TweetTokenizer()
    
    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")
    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)
    
    positive, negative, neutral = 0.0, 0.0, 0.0
    
    for tweet in tweets:
        print(tknzr.tokenize(tweet))
        # analyze tweet
        score = analyzer.analyze(tweet)
        if score > 0.0:
            # positive = (positive / len(tweets)) * 100
            positive = positive + 1
            print("positive: {}".format(positive))
        elif score < 0.0:
            negative = negative + 1
            print("negative: {}".format(negative))
        else:
            neutral = neutral + 1
            
    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #11
0
 def makeCore(self):
     calcWindow = self.getCurrentNodeEditorWidget()
     #print(calcWindow.filename)
     # if calcWindow.filename == "/Users/ouroboros/Desktop/df5.json":
     #     print("Core(0, 0) : labels :: -> b - > a - >, brackets :: -> (1 -> ( ->.")
     #     print("Core(1, 1) : labels :: -> b - > a -> b -> a - >, brackets :: -> (1 -> )1 -> ( -> ( ->.")
     #     print("Core(1, 1) : labels :: -> b - > b -> a -> a - >, brackets :: -> (1 -> ( -> )1 -> ( ->.")
     # elif calcWindow.filename == "/Users/ouroboros/Desktop/df6.json":
     #     print("Core(0, 0) : labels :: -> _ -> b - > b - >, brackets :: empty")
     #     print("Core(1, 1) : labels :: -> _ -> a - > b -> a -> b - >, brackets :: -> _ -> ( -> _ -> ) -> _ ->.")
     analyze_graph = Analyzer(calcWindow.scene.nodes,
                              calcWindow.scene.edges)
     analyze_graph.checkCycles()
     brackets, labels, msg = analyze_graph.buildCore()
     if msg == '':
         QMessageBox.warning(self, "Ядро L-графа",
                             "Не получилось найти ядро.")
     else:
         QMessageBox.about(self, "Ядро L-графа", msg)
コード例 #12
0
def clear_background_for_image(source, target, colors, verbose=True):
    """Clear background of source image.
    
    Result is a black and white image in png format. White color 
    coresponds to foreground pixels, black color to background pixels."""
    print "%s -> %s" % (source, target)
    img = Image.open(source)
    gray = img.convert("L")
    # compute mean color
    total = (gray.size[0] * gray.size[1])
    mean = sum((col * n for col, n in gray.getcolors())) / total
    try:
        analyzer = Analyzer(img, colors, verbose)
        pixels, groups, indexes = analyzer.filter_background(
            gray, color_threshold=mean - 30)  #
        analyzer.save_pixels(target, pixels)
    except AnalyzerError as e:
        print "Error: %s: %s" % (source, str(e))
        errors = errors + 1
コード例 #13
0
ファイル: application.py プロジェクト: wdlsvnit/SMP-2017-Web
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, 100)
    if tweets is None:
        return redirect(url_for("index"))

     # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")
    
    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)
    
    sump = 0
    sumn = 0
    sumg = 0
    for tweet in tweets:
        # analyze word
        score = analyzer.analyze(tweet)
        if score > 0.0:
            sump +=1
        elif score < 0.0:
            sumn += 1
        else:
            sumg += 1
    sumt = sump + sumn + sumg
    rp = (sump/sumt)*100
    rn = (sumn/sumt)*100
    rg = (sumg/sumt)*100
    positive, negative, neutral = rp, rn, rg

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #14
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)
    if tweets == None:
        return redirect(url_for("index"))

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    # analyze tweets
    positive = 0
    negative = 0
    neutral = 0
    score = 0
    for tweet in tweets:

        score = analyzer.analyze(tweet)

        if score > 0.0:
            positive += 1
        elif score < 0.0:
            negative += 1
        else:
            neutral += 1

        score = 0

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #15
0
def search():

    #A LOT OF CODE DUPLICATION!
    # absolute paths to positive and negative lists.
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, count=99)
    if tweets is None:
        return redirect(url_for("index"))

    positive = 0
    negative = 0
    neutral = 0
    for tweet in tweets:
        score = analyzer.analyze(tweet)
        totalScore = score
        if score > 0:
            positive += 1
        elif score < 0:
            negative += 1
        else:
            neutral += 1
        totalScore += 1

    positive, negative, neutral = int((positive / totalScore) * 100), int(
        (negative / totalScore) * 100), int((neutral / totalScore) * 100)

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #16
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    userTweets = helpers.get_user_timeline(screen_name)
    if userTweets == None:
        return redirect(url_for("index"))

    positive, negative, neutral = 0.0, 0.0, 0.0

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)
    # iterate through the first 50 of the user's tweets

    # determine how many tweets the user has
    numTweets = len(userTweets)
    if numTweets > 100:
        numTweets = 100

    for i in range(numTweets):
        # analyze tweet
        score = analyzer.analyze(userTweets[i])
        if score > 0.0:
            positive = positive + 1.0
        elif score < 0.0:
            negative = negative + 1.0
        else:
            neutral = neutral + 1.0

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #17
0
ファイル: application.py プロジェクト: kevinwalsh23/CS50
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    positive, negative, neutral = 0.0, 0.0, 100.0

    analyzer = Analyzer(positives, negatives)
    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)
    #if tweets != None:
    for i in tweets:
        score = analyzer.analyze(i)
        if score > 0.0:
            positive += 1.0
            neutral - 1.0
        elif score < 0.0:
            negative += 1.0
            neutral - 1.0
    #else:
    #   return redirect(url_for("index")

    #positive, negative, neutral = 0.0, 0.0, 100.0

    #if score > 0.0:
    #   positive += 1.0
    #  neutral - 1.0
    #elif score < 0.0:
    #   negative -= 1.0
    #  neutral - 1.0
    print(positive)

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #18
0
def search2():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)
    if not tweets:
        return redirect(url_for("index"))

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")
    classics = os.path.join(sys.path[0], "classics-words.txt")
    arthistory = os.path.join(sys.path[0], "arthistory-words.txt")
    tech = os.path.join(sys.path[0], "tech-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives, classics, arthistory, tech)

    # initialise classics, arthistory, tech, neutral counters
    classics, arthistory, tech, neutral = 0.0, 0.0, 0.0, 0.0

    # loop through tweets list to analyse it, adding to counter
    for tweet in tweets:
        interest = analyzer.analyze2(tweet)
        if interest == 'classics':
            classics += 1
        elif interest == 'arthistory':
            arthistory += 1
        elif interest == 'tech':
            tech += 1
        else:
            neutral += 1

    # generate chart
    chart = helpers.chart2(classics, arthistory, tech, neutral)

    # render results
    return render_template("search2.html", chart=chart, screen_name=screen_name)
コード例 #19
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)

    # init analyzer
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    tokenize = TweetTokenizer().tokenize

    positive, negative, neutral = 0.0, 0.0, 0.0

    user_timeline = get_user_timeline(screen_name, 100)

    if user_timeline is None:
        exit("Could not find user, please try a different @ name!")

    for tweet in user_timeline:
        score = 0
        for word in tokenize(tweet):
            score += analyzer.analyze(word)
        if score > 0:
            positive += 1
        elif score < 0:
            negative += 1
        else:
            neutral += 1

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #20
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name)

    # TODO
    if tweets == None:
        print("We couldn't get the tweets from " + screen_name)
        sys.exit()

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    positive, negative, neutral = 0.0, 0.0, 0.0
    score = 0
    for tweet in tweets:
        # analyze word
        score = analyzer.analyze(tweet)

        if score > 0.0:
            positive = positive + 1
        elif score < 0.0:
            negative = negative + 1
        else:
            neutral = neutral + 1
    total = positive + negative + neutral
    positive, negative, neutral = positive / total, negative / total, neutral / total

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #21
0
ファイル: application.py プロジェクト: LazyHooks/cs50-2017
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "").lstrip("@")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, 100)

    #if 100 tweets dont exist
    if tweets == None:
        tweets = helpers.get_user_timeline(screen_name, 100)

    if tweets == None:
        return redirect(url_for("index"))

    # set all counts to 0.0
    positive, negative, neutral = 0.0, 0.0, 0.0

    #list paths
    negatives = os.path.join(sys.path[0], "negative-words.txt")
    positives = os.path.join(sys.path[0], "positive-words.txt")

    #analyzer constructor
    analyzer = Analyzer(positives, negatives)

    #iterate over tweets classifying them
    for tweet in tweets:
        score = analyzer.analyze(tweet)
        if score > 0.0:
            positive = positive + 1
        elif score < 0.0:
            negative = negative + 1
        else:
            neutral = neutral + 1

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #22
0
def run(article_title,
        article_text,
        article_url,
        article_doi,
        hero,
        institute,
        release_date,
        search_depth=2):
    searcher = Searcher(article_text,
                        hero,
                        institute,
                        search_depth=search_depth)
    dwld = Downloader()
    analyzer = Analyzer(article_title, article_text, article_url, article_doi,
                        hero, institute)
    result_count = 10
    i = 0
    for url in searcher.gen():
        if url == '' or url == None:
            continue
        if i == result_count:
            break
        i += 1
        print('Обнаружена новая ссылка:\n{}\nОбработка...'.format(url))
        if 'pdf' in url:
            print(
                'Обработка PDF-документов не поддерживается. Документ будет добавлен для проверки'
            )
            page = None
        else:
            page = dwld.download(url, default_agent=True, timeout=15)
        print('Оценка близости текста - {:.2}/10'.format(
            analyzer.analyze(url, page)))
    results = sorted(analyzer.get_results(), key=lambda x: x[2], reverse=True)
    line_fmt = '{score:2.2f},{title:},{url:}\n'
    lines = []
    i = 0
    for url, title, score in results:
        line = line_fmt.format(title=title, url=url, score=score)
        lines.append(line)
        i += 1
    return lines
コード例 #23
0
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, 100)

    # TODO

    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    # return to index if screen_name doesn't exist
    if tweets == None:
        return redirect(url_for("index"))

    # create positive, negative and neutral counter
    # these variables are for chart plotting
    positive, negative, neutral = 0, 0, 0

    # analyze each tweet
    for tweet in tweets:
        score = analyzer.analyze(tweet)
        if score > 0.0:
            positive += 1
        elif score < 0.0:
            negative += 1
        else:
            neutral += 1

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)
コード例 #24
0
def build_index(docs_dir, index_dir):
    """
    Indexes files in `docs_root` recursively, placing the built index in `store_dir`
    """
    if not exists(index_dir):
        mkdir(index_dir)
    storage = SimpleFSDirectory(Paths.get(index_dir))  # index kept on disk

    config = IndexWriterConfig(Analyzer())
    config.setOpenMode(
        IndexWriterConfig.OpenMode.CREATE)  # overwrite existing index

    writer = IndexWriter(storage, config)

    print 'Indexing documents:'
    index_docs(docs_dir, writer)

    print 'Writing index...'
    writer.commit()
    writer.close()
コード例 #25
0
 def atuser_uid_parser(self, atuser_list):
     '''获取@用户对应用户昵称的用户uid'''
     analyzer = Analyzer()
     tmp_dict = {}
     for atuser_dict in atuser_list:
         if atuser_dict != {}:
             for key in atuser_dict.keys():
                 if not tmp_dict.has_key(key):
                     response = urllib2.urlopen("http://s.weibo.com/user/" +
                                                quote(quote(str(key))) +
                                                "&Refer=SUer_box")
                     #total_pq = analyzer.get_html(response.read(),'script:contains("W_texta")')
                     #uid = self.get_user_uid(total_pq)
                     #atuser_dict[key] = uid
                     #tmp_dict[key] = uid
                 else:
                     atuser_dict[key] = tmp_dict[key]
         else:
             continue
     return atuser_list
コード例 #26
0
ファイル: tweets.py プロジェクト: sureyeaah/CS50
def main():
    if len(sys.argv) != 2 :
        sys.exit("Usage: ./tweets @username")
    tweets = helpers.get_user_timeline(sys.argv[1].lstrip("@"), 50)
    
    # absolute paths to lists
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    for tweet in tweets:
        score = analyzer.analyze(tweet)
        if score > 0.0:
            print(colored(str(score) + " " + tweet, "green"))
        elif score < 0.0:
            print(colored(str(score) + " "  + tweet, "red"))
        else:
            print(colored(str(score) + " " + tweet, "yellow"))
コード例 #27
0
def run():
    analyzer = Analyzer( time_start, time_window, time_limit, preparsed_RIB, 
                         selectedAS, text_output, size_x, size_y )
    
    
    #so we can calculate total time this program used
    t = time.time()
    
    
    
    #####################################################################
    for opt in analysis_list:
        if opt == ANALYSIS_LINK:
            analyzer.analyzeLinkBindings( gama, delta )
        if opt == ANALYSIS_PREF:
            analyzer.analyzePrefBindings(alpha)
    #####################################################################

    
    print "Total time spent: %.2f  minutes" % ((time.time() - t)/60)
コード例 #28
0
    def parse_public_userinfo(self, response):
        '''解析公众账号个人信息'''
        item = WeibospiderItem()
        analyzer = Analyzer()
        try:
            total_pq1 = analyzer.get_html(response.body,
                                          'script:contains("pf_photo")')
            #item['image_urls'] = analyzer.get_userphoto_url(total_pq1)
            item['image_urls'] = None
            item['userAlias_public'] = total_pq1("div.PCD_header")("h1").text()

            total_pq2 = analyzer.get_html(response.body,
                                          'script:contains("PCD_text_b")')
            item['userinfo'] = analyzer.get_public_userinfo(total_pq2)
        except Exception, e:
            item['userinfo'] = {}.fromkeys(
                ('联系人:'.decode('utf-8'), '电话:'.decode('utf-8'),
                 '邮箱:'.decode('utf-8'), '友情链接:'.decode('utf-8')), '')
            item['image_urls'] = None
            item['userAlias_public'] = ""
コード例 #29
0
    def analyze_ast_tree(self, code, src_file):
        try:
            tree = ast.parse(code, type_comments=True)

            analyzer = Analyzer()
            analyzer.visit(tree)
            analyzer.refine_tokens()
            analyzer.write_tokens_to_file()

            if self.should_print_ast_tree is True:
                for node in tree.body:
                    print(ast.dump(node))

            if self.should_print_tokens is True:
                analyzer.print_statements()

            self.total_src_file_count += 1

        except Exception as error:
            print(str(error))
コード例 #30
0
 def run(self):
     queue = Queue()
     p = Process(target=_run_market_manager, args=(queue, ))
     p.start()
     analyzer = Analyzer()
     running = True
     while running:
         command = input()
         if command == 'exit':
             queue.put('exit')
             # p.join()
             running = False
         elif 'register' in command:
             url, delay = command.split(' ')[1:]
             queue.put(Task(TaskType.HISTOGRAM, url, int(delay)))
         elif 'show' in command:
             url, duration = command.split(' ')[1:3]
             analyzer.show_stats(url, int(duration))
         else:
             print(f'Unknown command {command}')