コード例 #1
0
def head(attr):
    if(attr[0] not in getCompleted()):
        code=''
        code=tagger(attr[0])
        code=code+attr[1]
        code=code+endtagger(attr[0])
        writer(code)
        addPending("head")
コード例 #2
0
def test(request):
    tweets = TestTweets.objects.all()[0:1000]
    count = 0
    neutral = 0
    for t in tweets:
        tag, pos, neg = tagger(t.tweet)
        if tag == -1:
            count += 1
        elif tag == 0:
            neutral += 1
    ratio = float(count) / (len(tweets) - neutral)
    template = loader.get_template('classifier/test.html')
    context = RequestContext(request, {'ratio': str(ratio)})
    return HttpResponse(template.render(context))
コード例 #3
0
ファイル: views.py プロジェクト: bedding/twinsight
def tag(request, testtweet_id):
    testtweet = TestTweets.objects.get(id=testtweet_id)
    bigrams = get_bigram_bag(testtweet.tweet)
    tag, pos, neg = tagger(testtweet.tweet)
    tag_string = get_tag_string(tag)
    template = loader.get_template('classifier/tag.html')
    context = RequestContext(request, {
        'bigrams': bigrams,
        'testtweet': testtweet,
        'tag_string': tag_string,
        'tag': tag,
        'pos': str(pos),
        'neg': str(neg),
        })
    return HttpResponse(template.render(context))
コード例 #4
0
def tag(request, testtweet_id):
    testtweet = TestTweets.objects.get(id=testtweet_id)
    bigrams = get_bigram_bag(testtweet.tweet)
    tag, pos, neg = tagger(testtweet.tweet)
    tag_string = get_tag_string(tag)
    template = loader.get_template('classifier/tag.html')
    context = RequestContext(request, {
        'bigrams': bigrams,
        'testtweet': testtweet,
        'tag_string': tag_string,
        'tag': tag,
        'pos': str(pos),
        'neg': str(neg),
        })
    return HttpResponse(template.render(context))
コード例 #5
0
ファイル: views.py プロジェクト: bedding/twinsight
def test(request):
    tweets = TestTweets.objects.all()[0:1000]
    count = 0
    neutral = 0
    for t in tweets:
        tag, pos, neg = tagger(t.tweet)
        if tag == -1:
            count += 1
        elif tag == 0:
            neutral += 1
    ratio = float(count) / (len(tweets) - neutral)
    template = loader.get_template('classifier/test.html')
    context = RequestContext(request, {
        'ratio': str(ratio)
        })
    return HttpResponse(template.render(context))
コード例 #6
0
def initialize_cells(cells, parts, grid, get_id, doc_features):
    """
    Initializes cells in the grid and manages tags. After this call, parts will
    be populated with the part numbers found by the tagger in the table.
    """

    for cell in cells:
        cell.lemmas = lemmas_from(cell.text)
        cell.tags = set()
        cell.features = set()
        cell.mentions = []
        cell.ispart = False
        for tagger in taggers:
            for tag_info in tagger(cell.text):
                mention, tag, mention_features = tag_info
                cell.mentions.append(tag_info)
                cell.tags.add(tag)
                if tag == "part_num":
                    parts.add(mention)  # duplicates will have no effect on the set
                    cell.ispart = True
                elif tag == "part_suffix":
                    # Only add variants that I actually see in the document as a part
                    for base_part in doc_features.get("part_variants", dict()).keys():
                        for variant in doc_features["part_variants"].get(base_part, set()):
                            if variant.endswith(mention):
                                parts.add(variant)
                                cell.mentions.append((variant, "part_num", set()))
                                cell.ispart = True  # tag as potentially referencing a part
                elif tag == "polarity":
                    if "first_polarity" in doc_features:
                        continue
                    doc_features["first_polarity"] = mention
                    cell.features.add("first_polarity")

        # Hacky tags that needs to become actual tagger
        # if cell.lemmas & header_lemmas: cell.tags.add('header')
        # if cell.lemmas & symbol_lemmas: cell.tags.add('symbol')

        # Record the scope for number matches
        cell.scopes = [get_id() if cell.ispart else None]
        if len(cell.lemmas) < 2:
            cell.features.add("less_than_2_words")
コード例 #7
0
ファイル: views.py プロジェクト: bedding/twinsight
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0]*time_slot
        neg_timeline = [0]*time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key = 'argHv5V9fa175ygapOHf1g',
            consumer_secret ='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token = '167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret = 'A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
            )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count-1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time+i*time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(request, {
            'format_tweets':format_tweets,
            'len':len(format_tweets),
            'neg': negative,
            'pos': len(format_tweets) - negative,
            'keyword': keyword,
            'timeline': time_timeline,
            'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
            })
        return HttpResponse(template.render(context))
コード例 #8
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0] * time_slot
        neg_timeline = [0] * time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key='argHv5V9fa175ygapOHf1g',
            consumer_secret='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token='167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret='A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
        )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count - 1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append(
                    [tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time + i * time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(
            request, {
                'format_tweets': format_tweets,
                'len': len(format_tweets),
                'neg': negative,
                'pos': len(format_tweets) - negative,
                'keyword': keyword,
                'timeline': time_timeline,
                'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
        })
        return HttpResponse(template.render(context))
コード例 #9
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0]*time_slot
        neg_timeline = [0]*time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.set_keywords([keyword])
        tso.set_language('en')
        tso.set_count(100)
        tso.set_include_entities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key = 'aUjZ7NR0b87m7lvC7NNFxmlQi',
            consumer_secret ='vCNYJLewRPhMrQ6q6x1B7vJcCq1PkdOywhS7ajCY5xu9vm0u5Z',
            access_token = '2940098420-wvLU4OftzQmtMjqN5NLBt4lL5kMUF5ubx6K1Oli',
            access_token_secret = '4xWNY899n4JTVDKPFBEWSB2uzMI72gVF6weXqFX1xu3ID'
            )
        # fetch
        for tweet in ts.search_tweets_iterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            location = tweet['user']['location']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at, location])
            if i >= count-1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time+i*time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(request, {
            'format_tweets':format_tweets,
            'len':len(format_tweets),
            'neg': negative,
            'pos': len(format_tweets) - negative,
            'keyword': keyword,
            'timeline': time_timeline,
            'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
            })
        return HttpResponse(template.render(context))
コード例 #10
0
ファイル: allPythonContent.py プロジェクト: Mondego/pyreco
        tags = self.rater(tags)

        return tags[:tags_number]



if __name__ == '__main__':

    import glob
    import pickle
    import sys

    if len(sys.argv) < 2:
        print 'No arguments given, running tests: '
        documents = glob.glob('tests/*')
    else:
        documents = sys.argv[1:]
    
    print 'Loading dictionary... '
    weights = pickle.load(open('data/dict.pkl', 'rb'))

    tagger = Tagger(Reader(), Stemmer(), Rater(weights))

    for doc in documents:
        with open(doc, 'r') as file:
            print 'Tags for ', doc, ':'
            print tagger(file.read())
          

########NEW FILE########
コード例 #11
0
parser.add_argument('-host', '--mongoDb_host', help='mongoDb host', required=False, default='localhost')
parser.add_argument('-p', '--mongoDb_port', help='mongoDb port', required=False, default=27017, type=int)
parser.add_argument('-d', '--dictionary', help='dictionary file', required=False, default='data/dict.pkl')
parser.add_argument('-r', '--print_only', help='print only', required=False, default=False, type=bool)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
parser.add_argument('--no-verbose', dest='verbose', action='store_false')
parser.set_defaults(verbose=False)


if __name__ == '__main__':
    args = parser.parse_args()
    client = MongoClient(args.mongoDb_host, args.mongoDb_port)

    notesCollection = client.notes.note
    notes = notesCollection.find({'language': args.language})

    tagger = tagger.getTagger(args.dictionary)

    for note in notes:
        if args.print_only:
            print(note)
        else:
            text_to_tag = note['text1'] + "   " + note['text2']
            pythonTags = tagger(text_to_tag)
            tags = [tag.string for tag in pythonTags]
            # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags]
            note['pythonTags'] = tags
            notesCollection.update({'_id': note['_id']}, note)
            if args.verbose:
                print(u"Message: {} \n\ttagged as: {}".format(text_to_tag, tags))
コード例 #12
0
        tags = self.reader(text)
        tags = map(self.stemmer, tags)
        tags = self.rater(tags)

        return tags[:tags_number]


if __name__ == '__main__':

    import glob
    import pickle
    import sys

    if len(sys.argv) < 2:
        print 'No arguments given, running tests: '
        documents = glob.glob('tests/*')
    else:
        documents = sys.argv[1:]

    print 'Loading dictionary... '
    weights = pickle.load(open('data/dict.pkl', 'rb'))

    tagger = Tagger(Reader(), Stemmer(), Rater(weights))

    for doc in documents:
        with open(doc, 'r') as file:
            print 'Tags for ', doc, ':'
            print tagger(file.read())

########NEW FILE########
コード例 #13
0
ファイル: test_ui.py プロジェクト: econner/Box.me
def tag():
   tags = tagger(st.get(1.0, END))
   output = ', '.join(t.string for t in tags)
   tkMessageBox.showinfo('Tags:', output)
   st.delete(1.0, END)
コード例 #14
0
                    help='print only',
                    required=False,
                    default=False,
                    type=bool)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
parser.add_argument('--no-verbose', dest='verbose', action='store_false')
parser.set_defaults(verbose=False)

if __name__ == '__main__':
    args = parser.parse_args()
    client = MongoClient(args.mongoDb_host, args.mongoDb_port)

    notesCollection = client.notes.note
    notes = notesCollection.find({'language': args.language})

    tagger = tagger.getTagger(args.dictionary)

    for note in notes:
        if args.print_only:
            print(note)
        else:
            text_to_tag = note['text1'] + "   " + note['text2']
            pythonTags = tagger(text_to_tag)
            tags = [tag.string for tag in pythonTags]
            # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags]
            note['pythonTags'] = tags
            notesCollection.update({'_id': note['_id']}, note)
            if args.verbose:
                print(u"Message: {} \n\ttagged as: {}".format(
                    text_to_tag, tags))