Example #1
0
def head(attr):
    if(attr[0] not in getCompleted()):
        code=''
        code=tagger(attr[0])
        code=code+attr[1]
        code=code+endtagger(attr[0])
        writer(code)
        addPending("head")
Example #2
0
def test(request):
    tweets = TestTweets.objects.all()[0:1000]
    count = 0
    neutral = 0
    for t in tweets:
        tag, pos, neg = tagger(t.tweet)
        if tag == -1:
            count += 1
        elif tag == 0:
            neutral += 1
    ratio = float(count) / (len(tweets) - neutral)
    template = loader.get_template('classifier/test.html')
    context = RequestContext(request, {'ratio': str(ratio)})
    return HttpResponse(template.render(context))
Example #3
0
def tag(request, testtweet_id):
    testtweet = TestTweets.objects.get(id=testtweet_id)
    bigrams = get_bigram_bag(testtweet.tweet)
    tag, pos, neg = tagger(testtweet.tweet)
    tag_string = get_tag_string(tag)
    template = loader.get_template('classifier/tag.html')
    context = RequestContext(request, {
        'bigrams': bigrams,
        'testtweet': testtweet,
        'tag_string': tag_string,
        'tag': tag,
        'pos': str(pos),
        'neg': str(neg),
        })
    return HttpResponse(template.render(context))
Example #4
0
def tag(request, testtweet_id):
    testtweet = TestTweets.objects.get(id=testtweet_id)
    bigrams = get_bigram_bag(testtweet.tweet)
    tag, pos, neg = tagger(testtweet.tweet)
    tag_string = get_tag_string(tag)
    template = loader.get_template('classifier/tag.html')
    context = RequestContext(request, {
        'bigrams': bigrams,
        'testtweet': testtweet,
        'tag_string': tag_string,
        'tag': tag,
        'pos': str(pos),
        'neg': str(neg),
        })
    return HttpResponse(template.render(context))
Example #5
0
def test(request):
    tweets = TestTweets.objects.all()[0:1000]
    count = 0
    neutral = 0
    for t in tweets:
        tag, pos, neg = tagger(t.tweet)
        if tag == -1:
            count += 1
        elif tag == 0:
            neutral += 1
    ratio = float(count) / (len(tweets) - neutral)
    template = loader.get_template('classifier/test.html')
    context = RequestContext(request, {
        'ratio': str(ratio)
        })
    return HttpResponse(template.render(context))
def initialize_cells(cells, parts, grid, get_id, doc_features):
    """
    Initializes cells in the grid and manages tags. After this call, parts will
    be populated with the part numbers found by the tagger in the table.
    """

    for cell in cells:
        cell.lemmas = lemmas_from(cell.text)
        cell.tags = set()
        cell.features = set()
        cell.mentions = []
        cell.ispart = False
        for tagger in taggers:
            for tag_info in tagger(cell.text):
                mention, tag, mention_features = tag_info
                cell.mentions.append(tag_info)
                cell.tags.add(tag)
                if tag == "part_num":
                    parts.add(mention)  # duplicates will have no effect on the set
                    cell.ispart = True
                elif tag == "part_suffix":
                    # Only add variants that I actually see in the document as a part
                    for base_part in doc_features.get("part_variants", dict()).keys():
                        for variant in doc_features["part_variants"].get(base_part, set()):
                            if variant.endswith(mention):
                                parts.add(variant)
                                cell.mentions.append((variant, "part_num", set()))
                                cell.ispart = True  # tag as potentially referencing a part
                elif tag == "polarity":
                    if "first_polarity" in doc_features:
                        continue
                    doc_features["first_polarity"] = mention
                    cell.features.add("first_polarity")

        # Hacky tags that needs to become actual tagger
        # if cell.lemmas & header_lemmas: cell.tags.add('header')
        # if cell.lemmas & symbol_lemmas: cell.tags.add('symbol')

        # Record the scope for number matches
        cell.scopes = [get_id() if cell.ispart else None]
        if len(cell.lemmas) < 2:
            cell.features.add("less_than_2_words")
Example #7
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0]*time_slot
        neg_timeline = [0]*time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key = 'argHv5V9fa175ygapOHf1g',
            consumer_secret ='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token = '167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret = 'A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
            )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count-1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time+i*time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(request, {
            'format_tweets':format_tweets,
            'len':len(format_tweets),
            'neg': negative,
            'pos': len(format_tweets) - negative,
            'keyword': keyword,
            'timeline': time_timeline,
            'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
            })
        return HttpResponse(template.render(context))
Example #8
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0] * time_slot
        neg_timeline = [0] * time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key='argHv5V9fa175ygapOHf1g',
            consumer_secret='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token='167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret='A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
        )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count - 1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append(
                    [tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time + i * time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(
            request, {
                'format_tweets': format_tweets,
                'len': len(format_tweets),
                'neg': negative,
                'pos': len(format_tweets) - negative,
                'keyword': keyword,
                'timeline': time_timeline,
                'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
        })
        return HttpResponse(template.render(context))
Example #9
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0]*time_slot
        neg_timeline = [0]*time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.set_keywords([keyword])
        tso.set_language('en')
        tso.set_count(100)
        tso.set_include_entities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key = 'aUjZ7NR0b87m7lvC7NNFxmlQi',
            consumer_secret ='vCNYJLewRPhMrQ6q6x1B7vJcCq1PkdOywhS7ajCY5xu9vm0u5Z',
            access_token = '2940098420-wvLU4OftzQmtMjqN5NLBt4lL5kMUF5ubx6K1Oli',
            access_token_secret = '4xWNY899n4JTVDKPFBEWSB2uzMI72gVF6weXqFX1xu3ID'
            )
        # fetch
        for tweet in ts.search_tweets_iterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            location = tweet['user']['location']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at, location])
            if i >= count-1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time+i*time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(request, {
            'format_tweets':format_tweets,
            'len':len(format_tweets),
            'neg': negative,
            'pos': len(format_tweets) - negative,
            'keyword': keyword,
            'timeline': time_timeline,
            'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
            })
        return HttpResponse(template.render(context))
Example #10
0
        tags = self.rater(tags)

        return tags[:tags_number]



if __name__ == '__main__':

    import glob
    import pickle
    import sys

    if len(sys.argv) < 2:
        print 'No arguments given, running tests: '
        documents = glob.glob('tests/*')
    else:
        documents = sys.argv[1:]
    
    print 'Loading dictionary... '
    weights = pickle.load(open('data/dict.pkl', 'rb'))

    tagger = Tagger(Reader(), Stemmer(), Rater(weights))

    for doc in documents:
        with open(doc, 'r') as file:
            print 'Tags for ', doc, ':'
            print tagger(file.read())
          

########NEW FILE########
parser.add_argument('-host', '--mongoDb_host', help='mongoDb host', required=False, default='localhost')
parser.add_argument('-p', '--mongoDb_port', help='mongoDb port', required=False, default=27017, type=int)
parser.add_argument('-d', '--dictionary', help='dictionary file', required=False, default='data/dict.pkl')
parser.add_argument('-r', '--print_only', help='print only', required=False, default=False, type=bool)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
parser.add_argument('--no-verbose', dest='verbose', action='store_false')
parser.set_defaults(verbose=False)


if __name__ == '__main__':
    args = parser.parse_args()
    client = MongoClient(args.mongoDb_host, args.mongoDb_port)

    notesCollection = client.notes.note
    notes = notesCollection.find({'language': args.language})

    tagger = tagger.getTagger(args.dictionary)

    for note in notes:
        if args.print_only:
            print(note)
        else:
            text_to_tag = note['text1'] + "   " + note['text2']
            pythonTags = tagger(text_to_tag)
            tags = [tag.string for tag in pythonTags]
            # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags]
            note['pythonTags'] = tags
            notesCollection.update({'_id': note['_id']}, note)
            if args.verbose:
                print(u"Message: {} \n\ttagged as: {}".format(text_to_tag, tags))
Example #12
0
        tags = self.reader(text)
        tags = map(self.stemmer, tags)
        tags = self.rater(tags)

        return tags[:tags_number]


if __name__ == '__main__':

    import glob
    import pickle
    import sys

    if len(sys.argv) < 2:
        print 'No arguments given, running tests: '
        documents = glob.glob('tests/*')
    else:
        documents = sys.argv[1:]

    print 'Loading dictionary... '
    weights = pickle.load(open('data/dict.pkl', 'rb'))

    tagger = Tagger(Reader(), Stemmer(), Rater(weights))

    for doc in documents:
        with open(doc, 'r') as file:
            print 'Tags for ', doc, ':'
            print tagger(file.read())

########NEW FILE########
Example #13
0
def tag():
   tags = tagger(st.get(1.0, END))
   output = ', '.join(t.string for t in tags)
   tkMessageBox.showinfo('Tags:', output)
   st.delete(1.0, END)
Example #14
0
                    help='print only',
                    required=False,
                    default=False,
                    type=bool)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
parser.add_argument('--no-verbose', dest='verbose', action='store_false')
parser.set_defaults(verbose=False)

if __name__ == '__main__':
    args = parser.parse_args()
    client = MongoClient(args.mongoDb_host, args.mongoDb_port)

    notesCollection = client.notes.note
    notes = notesCollection.find({'language': args.language})

    tagger = tagger.getTagger(args.dictionary)

    for note in notes:
        if args.print_only:
            print(note)
        else:
            text_to_tag = note['text1'] + "   " + note['text2']
            pythonTags = tagger(text_to_tag)
            tags = [tag.string for tag in pythonTags]
            # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags]
            note['pythonTags'] = tags
            notesCollection.update({'_id': note['_id']}, note)
            if args.verbose:
                print(u"Message: {} \n\ttagged as: {}".format(
                    text_to_tag, tags))