def head(attr): if(attr[0] not in getCompleted()): code='' code=tagger(attr[0]) code=code+attr[1] code=code+endtagger(attr[0]) writer(code) addPending("head")
def test(request): tweets = TestTweets.objects.all()[0:1000] count = 0 neutral = 0 for t in tweets: tag, pos, neg = tagger(t.tweet) if tag == -1: count += 1 elif tag == 0: neutral += 1 ratio = float(count) / (len(tweets) - neutral) template = loader.get_template('classifier/test.html') context = RequestContext(request, {'ratio': str(ratio)}) return HttpResponse(template.render(context))
def tag(request, testtweet_id): testtweet = TestTweets.objects.get(id=testtweet_id) bigrams = get_bigram_bag(testtweet.tweet) tag, pos, neg = tagger(testtweet.tweet) tag_string = get_tag_string(tag) template = loader.get_template('classifier/tag.html') context = RequestContext(request, { 'bigrams': bigrams, 'testtweet': testtweet, 'tag_string': tag_string, 'tag': tag, 'pos': str(pos), 'neg': str(neg), }) return HttpResponse(template.render(context))
def test(request): tweets = TestTweets.objects.all()[0:1000] count = 0 neutral = 0 for t in tweets: tag, pos, neg = tagger(t.tweet) if tag == -1: count += 1 elif tag == 0: neutral += 1 ratio = float(count) / (len(tweets) - neutral) template = loader.get_template('classifier/test.html') context = RequestContext(request, { 'ratio': str(ratio) }) return HttpResponse(template.render(context))
def initialize_cells(cells, parts, grid, get_id, doc_features): """ Initializes cells in the grid and manages tags. After this call, parts will be populated with the part numbers found by the tagger in the table. """ for cell in cells: cell.lemmas = lemmas_from(cell.text) cell.tags = set() cell.features = set() cell.mentions = [] cell.ispart = False for tagger in taggers: for tag_info in tagger(cell.text): mention, tag, mention_features = tag_info cell.mentions.append(tag_info) cell.tags.add(tag) if tag == "part_num": parts.add(mention) # duplicates will have no effect on the set cell.ispart = True elif tag == "part_suffix": # Only add variants that I actually see in the document as a part for base_part in doc_features.get("part_variants", dict()).keys(): for variant in doc_features["part_variants"].get(base_part, set()): if variant.endswith(mention): parts.add(variant) cell.mentions.append((variant, "part_num", set())) cell.ispart = True # tag as potentially referencing a part elif tag == "polarity": if "first_polarity" in doc_features: continue doc_features["first_polarity"] = mention cell.features.add("first_polarity") # Hacky tags that needs to become actual tagger # if cell.lemmas & header_lemmas: cell.tags.add('header') # if cell.lemmas & symbol_lemmas: cell.tags.add('symbol') # Record the scope for number matches cell.scopes = [get_id() if cell.ispart else None] if len(cell.lemmas) < 2: cell.features.add("less_than_2_words")
def analysis_keyword(request, keyword): try: form = KeywordForm() format_tweets = [] raw_tweets = [] time_slot = 20 pos_timeline = [0]*time_slot neg_timeline = [0]*time_slot time_timeline = [] tso = TwitterSearchOrder() tso.setKeywords([keyword]) tso.setLanguage('en') tso.setCount(100) tso.setIncludeEntities(False) count = 200 i = 0 start_time = datetime.max end_time = datetime.min ts = TwitterSearch( consumer_key = 'argHv5V9fa175ygapOHf1g', consumer_secret ='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo', access_token = '167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41', access_token_secret = 'A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi' ) # fetch for tweet in ts.searchTweetsIterable(tso): text = tweet['text'] user = tweet['user']['screen_name'] created_at = tweet['created_at'] raw_tweets.append([text, user, created_at]) if i >= count-1: break else: i += 1 # tagging for tweet in raw_tweets: tag, pos_value, neg_value = tagger(tweet[0]) if tag != 0: stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y") dt = datetime.fromtimestamp(mktime(stime)) format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value]) # statistics negative = 0 for tweet in format_tweets: if tweet[3] == -1: negative += 1 # generate timeline data for tweet in format_tweets: if tweet[2] < start_time: start_time = tweet[2] if tweet[2] > end_time: end_time = tweet[2] time_intvl = (end_time - start_time) / time_slot for tweet in format_tweets: slot = get_slot(time_intvl, tweet[2], start_time) - 1 if tweet[3] == 1: pos_timeline[slot] += 1 else: neg_timeline[slot] += -1 # format final timeline data for i in range(time_slot): if i % 4 == 0: timestr = (start_time+i*time_intvl).strftime('%H:%M:%S') else: timestr = '' time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]]) template = loader.get_template('classifier/alys_result.html') context = RequestContext(request, { 'format_tweets':format_tweets, 'len':len(format_tweets), 'neg': negative, 'pos': len(format_tweets) - negative, 'keyword': keyword, 'timeline': time_timeline, 'form': form, }) return HttpResponse(template.render(context)) except TwitterSearchException as e: template = loader.get_template('classifier/error.html') context = RequestContext(request, { 'e_str': str(e), }) return HttpResponse(template.render(context))
def analysis_keyword(request, keyword): try: form = KeywordForm() format_tweets = [] raw_tweets = [] time_slot = 20 pos_timeline = [0] * time_slot neg_timeline = [0] * time_slot time_timeline = [] tso = TwitterSearchOrder() tso.setKeywords([keyword]) tso.setLanguage('en') tso.setCount(100) tso.setIncludeEntities(False) count = 200 i = 0 start_time = datetime.max end_time = datetime.min ts = TwitterSearch( consumer_key='argHv5V9fa175ygapOHf1g', consumer_secret='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo', access_token='167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41', access_token_secret='A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi' ) # fetch for tweet in ts.searchTweetsIterable(tso): text = tweet['text'] user = tweet['user']['screen_name'] created_at = tweet['created_at'] raw_tweets.append([text, user, created_at]) if i >= count - 1: break else: i += 1 # tagging for tweet in raw_tweets: tag, pos_value, neg_value = tagger(tweet[0]) if tag != 0: stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y") dt = datetime.fromtimestamp(mktime(stime)) format_tweets.append( [tweet[0], tweet[1], dt, tag, pos_value, neg_value]) # statistics negative = 0 for tweet in format_tweets: if tweet[3] == -1: negative += 1 # generate timeline data for tweet in format_tweets: if tweet[2] < start_time: start_time = tweet[2] if tweet[2] > end_time: end_time = tweet[2] time_intvl = (end_time - start_time) / time_slot for tweet in format_tweets: slot = get_slot(time_intvl, tweet[2], start_time) - 1 if tweet[3] == 1: pos_timeline[slot] += 1 else: neg_timeline[slot] += -1 # format final timeline data for i in range(time_slot): if i % 4 == 0: timestr = (start_time + i * time_intvl).strftime('%H:%M:%S') else: timestr = '' time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]]) template = loader.get_template('classifier/alys_result.html') context = RequestContext( request, { 'format_tweets': format_tweets, 'len': len(format_tweets), 'neg': negative, 'pos': len(format_tweets) - negative, 'keyword': keyword, 'timeline': time_timeline, 'form': form, }) return HttpResponse(template.render(context)) except TwitterSearchException as e: template = loader.get_template('classifier/error.html') context = RequestContext(request, { 'e_str': str(e), }) return HttpResponse(template.render(context))
def analysis_keyword(request, keyword): try: form = KeywordForm() format_tweets = [] raw_tweets = [] time_slot = 20 pos_timeline = [0]*time_slot neg_timeline = [0]*time_slot time_timeline = [] tso = TwitterSearchOrder() tso.set_keywords([keyword]) tso.set_language('en') tso.set_count(100) tso.set_include_entities(False) count = 200 i = 0 start_time = datetime.max end_time = datetime.min ts = TwitterSearch( consumer_key = 'aUjZ7NR0b87m7lvC7NNFxmlQi', consumer_secret ='vCNYJLewRPhMrQ6q6x1B7vJcCq1PkdOywhS7ajCY5xu9vm0u5Z', access_token = '2940098420-wvLU4OftzQmtMjqN5NLBt4lL5kMUF5ubx6K1Oli', access_token_secret = '4xWNY899n4JTVDKPFBEWSB2uzMI72gVF6weXqFX1xu3ID' ) # fetch for tweet in ts.search_tweets_iterable(tso): text = tweet['text'] user = tweet['user']['screen_name'] location = tweet['user']['location'] created_at = tweet['created_at'] raw_tweets.append([text, user, created_at, location]) if i >= count-1: break else: i += 1 # tagging for tweet in raw_tweets: tag, pos_value, neg_value = tagger(tweet[0]) if tag != 0: stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y") dt = datetime.fromtimestamp(mktime(stime)) format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value]) # statistics negative = 0 for tweet in format_tweets: if tweet[3] == -1: negative += 1 # generate timeline data for tweet in format_tweets: if tweet[2] < start_time: start_time = tweet[2] if tweet[2] > end_time: end_time = tweet[2] time_intvl = (end_time - start_time) / time_slot for tweet in format_tweets: slot = get_slot(time_intvl, tweet[2], start_time) - 1 if tweet[3] == 1: pos_timeline[slot] += 1 else: neg_timeline[slot] += -1 # format final timeline data for i in range(time_slot): if i % 4 == 0: timestr = (start_time+i*time_intvl).strftime('%H:%M:%S') else: timestr = '' time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]]) template = loader.get_template('classifier/alys_result.html') context = RequestContext(request, { 'format_tweets':format_tweets, 'len':len(format_tweets), 'neg': negative, 'pos': len(format_tweets) - negative, 'keyword': keyword, 'timeline': time_timeline, 'form': form, }) return HttpResponse(template.render(context)) except TwitterSearchException as e: template = loader.get_template('classifier/error.html') context = RequestContext(request, { 'e_str': str(e), }) return HttpResponse(template.render(context))
tags = self.rater(tags) return tags[:tags_number] if __name__ == '__main__': import glob import pickle import sys if len(sys.argv) < 2: print 'No arguments given, running tests: ' documents = glob.glob('tests/*') else: documents = sys.argv[1:] print 'Loading dictionary... ' weights = pickle.load(open('data/dict.pkl', 'rb')) tagger = Tagger(Reader(), Stemmer(), Rater(weights)) for doc in documents: with open(doc, 'r') as file: print 'Tags for ', doc, ':' print tagger(file.read()) ########NEW FILE########
parser.add_argument('-host', '--mongoDb_host', help='mongoDb host', required=False, default='localhost') parser.add_argument('-p', '--mongoDb_port', help='mongoDb port', required=False, default=27017, type=int) parser.add_argument('-d', '--dictionary', help='dictionary file', required=False, default='data/dict.pkl') parser.add_argument('-r', '--print_only', help='print only', required=False, default=False, type=bool) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true') parser.add_argument('--no-verbose', dest='verbose', action='store_false') parser.set_defaults(verbose=False) if __name__ == '__main__': args = parser.parse_args() client = MongoClient(args.mongoDb_host, args.mongoDb_port) notesCollection = client.notes.note notes = notesCollection.find({'language': args.language}) tagger = tagger.getTagger(args.dictionary) for note in notes: if args.print_only: print(note) else: text_to_tag = note['text1'] + " " + note['text2'] pythonTags = tagger(text_to_tag) tags = [tag.string for tag in pythonTags] # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags] note['pythonTags'] = tags notesCollection.update({'_id': note['_id']}, note) if args.verbose: print(u"Message: {} \n\ttagged as: {}".format(text_to_tag, tags))
tags = self.reader(text) tags = map(self.stemmer, tags) tags = self.rater(tags) return tags[:tags_number] if __name__ == '__main__': import glob import pickle import sys if len(sys.argv) < 2: print 'No arguments given, running tests: ' documents = glob.glob('tests/*') else: documents = sys.argv[1:] print 'Loading dictionary... ' weights = pickle.load(open('data/dict.pkl', 'rb')) tagger = Tagger(Reader(), Stemmer(), Rater(weights)) for doc in documents: with open(doc, 'r') as file: print 'Tags for ', doc, ':' print tagger(file.read()) ########NEW FILE########
def tag(): tags = tagger(st.get(1.0, END)) output = ', '.join(t.string for t in tags) tkMessageBox.showinfo('Tags:', output) st.delete(1.0, END)
help='print only', required=False, default=False, type=bool) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true') parser.add_argument('--no-verbose', dest='verbose', action='store_false') parser.set_defaults(verbose=False) if __name__ == '__main__': args = parser.parse_args() client = MongoClient(args.mongoDb_host, args.mongoDb_port) notesCollection = client.notes.note notes = notesCollection.find({'language': args.language}) tagger = tagger.getTagger(args.dictionary) for note in notes: if args.print_only: print(note) else: text_to_tag = note['text1'] + " " + note['text2'] pythonTags = tagger(text_to_tag) tags = [tag.string for tag in pythonTags] # tags = [{'text': tag.stem, 'rating': tag.rating} for tag in pythonTags] note['pythonTags'] = tags notesCollection.update({'_id': note['_id']}, note) if args.verbose: print(u"Message: {} \n\ttagged as: {}".format( text_to_tag, tags))