def main(): args = argument_parser.main() global sql sql = SQLConnector(host=args.host, port=args.port, user=args.user, passwd=args.password, db=args.db) global bing bing = BingSearch() global new_software new_software = NewSoftware() global possible_tags possible_tags = [] mongo = MongoConnector(host=args.H, db=args.db) for page in range(1): res = sql.load_data(page) rows = res.num_rows() if not rows: print "No tweets left to analyse" break for _i_ in range(1): # rows): for tweet in res.fetch_row(): tweet_id = str(tweet[0]) text = tweet[1].lower() # text = "Version 2 Microsoft just released MS Office ver 3.20.2 for 99 cent 100c 10ps 13pence 10 pence" urls = find_url(text) for url in urls: text = text.replace(url, "").strip() versions = find_version(text) words = regexp_tokenize(text, pattern=r"\w+([.,]\w+)*|\S+") # print words prices = find_price(words) pos_ = pos(words) ngram = ngrams(words, 5) try: tagged_tweet = tag_tweets(ngram, tweet_id) tagged_tweet.add("tweet_text", text) tagged_tweet.add("sentiment", tweet[2]) tagged_tweet.add("url", urls) tagged_tweet.add("version", versions) tagged_tweet.add("price", prices) if tweet_id in possible_tags: print tweet_id else: if tagged_tweet.contains("software_id") or tagged_tweet.contains("operating_system_id"): print tweet print tagged_tweet print # mongo.insert(tagged_tweet) else: print tweet, "No software" # sql.setTagged(tagged_tweet.get('tweet_db_id')) except IncompleteTaggingError, e: # This will allow the tweet to be tagged again at a later stage print tweet_id + ":", e print tweet print
else: res = self._sql.load_data(max_results=350) rows = res.num_rows() if not rows: print "No tweets left to analyse" break for _i_ in range(rows): for tweet in res.fetch_row(): count+=1 try: total_tags.append(self._tag_tweet(tweet=tweet, store=store)) except: continue print count return total_tags def close(self): self._sql.close() self._mongo.close() def main(args): args = vars(args) tagger = TweetTagger(**args) print tagger.tag(1) tagger.close() return 0 if __name__ == "__main__": sys.exit(main(argument_parser.main()))
d.connect('index', '/', controller=w, action='index') d.connect('main', '/:action', controller=w) d.connect('main-1', '/:action/', controller=w) d.connect('res', '/analysis/:name', controller=w, action='aggregate') d.connect('res-1', '/analysis/:name/', controller=w, action='aggregate') d.connect('search', '/twitter/:query', controller=w, action='tweets') d.connect('search-1', '/twitter/:query/', controller=w, action='tweets') d.connect('extract', '/extract/:query', controller=w, action='extract') d.connect('extract-1', '/extract/:query/', controller=w, action='extract') return d if __name__ == '__main__': import os.path config = { '/':{ 'request.dispatch': setup_routes(args=argparse.main()), 'tools.staticdir.root': os.path.dirname(os.path.abspath(__file__)) + "/web" }, '/css':{ 'tools.staticdir.on': True, 'tools.staticdir.dir': 'css' }, '/js':{ 'tools.staticdir.on': True, 'tools.staticdir.dir': 'js' } } cherrypy.tree.mount(None, config=config) cherrypy.engine.start() cherrypy.engine.block()
d.connect('main-1', '/:action/', controller=w) d.connect('res', '/analysis/:name', controller=w, action='aggregate') d.connect('res-1', '/analysis/:name/', controller=w, action='aggregate') d.connect('search', '/twitter/:query', controller=w, action='tweets') d.connect('search-1', '/twitter/:query/', controller=w, action='tweets') d.connect('extract', '/extract/:query', controller=w, action='extract') d.connect('extract-1', '/extract/:query/', controller=w, action='extract') return d if __name__ == '__main__': import os.path with open('classpath.txt', 'r') as f: classpath=f.readline().strip() config = { '/':{ 'request.dispatch': setup_routes(args=argparse.main(), classpath=classpath), 'tools.staticdir.root': os.path.dirname(os.path.abspath(__file__)) + "/web" }, '/css':{ 'tools.staticdir.on': True, 'tools.staticdir.dir': 'css' }, '/js':{ 'tools.staticdir.on': True, 'tools.staticdir.dir': 'js' } } cherrypy.tree.mount(None, config=config) cherrypy.engine.start() cherrypy.engine.block()