コード例 #1
0
def main():
    args = argument_parser.main()
    global sql
    sql = SQLConnector(host=args.host, port=args.port, user=args.user, passwd=args.password, db=args.db)
    global bing
    bing = BingSearch()
    global new_software
    new_software = NewSoftware()
    global possible_tags
    possible_tags = []
    mongo = MongoConnector(host=args.H, db=args.db)
    for page in range(1):
        res = sql.load_data(page)
        rows = res.num_rows()
        if not rows:
            print "No tweets left to analyse"
            break

        for _i_ in range(1):  # rows):
            for tweet in res.fetch_row():
                tweet_id = str(tweet[0])
                text = tweet[1].lower()
                # text = "Version 2 Microsoft just released MS Office ver 3.20.2 for 99 cent 100c 10ps 13pence 10 pence"

                urls = find_url(text)
                for url in urls:
                    text = text.replace(url, "").strip()

                versions = find_version(text)

                words = regexp_tokenize(text, pattern=r"\w+([.,]\w+)*|\S+")
                # print words
                prices = find_price(words)

                pos_ = pos(words)
                ngram = ngrams(words, 5)

                try:
                    tagged_tweet = tag_tweets(ngram, tweet_id)
                    tagged_tweet.add("tweet_text", text)
                    tagged_tweet.add("sentiment", tweet[2])
                    tagged_tweet.add("url", urls)
                    tagged_tweet.add("version", versions)
                    tagged_tweet.add("price", prices)
                    if tweet_id in possible_tags:
                        print tweet_id
                    else:
                        if tagged_tweet.contains("software_id") or tagged_tweet.contains("operating_system_id"):
                            print tweet
                            print tagged_tweet
                            print
                            # mongo.insert(tagged_tweet)
                        else:
                            print tweet, "No software"
                        # sql.setTagged(tagged_tweet.get('tweet_db_id'))
                except IncompleteTaggingError, e:
                    # This will allow the tweet to be tagged again at a later stage
                    print tweet_id + ":", e
                    print tweet
                    print
コード例 #2
0
ファイル: tagger.py プロジェクト: taxomania/TwitterMining
            else:
                res = self._sql.load_data(max_results=350)
            rows = res.num_rows()
            if not rows:
                print "No tweets left to analyse"
                break
            for _i_ in range(rows):
                for tweet in res.fetch_row():
                    count+=1
                    try:
                        total_tags.append(self._tag_tweet(tweet=tweet, store=store))
                    except:
                        continue
        print count
        return total_tags

    def close(self):
        self._sql.close()
        self._mongo.close()

def main(args):
    args = vars(args)
    tagger = TweetTagger(**args)
    print tagger.tag(1)
    tagger.close()
    return 0

if __name__ == "__main__":
    sys.exit(main(argument_parser.main()))

コード例 #3
0
ファイル: __main__.py プロジェクト: taxomania/Project-Report
    d.connect('index', '/', controller=w, action='index')
    d.connect('main', '/:action', controller=w)
    d.connect('main-1', '/:action/', controller=w)
    d.connect('res', '/analysis/:name', controller=w, action='aggregate')
    d.connect('res-1', '/analysis/:name/', controller=w, action='aggregate')
    d.connect('search', '/twitter/:query', controller=w, action='tweets')
    d.connect('search-1', '/twitter/:query/', controller=w, action='tweets')
    d.connect('extract', '/extract/:query', controller=w, action='extract')
    d.connect('extract-1', '/extract/:query/', controller=w, action='extract')
    return d

if __name__ == '__main__':
    import os.path
    config = {
              '/':{
                   'request.dispatch': setup_routes(args=argparse.main()),
                   'tools.staticdir.root': os.path.dirname(os.path.abspath(__file__)) + "/web"
                  },
              '/css':{
                      'tools.staticdir.on': True,
                      'tools.staticdir.dir': 'css'
                     },
              '/js':{
                     'tools.staticdir.on': True,
                     'tools.staticdir.dir': 'js'
                    }
             }
    cherrypy.tree.mount(None, config=config)
    cherrypy.engine.start()
    cherrypy.engine.block()
コード例 #4
0
ファイル: __main__.py プロジェクト: taxomania/TwitterMining
    d.connect('main-1', '/:action/', controller=w)
    d.connect('res', '/analysis/:name', controller=w, action='aggregate')
    d.connect('res-1', '/analysis/:name/', controller=w, action='aggregate')
    d.connect('search', '/twitter/:query', controller=w, action='tweets')
    d.connect('search-1', '/twitter/:query/', controller=w, action='tweets')
    d.connect('extract', '/extract/:query', controller=w, action='extract')
    d.connect('extract-1', '/extract/:query/', controller=w, action='extract')
    return d

if __name__ == '__main__':
    import os.path
    with open('classpath.txt', 'r') as f:
        classpath=f.readline().strip()
    config = {
              '/':{
                   'request.dispatch': setup_routes(args=argparse.main(), classpath=classpath),
                   'tools.staticdir.root': os.path.dirname(os.path.abspath(__file__)) + "/web"
                  },
              '/css':{
                      'tools.staticdir.on': True,
                      'tools.staticdir.dir': 'css'
                     },
              '/js':{
                     'tools.staticdir.on': True,
                     'tools.staticdir.dir': 'js'
                    }
             }
    cherrypy.tree.mount(None, config=config)
    cherrypy.engine.start()
    cherrypy.engine.block()