예제 #1
0
파일: extractor.py 프로젝트: takayuk/says
    parser.add_argument("-i", "--items", default="items")
    parser.add_argument("-s", "--itemstats", default="itemstats")

    args = parser.parse_args()
    return args


if __name__ == "__main__":

    args = parse_args()

    db = Corpus(database=args.database, collection=args.items)
    db_stats = Corpus(database=args.database, collection=args.itemstats)
    
    try:
        latstats = db_stats.findsorted({}, key="id")[0]["id"]
    except IndexError:
        latstats = 0L

    for i, item in enumerate(db.find({ "id": { "$gt": latstats }})):

        words = extractd.getwords(item)
        messages = extractd.getmessages(item)
        tags = extractd.gethashtags(item)
        urls = extractd.geturls(item)
        
        db_stats.append({
            "screen_name": item["screen_name"]
            , "words": words
            , "messages": messages
            , "hashtags": tags