keywords = [] # reading keywords from input file with open(args.input) as f: kws = f.read().split("\n") keywords = [kw.strip() for kw in kws if len(kw) > 0] grap = TweetGrapper() #Search Mode #------------------ if "search" == args.mode.lower(): print "Activating search mode" if args.location is not None and args.lang is not None: grap.search(keywords, writeTweet, args.location, args.lang) else: grap.search(keywords, writeTweet) #STREAM Mode #------------------ elif "stream" == args.mode.lower(): print "Activating stream mode" if args.location is not None and args.lang is not None: grap.stream(keywords, writeTweet, args.location, args.lang) else: grap.stream(keywords, writeTweet) #STREAMLocation mode #------------------ elif "streamlocation" == args.mode.lower():
#GoodKeywords = ["محترم","جميل","محترمة"] GoodKeywords = ["حصري","حلو","طيب","رائع","عادي","خلوق","مختلف","مميز","سهل","لطيف","سعيد","سلس","بسيط","الحمد","نعم","خاص","كويس","متألق","خفيف","راقي","متواضع","يسر","راح","جميل","محترم","رايق","محترمة","مؤدب","حلوة","ممتع","جديد","مبدع","فايق","متميز","حبوب"] #GoodKeywords = ["و","انت","يا","ا"] grap = TweetGrapper() iteration = 0 while True: newGoodKeywords = [] for w in GoodKeywords: searchString = "\""+w + " و \"" result = grap.search([searchString],None) if len(result) > 0 : #find 1grams of the resulted word for tweet in result: r = tweet.clean(True) searchString = w + " و " pos = r.find(searchString.decode("utf-8")) w2 = "" if pos is not -1 : pos2 = r[pos+len(searchString.decode("utf-8")):].find(" ") if pos2 is not -1 : # print r # print r[pos+len(searchString.decode("utf-8")):] # print pos # print pos2
keywords = [] # reading keywords from input file with open(args.input) as f: kws = f.read().split("\n") keywords = [kw.strip() for kw in kws if len(kw) > 0] grap = TweetGrapper() #Search Mode #------------------ if "search" == args.mode.lower(): print "Activating search mode" if args.location is not None and args.lang is not None : grap.search(keywords,writeTweet,args.location,args.lang) else : grap.search(keywords,writeTweet) #STREAM Mode #------------------ elif "stream" == args.mode.lower(): print "Activating stream mode" if args.location is not None and args.lang is not None : grap.stream(keywords,writeTweet,args.location,args.lang) else : grap.stream(keywords,writeTweet) #STREAMLocation mode #------------------
"خفيف", "راقي", "متواضع", "يسر", "راح", "جميل", "محترم", "رايق", "محترمة", "مؤدب", "حلوة", "ممتع", "جديد", "مبدع", "فايق", "متميز", "حبوب" ] #GoodKeywords = ["و","انت","يا","ا"] grap = TweetGrapper() iteration = 0 while True: newGoodKeywords = [] for w in GoodKeywords: searchString = "\"" + w + " و \"" result = grap.search([searchString], None) if len(result) > 0: #find 1grams of the resulted word for tweet in result: r = tweet.clean(True) searchString = w + " و " pos = r.find(searchString.decode("utf-8")) w2 = "" if pos is not -1: pos2 = r[pos + len(searchString.decode("utf-8")):].find(" ") if pos2 is not -1: # print r # print r[pos+len(searchString.decode("utf-8")):] # print pos
''' import argparse from Classes.Tweet import * from TweetGrapper.TweetGrapper import * from PatternMatcher import * # t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم") # print t.clean() # print t.clean() # print t.cleanText grap = TweetGrapper() l = grap.search("محترم") for i in l: print str(i.id) + "\t" + i.clean().encode("utf-8") # parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File') # parser.add_argument('-c','--config', help='Input Config file name',required=True) # parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True) # parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True) # parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true") # parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False) # args = parser.parse_args() # if args.uniqandfilter is True and args.seedlexicon is None: # parser.error('must specify seedlexicon when choosing [-uf] option')
""" import argparse from Classes.Tweet import * from TweetGrapper.TweetGrapper import * from PatternMatcher import * # t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم") # print t.clean() # print t.clean() # print t.cleanText grap = TweetGrapper() l = grap.search("محترم") for i in l: print str(i.id) + "\t" + i.clean().encode("utf-8") # parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File') # parser.add_argument('-c','--config', help='Input Config file name',required=True) # parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True) # parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True) # parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true") # parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False) # args = parser.parse_args() # if args.uniqandfilter is True and args.seedlexicon is None: