Esempio n. 1
0

keywords = []
# reading keywords from input file
with open(args.input) as f:
    kws = f.read().split("\n")
    keywords = [kw.strip() for kw in kws if len(kw) > 0]

grap = TweetGrapper()

#Search Mode
#------------------
if "search" == args.mode.lower():
    print "Activating search mode"
    if args.location is not None and args.lang is not None:
        grap.search(keywords, writeTweet, args.location, args.lang)
    else:
        grap.search(keywords, writeTweet)

#STREAM Mode
#------------------
elif "stream" == args.mode.lower():
    print "Activating stream mode"
    if args.location is not None and args.lang is not None:
        grap.stream(keywords, writeTweet, args.location, args.lang)
    else:
        grap.stream(keywords, writeTweet)

#STREAMLocation mode
#------------------
elif "streamlocation" == args.mode.lower():
Esempio n. 2
0
#GoodKeywords = ["محترم","جميل","محترمة"] 
GoodKeywords = ["حصري","حلو","طيب","رائع","عادي","خلوق","مختلف","مميز","سهل","لطيف","سعيد","سلس","بسيط","الحمد","نعم","خاص","كويس","متألق","خفيف","راقي","متواضع","يسر","راح","جميل","محترم","رايق","محترمة","مؤدب","حلوة","ممتع","جديد","مبدع","فايق","متميز","حبوب"]

#GoodKeywords = ["و","انت","يا","ا"]

grap = TweetGrapper()

iteration = 0 

while True: 

	newGoodKeywords = []	
	for w in GoodKeywords:	
		searchString = "\""+w + " و \"" 
		result = grap.search([searchString],None)
		if len(result) > 0 :
			#find 1grams of the resulted word
			for tweet in result:
				r = tweet.clean(True)
				searchString = w + " و "				 							
				pos = r.find(searchString.decode("utf-8"))
				w2 = ""				

				if pos is not -1 :					
					pos2 = r[pos+len(searchString.decode("utf-8")):].find(" ")
					if pos2 is not -1 :
						# print r 
						# print r[pos+len(searchString.decode("utf-8")):]
						# print pos
						# print pos2
Esempio n. 3
0

keywords = []
# reading keywords from input file 
with open(args.input) as f:
    kws = f.read().split("\n")    
    keywords = [kw.strip() for kw in kws if len(kw) > 0]        

grap = TweetGrapper()

#Search Mode
#------------------
if  "search" == args.mode.lower():   
  print "Activating search mode"  
  if args.location is not None and args.lang is not None : 
    grap.search(keywords,writeTweet,args.location,args.lang)
  else :     
    grap.search(keywords,writeTweet)

#STREAM Mode
#------------------
elif "stream"  == args.mode.lower():
  print "Activating stream mode"
  if args.location is not None and args.lang is not None : 
    grap.stream(keywords,writeTweet,args.location,args.lang)
  else : 
    grap.stream(keywords,writeTweet)


#STREAMLocation mode
#------------------
Esempio n. 4
0
    "خفيف", "راقي", "متواضع", "يسر", "راح", "جميل", "محترم", "رايق", "محترمة",
    "مؤدب", "حلوة", "ممتع", "جديد", "مبدع", "فايق", "متميز", "حبوب"
]

#GoodKeywords = ["و","انت","يا","ا"]

grap = TweetGrapper()

iteration = 0

while True:

    newGoodKeywords = []
    for w in GoodKeywords:
        searchString = "\"" + w + " و \""
        result = grap.search([searchString], None)
        if len(result) > 0:
            #find 1grams of the resulted word
            for tweet in result:
                r = tweet.clean(True)
                searchString = w + " و "
                pos = r.find(searchString.decode("utf-8"))
                w2 = ""

                if pos is not -1:
                    pos2 = r[pos +
                             len(searchString.decode("utf-8")):].find(" ")
                    if pos2 is not -1:
                        # print r
                        # print r[pos+len(searchString.decode("utf-8")):]
                        # print pos
Esempio n. 5
0
'''

import argparse

from Classes.Tweet import *
from TweetGrapper.TweetGrapper import *
from PatternMatcher import *

# t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم")

# print t.clean()
# print t.clean()
# print t.cleanText

grap = TweetGrapper()
l = grap.search("محترم")

for i in l:
    print str(i.id) + "\t" + i.clean().encode("utf-8")

# parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File')
# parser.add_argument('-c','--config', help='Input Config file name',required=True)
# parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True)
# parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True)
# parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true")
# parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False)
# args = parser.parse_args()

# if args.uniqandfilter is True and args.seedlexicon is None:
#   parser.error('must specify seedlexicon when choosing [-uf] option')
Esempio n. 6
0
"""

import argparse

from Classes.Tweet import *
from TweetGrapper.TweetGrapper import *
from PatternMatcher import *

# t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم")

# print t.clean()
# print t.clean()
# print t.cleanText

grap = TweetGrapper()
l = grap.search("محترم")

for i in l:
    print str(i.id) + "\t" + i.clean().encode("utf-8")


# parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File')
# parser.add_argument('-c','--config', help='Input Config file name',required=True)
# parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True)
# parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True)
# parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true")
# parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False)
# args = parser.parse_args()


# if args.uniqandfilter is True and args.seedlexicon is None: