def find_this_file(filename: str, stopWords: bool): file_contents = read_file(filename) hash_details = hashlib.sha256(bytes(file_contents, encoding='utf-8')).hexdigest() data = collections.find_one({hash_details: { '$exists': True }}, {'_id': False}) if not data: data = manipulate_data(file_contents) data = {hash_details: data} collections.insert(data, check_keys=False) if stopWords: return removeStopWords(data[hash_details]) return data[hash_details]
def get_input(inp): # returns the tweets on the basis of provided input try: tinp = inp.split(":") mode = tinp[0] usr = tinp[1] except: pass if mode == "twitter": if usr == "home": tweets = get_home_timeline() # get tweets from twitter homepage elif usr == "none": tweets = get_tweets_of() # get tweets of the authorized api account else: tweets = get_tweets_of(usr) # get tweets of any usr by his userid else: tweets = text_utils.read_file(inp).split("\n") return tweets
def get_input(inp): # returns the tweets on the basis of provided input try: tinp = inp.split(':') mode = tinp[0] usr = tinp[1] except: pass if mode == 'twitter': if usr == 'home': tweets = get_home_timeline() # get tweets from twitter homepage elif usr == 'none': tweets = get_tweets_of( ) # get tweets of the authorized api account else: tweets = get_tweets_of(usr) # get tweets of any usr by his userid else: tweets = text_utils.read_file(inp).split('\n') return tweets
def __init__(self, euro_dirname, sew_dirname, tom_dirname): """ The instantiation of Sentence class :param euro_dirname: path of parsed Eurosense sentences :param sew_dirname: path of parsed SEW sentences :param tom_dirname: path of parsed TOM sentences """ # datasets paths self.euro_dirname = euro_dirname self.sew_dirname = sew_dirname self.tom_dirname = tom_dirname # regex for HTML tag self.pattern = re.compile(r"&\w+;") # stopwords (~600) self.cachedStopWords = set(read_file(STOP_WORDS)) # punctuations self.punctuation = set(string.punctuation) # it is possible to stemm, but it doesn't improve the score enought self.stemmer = nltk.stem.porter.PorterStemmer()
import tweepy import json import pprint import time import text_utils # authorize and access Twitter API path = "./essentials/twitter_credentials/access.json" json_data = text_utils.read_file(path) credentials = json.loads(json_data) auth = tweepy.OAuthHandler(credentials["consumer_key"], credentials["consumer_secret"]) auth.set_access_token(credentials["access_token"], credentials["access_token_secret"]) api = tweepy.API(auth) def get_home_timeline(): # returns top 20 tweets on your homepage public_tweets = api.home_timeline() tweets = [] for tweet in public_tweets: tweets.append(tweet.text) return tweets def get_tweets_of(user=None): # returns the top 20 tweets of any user given the user id if user == None: public_tweets = api.user_timeline() else: public_tweets = api.user_timeline(user)
import tweepy import json import pprint import time import text_utils # authorize and access Twitter API path = './essentials/twitter_credentials/access.json' json_data = text_utils.read_file(path) credentials = json.loads(json_data) auth = tweepy.OAuthHandler(credentials['consumer_key'], credentials['consumer_secret']) auth.set_access_token(credentials['access_token'], credentials['access_token_secret']) api = tweepy.API(auth) def get_home_timeline(): # returns top 20 tweets on your homepage public_tweets = api.home_timeline() tweets = [] for tweet in public_tweets: tweets.append(tweet.text) return tweets def get_tweets_of(user=None): # returns the top 20 tweets of any user given the user id if user == None: public_tweets = api.user_timeline()