def check_keyword(self, keyword): file_name = 'list_keywords.csv' file_name_Ncsv = 'list_keywords' try: save = open(file_name, "r") df = pandas.read_csv(save) # check that this lists has got search. condition = (df["keywords"] == keyword ) # check that this lists has got search. num = len(df[condition]) # if num > 0 is True it's mean that keyword has got search already. twitter = ManageFile("Twitter", keyword + "_Ncut" + self.lang, ["time", "content", "places"], "r") crawler = ManageFile("WebCrawler", keyword + "_Ncut" + self.lang, ["time", "header", "content", "link"], "r") if (num > 0 and (not twitter.do_it or not crawler.do_it)): return True twitter.close() crawler.close() save.close() save = ManageFile("", file_name_Ncsv, ["keywords"], "a") save.managefile_main([keyword]) except FileNotFoundError: # first time to run. temp = open(file_name, "w", newline='') temp.write("keywords\n") temp.write(f"{keyword}\n") temp.close() return False
def test_managefile_main(self): row_len_old = -1 read = open("Test_write_file/" + "test" + ".csv", "r") reader = csv.reader((line.replace('\0', '') for line in read), delimiter=",") for i in reader: row_len_old += 1 writefile = ManageFile("Test_write_file", "test", ["a", "b", "c"], "w") for i in range(random.randint(0, 10), random.randint(11, 100)): writefile.managefile_main([i * 0, i * 1, i * 2]) writefile.close() row_len_new = -1 read = open("Test_write_file/" + "test" + ".csv", "r") reader = csv.reader((line.replace('\0', '') for line in read), delimiter=",") for i in reader: row_len_new += 1 self.assertGreater(row_len_new, row_len_old)
class Twitter_API: def __init__(self, nlp): # Key and token CONSUMER_KEY = "ku1u0AkXp7DiD8UuDFBD5ejc7" # aka API key CONSUMER_SECRET = "3OifKHMc5Ik7VMUhjoGUu4BZBDLRDLUTeM6Qo2M70OYKqHgpGP" # aka API key secret ACCESS_TOKEN = "1348183052179001347-Sy8D0nHWqhVjKYiQ2cVTNgkv6m1HYW" ACCESS_TOKEN_SECRET = "Tars6ymAzSCwLTTxGfeqR78cJTAhm7c7mfen5UAXKa1WQ" # Authenticate to Twitter self.auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) self.auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) # Create API object self.api = tweepy.API(self.auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) self.nlp2 = nlp def main_twitter(self, lang, count, keyword, since, until, update=False): # ค่าคงที่ OFFSET = 38555555555555 # start open file "a" mode column = ['time', 'content', 'places'] self.writer2 = ManageFile("Twitter", keyword + "_Ncut" + lang, column, "a") self.date_since = datetime.strptime(since + " 00:00:00", "%Y-%m-%d %H:%M:%S") until = datetime.strptime(until, "%Y-%m-%d") + timedelta(days=1) until = str(until).split(" ")[0] count_ = 0 # it's mean count how many tweet. maxId = -1 # starter id moreOFFSET = 0 tricker = True query = keyword # this is word that want to search count = count # The number of results to try and retrieve per page. tweet_mode = "extended" result_type = "current" # when update mode is active while (tricker): print(count_, "round Twitter") print(count_) try: # print("ok1") if (maxId <= 0 and moreOFFSET < 1): # รอบแรก data = self.api.search(q=query, lang=lang, count=count, tweet_mode=tweet_mode, result_type=result_type, until=until) else: # รอบต่อๆไป if (moreOFFSET >= 1): # OFFSET เพิ่มเรื่อยๆๆๆๆ data = self.api.search( q=query, lang=lang, count=count, tweet_mode=tweet_mode, result_type=result_type, max_id=str(maxId - OFFSET - 555555555 - (100000000 * moreOFFSET)), # a x 10^13 until=until) count_ += 1 moreOFFSET += 1 else: # OFFSET ค่าคงที่ data = self.api.search( q=query, lang=lang, count=count, tweet_mode=tweet_mode, result_type=result_type, max_id=str(maxId - OFFSET), # a x 10^13 until=until) # print("ok2") maxId = data[-1].id count_ += 1 # counter tricker = self.write_csv(data, keyword, lang, since, until) # Write infor to .csv except IndexError: # เมื่อ data[-1].id IndexError หรือก็คือไม่มี tweet ไหนเลยที่หาได้เลย # print("no data") moreOFFSET += 1 count_ += 1 if (count_ >= 10): tricker = False except tweepy.error.TweepError: pass self.writer2.close() print("Twitter Done", count_) def update_mode(self, query, lang, count, tweet_mode, result_type, since, until): data = tweepy.Cursor(self.api.search, q=query, lang=lang, count=count, tweet_mode=tweet_mode, result_type=result_type, until=until).items() #tricker = self.write_csv(data, query, lang, since, until) return data def write_csv(self, data, keyword, lang, since, until): # Write file .csv for checking and record infor """column = ['time', 'content', 'places'] self.writer2 = ManageFile("Twitter", keyword+"_Ncut"+lang, column, "a") self.date_since = datetime.strptime(since+" 00:00:00", "%Y-%m-%d %H:%M:%S")""" for infor in data: date_created_at = datetime.strptime(str(infor.created_at), "%Y-%m-%d %H:%M:%S") #print(date_created_at, self.date_since) # when update mode is active time is ignore if (date_created_at < self.date_since): #print(date_created_at, self.date_since, "<") return False all_lang = self.nlp2.detection_lang(infor.full_text) check_lang = lang == all_lang if (lang == "all"): check_lang = ("en" == all_lang) or ("th" == all_lang) if (("RT @" not in infor.full_text) and check_lang): #print([str(infor.created_at), infor.full_text, infor.user.location]) self.writer2.managefile_main([ str(infor.created_at), infor.full_text, infor.user.location ]) #writerow( {'places': infor.user.location, 'time': str(infor.created_at), 'message':infor.full_text, 'link':"-"} ) return True def hit_trends(self): start = time.time() column = ["keyword", "tweet"] writer = ManageFile("Hit_Trends", "Hit_Trends", column, "w") # WOEID of Bangkok woeid = 1225448 # fetching the trends trends = self.api.trends_place(id=woeid) # printing the information print("The top trends for the location are :") for value in trends: for trend in value['trends']: writer.managefile_main([trend["name"], trend["tweet_volume"]]) #print(trend["name"], trend["tweet_volume"]) print(time.time() - start, "hittwitter")