def start_task(params, task_file_name=""): """Read task data from file and run twitts processing Write processing result in file Args: params: dict with parameters task_file_name: task file name, need to create appropriate result file if '' - will create own with '_taskmade_' tag in name """ global KPI_DICT_TEMPLATE print ("Staring task for user %s" % params.get("user_name")) if not task_file_name: task_file_name = str(current_timer()).replace(".", "") + "_%s_taskmade_" % user results_file_name = task_file_name.replace("_new", "") + "_results" results_file_path = path.join(RESULTS_DIR, results_file_name) results_file = open(results_file_path, "a") results_list = [] twitts_id_cashe = [] counter = 0 target_end_time = current_timer() + float(params.get("time_seconds")) while current_timer() <= target_end_time: anserw_count = 10 if counter == 0: anserw_count = 100 twitts = query_to_twitter(params.get("query"), True, anserw_count) if not twitts: continue kpi_dict = KPI_DICT_TEMPLATE.copy() new_twitts_lst = process_twitts(twitts, twitts_id_cashe, kpi_dict) results_list.extend(new_twitts_lst) counter += 1 sleep_timer(5) kpi_dict["twitts_counter"] = len(results_list) kpi_dict["queries_counter"] = counter kpi_dict["languages_counter"] = len(kpi_dict["languages"]) kpi_dict["user"]["users_counter"] = len(kpi_dict["user"]["uniqe_users"]) kpi_dict["user"]["time_zone_counter"] = len(kpi_dict["user"]["time_zones"]) result_dict = {"kpi": kpi_dict, "statuses": results_list} results_file.write(dumps(result_dict)) results_file.close() print ("End task for user {} , cicles - {}".format(params.get("user_name"), str(counter)))
def start_working(): """Processes queries files in never ended loop with some time delay""" default_timer = run_time = 60 while True: waiting_time = max(default_timer - run_time, 1) print "Next run will start in - %s seconds" % waiting_time sleep_timer(waiting_time) start_time = current_timer() print ("Starting reading queries line") for filename in sorted(listdir(QUERIES_DIR)): if filename[-5:] == "_done": continue todo_file_path = path.join(QUERIES_DIR, filename) print ("Start processing file: %s" % todo_file_path) todo_file = open(todo_file_path, "r") line = todo_file.readline() todo_file.close() try: dict_line = loads(line.replace("'", '"')) except Error as err: print "Error in JSON loading: %s" % err continue try: start_task(dict_line, filename) except Error as err: print "Error in task doing: %s" % err continue change_query_file(todo_file_path) run_time = current_timer() - start_time print "Finished reading queries directory, reading time - %s" % run_time
def query_to_twitter(words, only_twitts=False, count=15, lang="", geo=""): """Gets data from twitter and decode from JSON Args: words: searched frase only_twitts: if True - execute and return 'statuses' section from data if False - return 'pure' data count: number of twitts to reciev lang: language filter geo: geo filter Return: decoded data """ url = "https://api.twitter.com/1.1/search/tweets.json?" url += "q={}&count={}".format(words, str(count)) if lang: url += "&lang=%s" % lang print url client = make_client() resp, content = client.request(url, "GET") print ("Server answer: {}".format(resp["status"])) if resp["status"] == "503": # out of queries limits print ("out of limits") sleep_timer(2) return "" data = loads(content.decode("utf-8")) if only_twitts: data = data.get("statuses") return data