Example #1
0
def start_task(params, task_file_name=""):
    """Read task data from file and run twitts processing
	Write processing result in file 
	
	Args:
		params: dict with parameters 
		task_file_name: task file name, need to create appropriate result file
						 if '' - will create own with '_taskmade_' tag in name 
						 
	"""

    global KPI_DICT_TEMPLATE

    print ("Staring task for user %s" % params.get("user_name"))

    if not task_file_name:
        task_file_name = str(current_timer()).replace(".", "") + "_%s_taskmade_" % user

    results_file_name = task_file_name.replace("_new", "") + "_results"
    results_file_path = path.join(RESULTS_DIR, results_file_name)
    results_file = open(results_file_path, "a")

    results_list = []
    twitts_id_cashe = []
    counter = 0
    target_end_time = current_timer() + float(params.get("time_seconds"))

    while current_timer() <= target_end_time:

        anserw_count = 10
        if counter == 0:
            anserw_count = 100

        twitts = query_to_twitter(params.get("query"), True, anserw_count)
        if not twitts:
            continue

        kpi_dict = KPI_DICT_TEMPLATE.copy()
        new_twitts_lst = process_twitts(twitts, twitts_id_cashe, kpi_dict)
        results_list.extend(new_twitts_lst)

        counter += 1
        sleep_timer(5)

    kpi_dict["twitts_counter"] = len(results_list)
    kpi_dict["queries_counter"] = counter
    kpi_dict["languages_counter"] = len(kpi_dict["languages"])
    kpi_dict["user"]["users_counter"] = len(kpi_dict["user"]["uniqe_users"])
    kpi_dict["user"]["time_zone_counter"] = len(kpi_dict["user"]["time_zones"])

    result_dict = {"kpi": kpi_dict, "statuses": results_list}
    results_file.write(dumps(result_dict))
    results_file.close()

    print ("End task for user {} , cicles - {}".format(params.get("user_name"), str(counter)))
Example #2
0
def start_working():
    """Processes queries files in never ended loop with some time delay"""

    default_timer = run_time = 60

    while True:

        waiting_time = max(default_timer - run_time, 1)
        print "Next run will start in - %s seconds" % waiting_time
        sleep_timer(waiting_time)
        start_time = current_timer()
        print ("Starting reading queries line")

        for filename in sorted(listdir(QUERIES_DIR)):

            if filename[-5:] == "_done":
                continue

            todo_file_path = path.join(QUERIES_DIR, filename)
            print ("Start processing file: %s" % todo_file_path)
            todo_file = open(todo_file_path, "r")
            line = todo_file.readline()
            todo_file.close()

            try:
                dict_line = loads(line.replace("'", '"'))
            except Error as err:
                print "Error in JSON loading: %s" % err
                continue

            try:
                start_task(dict_line, filename)
            except Error as err:
                print "Error in task doing: %s" % err
                continue

            change_query_file(todo_file_path)

        run_time = current_timer() - start_time
        print "Finished reading queries directory, reading time - %s" % run_time
Example #3
0
def query_to_twitter(words, only_twitts=False, count=15, lang="", geo=""):
    """Gets data from twitter and decode from JSON
	
	Args:
		words: searched frase 
		only_twitts: if True - execute and return 'statuses' section from data
					 if False - return 'pure' data
		count: number of twitts to reciev
		lang: language filter 
		geo: geo filter
		
	Return:
		decoded data
		
	"""

    url = "https://api.twitter.com/1.1/search/tweets.json?"
    url += "q={}&count={}".format(words, str(count))
    if lang:
        url += "&lang=%s" % lang
    print url

    client = make_client()
    resp, content = client.request(url, "GET")
    print ("Server answer: {}".format(resp["status"]))

    if resp["status"] == "503":  # out of queries limits
        print ("out of limits")
        sleep_timer(2)
        return ""

    data = loads(content.decode("utf-8"))

    if only_twitts:
        data = data.get("statuses")

    return data