def clean_tweets(input, output): # Creates the output text file output_file = open(output,'w') # Creates JSON array out of line delimted JSON file json_input = utility.to_json_array(input) # Initialize unicode count variable unicode_count = 0 # Iterate through all JSON Objects within the array for currentObj in json_input: if 'text' in currentObj: text = parse.parse_json(currentObj, 'text') created_at = parse.parse_json(currentObj, 'created_at') # UNICODE CHECK if contain_unicode(text) or contain_unicode(created_at): unicode_count+=1 text = utility.clean_text(text) created_at = utility.clean_text(created_at) # Write JSON Object Post Output #print(text + ' (timestamp: ' + created_at + ')\n') output_file.write(text + ' (timestamp: ' + created_at + ')\n') # Writing aggregate unicode into output file and test string output_file.write(str(unicode_count) + ' tweet(s) contained unicode.') #print(str(unicode_count) + ' tweet(s) contained unicode.') # Close files output_file.close() print("\nTweet clean completed\nOutput in: " + output)
def average_degree(input, output): # Creates the output text file output_file = open(output,'w') # String that stores all the content of the output.txt # Used for debugging purposes #test = '' # Creates JSON array out of line delimted JSON file json_input = utility.to_json_array(input) adjacency_list = {} previous_threshold_time = None # Iterates through all objects within JSON Array for current_obj in json_input: # Checks if text exists within json_input if 'text' in current_obj: # Creates list of texts and timestamps of posts text = parse.parse_json(current_obj,'text') created_at = parse.parse_json(current_obj,'created_at') # Cleans texts and timestamps and parses timestamps text = utility.clean_text(text) created_at = utility.clean_text(created_at) parsed_time = parse.parse_timestamps(created_at) # Calculates current post threshold time threshold_time = utility.minute_offset(parsed_time); # Initializes previous threshold time so it can be used # for comparison and skip redundant checks if previous_threshold_time is None: previous_threshold_time = threshold_time # Removes all edges in adjacency list below threshold time if # threshold time was changed and resets threshold time to latest # post's time if threshold_time != previous_threshold_time: adjacency_list = remove_outdated(adjacency_list, threshold_time) previous_threshold_time = threshold_time # Creates # {'text': timestamp} -> {Node: [{edge: , timestamp: } {edge: , timestamp: }]} # Creates adjacency from hashtag list created from the post adjacency_list = identify_hashtags(adjacency_list, text, parsed_time) # Writes adjacency list to text output and testing console #print(debug.debug_adjacency_list(adjacency_list)) #test += debug.debug_adjacency_list(adjacency_list) #output_file.write(debug.debug_adjacency_list(adjacency_list)) # Creates degree list and calculates average degrees degree_list = create_degree_list(adjacency_list) average_degree = utility.calculate_average(degree_list) # Adds degree list and average degrees to output file and # debug text #print(debug.debug_degree_list(degree_list)) #print('Average degree = ' + debug.debug_degrees(degree_list) + str(average_degree)) #output_file.write(debug.debug_degree_list(degree_list)) #output_file.write('Average degree = ' + debug.debug_degrees(degree_list) + ' = ' + str(average_degree) + '\n') #print(str(average_degree)) output_file.write(str(average_degree) + '\n') #test += debug.debug_degree_list(degree_list) #test += 'Average degree = ' + str(average_degree)+'\n\n' # Close files output_file.close() print("\nAverage degree completed\nOutput in: " + output)
def incoming_sms(): """ Get the incoming message the user sent our Twilio number """ resp = MessagingResponse() response_text = '' is_get_request = False print(request.args) if 'from' in request.args: is_get_request = True logging.info("Servicing a GET request") arg_from = request.args.get('from') body = request.args.get('message') logging.debug( f"Message phone number from: {arg_from}, message: {body}") elif 'Message' in request.args: is_get_request = True logging.info("Servicing a GET request FIRST TIME!!!") arg_from = request.args.get('PhoneNumber') body = request.args.get('Message') logging.debug( f"Message phone number from: {arg_from}, message: {body}") else: logging.info("Servicing a POST request") body = request.values.get('Body', None) if (body is None): # print("RESP", resp) logging.error("There is no body. What is happening?") resp.message("THERE IS NO BODY!!! IS IT A ZOMBIE?") return str(resp) search_term = utility.clean_text(body) logger.info(f"Search term: {search_term}") # helpful message if len(search_term) < 1 or search_term == "1usa": response_text = generic_message_first_time elif search_term == "hello" or search_term == "info": response_text = generic_message elif search_term == "source": logger.debug("SOURCE") response_text = source_reply elif search_term == "time": logger.debug("TIME") time = cases.get_last_refreshed_time() response_text = time elif search_term == "feedback": response_text = "Please text FEEDBACK followed by whatever message you would like to leave" elif search_term.split() and search_term.split()[0] == "feedback": # Write to a file logger.info("FEEDBACK: {0}".format(search_term)) response_text = "Thank you for your feedback!" elif search_term == "total": total_cases = cases.get_total_cases() response_text = total_cases elif search_term == "cases": response_text = "Please specify the name of a US county/parish, US state, or global country.\n\nFor example: Cases in New York\n\nText TOTAL to get global stats" elif "cases in" in search_term: regexp = re.compile("cases in(.*)$") case_search = regexp.search(search_term).group(1) logger.debug("CASES Searching for cases in: {0}".format(case_search)) result = cases.handle_cases(utility.clean_text(case_search)) response_text = result # ask a question else: result = bot.handle_query(search_term) logger.debug( f"Returning answer to question: {' '.join(result.split())}") response_text = result if is_get_request: logger.debug(f"Returning response text to EXTexting: {response_text}") return response_text else: logger.debug(f"Returning response text to Twilio: {response_text}") resp.message(response_text) return str(resp)
def handle_cases(search_term): fetch_data() if (len(search_term.split()) < 1): logger.debug("CASES search term is empty. Returning early") return(apology_message) logger.debug(f"In handle cases. Looking for {search_term}") ### Special Cases ###################################### if ("china" in search_term): search_term = "china (mainland)" if ("korea" in search_term): search_term = "south korea" if any([search_term in ["united states", "unitedstates", "us", "america"]]): search_term = "usa" if search_term == "washington dc": search_term = "district of columbia" ######################################################## if search_term in en.countries_worldometer: my_country = '' for country in world_data['reports'][0]['table'][0]: if utility.clean_text(country['Country']) == search_term: my_country = country break if my_country == '': # Maybe if the country code changes in the API and what I have logger.warning("Could not find country for some reason...") return (apology_message) logger.debug(f"Found number of cases for {my_country['Country']}") obj = {"totalConfirmed": my_country['TotalCases'], "totalRecovered": my_country['TotalRecovered'], "totalDeaths": my_country['TotalDeaths'], "displayName": my_country['Country']} return format_response_for_cases(obj) # Total for each US state elif search_term in en.ctp_state_to_abbreviation_mapping: for state in state_data: if state['state'] == en.ctp_state_to_abbreviation_mapping[search_term]: my_state = state break if my_state == '': # should not get here logger.warning("Could not find state for some reason...") return (apology_message) obj = {"totalConfirmed": my_state['positive'], "totalRecovered": my_state['recovered'], "totalDeaths": my_state['death'], "displayName": search_term.title()} return(format_response_for_cases(obj)) else: my_county = get_county(search_term) return(my_county)