def find_frequent_entities(tweets, entity_key, k): ''' Find entities where the number of times the specific entity occurs is at least fraction * the number of entities in across the tweets. Input: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc k: integer Returns: list of entity, count pairs ''' z1, z2 = entity_key c = [] l = range(len(tweets)) for i in l: a = tweets[i]['entities'][z1] range_a = range(len(a)) for j in range_a: b = a[j] c.append(b[z2]) entity_list = [d.lower() for d in c] return find_frequent(entity_list, k)
def find_frequent_entities(tweets, entity_key, value_key, k): ''' Find entities where the number of times the specific entity occurs is at least fraction * the number of entities in across the tweets. Input: tweets: a list of tweets entity_key: a string ("hashtags", "user_mentions", etc) value_key: string (appropriate value depends on the entity type) k: integer Returns: list of entity, count pairs sorted in non-decreasing order by count. ''' l = helper_1_to_3(tweets, entity_key, value_key) return find_frequent(l, k)
def find_frequent_ngrams(tweets, n, k): ''' Find the most frequently-occurring n-grams. Inputs: tweets: a list of tweets n: integer k: integer Returns: list of ngram/value pairs ''' """ Your code goes here """ return find_frequent(make_n_grams(tweets, n), k)
def find_frequent_ngrams(tweets, n, k): ''' Find frequently occurring n-grams Inputs: tweets: a list of tweets n: integer k: integer Variables: big_ngram_array: list of tuples called ngrams Returns: list of ngram/value pairs ''' big_ngram_array = make_big_ngram_array(tweets, n) return find_frequent(big_ngram_array, k)
def find_frequent_ngrams(tweets, n, stop_words, stop_prefixes, k): ''' Find frequently occuring n-grams Inputs: tweets: a list of tweets n: integer k: integer stop_words: a set of strigns to ignore stop_prefixes: a set of strings. Words w/a prefix that appears in this list should be ignored. Returns: sorted list of pairs. Each pair has the form: ((year, month)), (sorted top-k n-grams for that month with their counts)) ''' b = ngram_helper(tweets, n, stop_words, stop_prefixes) return find_frequent(b, k)
def find_frequent_entities(tweets, entity_key, k): ''' Find entities where the number of times the specific entity occurs is at least fraction * the number of entities in across the tweets. Input: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc k: integer Variables: entity_array = holds all the hastags or screen_names Returns: list of entity, count pairs ''' entity_array = make_entity_array(tweets, entity_key) return find_frequent(entity_array, k)
def find_frequent_entities(tweets, entity_key, k): ''' Find entities where the number of times the specific entity occurs is at least 1/k * the number of entities in across the tweets. Input: tweets: list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc. k: integer Returns: list of entity, count pairs ''' """ Your code goes here """ # Extract the list of desired entities/ key and subkey return find_frequent(extract_entities_list(tweets, entity_key), k)
def find_frequent_entities(tweets, entity_key, k): ''' Find entities where the number of times the specific entity occurs is at least 1/k * the number of entities in across the tweets. Input: tweets: list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc. k: integer Returns: list of entity, count pairs ''' """ Your code goes here """ lst = hashtager(tweets, entity_key) a = find_frequent(lst, k) return a