def find_min_count_entities(tweets, entity_key, min_count): ''' Find the entitites that occur at least min_count times. Inputs: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc min_count: integer Returns: list of entity, count pairs ''' z1, z2 = entity_key c = [] l = range(len(tweets)) for i in l: a = tweets[i]['entities'][z1] range_a = range(len(a)) for j in range_a: b = a[j] c.append(b[z2]) entity_list = [d.lower() for d in c] return find_min_count(entity_list, min_count)
def find_min_count_entities(tweets, entity_key, value_key, min_count): ''' Find the entitites that occur at least min_count times. Inputs: tweets: a list of tweets entity_key: a string ("hashtags", "user_mentions", etc) value_key: string (appropriate value depends on the entity type) min_count: integer Returns: list of entity, count pairs sorted in non-decreasing order by count. ''' l = helper_1_to_3(tweets, entity_key, value_key) return find_min_count(l, min_count)
def find_min_count_ngrams(tweets, n, min_count): ''' Find n-grams that occur at least min_count times. Inputs: tweets: a list of tweets n: integer min_count: integer Returns: list of ngram/value pairs ''' final_list = pre_process_tweets(tweets, n) return find_min_count(final_list, min_count)
def find_min_count_ngrams(tweets, n, min_count): ''' Find n-grams that occur at least min_count times. Inputs: tweets: a list of tweets n: integer min_count: integer Returns: list of ngram/value pairs ''' """ Your code goes here """ return find_min_count(make_n_grams(tweets, n), min_count)
def find_min_count_entities(tweets, entity_key, min_count): ''' Find the entities that occur at least min_count times. Inputs: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc min_count: integer Returns: list of entity, count pairs ''' """ Your code goes here """ return find_min_count(extract_entities_list(tweets, entity_key), min_count)
def find_min_count_ngrams(tweets, n, min_count): ''' Find n-grams that occur at least min_count times. Inputs: tweets: a list of tweets n: integer min_count: integer Variables: big_ngram_array: list of tuples called ngrams Returns: list of ngram/value pairs ''' big_ngram_array = make_big_ngram_array(tweets, n) return find_min_count(big_ngram_array, min_count)
def find_min_count_entities(tweets, entity_key, min_count): ''' <<<<<<< HEAD Find the entitites that occur at least min_count times. Inputs: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc min_count: integer Returns: list of entity, count pairs ''' # Your code for Task 2.2 goes here good_tweets = clean_tweets(tweets,entity_key) min_k = find_min_count(good_tweets,min_count) return min_k
def find_min_count_entities(tweets, entity_key, min_count): ''' Find the entitites that occur at least min_count times. Inputs: tweets: a list of tweets entity_key: a pair ("hashtags", "text"), ("user_mentions", "screen_name"), etc min_count: integer Variables: entity_array = holds all the hastags or screen_names Returns: list of entity, count pairs ''' entity_array = make_entity_array(tweets, entity_key) return find_min_count(entity_array, min_count)
def find_min_count_ngrams(tweets, n, min_count): ''' <<<<<<< HEAD Find n-grams that occur at least min_count times across all tweets. Inputs: tweets: a list of tweets n: integer min_count: integer Returns: list of ngram/value pairs ''' min_ngrams = [] for tweet in tweets: ngrams = gen_n_grams(tweet,k) min_ngrams.append(find_min_count(ngrams,min_count)) return min_ngrams
def find_min_count_ngrams(tweets, n, stop_words, stop_prefixes, min_count): ''' Find n-grams that occur at least min_count times. Inputs: tweets: a list of tweets n: integer k: integer stop_words: a set of strigns to ignore stop_prefixes: a set of strings. Words w/a prefix that appears in this list should be ignored. min_count: integer Returns: list of key/value pairs sorted in non-increasing order by value. ''' b = ngram_helper(tweets, n, stop_words, stop_prefixes) return find_min_count(b, min_count)