Exemple #1
0
def find_frequent_entities(tweets, entity_key, k):
    '''
    Find entities where the number of times the specific entity occurs
    is at least fraction * the number of entities in across the tweets.

    Input: 
        tweets: a list of tweets
        entity_key: a pair ("hashtags", "text"), 
          ("user_mentions", "screen_name"), etc
        k: integer

    Returns: list of entity, count pairs
    '''

    z1, z2 = entity_key
    c = []
    l = range(len(tweets))

    for i in l:
        a = tweets[i]['entities'][z1]
        range_a = range(len(a))
        for j in range_a:
            b = a[j]
            c.append(b[z2])

    entity_list = [d.lower() for d in c]

    return find_frequent(entity_list, k)
Exemple #2
0
def find_frequent_entities(tweets, entity_key, value_key, k):
    '''
    Find entities where the number of times the specific entity occurs
    is at least fraction * the number of entities in across the tweets.

    Input: 
        tweets: a list of tweets
        entity_key: a string ("hashtags", "user_mentions", etc)
        value_key: string (appropriate value depends on the entity type)
        k: integer

    Returns: list of entity, count pairs sorted in non-decreasing order by count.
    '''
    l = helper_1_to_3(tweets, entity_key, value_key)
    return find_frequent(l, k)
Exemple #3
0
def find_frequent_ngrams(tweets, n, k):
    '''
	Find the most frequently-occurring n-grams.

	Inputs:
		tweets: a list of tweets
		n: integer
		k: integer

	Returns: list of ngram/value pairs
	'''
    """
	Your code goes here
	"""

    return find_frequent(make_n_grams(tweets, n), k)
Exemple #4
0
def find_frequent_ngrams(tweets, n, k):
    '''
    Find frequently occurring n-grams

    Inputs:
        tweets: a list of tweets
        n: integer
        k: integer

    Variables:
        big_ngram_array: list of tuples called ngrams

    Returns: list of ngram/value pairs
    '''

    big_ngram_array = make_big_ngram_array(tweets, n)
    return find_frequent(big_ngram_array, k)
Exemple #5
0
def find_frequent_ngrams(tweets, n, stop_words, stop_prefixes, k):
    '''
    Find frequently occuring n-grams

    Inputs: 
        tweets: a list of tweets
        n: integer
        k: integer
        stop_words: a set of strigns to ignore
        stop_prefixes: a set of strings. Words w/a prefix that appears 
        in this list should be ignored.

    Returns: sorted list of pairs. Each pair has the form:
        ((year, month)), (sorted top-k n-grams for that month with their counts)) 
    '''

    b = ngram_helper(tweets, n, stop_words, stop_prefixes)
    return find_frequent(b, k) 
Exemple #6
0
def find_frequent_entities(tweets, entity_key, k):
    '''
    Find entities where the number of times the specific entity occurs
    is at least fraction * the number of entities in across the tweets.

    Input: 
        tweets: a list of tweets
        entity_key: a pair ("hashtags", "text"), 
          ("user_mentions", "screen_name"), etc
        k: integer

    Variables:
		entity_array = holds all the hastags or screen_names

    Returns: list of entity, count pairs
    '''

    entity_array = make_entity_array(tweets, entity_key)
    return find_frequent(entity_array, k)
Exemple #7
0
def find_frequent_entities(tweets, entity_key, k):
    '''
	Find entities where the number of times the specific entity occurs
	is at least 1/k * the number of entities in across the tweets.

	Input:
		tweets: list of tweets
		entity_key: a pair ("hashtags", "text"),
		  ("user_mentions", "screen_name"), etc.
		k: integer

	Returns: list of entity, count pairs
	'''
    """
	Your code goes here
	"""
    # Extract the list of desired entities/ key and subkey

    return find_frequent(extract_entities_list(tweets, entity_key), k)
Exemple #8
0
def find_frequent_entities(tweets, entity_key, k):
    '''
    Find entities where the number of times the specific entity occurs
    is at least 1/k * the number of entities in across the tweets.

    Input:
        tweets: list of tweets
        entity_key: a pair ("hashtags", "text"),
          ("user_mentions", "screen_name"), etc.
        k: integer

    Returns: list of entity, count pairs
    '''
    """
    Your code goes here
    """

    lst = hashtager(tweets, entity_key)
    a = find_frequent(lst, k)
    return a