Esempio n. 1
0
def find_min_count_entities(tweets, entity_key, min_count):
    '''
    Find the entitites that occur at least min_count times.

    Inputs:
        tweets: a list of tweets
        entity_key: a pair ("hashtags", "text"), 
          ("user_mentions", "screen_name"), etc
        min_count: integer 

    Returns: list of entity, count pairs
    '''

    z1, z2 = entity_key
    c = []
    l = range(len(tweets))

    for i in l:
        a = tweets[i]['entities'][z1]
        range_a = range(len(a))
        for j in range_a:
            b = a[j]
            c.append(b[z2])

    entity_list = [d.lower() for d in c]

    return find_min_count(entity_list, min_count)
Esempio n. 2
0
def find_min_count_entities(tweets, entity_key, value_key, min_count):
    '''
    Find the entitites that occur at least min_count times.

    Inputs:
        tweets: a list of tweets
        entity_key: a string ("hashtags", "user_mentions", etc)
        value_key: string (appropriate value depends on the entity type)
        min_count: integer 

    Returns: list of entity, count pairs sorted in non-decreasing order by count.
    '''

    l = helper_1_to_3(tweets, entity_key, value_key)
    return find_min_count(l, min_count)
Esempio n. 3
0
def find_min_count_ngrams(tweets, n, min_count):
    '''
    Find n-grams that occur at least min_count times.
    
    Inputs:
        tweets: a list of tweets
        n: integer
        min_count: integer


    Returns: list of ngram/value pairs
    '''

    final_list = pre_process_tweets(tweets, n)

    return find_min_count(final_list, min_count)
Esempio n. 4
0
def find_min_count_ngrams(tweets, n, min_count):
    '''
	Find n-grams that occur at least min_count times.

	Inputs:
		tweets: a list of tweets
		n: integer
		min_count: integer

	Returns: list of ngram/value pairs
	'''
    """
	Your code goes here
	"""

    return find_min_count(make_n_grams(tweets, n), min_count)
Esempio n. 5
0
def find_min_count_entities(tweets, entity_key, min_count):
    '''
	Find the entities that occur at least min_count times.

	Inputs:
		tweets: a list of tweets
		entity_key: a pair ("hashtags", "text"),
		  ("user_mentions", "screen_name"), etc
		min_count: integer

	Returns: list of entity, count pairs
	'''
    """
	Your code goes here
	"""
    return find_min_count(extract_entities_list(tweets, entity_key), min_count)
Esempio n. 6
0
def find_min_count_ngrams(tweets, n, min_count):
    '''
    Find n-grams that occur at least min_count times.
    
    Inputs:
        tweets: a list of tweets
        n: integer
        min_count: integer

    Variables:
        big_ngram_array: list of tuples called ngrams

    Returns: list of ngram/value pairs
    '''

    big_ngram_array = make_big_ngram_array(tweets, n)
    return find_min_count(big_ngram_array, min_count)
Esempio n. 7
0
def find_min_count_entities(tweets, entity_key, min_count):
    '''
<<<<<<< HEAD
     Find the entitites that occur at least min_count times.

     Inputs:
        tweets: a list of tweets
        entity_key: a pair ("hashtags", "text"),
           ("user_mentions", "screen_name"), etc
         min_count: integer

     Returns: list of entity, count pairs
    '''
    # Your code for Task 2.2 goes here
    good_tweets = clean_tweets(tweets,entity_key)
    min_k = find_min_count(good_tweets,min_count)
    return min_k
Esempio n. 8
0
def find_min_count_entities(tweets, entity_key, min_count):
    '''
    Find the entitites that occur at least min_count times.

    Inputs:
        tweets: a list of tweets
        entity_key: a pair ("hashtags", "text"), 
          ("user_mentions", "screen_name"), etc
        min_count: integer 

    Variables:
		entity_array = holds all the hastags or screen_names

    Returns: list of entity, count pairs
    '''

    entity_array = make_entity_array(tweets, entity_key)
    return find_min_count(entity_array, min_count)
Esempio n. 9
0
def find_min_count_ngrams(tweets, n, min_count):
    '''
<<<<<<< HEAD
     Find n-grams that occur at least min_count times across all
     tweets.

     Inputs:
         tweets: a list of tweets
         n: integer
         min_count: integer

     Returns: list of ngram/value pairs
    '''
    min_ngrams = []
    for tweet in tweets:
        ngrams = gen_n_grams(tweet,k)
        min_ngrams.append(find_min_count(ngrams,min_count))
    return min_ngrams
Esempio n. 10
0
def find_min_count_ngrams(tweets, n, stop_words, stop_prefixes, min_count):
    '''
    Find n-grams that occur at least min_count times.

    Inputs: 
        tweets: a list of tweets
        n: integer
        k: integer
        stop_words: a set of strigns to ignore
        stop_prefixes: a set of strings. Words w/a prefix that appears 
        in this list should be ignored.
        min_count: integer

    Returns: list of key/value pairs sorted in non-increasing order
    by value.
    '''

    b = ngram_helper(tweets, n, stop_words, stop_prefixes)
    return find_min_count(b, min_count)