def main(argv):
    """
    Creates n-grams from the file New Years Resolution_merged.tsv. Output in JSON format
    """
    if validate_argv(argv) is False:
        print "Usage: tokenizeTweets.py <file name> <file directory> <n (for n gram)>"
        sys.exit()
    file_name = argv[0]
    input_directory_name = argv[1]
    n_for_ngrams = int(argv[2])
    input_path = fp.get_file_path(file_name, input_directory_name)
    tweets = extract_tweets(input_path)
    tweets_deduped = dedupe_and_tokenize(tweets)

    # for creating an ngram dictionary 

    ngrams = dg.create_ngrams(tweets_deduped, n_for_ngrams)
    ngram_dict = dg.create_ngram_dict(ngrams)
    output_path = fp.set_output_file_path('New Years Resolution_ngram_' + str(n_for_ngrams) + '.json', 'ngrams')
    output_ngram(ngram_dict, output_path)

    # for creating a list of tokens. Removing the words "New Years Resolution" as well.

    tokens = break_down_sentences(tweets_deduped)
    tokens_cleaned = remove_tokens(tokens, ['new', 'years', 'resolution', ':'])
    output_path2 = fp.set_output_file_path('New Years Resolution_tokens.tsv', 'tokens')
    output_tokens(tokens_cleaned, output_path2)
def main(argv):
    """
    Merge files of the format <search_term>_####.tsv" 
    in the data_raw directory and outputs into the "merged" directory
    """

    

    if validate_argv(argv) is False:
        print "Usage: mergeFiles.py <search_term>"
        sys.exit()

    input_directory_name = 'data_raw'
    search_term = argv[0]
    output_file_name = search_term + '_merged.tsv'
    output_directory_name = 'merged'


    output_path = fp.set_output_file_path(output_file_name, output_directory_name) 
    output = open(output_path, 'a')
    for h1 in range(3):
        for h2 in range(10):
            for m1 in range(6):
                for m2 in range(10):
                    file_name = search_term + '_' + str(h1) + str(h2) + str(m1) + str(m2) + '.tsv'
                    file_path = fp.get_file_path(file_name, input_directory_name)
                    if fp.filename_exists(file_path):
                        file = open(file_path, 'r')
                        file.next()
                        for line in file:
                            output.write(line)
                        file.close()
    output.close()
def validate_argv(argv):
    """
    List[String,...] -> Boolean
    Takes a command line argument and ensures that a) there are only 2 arguments and b) 1st & 2nd argument is a valid path and file.  False if any of the above are not true
    """
    if len(argv) != 2:
        return False
    file_name = argv[0]
    input_directory_name = argv[1]
    file_path = fp.get_file_path(file_name, input_directory_name)
    if fp.filename_exists(file_path) is False:
        print "File doesn't exist"
        return False
    return True
def main(argv):
    """
    Uses a trigram Markov chain (found on https://gist.github.com/agiliq/131679), randomly generate sentences from token list
    """

    if validate_argv(argv) is False:
        print "Usage: tokenizeTweets.py <file name> <file directory>"
        sys.exit()
    file_name = argv[0]
    input_directory_name = argv[1]
    path = fp.get_file_path(file_name, input_directory_name)
    input = open(path, 'r')
    text_markov = markov.Markov(input)
    input.close()
    print text_markov.generate_markov_text()
def validate_argv(argv):
    """
    List[String,...] -> Boolean
    Takes a command line argument and ensures that a) there are only 3 arguments and b) 1st & 2nd argument is a valid path and file c) 3rd argument is a positive integer. Returns False if any of the above are not true
    """
    if len(argv) != 3:
        return False
    file_name = argv[0]
    input_directory_name = argv[1]
    n = argv[2]
    file_path = fp.get_file_path(file_name, input_directory_name)
    if fp.filename_exists(file_path) is False:
        print "File doesn't exist"
        return False
    try:
        n_int = int(n)
        if n_int < 2 or n_int > 5:
            print "n must be greater than 1 or less than 6"
            return False
    except:
        return False
        print "n must be an integer"
    return True
def main(argv):
    """
    Creates a randomly generated tweet based on a single initial keyword. Example: createTweets.py New\ Years\ Resolution_ngram_3.json ngrams stop
    """
    if validate_argv(argv) is False:
        print "Usage: tokenizeTweets.py <file name> <file directory> <start term>"
        sys.exit()
    file_name = argv[0]
    input_directory_name = argv[1]
    search_term = argv[2].lower()
    input_path = fp.get_file_path(file_name, input_directory_name)
    ngram_dictionary = extract_dictionary(input_path)
    if validate_search_term(search_term, ngram_dictionary) is False:
        print "'" + search_term + "' does not exist in tweet dictionary" 
        sys.exit()
    sentence_list = create_sentence(search_term, ngram_dictionary)
    count = 0
    while len(sentence_list) < 6 and count < 5: # 5 attempts to create a sentence longer than 6 words (including '$')
        sentence_list = create_sentence(search_term, ngram_dictionary)
        count = count + 1
    if '$' in sentence_list:
        sentence_list.remove('$')
    sentence = ' '.join(sentence_list)
    print sentence