def main(argv):
    """
    Creates n-grams from the file New Years Resolution_merged.tsv. Output in JSON format
    """
    if validate_argv(argv) is False:
        print "Usage: tokenizeTweets.py <file name> <file directory> <n (for n gram)>"
        sys.exit()
    file_name = argv[0]
    input_directory_name = argv[1]
    n_for_ngrams = int(argv[2])
    input_path = fp.get_file_path(file_name, input_directory_name)
    tweets = extract_tweets(input_path)
    tweets_deduped = dedupe_and_tokenize(tweets)

    # for creating an ngram dictionary 

    ngrams = dg.create_ngrams(tweets_deduped, n_for_ngrams)
    ngram_dict = dg.create_ngram_dict(ngrams)
    output_path = fp.set_output_file_path('New Years Resolution_ngram_' + str(n_for_ngrams) + '.json', 'ngrams')
    output_ngram(ngram_dict, output_path)

    # for creating a list of tokens. Removing the words "New Years Resolution" as well.

    tokens = break_down_sentences(tweets_deduped)
    tokens_cleaned = remove_tokens(tokens, ['new', 'years', 'resolution', ':'])
    output_path2 = fp.set_output_file_path('New Years Resolution_tokens.tsv', 'tokens')
    output_tokens(tokens_cleaned, output_path2)
def main(argv):
    """
    Merge files of the format <search_term>_####.tsv" 
    in the data_raw directory and outputs into the "merged" directory
    """

    

    if validate_argv(argv) is False:
        print "Usage: mergeFiles.py <search_term>"
        sys.exit()

    input_directory_name = 'data_raw'
    search_term = argv[0]
    output_file_name = search_term + '_merged.tsv'
    output_directory_name = 'merged'


    output_path = fp.set_output_file_path(output_file_name, output_directory_name) 
    output = open(output_path, 'a')
    for h1 in range(3):
        for h2 in range(10):
            for m1 in range(6):
                for m2 in range(10):
                    file_name = search_term + '_' + str(h1) + str(h2) + str(m1) + str(m2) + '.tsv'
                    file_path = fp.get_file_path(file_name, input_directory_name)
                    if fp.filename_exists(file_path):
                        file = open(file_path, 'r')
                        file.next()
                        for line in file:
                            output.write(line)
                        file.close()
    output.close()