def get_all_values_jan_4(): """Defines all parameters for the entire experiment""" subreddits = ["mcgill"] year = 2016 start_month = 1 end_month = 2 ngrams = 4 text_min = 10 # TODO: make sure this is being used in all the right places text_max = 1000 # TODO: are these truly the values that you want? # values that define if you restrict value calculation to just a certain # subreddit prior_interaction_subreddit = None user_prolificness_subreddit = None user_karma_subreddit = None relevant_categories = [] # TODO: fill this out base_path = "/home/ndg/projects/shared_datasets/reddit-style/" out_file = "/home/ndg/projects/shared_datasets/reddit-style/data/get_all_values_jan_4.csv" get_features.write_to_csv(subreddits, year, start_month, end_month, ngrams, text_min, text_max, base_path, relevant_categories, out_file, user_prolificness_subreddit, user_karma_subreddit, prior_interaction_subreddit)
def test_dask_large(): """Defines all parameters for the entire experiment""" with open("../data/large_subs.txt") as f: content = f.readlines() content = [x.strip() for x in content] # subreddits = content[:3] partitioned_subreddits = partition(content, 10) year = 2016 start_month_pairs = 4 end_month_pairs = 4 num_months_back = 1 ngrams = 5 text_min = 5 # TODO: make sure this is being used in all the right places text_max = 10000 num_pairs_cap = 5000 num_pairs_min = 100 # TODO: are these truly the values that you want? # values that define if you restrict value calculation to just a certain # subreddit restrict_to_subreddit_only = False relevant_categories = ["ppron", "i", "we", "you", "shehe", "they" "ipron", "article", "prep", "auxverb", "conj", "negate", "verb", "adj", "compare", "interrog", "number", "quant", "posemo", "negemo", "anx", "anger", "sad"] base_path = "/home/ndg/projects/shared_datasets/reddit-style/" for i in range(len(partitioned_subreddits)): subreddits = partitioned_subreddits[i] print subreddits out_file = "/home/ndg/projects/shared_datasets/reddit-style" \ "/output_data/TESTDASKlarge_subs_{}_get_all_values_jan_27_{}_{" \ "}_{" \ "}_{}_{}_{}.csv".format(i, year, start_month_pairs, end_month_pairs - num_months_back, ngrams, text_min, text_max) get_features.write_to_csv(subreddits, year, start_month_pairs, end_month_pairs, ngrams, text_min, text_max, base_path, relevant_categories, out_file, restrict_to_subreddit_only, num_pairs_cap, num_pairs_min, num_months_back)
def get_all_values_jan_22(): """Defines all parameters for the entire experiment""" with open("../data/large_subs.txt") as f: content = f.readlines() content = [x.strip() for x in content] subreddits = content[:100] year = 2016 start_month_pairs = 4 end_month_pairs = 4 start_month_metadata = 1 end_month_metadata = 4 ngrams = 5 text_min = 0 # TODO: make sure this is being used in all the right places text_max = 10000 num_pairs_cap = 10000 num_pairs_min = 1000 # TODO: are these truly the values that you want? # values that define if you restrict value calculation to just a certain # subreddit restrict_to_subreddit_only = False relevant_categories = ["ppron", "i", "we", "you", "shehe", "they" "ipron", "article", "prep", "auxverb", "conj", "negate", "verb", "adj", "compare", "interrog", "number", "quant", "posemo", "negemo", "anx", "anger", "sad"] base_path = "/home/ndg/projects/shared_datasets/reddit-style/" out_file = "/home/ndg/projects/shared_datasets/reddit-style/output_data/large_subs_get_all_values_jan_23_{}_{}_{}_{}_{}_{}.csv".format(year, start_month_pairs, end_month_pairs, ngrams, text_min, text_max) # language_model.create_subreddit_language_models(subreddits, year, # start_month_pairs, end_month_pairs, # ngrams, text_min, text_max, # base_path) get_features.write_to_csv(subreddits, year, start_month_pairs, end_month_pairs, start_month_metadata, end_month_metadata, ngrams, text_min, text_max, base_path, relevant_categories, out_file, restrict_to_subreddit_only, num_pairs_cap, num_pairs_min)