def grid_search(method = "NB"): results = {} #Getting the data from the training file context_data = getTrainingContextData() print("Grabbed the context data") #Reading in the word to vector dataframe word_vector_subset = readWordToVector() print("Grabbed the word_vector_subset") if method == "NB": total = len(window_size_options) * len(n_components_options) * len(perplexity_options) * len(std) print("Total steps: {0}".format(total)) for window_size in window_size_options: for n_component in n_components_options: for perplexity in perplexity_options: for s in std: results[str((window_size, n_component, perplexity, s))]= controller(method, context_data, word_vector_subset, window_size, n_component, perplexity, naive_bayes_window[0],"","","",s) total -= 1 print("{0} to go".format(total)) else: total = len(c_range) * len(gamma_range) *len(window_size_options)*len(svm_range)*len(std) print("Total steps: {0}".format(total)) for window_size in window_size_options: for c in c_range: for g in gamma_range: for srange in svm_range: for s in std: results[str((window_size, c, g, srange,s))]= controller(method, context_data, word_vector_subset, window_size, "", "", "", c, g, srange,s) total -= 1 print("{0} to go".format(total)) pprint(results) saveValidationData(results);
def main(): #Getting the data from the training file context_data = getTrainingContextData() print("Retrieved data from the training file") #Adding data from the test file to the same context data context_data = getTestContextData(context_data) print("Retrieved data from the test file") #Obtain training word counts to be used as (inverse) vector weights if norm_word_counts == True: word_freqs = getWordFreqs(context_data) else: word_freqs = Counter() #Reading in the word to vector dataframe word_vector_subset = readWordToVector() print("Retrieved the word2vec dataset") #Create the feature vector for each instance id in the above data structure and save it in JSON format context_feature_data = makeFeatureVectorForWordInstance(context_data, word_vector_subset, word_freqs) saveContextVectorData(context_feature_data) print("Created the word vectors for all word types and their instances")