def get_fitness_gradient_for_single_solution(summ): sys_tf = get_TF(summ, vocab) # ref_tf is globally defined fit_grad = fitness_gradient(sys_tf, ref_tf) return fit_grad
thematic_words = [i for i, j in freq.most_common(N)] max_thematic = max([len(np.intersect1d(i, thematic_words)) for i in sents]) for Si in sents: Si_thematic = np.intersect1d(Si, thematic_words) F5.append(len(Si_thematic) / max_thematic) # F6: Proper Noun for Si in sents: Si_propnouns = np.intersect1d(Si, propernoun) F6.append(len(Si_propnouns) / len(Si)) # F7: Similarities Between Sentences vocab = sorted(set(flat)) TF = get_TF(sents, vocab) sim_SiSj = [] for i, Si in enumerate(TF): temp = [] for j, Sj in enumerate(TF): if i == j: continue temp.append(cosine(Si, Sj)) sim_SiSj.append(sum(temp)) max_simSiSj = max(sim_SiSj) for sim_Si in sim_SiSj: F7.append(sim_Si / max_simSiSj) # F8: Term Weight TFIDF = get_TFIDF(sents, vocab)
feature_file.close() doc_file = open('./1.clean/001.txt') doc = doc_file.read() doc_file.close() doc_T, doc_label, doc_sents = sentences_from_document(doc) doc_T = doc_T[2:] document = {} for i in range(0, len(doc_sents)): document[i + 1] = doc_sents[i] vocab = generate_vocab(doc_sents) ref_tf = get_TF(reference, vocab) #STEP 1 - Initialize Set of Solutions A as weight of text features, #Set of Velocities V, stage counter k = 1 and set the different #parameters of SSO random.seed() best_solution = [] population_size = 100 no_of_features = 8 iterations = 100 sentences_to_extract = 12 velocity_multiplier = 0.75 inertia = 0.56 velocity_limiter_ratio = 0.37