コード例 #1
0
    def get_fitness_gradient_for_single_solution(summ):
        sys_tf = get_TF(summ, vocab)
        # ref_tf is globally defined

        fit_grad = fitness_gradient(sys_tf, ref_tf)

        return fit_grad
コード例 #2
0
    thematic_words = [i for i, j in freq.most_common(N)]
    max_thematic = max([len(np.intersect1d(i, thematic_words)) for i in sents])

    for Si in sents:
        Si_thematic = np.intersect1d(Si, thematic_words)
        F5.append(len(Si_thematic) / max_thematic)

    # F6: Proper Noun
    for Si in sents:
        Si_propnouns = np.intersect1d(Si, propernoun)
        F6.append(len(Si_propnouns) / len(Si))

    # F7: Similarities Between Sentences
    vocab = sorted(set(flat))

    TF = get_TF(sents, vocab)

    sim_SiSj = []
    for i, Si in enumerate(TF):
        temp = []
        for j, Sj in enumerate(TF):
            if i == j: continue
            temp.append(cosine(Si, Sj))
        sim_SiSj.append(sum(temp))
    max_simSiSj = max(sim_SiSj)

    for sim_Si in sim_SiSj:
        F7.append(sim_Si / max_simSiSj)

    # F8: Term Weight
    TFIDF = get_TFIDF(sents, vocab)
コード例 #3
0
feature_file.close()

doc_file = open('./1.clean/001.txt')
doc = doc_file.read()
doc_file.close()

doc_T, doc_label, doc_sents = sentences_from_document(doc)
doc_T = doc_T[2:]

document = {}
for i in range(0, len(doc_sents)):
    document[i + 1] = doc_sents[i]

vocab = generate_vocab(doc_sents)

ref_tf = get_TF(reference, vocab)

#STEP 1 - Initialize Set of Solutions A as weight of text features,
#Set of Velocities V, stage counter k = 1 and set the different
#parameters of SSO
random.seed()

best_solution = []

population_size = 100
no_of_features = 8
iterations = 100
sentences_to_extract = 12
velocity_multiplier = 0.75
inertia = 0.56
velocity_limiter_ratio = 0.37