def generate_features(stances, dataset, name, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") # X_refuting_body: I add a new refuting feature about the existence of refuting words in the body X_refuting_head, X_refuting_body = refuting_features(h, b) X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") # X_senti_head: The sentiment vector of the headline; # X_senti_body: The sentiment vector of the body; # X_senti_cos : The cosine similarity between the sentiment vectors of the headline and body. X_senti_head, X_senti_body, X_senti_cos = sentiment_features(h, b) # X_tf_cos : The cosine similarity between the TF vectors of the headline and body # X_tf_idf_cos : The cosine similarity between the TF-IDF vectors of the headline and body. X_tf_cos, X_tf_idf_cos = gen_tf_idf_feats(stances, dataset.articles, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer) X = np.c_[X_hand, X_polarity, X_refuting_head, X_overlap, X_tf_cos] return X, y
def generate_features(stances, dataset, name, number_of_words=5000): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) vocabulary = vocabularyForm(h, b, number_of_words) #print(vocabulary) if name != "vocab": X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_Bow = calculateBOW(h, b, vocabulary) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] print('* GENERATING FEATURES *') print(datetime.now()) for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") #X_tfidf = gen_or_load_feats(tfidf_features, h, b, "features/tfidf."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_sentiment] print('* FINISHED GENERATING FEATURES *') print(datetime.now()) return X, y
def generate_baseline_features(stances, dataset, name, binary=True): h, b, y = [], [], [] baseline_dir = '../baseline/' for stance in stances: if (binary != True): y.append(LABELS.index(stance['Stance'])) else: if LABELS.index(stance['Stance']) < 3: y.append(0) else: y.append(1) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, baseline_dir + "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats( refuting_features, h, b, baseline_dir + "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features, h, b, baseline_dir + "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, baseline_dir + "features/hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X, y
def generate_features_keywords_with_IDs(stances, dataset, name, mode): h, b, y, IDs = [], [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) IDs.append(stance['Body ID']) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features_NLTK, h, b, "features/polarity_NLTK_full." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") if mode == 'train': X_keywords = gen_or_load_feats_with_IDs( keywords_features_train, h, b, IDs, "features/keywords_." + name + ".npy") else: X_keywords = gen_or_load_feats_with_IDs( keywords_features_competition, h, b, IDs, "features/keywords_." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_keywords] return X, y
def generate_features(stances, dataset, name, filters=False): h, b, y = [], [], [] for stance in stances: if filters: if LABELS.index(stance['Stance']) == 3: y.append(0) else: y.append(1) else: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") #X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy") #X_cosinetfidf = gen_or_load_feats(cosine_tfidf_features, h, b, "features/cosinetfidf."+name+".npy") #X_bleu = gen_or_load_feats(bleu_features, h, b, "features/bleu."+name+".npy") X = np.c_[X_refuting, X_polarity, X_hand, X_overlap] return X, y
def generate_features(stances, dataset, name): h, b, kh, kb, y = [], [], [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) kh.append(stance['Key']) kb.append(dataset.keys[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, kh, kb, "your--path/features/overlap." + name + ".npy") #X_refuting = gen_or_load_feats(refuting_features, h, b,kh,kb, "your--path/features/refuting."+name+".npy") #X_polarity = gen_or_load_feats(polarity_features, h, b,kh,kb, "your--path/features/polarity."+name+".npy") X_hand = gen_or_load_feats( hand_features, h, b, kh, kb, "your--path/stage1/features/hand." + name + ".npy") X_sc = gen_or_load_feats( score_feature, h, b, kh, kb, "your--path/stage1/features/score." + name + ".npy") X_wvs = gen_or_load_feats( word_vec_sim, h, b, kh, kb, "your--path/stage1/features/wv_sim." + name + ".npy") X_fs = gen_or_load_feats( features_sim, h, b, kh, kb, "your--path/stage1/features/feat_sim." + name + ".npy") #X= np.c_[ X_hand,X_overlap,X_polarity,X_sc,X_wvs] #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] #X = np.c_[X_hand,X_fs, X_refuting, X_overlap, X_sc] X = np.c_[X_hand, X_fs, X_overlap, X_sc, X_wvs] #X= np.c_[X_hand, X_fs,X_overlap,X_sc] #X= np.c_[ X_hand] return X, y
def generate_features_all(stances,dataset,name,repl): # Pass all articles through here first id, h, b, y = init_features(stances,dataset,repl) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_overlap, X_hand] return X,y,id
def generate_features_biased(stances,dataset,name,repl): # Pass biased articles through here third id, h, b, y = init_features(stances,dataset,repl) X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_polarity, X_refuting, X_hand] return X,y,id
def generate_features_related(stances,dataset,name,repl): # Pass related articles through here second id, h, b, y = init_features(stances,dataset,repl) X_bayes = gen_or_load_feats(naive_bayes_features, h, b, "features/bayes."+name+".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X_format = gen_or_load_feats(format_features, h, b, "features/format."+name+".npy") X = np.c_[X_polarity, X_refuting, X_hand, X_bayes, X_format] return X,y,id
def generate_features_related(stances, dataset, name): # Pass related articles through here second id, h, b, y = init_features(stances, dataset, {'agree': 'disagree'}) X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X = np.c_[X_polarity, X_refuting, X_hand] return X, y, id
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_analyzer, h, b, "features/sentiment." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_ner = gen_or_load_feats(name_entity_similarity, h, b, "features/ner." + name + ".npy") X_Q = gen_or_load_feats(question_mark_ending, h, b, "features/Q." + name + ".npy") X_doc2vec = gen_or_load_feats(doc2vec_feature, h, b, "features/doc2vec." + name + ".npy") X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner, X_Q, X_doc2vec] return X, y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X,y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X,y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf."+name+".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy") X = np.c_[X_hand, X_overlap, X_tf_idf, X_sentiment] return X, y
def generate_features(dataset): h, b = [], [] for stance in dataset.stances: h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap.npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting.npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity.npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand.npy") X = np.c_[X_refuting, X_polarity, X_hand, X_overlap] return X
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: if params.run_2_class: if name != 'competition': y.append(LABELS_RELATED.index(stance['Stance'])) else: y.append(LABELS.index(stance['Stance'])) else: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_overlap_quotes = gen_or_load_feats( word_overlap_quotes_features, h, b, "features/overlap_quotes." + name + ".npy") X_overlap_pos = gen_or_load_feats(word_overlap_pos_features, h, b, "features/overlap_pos." + name + ".npy") X_overlap_pos_sentence = gen_or_load_feats( word_overlap_split_bodies_features, h, b, "features/overlap_pos_sentence_split_bodies." + name + ".npy") X_tfidf = gen_or_load_feats(word_tfidf_features, h, b, "features/tfidf_pos." + name + ".npy") X_tfidf_max = gen_or_load_feats(word_tfidf_pos_ss_features, h, b, "features/tfidf_pos_max." + name + ".npy") X_overlap_bpe_SS = gen_or_load_feats( word_overlap_bpe_features, h, b, "features/overlap_bpe_nltk_tag3." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_overlap_quotes, X_overlap_pos, X_overlap_pos_sentence, X_tfidf, X_tfidf_max, X_overlap_bpe_SS] return X, y
def generate_features_nn(dataset): h, b = [], [] for stance in dataset.stances: h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_glove = gen_or_load_feats(glove_features, h, b, "features/glove_features.npy") return X_glove
def generate_test_features(stances, dataset, name): h, b = [], [] for stance in stances: h.append(stance['Headline']) b.append(dataset.body[stance['Body ID']]) X_lda = gen_or_load_feats(lda_features, h, b, "features/lda" + name + ".npy") X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_lda] return X
def generate_baseline_test_features(stances, dataset, name): h, b = [], [] baseline_dir = '../baseline/' for stance in stances: h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, baseline_dir + "features/overlap." + name + "_.npy") X_refuting = gen_or_load_feats( refuting_features, h, b, baseline_dir + "features/refuting." + name + "_.npy") X_polarity = gen_or_load_feats( polarity_features, h, b, baseline_dir + "features/polarity." + name + "_.npy") X_hand = gen_or_load_feats( hand_features, h, b, baseline_dir + "features/hand." + name + "_.npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X_ner = gen_or_load_feats(ner_features, h, b, "features/ner." + name + ".npy") X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_bert = bert_features("features/combined_bert_" + name + ".csv") X_cosine = cosine_features(name, "features/cosine." + name + ".npy") X = np.c_[X_refuting, X_overlap, X_hand, X_sentiment, X_ner, X_polarity, X_bert, X_cosine] # X_train_79 = pd.concat([refuting_features, overlap_features, hand_features.loc[:,0:3],hand_features.loc[:,16:], #sentiment_features, ner_features, polarity_features, train_combined, pd.Series(cosine_sim_train)], axis=1) return X, y
def generate_features(stances, dataset, name): print("Generating Features for :", name) h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_bowv = np.array( gen_or_load_feats(bow_averaged_vectors, h, b, "features/bowvec_200dnorm." + name + ".npy")) X_bowc = np.array( gen_or_load_feats(bow_count_vectors, h, b, "features/bowcount_1000." + name + ".npy")) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_bowv, X_bowc] print("... Done. Features :", X.shape[1]) return X, y
def generate_features(headlines, bodies, name, possibility, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer): #generate or load the features, note that we generate the features even if they're not currently used in this possibility, since it will be saved for later possibilities X_overlap = gen_or_load_feats(word_overlap_features, headlines, bodies, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, headlines, bodies, "features/refuting." + name + ".npy") X_grammar_dependencies = gen_or_load_feats( grammar_dependencies_count, headlines, bodies, "features/grammar" + name + ".npy") X_tf_idf = gen_or_load_feats(tfIdf_features, headlines, bodies, "features/tfidf." + name + ".npy", bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer) features = [] verbos = "" if possibility[0] == '1': features.append(X_overlap) verbos += " * X_Overlap" if possibility[1] == '1': features.append(X_refuting) verbos += " * X_refuting" if possibility[2] == '1': features.append(X_grammar_dependencies) verbos += " * X_grammar_dependencies" if possibility[3] == '1': features.append(X_tf_idf) verbos += " * X_tf_idf" verbos = "Test with the following Features : " + verbos X = stackFeatures( features) # stack the generated feature in a long vector table. return X, verbos
def generate_features(stances, dataset, name, number_of_words=5000): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) vocabulary = vocabularyForm(h, b, number_of_words) #print(vocabulary) if name != "vocab": X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_Bow = calculateBOW(h, b, vocabulary) infersent = torch.load('infersent.allnli.pickle', map_location=lambda storage, loc: storage) infersent.set_glove_path('dataset/glove.840B.300d.txt') sentences = [] for i, j in zip(h, b): sentences.append(i) infersent.build_vocab(sentences, tokenize=True) X_embed = infersent.encode(sentences, tokenize=True) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow, X_embed] return X, y
def generate_features_nn(stances, dataset, name, filters=False): h, b, y = [], [], [] for stance in stances: if stance['Stance'] != 'unrelated' or not filters: y.append(LABELS_ONE_HOT[stance['Stance']]) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) y = np.asarray(y) X_glove = gen_or_load_feats(glove_features, h, b, "features/glove_features." + name + ".npy") return X_glove, y
def generate_features(stances, dataset, name, only_related=False): h, b, y, y_bi = [], [], [], [] related_dir = "re_" if only_related else "" for stance in stances: y_bi.append(stance['Stance_biClass']) y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, "features/" + related_dir + "overlap." + name + ".npy") X_refuting = gen_or_load_feats( refuting_features, h, b, "features/" + related_dir + "refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features, h, b, "features/" + related_dir + "polarity." + name + ".npy") X_sentiment = gen_or_load_feats( sentiment_analyzer, h, b, "features/" + related_dir + "sentiment." + name + ".npy") X_hand = gen_or_load_feats( hand_features, h, b, "features/" + related_dir + "hand." + name + ".npy") X_ner = gen_or_load_feats( name_entity_similarity, h, b, "features/" + related_dir + "ner." + name + ".npy") X_Q = gen_or_load_feats(name_entity_similarity, h, b, "features/" + related_dir + "Q." + name + ".npy") X_doc2vec = gen_or_load_feats( doc2vec_feature, h, b, "features/" + related_dir + "doc2vec." + name + ".npy") X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner, X_Q, X_doc2vec] return X, y, y_bi
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf." + name + ".npy") X_svd = gen_or_load_feats(svd_features, h, b, "features/svd." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd, X_sentiment] # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] # pdb.set_trace() # from matplotlib import pyplot as plt # plt.plot(X_tf_idf.flatten()) # plt.show() # plt.plot(X_svd.flatten()) # plt.show() return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] rows = [] for stance in stances: row = [] y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) row.append(stance['Headline']) row.append(dataset.articles[stance['Body ID']]) row.append(LABELS.index(stance['Stance'])) rows.append(row) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") ######Topic Modelling - New Features Added###### X_NMF = gen_or_load_feats(NMF_cos_50, h, b, "features/nmf." + name + ".npy") X_LDA = gen_or_load_feats(LDA_cos_25, h, b, "features/lda-25." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_NMF, X_LDA] if (name == "competition"): if not (os.path.isfile('comp_feature_data.csv')): comp_feature_data['stance'] = y comp_feature_data['headline'] = h comp_feature_data['body_id'] = b for i in range(0, X.shape[1]): comp_feature_data[i] = X[:, i] if (name == "full"): if not (os.path.isfile('train_feature_data.csv')): train_feature_data['stance'] = y train_feature_data['headline'] = h train_feature_data['body_id'] = b for i in range(0, X.shape[1]): train_feature_data[i] = X[:, i] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf." + name + ".npy") X_svd = gen_or_load_feats(svd_features, h, b, "features/svd." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd, X_sentiment] # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] # from matplotlib import pyplot as plt # compound = sorted(zip(X_sentiment[:,3]-X_sentiment[:,7], y), key = lambda x: x[1]) # plt.plot(compound) # plt.show() # fig1 = plt.gcf() # overlap = sorted(zip(X_refuting.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig1.savefig('refuting.png') # fig2 = plt.gcf() # overlap = sorted(zip(X_hand.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig2.savefig('hand.png') # fig3 = plt.gcf() # overlap = sorted(zip(X_polarity.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig3.savefig('polarity.png') # fig4 = plt.gcf() # tfidf = sorted(zip(X_tf_idf.flatten(), y), key = lambda x: x[1]) # plt.plot(tfidf) # plt.draw() # plt.show() # fig4.savefig('tfidf.png') # fig5 = plt.gcf() # svd = sorted(zip(X_svd.flatten(), y), key = lambda x: x[1]) # plt.plot(svd) # plt.draw() # plt.show() # fig5.savefig('svd.png') # fig6 = plt.gcf() # svd = sorted(zip(X_sentiment[:,3] - X_sentiment[:,7], y), key = lambda x: x[1]) # plt.plot(svd) # plt.draw() # plt.show() # fig6.savefig('sentiment.png') return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] yur = [] # only has related/unreleted labels for i in range(len(y)): if y[i] == 3: yur.append(3) else: yur.append(0) #0 to indicate related yr = [] # only include related entries Xr = [] for i in range(len(y)): if y[i] == 3: continue yr.append(y[i]) Xr.append(X[i]) yr_discussVsAd = [] for i in range(len(y)): if y[i] == 3: continue elif y[i] == 2: yr_discussVsAd.append(2) else: yr_discussVsAd.append(0) # 0 to indicate both agree and disagree Xad = [] yad_agreeVsdisagree = [] for i in range(len(y)): if y[i] == 3 or y[i] == 2: continue yad_agreeVsdisagree.append(y[i]) Xad.append(X[i]) """ X: original X y: original y yur: y for related/unrelated. still the original size, use 0 to indicate related, 3 unrelated Xr: subset of X which only contains related pairs yr: subset of y which only contains related pairs yr_discussVsAd: the same size as yr. use 0 to indicate agree and disagree, 2 for discuss Xad: subset of X which only contains agree and disagree yad_agreeVsdisagree: same size as Xad, 0 for agree and 1 for disagree To seperate only related vs unrelated, use X and yur To seperate only discuss vs agree/disagree, use Xr and yr_discussVsAd To seperate only agree vs disagree, use Xad and yad_agreeVsdisagree """ return X, y, yur, Xr, yr, yr_discussVsAd, Xad, yad_agreeVsdisagree