def generate_baseline_features(stances, dataset, name, binary=True): h, b, y = [], [], [] baseline_dir = '../baseline/' for stance in stances: if (binary != True): y.append(LABELS.index(stance['Stance'])) else: if LABELS.index(stance['Stance']) < 3: y.append(0) else: y.append(1) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, baseline_dir + "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats( refuting_features, h, b, baseline_dir + "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features, h, b, baseline_dir + "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, baseline_dir + "features/hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X, y
def generate_features(stances, dataset, name, model, mode, binary=False): headline, body = dict(), dict() headline['features'] = [] headline['lengths'] = [] body['features'] = [] body['lengths'] = [] y = [] for stance in stances: if (binary != True): y.append(LABELS.index(stance['Stance'])) else: if LABELS.index(stance['Stance']) < 3: y.append(0) else: y.append(1) headline_features, h_length = buildWordVector(stance['Headline'], model, mode) body_features, b_length = buildWordVector( dataset.articles[stance['Body ID']], model, mode) headline['features'].append(headline_features) headline['lengths'].append(h_length) body['features'].append(body_features) body['lengths'].append(b_length) return headline, body, y
def generate_features(stances, dataset, name, filters=False): h, b, y = [], [], [] for stance in stances: if filters: if LABELS.index(stance['Stance']) == 3: y.append(0) else: y.append(1) else: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") #X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy") #X_cosinetfidf = gen_or_load_feats(cosine_tfidf_features, h, b, "features/cosinetfidf."+name+".npy") #X_bleu = gen_or_load_feats(bleu_features, h, b, "features/bleu."+name+".npy") X = np.c_[X_refuting, X_polarity, X_hand, X_overlap] return X, y
def generate_features_keywords_with_IDs(stances, dataset, name, mode): h, b, y, IDs = [], [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) IDs.append(stance['Body ID']) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features_NLTK, h, b, "features/polarity_NLTK_full." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") if mode == 'train': X_keywords = gen_or_load_feats_with_IDs( keywords_features_train, h, b, IDs, "features/keywords_." + name + ".npy") else: X_keywords = gen_or_load_feats_with_IDs( keywords_features_competition, h, b, IDs, "features/keywords_." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_keywords] return X, y
def generate_features(stances, dataset, name, number_of_words=5000): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) vocabulary = vocabularyForm(h, b, number_of_words) #print(vocabulary) if name != "vocab": X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_Bow = calculateBOW(h, b, vocabulary) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] print('* GENERATING FEATURES *') print(datetime.now()) for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") #X_tfidf = gen_or_load_feats(tfidf_features, h, b, "features/tfidf."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_sentiment] print('* FINISHED GENERATING FEATURES *') print(datetime.now()) return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_analyzer, h, b, "features/sentiment." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_ner = gen_or_load_feats(name_entity_similarity, h, b, "features/ner." + name + ".npy") X_Q = gen_or_load_feats(question_mark_ending, h, b, "features/Q." + name + ".npy") X_doc2vec = gen_or_load_feats(doc2vec_feature, h, b, "features/doc2vec." + name + ".npy") X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner, X_Q, X_doc2vec] return X, y
def generate_features(stances, dataset, name, model, binary=True): headline, body, y = [], [], [] for stance in tqdm(stances): if (binary != True): y.append(LABELS.index(stance['Stance'])) else: if LABELS.index(stance['Stance']) < 3: y.append(0) else: y.append(1) headline.append(buildWordVector(stance["Headline"], model)) body.append(buildWordVector(dataset.articles[stance["Body ID"]], model)) concatenated = np.c_[headline, body] return concatenated, headline, body, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X_ner = gen_or_load_feats(ner_features, h, b, "features/ner." + name + ".npy") X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_bert = bert_features("features/combined_bert_" + name + ".csv") X_cosine = cosine_features(name, "features/cosine." + name + ".npy") X = np.c_[X_refuting, X_overlap, X_hand, X_sentiment, X_ner, X_polarity, X_bert, X_cosine] # X_train_79 = pd.concat([refuting_features, overlap_features, hand_features.loc[:,0:3],hand_features.loc[:,16:], #sentiment_features, ner_features, polarity_features, train_combined, pd.Series(cosine_sim_train)], axis=1) return X, y
def generate_features(stances, dataset, name, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") # X_refuting_body: I add a new refuting feature about the existence of refuting words in the body X_refuting_head, X_refuting_body = refuting_features(h, b) X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") # X_senti_head: The sentiment vector of the headline; # X_senti_body: The sentiment vector of the body; # X_senti_cos : The cosine similarity between the sentiment vectors of the headline and body. X_senti_head, X_senti_body, X_senti_cos = sentiment_features(h, b) # X_tf_cos : The cosine similarity between the TF vectors of the headline and body # X_tf_idf_cos : The cosine similarity between the TF-IDF vectors of the headline and body. X_tf_cos, X_tf_idf_cos = gen_tf_idf_feats(stances, dataset.articles, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer) X = np.c_[X_hand, X_polarity, X_refuting_head, X_overlap, X_tf_cos] return X, y
def generate_features(stances, dataset, name): print("Generating Features for :", name) h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_bowv = np.array( gen_or_load_feats(bow_averaged_vectors, h, b, "features/bowvec_200dnorm." + name + ".npy")) X_bowc = np.array( gen_or_load_feats(bow_count_vectors, h, b, "features/bowcount_1000." + name + ".npy")) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_bowv, X_bowc] print("... Done. Features :", X.shape[1]) return X, y
def preprocess_and_write(dataset, stance_list, tier, out_dir): #h, b, y = [],[],[] examples = [] num_exmpls = 0 for stance in tqdm(stance_list, desc="Preprocessing {}".format(tier)): y = LABELS.index(stance['Stance']) h = stance['Headline'] b = dataset.articles[stance['Body ID']] h = clean(h) h_tokens = get_tokenized_sequences(h) b = clean(b) b_tokens = get_tokenized_sequences(b) examples.append( (' '.join(h_tokens), ' '.join(b_tokens), ' '.join([str(y)]))) num_exmpls = num_exmpls + 1 print("Processed %i examples" % (num_exmpls)) # shuffle examples indices = range(len(examples)) np.random.shuffle(indices) with open(os.path.join(out_dir, tier +'.headline'), 'w') as headline_file, \ open(os.path.join(out_dir, tier +'.body'), 'w') as body_file,\ open(os.path.join(out_dir, tier +'.stance'), 'w') as stance_file: for i in indices: (headline, body, stance) = examples[i] # write tokenized data to file write_to_file(headline_file, headline) write_to_file(body_file, body) write_to_file(stance_file, stance)
def generate_features(stances, dataset, name): h, b, kh, kb, y = [], [], [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) kh.append(stance['Key']) kb.append(dataset.keys[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, kh, kb, "your--path/features/overlap." + name + ".npy") #X_refuting = gen_or_load_feats(refuting_features, h, b,kh,kb, "your--path/features/refuting."+name+".npy") #X_polarity = gen_or_load_feats(polarity_features, h, b,kh,kb, "your--path/features/polarity."+name+".npy") X_hand = gen_or_load_feats( hand_features, h, b, kh, kb, "your--path/stage1/features/hand." + name + ".npy") X_sc = gen_or_load_feats( score_feature, h, b, kh, kb, "your--path/stage1/features/score." + name + ".npy") X_wvs = gen_or_load_feats( word_vec_sim, h, b, kh, kb, "your--path/stage1/features/wv_sim." + name + ".npy") X_fs = gen_or_load_feats( features_sim, h, b, kh, kb, "your--path/stage1/features/feat_sim." + name + ".npy") #X= np.c_[ X_hand,X_overlap,X_polarity,X_sc,X_wvs] #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] #X = np.c_[X_hand,X_fs, X_refuting, X_overlap, X_sc] X = np.c_[X_hand, X_fs, X_overlap, X_sc, X_wvs] #X= np.c_[X_hand, X_fs,X_overlap,X_sc] #X= np.c_[ X_hand] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] rows = [] for stance in stances: row = [] y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) row.append(stance['Headline']) row.append(dataset.articles[stance['Body ID']]) row.append(LABELS.index(stance['Stance'])) rows.append(row) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") ######Topic Modelling - New Features Added###### X_NMF = gen_or_load_feats(NMF_cos_50, h, b, "features/nmf." + name + ".npy") X_LDA = gen_or_load_feats(LDA_cos_25, h, b, "features/lda-25." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_NMF, X_LDA] if (name == "competition"): if not (os.path.isfile('comp_feature_data.csv')): comp_feature_data['stance'] = y comp_feature_data['headline'] = h comp_feature_data['body_id'] = b for i in range(0, X.shape[1]): comp_feature_data[i] = X[:, i] if (name == "full"): if not (os.path.isfile('train_feature_data.csv')): train_feature_data['stance'] = y train_feature_data['headline'] = h train_feature_data['body_id'] = b for i in range(0, X.shape[1]): train_feature_data[i] = X[:, i] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: if params.run_2_class: if name != 'competition': y.append(LABELS_RELATED.index(stance['Stance'])) else: y.append(LABELS.index(stance['Stance'])) else: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_overlap_quotes = gen_or_load_feats( word_overlap_quotes_features, h, b, "features/overlap_quotes." + name + ".npy") X_overlap_pos = gen_or_load_feats(word_overlap_pos_features, h, b, "features/overlap_pos." + name + ".npy") X_overlap_pos_sentence = gen_or_load_feats( word_overlap_split_bodies_features, h, b, "features/overlap_pos_sentence_split_bodies." + name + ".npy") X_tfidf = gen_or_load_feats(word_tfidf_features, h, b, "features/tfidf_pos." + name + ".npy") X_tfidf_max = gen_or_load_feats(word_tfidf_pos_ss_features, h, b, "features/tfidf_pos_max." + name + ".npy") X_overlap_bpe_SS = gen_or_load_feats( word_overlap_bpe_features, h, b, "features/overlap_bpe_nltk_tag3." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_overlap_quotes, X_overlap_pos, X_overlap_pos_sentence, X_tfidf, X_tfidf_max, X_overlap_bpe_SS] return X, y
def init_features(stances, dataset, repl): id, h, b, y = [], [], [], [] for stance in stances: id.append(stance['Stance ID']) s = stance['Stance'] y.append(LABELS.index(repl[s] if s in repl else s)) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) return id, h, b, y
def generate_features(stances, dataset, name, model, mode, binary=True): headline, body, y = [], [], [] for stance in tqdm(stances): if (binary != True): y.append(LABELS.index(stance['Stance'])) else: if LABELS.index(stance['Stance']) < 3: y.append(0) else: y.append(1) d = [] d.append(' '.join(cleantext(stance["Headline"]))) d.append(' '.join(cleantext(dataset.articles[stance["Body ID"]]))) #print(d) vector = TfidfVectorizer(min_df=1, tokenizer=None) tfidf = vector.fit_transform(d) vocab = vector.vocabulary_ #print(len(vocab)) #clean_headline = clean(stance['Headline']) #clean_body = clean(dataset.articles[stance['Body ID']]) #tokenized_headline = get_tokenized_lemmas(clean_headline) #tokenized_body = get_tokenized_lemmas(clean_body) headline.append( buildWordVector(stance["Headline"], model, mode, vocab, tfidf, flag=1)) body.append( buildWordVector(dataset.articles[stance["Body ID"]], model, mode, vocab, tfidf, flag=0)) concatenated = np.c_[headline, body] return concatenated, headline, body, y
def compute_ub(slaves,stances): actual = [] for stance in stances: actual.append(LABELS.index(stance['Stance'])) predicted = [] for classifier in slaves: pred = classifier.predict(stances) pred = [LABELS.index(p) for p in pred] predicted.append(pred) oracle = 0 predicted = list(zip(*predicted)) for i,cls in enumerate(actual): if cls in predicted[i]: oracle += 1 print(oracle) print(len(actual)) print(oracle/len(actual))
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_embedding = gen_or_load_feats(word_features, h, b, "features/embedding." + name + ".npy") #return X_embedding,y X = np.c_[X_embedding] print(type(X[0][0])) return X, y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] return X,y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf."+name+".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy") X = np.c_[X_hand, X_overlap, X_tf_idf, X_sentiment] return X, y
def generate_features_second_layer(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_overlap2 = gen_or_load_feats(bow_overlap_features, h, b, "features/bow_overlap." + name + ".npy") X = np.c_[X_hand, X_refuting, X_polarity, X_overlap, X_overlap2] return X, y
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") X_agree = gen_or_load_feats(agree_features, h, b, "features/agree." + name + ".npy") X_discuss = gen_or_load_feats(discuss_features, h, b, "features/discuss." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") X = np.c_[X_hand, X_polarity, X_discuss, X_agree, X_refuting, X_overlap] return X,y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) #X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") #X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") #X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") #X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] #X = gen_or_load_feats(word2VecFeature, h, b, "features/word2VecFeature."+name+".npy") X = gen_or_load_feats(entity, h, b, "features/entity." + name + ".npy") return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf." + name + ".npy") X_svd = gen_or_load_feats(svd_features, h, b, "features/svd." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd, X_sentiment] # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] # pdb.set_trace() # from matplotlib import pyplot as plt # plt.plot(X_tf_idf.flatten()) # plt.show() # plt.plot(X_svd.flatten()) # plt.show() return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_head_w2v, X_body_w2v = gen_or_load_feats( gen_w2v, h, b, FILENAME + "w2v." + name + ".npy") X_overlap = gen_or_load_feats(word_overlap_features, h, b, FILENAME + "overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, FILENAME + "refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, FILENAME + "polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, FILENAME + "hand." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_head_w2v, X_body_w2v] return X, y
def generate_features(stances, dataset, name, only_related=False): h, b, y, y_bi = [], [], [], [] related_dir = "re_" if only_related else "" for stance in stances: y_bi.append(stance['Stance_biClass']) y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats( word_overlap_features, h, b, "features/" + related_dir + "overlap." + name + ".npy") X_refuting = gen_or_load_feats( refuting_features, h, b, "features/" + related_dir + "refuting." + name + ".npy") X_polarity = gen_or_load_feats( polarity_features, h, b, "features/" + related_dir + "polarity." + name + ".npy") X_sentiment = gen_or_load_feats( sentiment_analyzer, h, b, "features/" + related_dir + "sentiment." + name + ".npy") X_hand = gen_or_load_feats( hand_features, h, b, "features/" + related_dir + "hand." + name + ".npy") X_ner = gen_or_load_feats( name_entity_similarity, h, b, "features/" + related_dir + "ner." + name + ".npy") X_Q = gen_or_load_feats(name_entity_similarity, h, b, "features/" + related_dir + "Q." + name + ".npy") X_doc2vec = gen_or_load_feats( doc2vec_feature, h, b, "features/" + related_dir + "doc2vec." + name + ".npy") X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner, X_Q, X_doc2vec] return X, y, y_bi
def generate_features(stances,dataset,name): h, b, y = [],[],[] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy") #print("overlap:") #print(X_overlap) X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy") #print("X_refuting:") #print(X_refuting) X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy") #print(x_hand.z) X_w2v_body = gen_or_load_feats(w2v_body_feature_features, h ,b, "features/word."+name+".npy") #print(X_word) X_w2v_head = gen_or_load_feats(w2v_head_feature_features, h ,b, "features/head."+name+".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap,X_w2v_body,X_w2v_head] return X,y
def generate_features(stances, dataset, name, number_of_words=5000): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) vocabulary = vocabularyForm(h, b, number_of_words) #print(vocabulary) if name != "vocab": X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_Bow = calculateBOW(h, b, vocabulary) infersent = torch.load('infersent.allnli.pickle', map_location=lambda storage, loc: storage) infersent.set_glove_path('dataset/glove.840B.300d.txt') sentences = [] for i, j in zip(h, b): sentences.append(i) infersent.build_vocab(sentences, tokenize=True) X_embed = infersent.encode(sentences, tokenize=True) X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow, X_embed] return X, y
def generate_features(stances, dataset, name): h, b, y = [], [], [] for stance in stances: y.append(LABELS.index(stance['Stance'])) h.append(stance['Headline']) b.append(dataset.articles[stance['Body ID']]) X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy") X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy") X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy") X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy") X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf." + name + ".npy") X_svd = gen_or_load_feats(svd_features, h, b, "features/svd." + name + ".npy") X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment." + name + ".npy") X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd, X_sentiment] # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] # from matplotlib import pyplot as plt # compound = sorted(zip(X_sentiment[:,3]-X_sentiment[:,7], y), key = lambda x: x[1]) # plt.plot(compound) # plt.show() # fig1 = plt.gcf() # overlap = sorted(zip(X_refuting.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig1.savefig('refuting.png') # fig2 = plt.gcf() # overlap = sorted(zip(X_hand.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig2.savefig('hand.png') # fig3 = plt.gcf() # overlap = sorted(zip(X_polarity.flatten(), y), key = lambda x: x[1]) # plt.plot(overlap) # plt.draw() # plt.show() # fig3.savefig('polarity.png') # fig4 = plt.gcf() # tfidf = sorted(zip(X_tf_idf.flatten(), y), key = lambda x: x[1]) # plt.plot(tfidf) # plt.draw() # plt.show() # fig4.savefig('tfidf.png') # fig5 = plt.gcf() # svd = sorted(zip(X_svd.flatten(), y), key = lambda x: x[1]) # plt.plot(svd) # plt.draw() # plt.show() # fig5.savefig('svd.png') # fig6 = plt.gcf() # svd = sorted(zip(X_sentiment[:,3] - X_sentiment[:,7], y), key = lambda x: x[1]) # plt.plot(svd) # plt.draw() # plt.show() # fig6.savefig('sentiment.png') return X, y