def checkIdenticals(): old = ptd.getDataWithMeta() old_2011 = old[old.Publication_year == 2011] old_2011_wos = old_2011.WOS.tolist() new = ptd.getUnlabelledData() print("len of new data: {}".format(len(new))) new_2011 = new[new.Publication_year == "2011"] new_2011_wos = new_2011.WOS.tolist() print("old length 2011: {}".format(len(old_2011_wos))) print("new length 2011: {}".format(len(new_2011_wos))) print old_2011_wos[:5] print new_2011_wos[:5] identical = [] for wos in new_2011_wos: for wos2 in old_2011_wos: if wos == wos2: print("{}\n{}\n".format(wos, wos2)) identical.append(wos) print("Number of identical papers = {}".format(len(identical))) new_data = ptd.getUnlabelledDataAsList() print ("len of old before: {}".format(len(new_data))) new_data_after = [] for dic in new_data: if dic["WOS"] not in identical: new_data_after.append(dic) print ("len of old after: {}".format(len(new_data_after)))
none_c = 0 for pred in predictions: if pred == "AGAINST": against_c += 1 elif pred == "FAVOR": favor_c += 1 else: none_c += 1 print("\nThe distribution of predictions are: ") print("\tFAVOR: \t{}".format(favor_c)) print("\tAGAINST:\t{}".format(against_c)) print("\tNONE: \t{}".format(none_c)) unique_years = list(set(unlabelled_data.Publication_year.tolist())) unlabelled_data = ptd.getUnlabelledDataAsList() for i, dic in enumerate(unlabelled_data): dic["Stance"] = predictions[i] for year in unique_years: favor_c = 0 against_c = 0 none_c = 0 for dic in unlabelled_data: if dic["Publication_year"] == year: if dic["Stance"] == "AGAINST": against_c += 1 elif dic["Stance"] == "FAVOR": favor_c += 1 else: