def loadarticles(): files = listdir(ARTICLES_FOLDER) article_ctr = 0 for file in files: if article_ctr > MAX_ARTICLE_COUNT: break try: # initialize article element to containe sentences global articles articles.append([]) article_names.append(file) # open the file f = open(ARTICLES_FOLDER + file, 'r') # get the text and perform string processing article = f.read().replace("\\n", "").replace("'b'", "").replace( "b'", "").replace("[", "").replace("]", "") article = ''.join([i for i in article if not i.isdigit()]) # seperate by comma sentences = utils.delim_sentences(article) for sentence in sentences: articles[article_ctr].append(sentence) f.close() except Exception as e: print("Error in " + file) print(e) # increment article_ctr = article_ctr + 1
def evaluate(testfiletxt, answerfilejson): f = open(testfiletxt, 'r') text = f.read().replace("\\n", "").replace("'b'", "").replace("b'", "").replace("[", "").replace("]", "") sentences = utils.delim_sentences(text) with open(answerfilejson) as data_ans: data = json.loads(data_ans.read()) for sentence in sentences: print(sentence)
def getResults(article, relation): model = Model(relation) sentences = utils.delim_sentences(article) ret_val = [] for sentence in sentences: classification = model.predict(sentence) temp_item = {"sentence": sentence, "classification": classification} ret_val.append(temp_item) # print(sentence) # print(classification) return ret_val
def main(relation): print('Creating test file for', relation) f = open('sen_' + relation + '.txt', 'r') fans = open('ans_file2_' + relation + '.json', 'w') wordmodel = ModelWords(relation) text = f.read() data = [] sentences = utils.delim_sentences(text) for sentence in sentences: print('-------\nSen:', sentence) baseword, subword = wordmodel.getWords(sentence) print(':::', baseword, subword) isCorrect = input("Is correct? [ENTER] YES --- [NO] NO") if len(isCorrect) == 0: temp = { 'sentence': sentence, 'baseword': baseword, 'subword': subword } data.append(temp) continue # else: # r = random.randint(0, 10) # if (r > 7): # temp = {'sentence': sentence, 'baseword': baseword, 'subword': subword} # data.append(temp) # baseword = input('Enter baseword: ') # if len(baseword) == 0: # continue # subword = input('Enter subword: ') fans.write("[\n") for index, item in enumerate(data): if index == len(data) - 1: item['sentence'] = item['sentence'].replace('"', '') to_write = '\t{"baseword": "%s", "subword": "%s", "sentence": "%s"}\n' % ( item['baseword'], item['subword'], item['sentence']) fans.write(to_write) else: item['sentence'] = item['sentence'].replace('"', '') to_write = '\t{"baseword": "%s", "subword": "%s", "sentence": "%s"},\n' % ( item['baseword'], item['subword'], item['sentence']) fans.write(to_write) fans.write("]")
def evaluate(testfiletxt, answerfilejson, relation, logfile): REL_INDEX = 0 if relation == 'Synonymy': REL_INDEX = 1 elif relation == 'Hyponym': REL_INDEX = 2 elif relation == 'Holynym': REL_INDEX = 3 elif relation == 'Meronym': REL_INDEX = 4 model = Model(relation) f = open(testfiletxt, 'r') text = f.read().replace("\\n", "").replace("'b'", "").replace("b'", "").replace( "[", "").replace("]", "") sentences = utils.delim_sentences(text) with open(answerfilejson) as data_ans: data = json.loads(data_ans.read()) print("-------------------------") print("\n", relation) score = 0 for sentence in sentences: print('\n\nClassifying:', sentence) # print(model_syn.getTotalScore(sentence)) prediction = model.predict(sentence) sen_type = getType(data, sentence) print(':: Classification if', relation, ':\t', prediction) # one is our marker if syn if sen_type == REL_INDEX: ans = True else: ans = False print(':: From ANS file is', relation, ':\t', ans) if ans == prediction: score = score + 1 print('Match') print('\n\nScore for', relation, ':', score, '/', len(sentences)) to_write = "%f, %d, %d\n" % (score / len(sentences), score, len(sentences)) logfile.write(to_write)
def getResults(article, relation): model = Model(relation) modelwords = ModelWords(relation) sentences = utils.delim_sentences(article) ret_val = [] for sentence in sentences: classification = model.predict(sentence) baseword, subword = modelwords.getWords(sentence) temp_item = {"sentence": sentence, "classification": classification, "baseword": baseword, "subword": subword} ret_val.append(temp_item) # print(sentence) # print(classification) return ret_val
def create_test_file(filepath, filename): f = open(filepath, 'r') fileans = 'ANSF' + filename.replace('.txt', '.json') text = f.read().replace("\\n", "").replace("'b'", "").replace("b'", "").replace( "[", "").replace("]", "") sentences = utils.delim_sentences(text) data = [] for sentence in sentences: print(sentence) sen_type = input( "Enter type: [0] None, [1] Syn, [2] Hyp, [3] Holy, [4] Mero \nType: " ) temp = {'sentence': sentence, 'type': sen_type} data.append(temp) fans = open(fileans, 'w') fans.write("[\n") for index, item in enumerate(data): if index == len(data) - 1: to_write = '\t{"type": "%s", "sentence": "%s"}\n' % ( item['type'], item['sentence'], ) fans.write(to_write) else: to_write = '\t{"type": "%s", "sentence": "%s"},\n' % ( item['type'], item['sentence'], ) fans.write(to_write) fans.write("]") ftxt = open('TESTF' + filename, 'w') ftxt.write(text) ftxt.close() f.close() fans.close()
def run(): sentencemodel = ModelSentence('Synonymy') wordmodel = ModelWords('Synonymy') fsyn = open('sen_syn.txt', 'w') # work on the first 10 articles n = 100 files = os.listdir('./articles') for i in range(0, len(files)): f = open('./articles/' + files[i], 'r') article = f.read() sentences = utils.delim_sentences(article) for sentence in sentences: # A sentence with synonymy is found if (sentencemodel.predict(sentence)): to_write = sentence + '\n' fsyn.write(to_write) print(files[i], ":", wordmodel.getWords(sentence)) f.close()
def run(relation): print(relation) sentencemodel = ModelSentence(relation) wordmodel = ModelWords(relation) fsyn = open('sen_' + relation + '.txt', 'w') # work on the first 10 articles n = 100 files = os.listdir('./articles') for i in range(0, n): file_index = random.randint(0, len(files)) f = open('./articles/' + files[file_index], 'r') article = f.read() sentences = utils.delim_sentences(article) for sentence in sentences: # A sentence with synonymy is found if (sentencemodel.predict(sentence)): to_write = sentence + '.\n' fsyn.write(to_write) f.close()