def readFile(fileName, context): start_time = time.time() dir_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(dir_path) try: reader = open(fileName, "r") writer = open('output.nq', 'w+') except Exception as e: print(e) else: parity = itertools.cycle([True, False]) for line in reader: line = line.replace('%20', ',').replace('%2C', ',') if line.startswith("_:"): writer.write('{} <{}> .'.format(line.strip().rstrip(' .'), context)) writer.write('\n') continue if line.isspace(): continue if next(parity): line1 = line writer.write('{} <{}> .'.format(line1.strip().rstrip(' .'), context)) writer.write('\n') line1 = re.findall('<([^>]*)>', line) if len(line1) == 3: sub_line1, rel_line1, obj_line1 = line1[0], line1[1], line1[2] elif len(line1) == 2: sub_line1, rel_line1 = line1[0], line1[1] obj_line1 = re.findall('"([^"]*)"', line)[0] else: pass finalSub = '' sub1 = sub_line1 sub_line1 = sub_line1.split('/')[-1].upper() try: extractedSub = entityEtra.get_continuous_chunks(sub_line1.upper()) print("sub ext", extractedSub) extractedSubLength = len(extractedSub) except Exception as e: print(e) else: # print("NLTK works perfectly") if extractedSubLength == 0: pass elif extractedSubLength == 1: finalSub = extractedSub[0] else: writer.write( '<{}> <{}isA> <{}> <{}> .'.format(sub1, relationToUse, '/'.join(sub1.split('/')[:-1]) + '/' + ','.join( extractedSub), context)) writer.write('\n') # writer.write('{}'.format(extractedSub)) else: line2 = line writer.write('{} <{}> .'.format(line2.strip().rstrip(' .'), context)) writer.write('\n') line2 = re.findall('<([^>]*)>', line) if len(line2) == 3: sub_line2, rel_line2, obj_line2 = line2[0], line2[1], line2[2] elif len(line2) == 2: sub_line2, rel_line2 = line2[0], line2[1] obj_line2 = re.findall('"([^"]*)"', line)[0] else: pass # writer.write("Both line: {}{}".format(line1, line2)) finalObj = '' sub2 = sub_line2 sub_line2 = sub_line2.split('/')[-1].upper() try: extractedObj = entityEtra.get_continuous_chunks(sub_line2.upper()) print("obj ext", extractedObj) extractedObjLength = len(extractedObj) except Exception as e: print(e) else: # print("NLTK works perfectly") if extractedObjLength == 0: pass elif extractedObjLength == 1: finalObj = extractedObj[0] else: writer.write('<{}> <{}isA> <{}> <{}> .'.format(sub2, relationToUse, '/'.join(sub2.split('/')[:-1]) + '/' + ','.join( extractedObj), context)) writer.write('\n') sub_line1, sub_line2 = sub_line1.lower(), sub_line2.lower() # Calculating the similarity between two normal words that have not been changed finalSub, finalObj = finalSub.lower(), finalObj.lower() extractedSub = [i.lower() for i in extractedSub] extractedObj = [i.lower() for i in extractedObj] if extractedSubLength == 0: if extractedObjLength == 0: calSimilarity(sub_line1, sub_line2, writer, sub1, sub2, context) subToUse, objToUse = sub_line1, sub_line2 elif extractedObjLength == 1: calSimilarity(sub_line1, finalObj, writer, sub1, sub2, context) subToUse, objToUse = sub_line1, finalObj else: for everyObj in extractedObj: calSimilarity(sub_line1, everyObj, writer, sub1, sub2, context) subToUse, objToUse = sub_line1, everyObj if mainSim != 0: break elif extractedSubLength == 1: if extractedObjLength == 0: calSimilarity(finalSub, sub_line2, writer, sub1, sub2, context) subToUse, objToUse = finalSub, sub_line2 elif extractedObjLength == 1: calSimilarity(finalSub, finalObj, writer, sub1, sub2, context) subToUse, objToUse = finalSub, finalObj else: for everyObj1 in extractedObj: calSimilarity(finalSub, everyObj1, writer, sub1, sub2, context) subToUse, objToUse = finalSub, everyObj1 if mainSim != 0: break else: if extractedObjLength == 0: for everySub in extractedSub: calSimilarity(everySub, sub_line2, writer, sub1, sub2, context) subToUse, objToUse = everySub, sub_line2 if mainSim != 0: break elif extractedObjLength == 1: for everySub1 in extractedSub: calSimilarity(everySub1, finalObj, writer, sub1, sub2, context) subToUse, objToUse = everySub1, finalObj if mainSim != 0: break else: for everySub2 in extractedSub: for everyObj2 in extractedObj: calSimilarity(everySub2, everyObj2, writer, sub1, sub2, context) subToUse, objToUse = everySub2, everyObj2 if mainSim != 0: break print("sim is: ", mainSim) if mainSim != 0: print("Insssside the first if") imageURLS(subToUse, hashOut, writer, relationToUse, context) imageURLS(objToUse, hashOut, writer, relationToUse, context) specificImageURLs(subToUse, hashOut, writer, relationToUse, context) specificImageURLs(objToUse, hashOut, writer, relationToUse, context) # print("DONNNNNNNNe ALL") writer.flush() writer.close() readIn = open('output.nq') data = readIn.read().replace('<', '<').replace('>', '>').replace('\n', '</br>') readIn.close() return data
def readFile(fileName, context): start_time = time.time() dir_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(dir_path) try: reader = open(fileName, "r") writer = open('output.nq', 'w+') except Exception as e: print(e) else: parity = itertools.cycle([True, False]) for line in reader: line = line.replace('%20', ',').replace('%2C', ',') if line.startswith("_:"): writer.write('{} <{}> .'.format(line.strip().rstrip(' .'), context)) writer.write('\n') continue if line.isspace(): continue if next(parity): line1 = line writer.write('{} <{}> .'.format(line1.strip().rstrip(' .'), context)) writer.write('\n') line1 = re.findall('<([^>]*)>', line) if len(line1) == 3: sub_line1, rel_line1, obj_line1 = line1[0], line1[ 1], line1[2] elif len(line1) == 2: sub_line1, rel_line1 = line1[0], line1[1] obj_line1 = re.findall('"([^"]*)"', line)[0] else: pass finalSub = '' sub1 = sub_line1 sub_line1 = sub_line1.split('/')[-1].upper() try: extractedSub = entityEtra.get_continuous_chunks( sub_line1.upper()) extractedSubLength = len(extractedSub) except Exception as e: print(e) else: # print("NLTK works perfectly") if extractedSubLength == 0: pass elif extractedSubLength == 1: finalSub = extractedSub[0] else: writer.write('<{}> <{}isA> <{}> <{}> .'.format( sub1, relationToUse, '/'.join(sub1.split('/')[:-1]) + '/' + ','.join(extractedSub), context)) writer.write('\n') else: line2 = line writer.write('{} <{}> .'.format(line2.strip().rstrip(' .'), context)) writer.write('\n') line2 = re.findall('<([^>]*)>', line) if len(line2) == 3: sub_line2, rel_line2, obj_line2 = line2[0], line2[ 1], line2[2] elif len(line2) == 2: sub_line2, rel_line2 = line2[0], line2[1] obj_line2 = re.findall('"([^"]*)"', line)[0] else: pass # writer.write("Both line: {}{}".format(line1, line2)) finalObj = '' sub2 = sub_line2 sub_line2 = sub_line2.split('/')[-1].upper() try: extractedObj = entityEtra.get_continuous_chunks( sub_line2.upper()) extractedObjLength = len(extractedObj) except Exception as e: print(e) else: # print("NLTK works perfectly") if extractedObjLength == 0: pass elif extractedObjLength == 1: finalObj = extractedObj[0] else: writer.write('<{}> <{}isA> <{}> <{}> .'.format( sub2, relationToUse, '/'.join(sub2.split('/')[:-1]) + '/' + ','.join(extractedObj), context)) writer.write('\n') sub_line1, sub_line2 = sub_line1.lower(), sub_line2.lower() from relations import check_Words words_to_use = check_Words(extractedSub, extractedObj) first, second = words_to_use[0], words_to_use[1] calSimilarity(first, second, writer, sub1, sub2, context) writer.flush() writer.close() readIn = open('output.nq') data = readIn.read().replace('<', '<').replace('>', '>').replace( '\n', '</br>') readIn.close() return data