import codecs import json import func if len(argv) < 5: print """ ./addPredictionPSArg1.py <relations.json> <parses.json> <predictionFile> <writeF> """ exit() relations = argv[1] #relations.json predictedClauses = open(argv[3], 'r') parsesFile = argv[2] outF = open(argv[4], 'w') parsesF = codecs.open(parsesFile, encoding='utf8') parseDict = json.load(parsesF) dictByDocID = func.makeDictByDocID(parseDict) relationsF = codecs.open(relations, encoding='utf8') relations = [json.loads(x) for x in relationsF] def writeOutputFormat(relations, outF): for relation in relations: outF.write('%s\n' % json.dumps(relation)) def makeRelationsDictForOutput(relationDict, psArg1PredictionsDict, dictByDocID): #relations = [json.loads(x) for x in relationsF] total = 0 correct = 0.0 for relation in relationDict:
def produceNonExplicitRelationCandidates(predictionsF, parsesF, docDir): relations = [] print("In function: produceNonExplicitRelationCandidates"); #Read predicted relations.json print("Reading predicted relations.json"); pdtb_file = codecs.open(predictionsF, encoding='utf8'); predictions = [json.loads(x) for x in pdtb_file]; print("Done"); #Read parses.json print ("Reading parses.json"); parse_file = codecs.open(parsesF, encoding='utf8') parses = json.load(parse_file) print ("Done"); dictByDocID=func.makeDictByDocID(parses) nonMatchNum = 0 for DocID in parses.keys(): #print relationCount = 0 senList = parses[DocID]['sentences'] toCheckParagraph = False senLineDict = createSentenceLineDict(DocID, docDir) if len(senList) == len(senLineDict): toCheckParagraph = True else: nonMatchNum += 1 #print "Doc [" + DocID + "]: Sentence count in parses.json <" + str(len(senList)) + "> does NOT match the count in raw file <" + str(len(senLineDict)) + ">" for sen1ID in range (0, len(senList)-1): sen2ID = sen1ID + 1 #Extract a adjacent sentences pair sen1 = senList[sen1ID] sen2 = senList[sen2ID] #Check whether they are in different paragraphs if toCheckParagraph: if not inSameParagraph(sen1ID, sen2ID, senLineDict): continue #Check whether a sentence pair already has explicit relation if isExplicitRelation(sen1ID, sen2ID, DocID, predictions): continue #For valid sentence pairs, create a relation relation = {} #do DocID relation['DocID'] = DocID #do Arg1 relation['Arg1'] = extractArgFields(DocID, sen1, sen1ID, dictByDocID) #do Arg2 relation['Arg2'] = extractArgFields(DocID, sen2, sen2ID, dictByDocID) #Append relation relationCount += 1 relations.append(relation) #print 'Create ' + str(relationCount) + ' non-explicit relations out of ' + str(len(senList)) + ' sentences from Doc [' + DocID + ']' print '\n' + str(len(relations)) + ' Non-Explicit relations created in total' print str(nonMatchNum) + '/' + str(len(parses)) + ' documents have inconsistent sentence counts in parses.json and raw files' #add explicit relations for relation in predictions: relations.append(relation) print '\nExporting ' + str(len(relations)) + ' relations (both explicit and non-explicit) in total' return relations
def produceNonExplicitRelationCandidates(predictionsF, parsesF, docDir): relations = [] print("In function: produceNonExplicitRelationCandidates") #Read predicted relations.json print("Reading predicted relations.json") pdtb_file = codecs.open(predictionsF, encoding='utf8') predictions = [json.loads(x) for x in pdtb_file] print("Done") #Read parses.json print("Reading parses.json") parse_file = codecs.open(parsesF, encoding='utf8') parses = json.load(parse_file) print("Done") dictByDocID = func.makeDictByDocID(parses) nonMatchNum = 0 for DocID in parses.keys(): #print relationCount = 0 senList = parses[DocID]['sentences'] toCheckParagraph = False senLineDict = createSentenceLineDict(DocID, docDir) if len(senList) == len(senLineDict): toCheckParagraph = True else: nonMatchNum += 1 #print "Doc [" + DocID + "]: Sentence count in parses.json <" + str(len(senList)) + "> does NOT match the count in raw file <" + str(len(senLineDict)) + ">" for sen1ID in range(0, len(senList) - 1): sen2ID = sen1ID + 1 #Extract a adjacent sentences pair sen1 = senList[sen1ID] sen2 = senList[sen2ID] #Check whether they are in different paragraphs if toCheckParagraph: if not inSameParagraph(sen1ID, sen2ID, senLineDict): continue #Check whether a sentence pair already has explicit relation if isExplicitRelation(sen1ID, sen2ID, DocID, predictions): continue #For valid sentence pairs, create a relation relation = {} #do DocID relation['DocID'] = DocID #do Arg1 relation['Arg1'] = extractArgFields(DocID, sen1, sen1ID, dictByDocID) #do Arg2 relation['Arg2'] = extractArgFields(DocID, sen2, sen2ID, dictByDocID) #Append relation relationCount += 1 relations.append(relation) #print 'Create ' + str(relationCount) + ' non-explicit relations out of ' + str(len(senList)) + ' sentences from Doc [' + DocID + ']' print '\n' + str( len(relations)) + ' Non-Explicit relations created in total' print str(nonMatchNum) + '/' + str( len(parses) ) + ' documents have inconsistent sentence counts in parses.json and raw files' #add explicit relations for relation in predictions: relations.append(relation) print '\nExporting ' + str(len( relations)) + ' relations (both explicit and non-explicit) in total' return relations
features.append('prevLastPOS:'+prevLastPOS) features.append('nextFirstPOS:'+nextFirstPOS) features.append('prevLastAndComma:'+prevLast+'_'+commaBefore) features.append('nextFirstAndComma:'+nextFirst+'_'+commaAfter) features.append('commaAndcurFirstWord:'+commaBefore+'_'+curFirstWord) features.append('curLastWordAndComma:'+curLastWord+'_'+commaAfter) features.append('commaAndcurFirstPOS:'+commaBefore+'_'+curFirstPOS) features.append('curLastPOSAndComma:'+curLastPOS+'_'+commaAfter) features.append('verb1:'+v1) features.append('verb2:'+v2) features.append('verb3:'+v3) return features dictByDocID=func.makeDictByDocID(parseDict) #func.getAllVerbsFromData(dictByDocID) #exit() dictByTokenID=func.makeDictByTokenID(dictByDocID) #for i in dictByDocID: # print >>stderr, i, dictByDocID[i].keys() #print >>stderr, dictByDocID.keys() #print >>stderr, len(relations) #exit() relationDict=func.makeRelationDict(relations) makeDataForArg1PSExplicit(dictByDocID,dictByTokenID,parseDict,relationDict,verbList,relations,outF) #makeDataForImplicitSenseGold(dictByDocID,relationDict,outF) #will use gold arguments for implicit relations #exit()