def train(self): rsl = [] files = listdir(self.path) reader = InputData(self.dataset_type, self.path) corpus = plsa.Corpus() for filename in files: vectors=[] question = reader.readFile(filename) id = question["id"] self.data[id]=[ref["text"] for ref in question["referenceAnswers"]] for r in question["referenceAnswers"]: rid=r["id"] references=[ sr["text"] for sr in r["studentAnswers"]] """ for ans in question["student_answers"]: if ans["id"]==rid: references.append(ans["text"]) #references=[ self.stemmer.stem(sr["text"]) for sr in r["studentAnswers"]] """ references.append(r["text"]) corpus.addBaseline(references) #print corpus.getVector() vectors.append(corpus.getVector()) corpus.reset() self.model[id]=vectors return
def test(self,mode,inputdir,outputdir): head = ["id","grade" ,"Accuracy","Predicted"] self.mode=mode rsl=[] files = listdir(inputdir) reader = InputData(self.dataset_type, inputdir) for filename in files: question = reader.readFile(filename) id = question["id"] stuAns = [] for r in question["referenceAnswers"]: for sr in r["studentAnswers"]: stuAns.append(sr) for sr in question["otherStudentAnswers"]: stuAns.append(sr) for sr in stuAns: grade="" if self.nonDomain.test(sr["text"]): if mode==2 or mode==3: grade="incorrect" if mode==5: grade="non_domain" rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":"NA"}) print rsl[len(rsl)-1] continue if self.contradictBigram.isContradictory(id,sr["text"]) or self.contradict.isContradictory(self.modeler.getReferences(id),sr["text"]): if mode==2: grade="incorrect" if mode==3 or mode==5: grade="contradictory" rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":"NA"}) print rsl[len(rsl)-1] continue score=self.modeler.grade(id,sr["text"]) if self.datamode== "beetle": self.irr.build(self.modeler.getReferences(id)) if self.irr.isIrrelevent(sr["text"]): score=-1 grade=self.predict(score) rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":score}) print rsl[len(rsl)-1] output(outputdir, head, rsl)