def __genFeature(self, num_of_homo_feats): ###################### TODO ###################### print "generate h features" qry = self.qry doc = self.doc homo_feats = {} df = ProcDoc.docFreq(doc) for q_id, q_terms in qry.items(): npscq = np.asarray([self.scq(df, q_term) for q_term in q_terms]) homo_feats[q_id] = np.asarray([ np.sum(npscq), np.amax(npscq), np.amin(npscq), np.mean(npscq) ]) # np.sum(a) # np.amax(a) # np.amin(a) # np.mean(a) # a.prod()**(1.0/len(a)) # len(a) / np.sum(1.0/a) # var = variation(a, axis=0) idmax = np.argmax(var) return homo_feats
def __genFeature(self, num_of_homo_feats): print "generate h features" qry = self.qry doc = self.doc homo_feats = {} df = ProcDoc.docFreq(doc) for q_id, q_terms in qry.items(): npscq = np.asarray([self.__scq(df, q_term) for q_term in q_terms]) harm_mean = self.__harm_mean(npscq) geo_mean = self.__geo_mean(npscq) homo_feats[q_id] = np.asarray([ np.std(npscq), np.sum(npscq), np.amax(npscq), np.amin(npscq), np.mean(npscq), harm_mean, geo_mean ]) return homo_feats