def __call__(self, data): """ Mapper Program """ true_cnt_t = 0 true_cnt_f = 0 false_cnt_t = 0 false_cnt_f = 0 for docID, doc in data: for term in doc.split("\n"): self.SEP = self.SEP if self.SEP is not None else get_sep(term) point = np.fromstring(term, dtype=np.float64, sep=self.SEP) result = self.varify(point) if result == TRUE_F: true_cnt_f = true_cnt_f + 1 elif result == TRUE_T: true_cnt_t = true_cnt_t + 1 elif result == FALSE_F: false_cnt_f = false_cnt_f + 1 elif result == FALSE_T: false_cnt_t = false_cnt_t + 1 yield TRUE_T_STR, true_cnt_t yield TRUE_F_STR, true_cnt_f yield FALSE_T_STR, false_cnt_t yield FALSE_F_STR, false_cnt_f
def __call__(self, data): """ Mapper Program: Inputs: data, which is the whole split block data Outputs: key: untified id value: resultD,resultH """ # SETP1: read data matrix and do some transpose resultH = None resultD = None for docID, doc in data: for term in doc.split("\n"): self.SEP = self.SEP if self.SEP is not None else get_sep(term) point = np.fromstring(term, dtype=np.float64, sep=self.SEP) (localH, localD) = self.calculate(point) if resultH is not None: resultH = resultH + localH resultD = resultD + localD else: resultH = localH resultD = localD debug(np.shape(resultH)) debug(np.shape(resultD)) yield "nonused", (resultD.tolist(), resultH.tolist())
def __call__(self, data): """ Mapper Program """ cnt = 0 means = 0.0 for docID, doc in data: for term in doc.split("\n"): self.SEP = self.SEP if self.SEP is not None else get_sep(term) point = np.fromstring(term, dtype=np.float64, sep=self.SEP) means += self.getDValue(point) cnt = cnt + 1 yield "nonused", (cnt, means)
def __call__(self, data): """ Mapper Program It will output the modified single line """ for docID, doc in data: for term in doc.split("\n"): self.SEP = self.SEP if self.SEP is not None else get_sep(term) point = np.fromstring(term, dtype=np.float64, sep=self.SEP) label = int(point[-1]) last_value = self.getDValue(point) point = self.extend_point(point) point[-1] = last_value point[-2] = float(label) output = ",".join([str(i) for i in point]) yield output, "\t"