Beispiel #1
0
    def __call__(self, data):
        """
        Mapper Program
        """

        true_cnt_t = 0
        true_cnt_f = 0
        false_cnt_t = 0
        false_cnt_f = 0

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                result = self.varify(point)
                if result == TRUE_F:
                    true_cnt_f = true_cnt_f + 1
                elif result == TRUE_T:
                    true_cnt_t = true_cnt_t + 1
                elif result == FALSE_F:
                    false_cnt_f = false_cnt_f + 1
                elif result == FALSE_T:
                    false_cnt_t = false_cnt_t + 1

        yield TRUE_T_STR, true_cnt_t
        yield TRUE_F_STR, true_cnt_f
        yield FALSE_T_STR, false_cnt_t
        yield FALSE_F_STR, false_cnt_f
Beispiel #2
0
    def __call__(self, data):
        """
        Mapper Program
        """

        true_cnt_t = 0
        true_cnt_f = 0
        false_cnt_t = 0
        false_cnt_f = 0

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                result = self.varify(point)
                if result == TRUE_F:
                    true_cnt_f = true_cnt_f + 1
                elif result == TRUE_T:
                    true_cnt_t = true_cnt_t + 1
                elif result == FALSE_F:
                    false_cnt_f = false_cnt_f + 1
                elif result == FALSE_T:
                    false_cnt_t = false_cnt_t + 1

        yield TRUE_T_STR, true_cnt_t
        yield TRUE_F_STR, true_cnt_f
        yield FALSE_T_STR, false_cnt_t
        yield FALSE_F_STR, false_cnt_f
Beispiel #3
0
    def __call__(self, data):
        """
        Mapper Program:

        Inputs:
            data, which is the whole split block data

        Outputs:
            key: untified id
            value: resultD,resultH
        """

        # SETP1: read data matrix and do some transpose
        resultH = None
        resultD = None

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                (localH, localD) = self.calculate(point)

                if resultH is not None:
                    resultH = resultH + localH
                    resultD = resultD + localD
                else:
                    resultH = localH
                    resultD = localD

        debug(np.shape(resultH))
        debug(np.shape(resultD))

        yield "nonused", (resultD.tolist(), resultH.tolist())
Beispiel #4
0
    def __call__(self, data):
        """
        Mapper Program:

        Inputs:
            data, which is the whole split block data

        Outputs:
            key: untified id
            value: resultD,resultH
        """

        # SETP1: read data matrix and do some transpose
        resultH = None
        resultD = None

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                (localH, localD) = self.calculate(point)

                if resultH is not None:
                    resultH = resultH + localH
                    resultD = resultD + localD
                else:
                    resultH = localH
                    resultD = localD

        debug(np.shape(resultH))
        debug(np.shape(resultD))

        yield "nonused", (resultD.tolist(), resultH.tolist())
Beispiel #5
0
    def __call__(self, data):
        """
        Mapper Program
        """

        cnt = 0
        means = 0.0

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                means += self.getDValue(point)
                cnt = cnt + 1

        yield "nonused", (cnt, means)
Beispiel #6
0
    def __call__(self, data):
        """
        Mapper Program

            It will output the modified single line
        """

        for docID, doc in data:
            for term in doc.split("\n"):
                self.SEP = self.SEP if self.SEP is not None else get_sep(term)
                point = np.fromstring(term, dtype=np.float64, sep=self.SEP)
                label = int(point[-1])
                last_value = self.getDValue(point)
                point = self.extend_point(point)
                point[-1] = last_value
                point[-2] = float(label)
                output = ",".join([str(i) for i in point])
                yield output, "\t"