Example #1
0
def reducer_count(k, vs):
    """
    count the occurency of the key
    """
    c = 0
    for v in vs:
        c += 1
    emit(k, str(c))
Example #2
0
def reducer_simplesum(k, vs):
    """
    sum the counts of the key
    """
    s = 0
    for v in vs:
        s += float(v)
    emit(k, str(s))
Example #3
0
def reducer_simplesum(k, vs):
    """
    sum the counts of the key
    """
    s = 0
    for v in vs:
        s += float(v)
    emit(k, str(s))
Example #4
0
def reducer_count(k, vs):
    """
    count the occurency of the key
    """
    c = 0
    for v in vs:
        c += 1
    emit(k, str(c))
Example #5
0
 def step_cleanup(self):
     """Compute last record and emit the data.
     """
     # last record for multi-line record. The processing is duplicated for single-line record.
     self._compute()
     
     for (k,v) in self._output.iteritems():
         if type(v) == type([]):
             emit(k, "\t".join(map(str,v)))
         else:
             emit(k, str(v))
Example #6
0
    def step_cleanup(self):
        """Compute last record and emit the data.
        """
        # last record for multi-line record. The processing is duplicated for single-line record.
        self._compute()

        for (k, v) in self._output.iteritems():
            if type(v) == type([]):
                emit(k, "\t".join(map(str, v)))
            else:
                emit(k, str(v))
Example #7
0
def reducer(k, vs):
    """
    """
    for v in vs:
        pid = v.rsplit("\t")[-1]
        suffix = 'Z'
        if pid == '102':
            suffix = 'A'
        elif pid == '103':
            suffix = 'B'
        elif pid == '241':
            suffix = 'C'
        else:
            pass
        emit(k, v+"#"+suffix)
Example #8
0
def reducer_listsum(k, vs):
    """
    Sum the list values of the key.
    
    length of list is calculated from the first data line.
    
    Note that vs is of type hceutil.ReduceValues, which is not subscriptable.
    
    """
    s = []
    slen = 0
    for v in vs:
        delta = map(float, v.split("\t"))
        if s :
            for i in range(slen):
                s[i] += delta[i]
        else :
            s = delta
            slen = len(s)
    
    emit(k, "\t".join(map(str, s)))
Example #9
0
def reducer_listsum(k, vs):
    """
    Sum the list values of the key.
    
    length of list is calculated from the first data line.
    
    Note that vs is of type hceutil.ReduceValues, which is not subscriptable.
    
    """
    s = []
    slen = 0
    for v in vs:
        delta = map(float, v.split("\t"))
        if s:
            for i in range(slen):
                s[i] += delta[i]
        else:
            s = delta
            slen = len(s)

    emit(k, "\t".join(map(str, s)))
Example #10
0
def mapper(k, v):
    """
    k == None, v is a text line
    """
    global rec, nbad
    try:
        if rec.parseLine(v):
            baiduid = rec.attr("baiduid")
            if baiduid != '-':
                pid = rec.attr("urlfields").get("pid", '0')
                if pid in ["102", "103", "241"]:
                    time = rec.attr('timesz')
                    ip = rec.attr('ip')
                    url = rec.attr('url')
                    refer = rec.attr('referer')
                    emit(baiduid, "\t".join([time, ip, url, refer, pid]))
                else:
                    # print >> sys.stderr, pid
                    pass
                return True
    except ValueError:
        print >> sys.stderr, v
    nbad += 1
    return True
Example #11
0
def reducer_cat(k,vs):
    """
    simplely cat   
    """
    for v in vs:
        emit(k,str(v))
Example #12
0
def reducer_cat(k, vs):
    """
    simplely cat   
    """
    for v in vs:
        emit(k, str(v))