def finish(self):
     if len(self.__stage_list) > 0:
         logger().info('cat finish, flush %d transactions',
                       len(self.__stage_list))
     for val in self.__stage_list[::-1]:
         val.complete()
     self.__stage_list = []
Exemple #2
0
def track_location_set(track_id, location):
    table_name = get_track_location_table()
    cols = []
    cols.append(('location', 'hdfs_path', location))
    ret = hbase_util.put_cols(table_name, track_id, cols)
    logger().info('write hbase rowkey[%s] -> column[%s] content[%s]',
                    track_id, 'location:hdfs_path', location)
    return ret
Exemple #3
0
def kds_get(batch):
    rowkey = kds_rowkey.gen(batch)
    table = get_table_name()
    ret = hbase_util.get_col(table, rowkey, 'kds', 'data')
    logger().info('seek data for row[%s]', rowkey)
    if ret:
        return ret['kds:data']
    return None
Exemple #4
0
def track_batch_set(track_id, batch):
    table_name = get_track_batch_table()
    cols = []
    cols.append(('batch', 'newest', batch))
    ret = hbase_util.put_cols(table_name, track_id, cols)
    logger().info('write hbase rowkey[%s] -> column[%s] content[%s]',
            track_id, 'batch:newest', batch)
    return ret
Exemple #5
0
def kds_save(batch, data):
    rowkey = kds_rowkey.gen(batch)
    table = get_table_name()
    cols = []
    cols.append(('kds', 'data', data))
    logger().info('save data for row[%s], table[%s]', rowkey, table)
    if not hbase_util.put_cols(table, rowkey, cols):
        return False
    return True
def get(table, row, column_family=None, column=None):
    succ = False
    for i in range(retry_times):
        try:
            ret = _get(table, row, column_family, column)
            succ = True
            break
        except Exception, e:  # pylint: disable=W0703
            logger().error('exception:[%s]', str(e))
            thrift_reconn()
def put_cols(table, row, cols):  #(column_family, column, data)
    succ = False
    for i in range(retry_times):
        try:
            ret = _put_cols(table, row, cols)
            succ = True
            break
        except Exception, e:  # pylint: disable=W0703
            logger().error('exception:[%s]', str(e))
            thrift_reconn()
def put_col(table, row, column_family, column, content):
    succ = False
    for i in range(retry_times):
        try:
            ret = _put_col(table, row, column_family, column, content)
            succ = True
            break
        except Exception, e:
            logger().error('exception:[%s]', str(e))
            thrift_reconn()
Exemple #9
0
 def new_fun(*args, **kw):
     arg = args
     f = fun
     for i in range(3):
         try:
             return f(*args, **kw)
         except Exception, e:
             logger().warning(traceback.format_exc())
             time.sleep(3)
             recover()
             f = getattr(obj, '__' + fun_name)
             continue
Exemple #10
0
def track_batch_get(track_id):
    table_name = get_track_batch_table()
    ret = hbase_util.get(table_name, track_id,
                        column_family='batch', column='newest')
    if not ret:
        logger().info('rowkey[%s] not found', track_id)
        return None
    ret = ret['batch:newest']
    logger().info(
            'get hbase rowkey[%s] column[batch:newest] -> content[%s]',
            track_id, ret)
    return ret
Exemple #11
0
 def get(self):
     cat_helper.agent_reset()
     try:
         if FLAG().get:
             return self.process()
         else:
             msg = 'get method not supported'
             logger().error(msg)
             self.write(msg)
             self.set_status(404)
     finally:
         cat_helper.agent_finish()
Exemple #12
0
def track_location_get(track_id):
    table_name = get_track_location_table()
    ret = hbase_util.get(table_name, track_id,
                        column_family='location', column='hdfs_path')
    if not ret:
        logger().info('rowkey[%s] not found', track_id)
        return None
    ret = ret['location:hdfs_path']
    logger().info(
            'get hbase rowkey[%s] column[location:hdfs_path]-> content[%s]',
            track_id, ret)
    return ret
Exemple #13
0
def image_exist(track_id, tpid, tpe, seq, image_type, batch=''):
    row_key = rowkey.gen(track_id, tpid, tpe, seq, image_type)
    logger().info('row_key[%s], batch[%s]', row_key, batch)
    if not batch:
        last_batch = batch_manager.get_last_batch(track_id, tpid, tpe, seq,
                                                  image_type)
        return last_batch != None
    else:
        if tpe == '00' and seq == '004':
            return False
        else:
            table = get_table_name()
            ret = hbase_util.get_col(table, row_key, 'content', batch)
            return not not ret
Exemple #14
0
def image_delete(track_id, tpid, tpe, seq, image_type, batch=''):
    row_key = rowkey.gen(track_id, tpid, tpe, seq, image_type)
    if not batch:
        batch = batch_manager.get_last_batch(track_id, tpid, tpe, seq,
                                             image_type)
        if not batch:
            logger().error('get empty batch for row_key[%s]', row_key)
            return False

    table = get_table_name()
    columns = ['content:' + batch]
    logger().debug('delete row [%s], batch[%s]', row_key, batch)
    hbase_util.delete(table, row_key, columns=columns)
    batch_manager.del_batch(track_id, tpid, tpe, seq, image_type, batch)
    return True
Exemple #15
0
def del_batch(track_id, tpid, tpe, seq, image_type, batch):
    batch_list = get_batch_list(track_id, tpid, tpe, seq, image_type)
    logger().debug('old batch_list:%s', batch_list)
    if batch_list:
        batch_list = _batch_del(batch_list, batch)
    else:
        batch_list = ''
    logger().debug('new batch_list:%s', batch_list)

    # write
    table = get_table_name()
    #row_key = '%s-%s_%s_%s_%s' % (track_id, tpid, tpe, seq, image_type)
    row = rowkey.gen_v2(track_id, tpid, tpe, seq, image_type)
    cols = []
    cols.append(('meta', 'batch_list', batch_list))
    return hbase_util.put_cols(table, row, cols)
Exemple #16
0
def image_get(track_id, tpid, tpe, seq, image_type, batch=None):
    #row_key = '%s-%s_%s_%s_%s' % (track_id, tpid, tpe, seq, image_type)
    row_key = rowkey.gen(track_id, tpid, tpe, seq, image_type)
    if not batch:
        batch = batch_manager.get_last_batch(track_id, tpid, tpe, seq,
                                             image_type)
        if not batch:
            logger().error('get empty batch for row_key[%s]', row_key)
            return None

    table = get_table_name()
    logger().debug('row [%s], batch[%s]', row_key, batch)
    ret = hbase_util.get_col(table, row_key, 'content', batch)
    if ret:
        return ret['content:' + batch]
    return None
Exemple #17
0
def image_write(track_id, tpid, tpe, seq, image_type, content, task_seq,
                batch):
    row_key = rowkey.gen(track_id, tpid, tpe, seq, image_type)

    cols = []
    table = batch_manager.get_table_name()
    cols.append(('meta', 'type', tpe))
    cols.append(('meta', 'seq', seq))
    cols.append(('meta', 'image_type', image_type))
    cols.append(('meta', 'task_seq_' + task_seq, batch))
    if not hbase_util.put_cols(table, row_key, cols):
        return False

    cols = []
    table = get_table_name()
    logger().info('write row_key[%s] to table[%s]', row_key, table)
    cols.append(('content', batch, content))
    if not hbase_util.put_cols(table, row_key, cols):
        return False
    return True
 def run(self):
     fpath = FLAG().raw_data
     if not fpath:
         logger().error('bad raw_data path[%s]', fpath)
         sys.exit(1)
     logger().info('raw_data path[%s]', fpath)
     cnt = 0
     '''
     first_line = True
     '''
     # for line in open(fpath, 'rU'):
     for line in open(fpath):
         '''
         if first_line:
             first_line = False
             if FLAG().skip_first_line:
                 continue
         # not first_line
         '''
         self.process_line(line.strip())
     self.post_process()
    def process_line(self, line):
        segs = line.split('\t')
        if 12 != len(segs):
            logger().warning('error line[%s]', line)
            counter.inc('error line')
            return
        
        data = {}
        data['content_id'] = segs[0]
        data['program_id'] = segs[1]
        data['pack_id'] = segs[2]
        data['hits'] = segs[3]
        data['cont_name'] = segs[4]

        data['cont_recomm'] = segs[5]
        data['publish_time'] = segs[6]
        data['cont_type'] = segs[7]
        data['cont_type_name'] = segs[8]
        data['media_shape'] = segs[9]
        
        data['keywords'] = segs[10]
        data['source'] = segs[11]
        self.process(data)
Exemple #20
0
def image_upload(track_id,
                 tpid,
                 tpe,
                 seq,
                 image_type,
                 content,
                 task_seq='!!!',
                 batch='!!!'):
    if not task_seq:
        task_seq = '!!!'
    if not batch:
        batch = '!!!'
    _timer = Timer()
    _timer.stage_begin('image write')
    if not image_write(track_id, tpid, tpe, seq, image_type, content, task_seq,
                       batch):
        return False
    _timer.stage_begin('set batch')
    if not batch_manager.set_batch(track_id, tpid, tpe, seq, image_type,
                                   batch):
        return False
    _timer.finish()
    logger().debug(_timer.dump())
    return True
    def image_get(self, track_id, tpid):
        task_id = track_id.split('_')[0]
        task_seq, num_reducer = self.get_task_info(task_id)
        logger().info('task_seq:%s, num_reducer:%s', task_seq, str(num_reducer))
        if not task_seq or not num_reducer:
            return None
        cmd = '/opt/hadoop-3.1.0/bin/hadoop jar ' + \
              'jar/HDFS004Store.jar kd.mapreduce.Hdfs004Store2 '
        arg = '%s %s %d %s' % (tpid, task_seq, num_reducer, './' + tpid)
        cmd = "%s %s" % (cmd, arg)
        logger().info('cmd:%s', cmd)
        ret = 0 == os.system(cmd) # pylint: disable C0122
        if ret:
            data = open('./' + tpid).read()
            jpg = data.split(',')[-1]
            jpg = base64.b64decode(jpg)

            os.remove('./' + tpid)
            logger().info('jpg size:%d', len(jpg))
            return jpg
        else:
            return None
Exemple #22
0
def api_v2_main():
    import os
    import base64
    import sys
    for line in sys.stdin:
        track_id, tpid, batch = line.strip().split()
        pic1 = '%s_00_004.jpg' % tpid
        pic = '/mnt/data1/mongo/jingjingg_004/%s_00_004.jpg' % tpid
        tpe = "00"
        seq = "004"
        image_type = "jpg"
        try:
            with open(pic) as f:
                content = f.read().strip()
                res = image_upload(track_id, tpid, tpe, seq, image_type,
                                   content)
                if res:
                    logger().info('%s is succes upload,image is       %s',
                                  tpid, pic1)
                else:
                    logger().info('%s is failed upload,image is       %s',
                                  tpid, pic1)
        except:
            logger().info("has empty pic, image is %s" % pic1)
Exemple #23
0
def image_stat(track_id, tpe, seq_list, image_type):
    conn = hbase_util.get_thrift_conn()
    table_name = batch_manager.get_table_name()
    logger().info('scan table:%s', table_name)
    table = conn.table(table_name)
    start_key = '%d-%s' % (rowkey.compute_magic(track_id), track_id)

    ret = {}
    for seq in seq_list:
        ret[seq] = []

    for row_data in table.scan(row_start=start_key,
                               row_stop=start_key + '~',
                               batch_size=300):
        row_key = row_data[0]
        data = row_data[1]
        if 'meta:type' not in data or tpe != data['meta:type']:
            logger().info('bad type[%s], rowkey[%s]', data['meta:type'],
                          row_key)
            continue
        if 'meta:seq' not in data or data['meta:seq'] not in seq_list:
            logger().info('bad seq [%s], rowkey[%s]', data['meta:seq'],
                          row_key)
            continue
        if 'meta:image_type' not in data or \
                image_type != data['meta:image_type']:
            logger().info('bad image_type[%s], rowkey[%s]',
                          data['meta:image_type'], row_key)
            continue
        if 'meta:batch_list' not in data or not data['meta:batch_list']:
            logger().info('empty batch_list for rowkey[%s]', row_key)
        tpid = '_'.join(row_key.split('-')[2].split('_')[0:2])
        ret[data['meta:seq']].append(tpid)

    for key, val in ret.items():
        logger().info('stat result %s:%d', key, len(val))

    return ret
Exemple #24
0
def kds_del(batch):
    rowkey = kds_rowkey.gen(batch)
    logger().info('delete rowkey[%s]', rowkey)
    table = get_table_name()
    return hbase_util.delete(table, rowkey)
Exemple #25
0
 def process(self):
     logger().error('not implement')
     sys.exit(-1)
 def __init__(self):
     logger().info('cat init')
     cat.init("kd.hbase")
    table.put(row, col_dict, wal=True)
    return True


def put_cols(table, row, cols):  #(column_family, column, data)
    succ = False
    for i in range(retry_times):
        try:
            ret = _put_cols(table, row, cols)
            succ = True
            break
        except Exception, e:  # pylint: disable=W0703
            logger().error('exception:[%s]', str(e))
            thrift_reconn()
    if not succ:
        logger().info('put col failed %d times, table[%s], rowkey[%s]',
                      retry_times, table, row)
    return succ


def _put_col(table, row, column_family, column, content):
    cf = '%s:%s' % (column_family, column)
    conn = get_thrift_conn()
    table = conn.table(table)
    table.put(row, {cf: content}, wal=True)
    return True


def put_col(table, row, column_family, column, content):
    succ = False
    for i in range(retry_times):
        try:
Exemple #28
0
 def recover(self):
     master = self.__get_master_from_sentinel()
     logger().info('switch master to [%s]', master)