def getId(table, val): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) if table == 'users': sql = '''select id from users where username = %s''' elif table == 'tags': sql = '''select id from tags where tag_name = %s''' else: return None try: cur = db.cursor() cur.execute(sql, val) data = cur.fetchone() cur.close() db.close() if data != None: return data[0] else: return None except: db.close() utils.PrintException() return None
def getData(table, val): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) if table == 'comments': sql = '''select * from comments where author_text = %s and permlink = %s limit 1''' elif table == 'comments_votes': sql = '''select * from comments_votes where user_id = %s and comment_id = %s limit 1''' else: return None try: cur = db.cursor() cur.execute(sql, val) data = cur.fetchone() cur.close() db.close() return data except: db.close() utils.PrintException() return None
def updateCount(undo_id): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) print('get_in_update_count: ', undo_id) sql = 'update undo_op set count = count + 1 where id = %s' try: cur = db.cursor() cur.execute(sql, undo_id) db.commit() cur.close() db.close() print('update_count_success: ', undo_id) except Exception as e: print('update_count_failed: ', undo_id) cur.close() db.rollback() db.close() utils.PrintException(undo_id)
async def process(self, block_num, block_time, trans_id, ops): db = self.db # print('process %i blcok\'s ops' % block_num) self.processed_data = {'data': [], 'undo': []} for op_idx, op in enumerate(ops): op_type = op[0] op_detail = op[1] if op_type == 'comment' and op_detail['parent_author'] == '': self.processed_data['data'].append( (op_detail['parent_permlink'], )) if op_detail['json_metadata'] == '': continue try: json_metadata = json.loads(op_detail['json_metadata']) if isinstance(json_metadata, dict): if 'tags' in json_metadata: for tag in json_metadata['tags']: if await self.checkExist(tag) == False: #print('tag:', tag) self.processed_data['data'].append((tag, )) else: print('invalid json_metadata:', json_metadata) except Exception as e: utils.PrintException([block_num, trans_id, op_idx, op, e]) continue else: # print('unknown type:', op_type) continue # print('processed:', self.processed_data) return self.processed_data
async def process(self, block_num, block_time, trans_id, ops): global task_type db = self.db # print('process %i blcok\'s ops' % block_num) self.processed_data = { 'data': [], 'undo': []} for op_idx, op in enumerate(ops): try: op_type = op[0] op_detail = op[1] if op_type == 'comment': created_at = block_time updated_at = block_time is_del = False json_metadata = op_detail['json_metadata'] parent_author_text = op_detail['parent_author'] parent_permlink = op_detail['parent_permlink'] author_text = op_detail['author'] permlink = op_detail['permlink'] title = op_detail['title'] # check if comment edit through body body = op_detail['body'] dmp = dmp_module.diff_match_patch() try: # if patch_fromText successed, this comment is edited. dmp.patch_fromText(body) self.processed_data['undo'].append((block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue except: is_exist = await self.checkExist(author_text, permlink) if is_exist == False: # this comment is a new comment. self.processed_data['data'].append(( None, # parent_id permlink, title, body, json_metadata, parent_permlink, created_at, updated_at, is_del, parent_author_text, author_text)) else: # this comment is edited and does not use diff_match_patch self.processed_data['undo'].append((block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue elif op_type == 'delete_comment': self.processed_data['undo'].append((block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) print('do_later_del', block_num, trans_id, op_idx) except Exception as e: self.processed_data['undo'].append((block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) utils.PrintException([block_num, trans_id, op_idx, e]) # print('processed:', self.processed_data) return self.processed_data
def parseVote(val): config = utils.get_config() undo_count = config['undo_count'] undo_id = val[0] block_num = val[1] trans_id = val[2] op_idx = val[3] op = json.loads(val[4]) task_type = val[5] block_time = val[7] current_count = val[6] try: op_type = op[0] op_detail = op[1] print('parse_vote:', undo_id) if op_type == 'vote': print('get_in_vote') weight = op_detail['weight'] if weight >= 0: updown = True else: weight = (-1) * weight updown = False voter_id = getId('users', op_detail['voter']) if voter_id == None: print('not_found_voter_id', undo_id) return updateCount(undo_id) comment = getData('comments', (op_detail['author'], op_detail['permlink'])) if comment == None: print('not_found_comment', block_num, trans_id, op_idx) return updateCount(undo_id) else: # vote to comment vote = getData('comments_votes', (voter_id, comment[0])) if vote != None: # edit vote return updateData('comments_votes', vote[0], undo_id, ( comment[0], voter_id, weight, updown, vote[5], block_time)) else: # insert comment vote return insertData('comments_votes', undo_id, (( comment[0], voter_id, weight, updown, block_time, block_time), )) except Exception as e: utils.PrintException(undo_id) return updateCount(undo_id)
def updateData(table, old_id, undo_id, val): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) if table == 'comments': sql = '''update comments set permlink = %s, title = %s, body = %s, json_metadata = %s, parent_permlink = %s, created_at = %s, updated_at = %s, is_del = %s, parent_author_text = %s, author_text = %s where id = {}'''.format(old_id) elif table == 'comments_votes': sql = '''update comments_votes set comment_id = %s, user_id = %s, weight = %s, updown = %s, created_at = %s, updated_at = %s where id = {}'''.format(old_id) else: return None remove_undo_op_sql = '''delete from undo_op where id = %s''' try: cur = db.cursor() #update data #print('sql', sql, val) cur.execute(sql, val) #remove undo_op cur.execute(remove_undo_op_sql, undo_id) db.commit() cur.close() db.close() return True except: cur.close() db.rollback() db.close() updateCount(undo_id) utils.PrintException(undo_id) return False
async def process(self, block_num, block_time, trans_id, ops): global task_type db = self.db # print('process %i blcok\'s ops' % block_num) self.processed_data = {'data': [], 'undo': []} for op_idx, op in enumerate(ops): try: op_type = op[0] op_detail = op[1] if op_type == 'vote': weight = op_detail['weight'] created_at = block_time updated_at = block_time voter_id = await self.getId('users', op_detail['voter']) #print(trans_id, op_idx, 'voter_id:', voter_id) if voter_id == None: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue comment_id = await self.getId( 'comments', (op_detail['author'], op_detail['permlink'])) #print(trans_id, op_idx, 'post_id:', post_id) if comment_id == None: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue else: vote_id = await self.getId('comments_votes', (voter_id, comment_id)) #print(trans_id, op_idx, 'vote_id:', vote_id) if vote_id != None: # edit vote self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue else: # insert comment vote if weight >= 0: updown = True else: weight = (-1) * weight updown = False self.processed_data['data'].append( ('comment', (comment_id, voter_id, weight, updown, created_at, updated_at))) except Exception as e: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) utils.PrintException(e) # print('processed:', self.processed_data) return self.processed_data
def delData(table, old_id, undo_id): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) if table == 'posts': sql = '''update posts set is_del = 1 where id = %s''' elif table == 'comments': sql = '''update comments set is_del = 1 where id = %s''' elif table == 'undo_op': sql = None else: return None remove_undo_op_sql = '''delete from undo_op where id = %s''' try: cur = db.cursor() #remove data if sql != None: cur.execute(sql, old_id) #remove undo_op cur.execute(remove_undo_op_sql, undo_id) db.commit() cur.close() db.close() return True except: cur.close() db.rollback() db.close() updateCount(undo_id) utils.PrintException(undo_id)
async def process(self, block_num, block_time, trans_id, ops): global task_type db = self.db # print('process %i blcok\'s ops' % block_num) self.processed_data = {'data': [], 'undo': []} for op_idx, op in enumerate(ops): try: op_type = op[0] op_detail = op[1] if op_type == 'comment': json_metadata = op_detail['json_metadata'] try: json_metadata = json.loads(op_detail['json_metadata']) except Exception as e: print('parse json failed:', op_detail['json_metadata']) continue comment_id = await self.getId( 'comments', (op_detail['author'], op_detail['permlink'])) if comment_id == None: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) continue if 'tags' in json_metadata: if isinstance(json_metadata['tags'], list): for tag in json_metadata['tags']: tag_id = await self.getId('tags', tag) if tag_id != None: self.processed_data['data'].append( (comment_id, tag_id)) except Exception as e: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) utils.PrintException([block_num, trans_id, op_idx, op]) # print('processed:', self.processed_data) return self.processed_data
def mainMultiProcess(): global check_point_sum try: config = utils.get_config() base_sleep = config['base_sleep'] base_slice_step = config['base_slice_step'] base_thread_count = config['base_thread_count'] signal.signal(signal.SIGINT, quit) signal.signal(signal.SIGTERM, quit) while True: all_tasks = BaseTasks.get() print('all_tasks_count', len(all_tasks)) # print('all_tasks', all_tasks) if all_tasks == []: print('no_tasks') continue task_queue = queue.Queue() for tmp_tasks in all_tasks: task_queue.put(tmp_tasks) # make multi threads thread_list = [] for n in range(base_thread_count): t_t = threading.Thread(target=processor, args=(task_queue, )) thread_list.append(t_t) for t in thread_list: t.setDaemon(True) t.start() task_queue.join() # suspend before all tasks finished check_point_sum = 0 time.sleep(base_sleep) except Exception as e: utils.PrintException() sys.exit()
def parseCommentTag(val): config = utils.get_config() undo_count = config['undo_count'] undo_id = val[0] block_num = val[1] trans_id = val[2] op_idx = val[3] op = json.loads(val[4]) task_type = val[5] block_time = val[7] current_count = val[6] try: op_type = op[0] op_detail = op[1] print('parse_comment_tag:', undo_id) if op_type == 'comment': print('get_in_comment_tag') try: json_metadata = json.loads(op_detail['json_metadata']) except Exception as e: print('parse json failed:', op_detail['json_metadata']) return delData('undo_op', None, undo_id) comment = getData('comments', (op_detail['author'], op_detail['permlink'])) if comment == None: if current_count == undo_count: return insertData('comments', undo_id, (( op_detail['permlink'], # permlink op_detail['title'], op_detail['body'], json.dumps(op_detail['json_metadata']), op_detail['parent_permlink'], # parent_permlink block_time, # created_at block_time, # updated_at False, op_detail['parent_author'], op_detail['author']), )) else: print('not_found_comment_in_comment_tag') return updateCount(undo_id) try: if 'tags' in json_metadata: if isinstance(json_metadata['tags'], list): tmp_insert_data = [] for tag in json_metadata['tags']: tag_id = getId('tags', tag) if tag_id != None: tmp_insert_data.append((comment[0], tag_id)) else: print('not_found_tag_id', undo_id) insertData('tags', None, ((tag), )) return updateCount(undo_id) return insertData('comments_tags', undo_id, tuple(tmp_insert_data)) else: return delData('undo_op', None, undo_id) else: return delData('undo_op', None, undo_id) except TypeError as e: print('json_metadata_type_error', e, undo_id) return delData('undo_op', None, undo_id) except Exception as e: utils.PrintException(undo_id) return updateCount(undo_id)
def parseUserRelation(val): config = utils.get_config() undo_count = config['undo_count'] undo_id = val[0] block_num = val[1] trans_id = val[2] op_idx = val[3] op = json.loads(val[4]) task_type = val[5] block_time = val[7] current_count = val[6] try: op_type = op[0] op_detail = op[1] print('parse_user_relation:', undo_id) if op_type == 'custom_json' and 'id' in op_detail and op_detail['id'] == 'follow': try: json_data = json.loads(op_detail['json']) except Exception as e: print('parse json failed:', op_detail['json']) return delData('undo_op', None, undo_id) try: follower = None following = None what = None if isinstance(json_data, dict): if 'follower' in json_data: follower = json_data['follower'] else: return delData('undo_op', None, undo_id) if 'following' in json_data: following = json_data['following'] else: return delData('undo_op', None, undo_id) if 'what' in json_data and isinstance(json_data['what'], list): if len(json_data['what']) == 0: what = '' else: what = json_data['what'][0] else: return delData('undo_op', None, undo_id) else: return delData('undo_op', None, undo_id) if follower == None and following == None and what == None: return delData('undo_op', None, undo_id) follower_id = getId('users', follower) if follower_id == None: print('not_found_follower_id', undo_id) return updateCount(undo_id) following_id = getId('users', following) if following_id == None: print('not_found_following_id', undo_id) return updateCount(undo_id) return insertData('user_relations', undo_id, ((follower_id, following_id, what, block_time), )) except Exception as e: utils.PrintException([block_num, trans_id, op_idx]) return updateCount(undo_id) except Exception as e: utils.PrintException(undo_id) return updateCount(undo_id)
def insertData(table, undo_id, val): config = utils.get_config() db_c = config['steem_config'] db = pymysql.connect( host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False) if table == 'comments': sql = '''insert into comments ( permlink, title, body, json_metadata, parent_permlink, created_at, updated_at, is_del, parent_author_text, author_text ) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''' elif table == 'comments_votes': sql = '''insert into comments_votes ( comment_id, user_id, weight, updown, created_at, updated_at ) values (%s, %s, %s, %s, %s, %s)''' elif table == 'comments_tags': sql = '''insert ignore into comments_tags (comments_id, tags_id) values (%s, %s)''' comment_ids = [] for v in val: comment_ids.append(v[0]) if len(comment_ids) > 0: tuple_comment_ids = tuple(comment_ids) format_strings = ','.join(['%s'] * len(comment_ids)) sql2 = 'delete from comments_tags where comments_id in (%s)' % format_strings else: sql2 = None elif table == 'user_relations': sql = '''insert into user_relations (follower_id, following_id, what, created_at) values (%s, %s, %s, %s)''' elif table == 'tags': sql = '''insert ignore into tags (tag_name) values (%s)''' else: return None remove_undo_op_sql = '''delete from undo_op where id = %s''' try: cur = db.cursor() if table == 'comments_tags': # remove previous comment_tag records if sql2 != None: cur.execute(sql2, tuple_comment_ids) #update data cur.executemany(sql, val) if undo_id != None: #remove undo_op cur.execute(remove_undo_op_sql, undo_id) db.commit() cur.close() db.close() return True except: cur.close() db.rollback() db.close() utils.PrintException(undo_id) updateCount(undo_id) return False
async def process(self, block_num, block_time, trans_id, ops): global task_type db = self.db # print('process %i blcok\'s ops' % block_num, ops) self.processed_data = {'data': [], 'undo': []} for op_idx, op in enumerate(ops): op_type = op[0] op_detail = op[1] if op_type == 'custom_json' and 'id' in op_detail and op_detail[ 'id'] == 'follow': if op_detail['json'] == '': continue try: json_data = json.loads(op_detail['json']) follower = None following = None what = None if isinstance(json_data, dict): if 'follower' in json_data: follower = json_data['follower'] if 'following' in json_data: following = json_data['following'] if 'what' in json_data and isinstance( json_data['what'], list) and len(json_data['what']) > 0: what = json_data['what'][0] #elif isinstance(json_data, list): # if len(json_data) >= 2 and json_data[0] == 'follow': # if 'follower' in json_data[1]: # follower = json_data[1]['follower'] # if 'following' in json_data[1]: # following = json_data[1]['following'] # if 'what' in json_data[1] and len(json_data[1]['what']) > 0: # what = json_data[1]['what'][0] # else: # continue else: continue if follower == None and following == None and ( what == None or what == ''): print('follow_data_error', block_num, trans_id, follower, following, what, op) continue sql = ''' select id, username from users where username = %s or username = %s''' cur = await db.cursor() await cur.execute(sql, (follower, following)) user_data = await cur.fetchall() await cur.close() if len(user_data) == 2: for user in user_data: if user[1] == follower: follower_id = user[0] if user[1] == following: following_id = user[0] self.processed_data['data'].append(( follower_id, following_id, what, block_time, )) else: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) except Exception as e: self.processed_data['undo'].append( (block_num, trans_id, op_idx, json.dumps(op), tasks.getTypeId(task_type), block_time)) utils.PrintException([block_num, trans_id, op_idx]) else: # print('unknown type:', op_type, block_num, trans_id, ops, op_idx) continue # print('processed:', self.processed_data) return self.processed_data
def parseComment(val): config = utils.get_config() undo_count = config['undo_count'] undo_id = val[0] block_num = val[1] trans_id = val[2] op_idx = val[3] op = json.loads(val[4]) task_type = val[5] block_time = val[7] current_count = val[6] try: op_type = op[0] op_detail = op[1] print('parse_comment:', op_detail, undo_id) if op_type == 'comment': print('get_in_comment') parent_author_text = op_detail['parent_author'] author_text = op_detail['author'] # check if comment edit through body dmp dmp = dmp_module.diff_match_patch() try: # if patch_fromText successed, this comment is edited. patches = dmp.patch_fromText(op_detail['body']); old_comment = getData('comments', (author_text, op_detail['permlink'])) if old_comment == None: if current_count == undo_count: # if data cannot find parent, insert it directly. return insertData('comments', undo_id, (( op_detail['permlink'], # permlink op_detail['title'], op_detail['body'], json.dumps(op_detail['json_metadata']), op_detail['parent_permlink'], # parent_permlink block_time, # created_at block_time, # updated_at False, parent_author_text, author_text), )) else: print('comment_not_exist_dmp', undo_id) return updateCount(undo_id) print('old_comment_dmp:', undo_id) new_body = dmp.patch_apply(patches, old_comment[4]); print('dmp_edit_comment', block_num, trans_id, op_idx, old_comment[0]) return updateData('comments', old_comment[0], undo_id, ( old_comment[2], # permlink op_detail['title'], new_body[0], json.dumps(op_detail['json_metadata']), old_comment[8], # parent_permlink old_comment[9], # created_at block_time, False, parent_author_text, author_text)) except ValueError as e: old_comment = getData('comments', (author_text, op_detail['permlink'])) if old_comment == None: print('comment_not_exist2', undo_id) return insertData('comments', undo_id, (( op_detail['permlink'], # permlink op_detail['title'], op_detail['body'], json.dumps(op_detail['json_metadata']), op_detail['parent_permlink'], # parent_permlink block_time, # created_at block_time, # updated_at False, parent_author_text, author_text), )) else: print('without_dmp_edit_comment', block_num, trans_id, op_idx, old_comment[0]) return updateData('comments', old_comment[0], undo_id, ( old_comment[2], # permlink op_detail['title'], op_detail['body'], json.dumps(op_detail['json_metadata']), old_comment[8], # parent_permlink old_comment[9], # created_at block_time, False, old_comment[12], # parent_author_text old_comment[13])) # author_text elif op_type == 'delete_comment': old_comment = getData('comments', (op_detail['author'], op_detail['permlink'])) if old_comment == None: print('not_found_comment_which_will_be_del', undo_id) return updateCount(undo_id) else: return updateData('comments', old_comment[0], undo_id, ( old_comment[2], # permlink old_comment[3], # title old_comment[4], # body old_comment[5], # json_metadata old_comment[8], # parent_permlink old_comment[9], # created_at block_time, True, old_comment[12], # parent_author_text old_comment[13])) # author_text except Exception as e: utils.PrintException(undo_id) return updateCount(undo_id)
async def doMultiTasks(self, task): try: config = self.config db_c = config['steem_config'] db = await aiomysql.connect(host=db_c['host'], port=db_c['port'], user=db_c['user'], password=db_c['pass'], charset='utf8mb4', db=db_c['db'], autocommit=False, loop=self.loop) self.db = db task_start_time = time.time() self.task_id = task['id'] self.block_from = task['block_num_from'] self.block_to = task['block_num_to'] print('task_id:', self.task_id, 'from:', self.block_from, 'to:', self.block_to) # prepare data sql = ''' select block_num, block_info, timestamp from block_cache where block_num >= %s and block_num <= %s order by block_num asc''' cur = await db.cursor() await cur.execute(sql, (self.block_from, self.block_to)) blocks = await cur.fetchall() await cur.close() self.prepared_data = {'data': [], 'undo': []} has_err = False for block in blocks: curr_block_num = block[0] curr_block_info = json.loads(block[1]) curr_block_timestamp = block[2] if curr_block_info['transaction_ids'] != []: for idx, trans in enumerate( curr_block_info['transactions']): curr_block_trans_id = curr_block_info[ 'transaction_ids'][idx] curr_block_trans = trans processed_data = await self.process( curr_block_num, curr_block_timestamp, curr_block_trans_id, curr_block_trans['operations']) if processed_data == False: has_err = True break if processed_data['data'] != []: tmp_len = len(self.prepared_data['data']) self.prepared_data['data'][ tmp_len:tmp_len] = processed_data['data'] if processed_data['undo'] != []: tmp_len = len(self.prepared_data['undo']) self.prepared_data['undo'][ tmp_len:tmp_len] = processed_data['undo'] if has_err == True: break if has_err == True: print('there are some unexpected errors. task_id ', self.task_id, 'will not run.') else: # insert data await self.insertData() # end task db.close() self.db = None task_end_time = time.time() print('task_id:', self.task_id, 'db closed', 'task_spent:', task_end_time - task_start_time) except Exception as e: utils.PrintException(e)