def delete_post_and_comments_by_id(cf, post_id): items_to_delete = [{"id": "50_" + post_id}] #items_to_delete = [] total, items = query.query_items(cf, "post_id:%s" % (post_id)) for i in items: items_to_delete.append(i) print(batch_delete_item(dynamodb.Table(cf.table_name), items_to_delete))
def send_task(cf, query_str, task, batch=100, extra={}): ## find items using "query" in lucene format to query ES and send the items to SQS task queue total, items = query.query_items(cf, query_str) if extra.get('type') == 'post': ## if process type is post, get all replies of the post and send replies to queue process_post_comments(cf, create_messages(batch, task, items, extra), task, batch) send_to_queue(cf, task, items, batch, extra) else: ## if process type is general, send the data to queue directly #print("Sending to queue:%s" %(len(items))) send_to_queue(cf, task, items, batch, extra) return total
def main(): args = read_arguments() if args.log is not None: # set the root logger level numeric_level = getattr(logging, args.log.upper()) if not isinstance(numeric_level, int): raise ValueError("Invalid log level: %s" % args.log.upper()) logging.basicConfig(level=numeric_level) logging.info("invoked with args:" + str(args)) driver_class = find_driver(args.site) if driver_class is None: logging.error("error - %s is not yet supported" % args.site) return driver = driver_class() # specify options for this query options = drivers.base.QueryOptions(item_name=args.item) infinite = True if args.count == 0 else False while infinite or args.count > 0: args.count -= 1 items = query.query_items(driver, options) if items is not None and len(items) > 0: logging.info("listing found items: \n") for item in items: logging.info(item.to_string()) # TODO invoke post-processor else: logging.warning("no results found") logging.debug("sleeping %d seconds" % args.sleep) time.sleep(args.sleep) logging.info("program exiting")
#Process data: _exists_:post_id AND unix_created_time:>=1525132800 AND unix_created_time:<1525219200 #current_start_unix=1525132800 if args.date_end: date_end_unix = utils.get_date_unix(utils.parse_time( args.date_end)) else: date_end_unix = utils.get_current_posix_number() while current_start_unix <= date_end_unix: print(date_end_unix - current_start_unix) query_str = args.query + " AND unix_created_time:>=%s AND unix_created_time:<%s" % ( current_start_unix, current_start_unix + 86400) if favorite: query_str = query_str + " AND categs.active:3" current_start_unix = current_start_unix + 86400 print("Process data: " + query_str) total, posts = query.query_items(cf, query_str) if len(posts) > 0: aggreate_es_item_into_mysql(cf, nl, posts) ##try: ## aggreate_es_item_into_mysql(cf,posts) ##except Exception as e: ## print(e) store_report_table(cf, cf.mysql_table_name_count, [comments_count_ht]) else: if favorite: query_str = args.query + " AND categs.active:3" print("Process data: " + query_str) total, posts = query.query_items(cf, query_str) print(len(posts)) aggreate_es_item_into_mysql(cf, nl, posts)
def get_item_comments_ids(cf, item): if item['object_type'] == 'post': total, items = query.query_items(cf, "post_id:%s" % (item['object_id'])) if total > 0: return [x["id"] for x in items]