예제 #1
0
def delete_post_and_comments_by_id(cf, post_id):
    items_to_delete = [{"id": "50_" + post_id}]
    #items_to_delete = []
    total, items = query.query_items(cf, "post_id:%s" % (post_id))
    for i in items:
        items_to_delete.append(i)
    print(batch_delete_item(dynamodb.Table(cf.table_name), items_to_delete))
예제 #2
0
def send_task(cf, query_str, task, batch=100, extra={}):
    ## find items using "query" in lucene format to query ES and send the items to SQS task queue
    total, items = query.query_items(cf, query_str)
    if extra.get('type') == 'post':
        ## if process type is post, get all replies of the post and send replies to queue
        process_post_comments(cf, create_messages(batch, task, items, extra),
                              task, batch)
        send_to_queue(cf, task, items, batch, extra)
    else:
        ## if process type is general, send the data to queue directly
        #print("Sending to queue:%s" %(len(items)))
        send_to_queue(cf, task, items, batch, extra)
    return total
예제 #3
0
def main():
    args = read_arguments()

    if args.log is not None:
        # set the root logger level
        numeric_level = getattr(logging, args.log.upper())
        if not isinstance(numeric_level, int):
            raise ValueError("Invalid log level: %s" % args.log.upper())

        logging.basicConfig(level=numeric_level)

    logging.info("invoked with args:" + str(args))

    driver_class = find_driver(args.site)

    if driver_class is None:
        logging.error("error - %s is not yet supported" % args.site)
        return

    driver = driver_class()

    # specify options for this query
    options = drivers.base.QueryOptions(item_name=args.item)

    infinite = True if args.count == 0 else False

    while infinite or args.count > 0:
        args.count -= 1
        items = query.query_items(driver, options)
        if items is not None and len(items) > 0:
            logging.info("listing found items: \n")
            for item in items:
                logging.info(item.to_string())

            # TODO invoke post-processor
        else:
            logging.warning("no results found")

        logging.debug("sleeping %d seconds" % args.sleep)
        time.sleep(args.sleep)

    logging.info("program exiting")
예제 #4
0
        #Process data: _exists_:post_id AND unix_created_time:>=1525132800 AND unix_created_time:<1525219200
        #current_start_unix=1525132800
        if args.date_end:
            date_end_unix = utils.get_date_unix(utils.parse_time(
                args.date_end))
        else:
            date_end_unix = utils.get_current_posix_number()
        while current_start_unix <= date_end_unix:
            print(date_end_unix - current_start_unix)
            query_str = args.query + " AND unix_created_time:>=%s AND unix_created_time:<%s" % (
                current_start_unix, current_start_unix + 86400)
            if favorite:
                query_str = query_str + " AND categs.active:3"
            current_start_unix = current_start_unix + 86400
            print("Process data: " + query_str)
            total, posts = query.query_items(cf, query_str)
            if len(posts) > 0:
                aggreate_es_item_into_mysql(cf, nl, posts)
            ##try:
            ##    aggreate_es_item_into_mysql(cf,posts)
            ##except Exception as e:
            ##    print(e)
        store_report_table(cf, cf.mysql_table_name_count, [comments_count_ht])
    else:
        if favorite:
            query_str = args.query + " AND categs.active:3"
        print("Process data: " + query_str)
        total, posts = query.query_items(cf, query_str)
        print(len(posts))
        aggreate_es_item_into_mysql(cf, nl, posts)
예제 #5
0
def get_item_comments_ids(cf, item):
    if item['object_type'] == 'post':
        total, items = query.query_items(cf,
                                         "post_id:%s" % (item['object_id']))
        if total > 0:
            return [x["id"] for x in items]