Beispiel #1
0
    def save_item_correlation(self, subtype, obj_id, item_id, item_date):
        self.update_correlation_daterange(subtype, obj_id, item_date)

        # global set
        r_serv_metadata.sadd(
            'set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id),
            item_id)

        # daily
        r_serv_metadata.hincrby(
            '{}:{}:{}'.format(self.correlation_name, subtype, item_date),
            obj_id, 1)

        # all type
        r_serv_metadata.zincrby(
            '{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1)

        ## object_metadata
        # item
        r_serv_metadata.sadd(
            'item_{}_{}:{}'.format(self.correlation_name, subtype, item_id),
            obj_id)

        # domain
        if item_basic.is_crawled(item_id):
            domain = item_basic.get_item_domain(item_id)
            self.save_domain_correlation(domain, subtype, obj_id)
Beispiel #2
0
def add_obj_tag(object_type, object_id, tag, obj_date=None):
    if object_type == "item":  # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        if obj_date is None:
            raise ValueError("obj_date is None")

        # add tag
        r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
        r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id)

        # add domain tag
        if item_basic.is_crawled(
                object_id
        ) and tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
            domain = item_basic.get_item_domain(object_id)
            add_tag("domain", tag, domain)
    else:
        r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
        r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id)
Beispiel #3
0
def is_crawled(item_id):
    return item_basic.is_crawled(item_id)