Example #1
0
def _make_update(update_map, inc_map = None):
    now = datetime2timestamp(datetime.datetime.utcnow())

    #add status_last_modified field
    if update_map.has_key("crawl_status"):
        update_map["status_last_modified"] = now

    #separate url_info fields from meta_url_info fields
    first_update_map, second_update_map = misc.separate_dict(update_map, common_settings.database_table_fields["urlRepositoryMeta"])
    first_inc_map, second_inc_map = misc.separate_dict(inc_map if inc_map is not None else {}, common_settings.database_table_fields["urlRepositoryMeta"])
    misc.copy_dict(first_update_map, second_update_map, common_settings.common_url_info_fields, soft = True)
    misc.copy_dict(first_inc_map, second_inc_map, common_settings.common_url_info_fields, soft = True)

    first_update = _create_update(first_update_map, first_inc_map)
    second_update = _create_update(second_update_map, second_inc_map)
    return first_update, second_update
Example #2
0
def _insert_url_info(url, url_info):
    UrlCacheClient.update_url_info(url, url_info)

    first_update_map, second_update_map = misc.separate_dict(url_info, common_settings.database_table_fields["urlRepositoryMeta"])
    misc.copy_dict(first_update_map, second_update_map, common_settings.common_url_info_fields + ["url", "_id"])

    db.urlRepository.insert(first_update_map)
    crawlerMetadb.insert_url_info_meta(second_update_map)