def _make_update(update_map, inc_map = None): now = datetime2timestamp(datetime.datetime.utcnow()) #add status_last_modified field if update_map.has_key("crawl_status"): update_map["status_last_modified"] = now #separate url_info fields from meta_url_info fields first_update_map, second_update_map = misc.separate_dict(update_map, common_settings.database_table_fields["urlRepositoryMeta"]) first_inc_map, second_inc_map = misc.separate_dict(inc_map if inc_map is not None else {}, common_settings.database_table_fields["urlRepositoryMeta"]) misc.copy_dict(first_update_map, second_update_map, common_settings.common_url_info_fields, soft = True) misc.copy_dict(first_inc_map, second_inc_map, common_settings.common_url_info_fields, soft = True) first_update = _create_update(first_update_map, first_inc_map) second_update = _create_update(second_update_map, second_inc_map) return first_update, second_update
def _insert_url_info(url, url_info): UrlCacheClient.update_url_info(url, url_info) first_update_map, second_update_map = misc.separate_dict(url_info, common_settings.database_table_fields["urlRepositoryMeta"]) misc.copy_dict(first_update_map, second_update_map, common_settings.common_url_info_fields + ["url", "_id"]) db.urlRepository.insert(first_update_map) crawlerMetadb.insert_url_info_meta(second_update_map)