def save_item_sql(item): # 是否开启去重 is_dedup = get_is_dedup() # 赋值task_id item['task_id'] = get_task_id() if is_dedup == '1': # 去重 dedup_field = get_dedup_field() dedup_method = get_dedup_method() if dedup_method == DedupMethod.OVERWRITE: # 覆盖 if get_item(item, dedup_field): update_item(item, dedup_field) else: insert_item(item) elif dedup_method == DedupMethod.IGNORE: # 忽略 insert_item(item) else: # 其他 insert_item(item) else: # 不去重 insert_item(item)
def save_item_mongo(item): col = get_col() # 赋值task_id item['task_id'] = get_task_id() # 是否开启去重 is_dedup = get_is_dedup() if is_dedup == '1': # 去重 dedup_field = get_dedup_field() dedup_method = get_dedup_method() if dedup_method == DedupMethod.OVERWRITE: # 覆盖 if col.find_one({dedup_field: item[dedup_field]}): col.replace_one({dedup_field: item[dedup_field]}, item) else: col.save(item) elif dedup_method == DedupMethod.IGNORE: # 忽略 col.save(item) else: # 其他 col.save(item) else: # 不去重 col.save(item)
def test_save_item(self): for i in range(10): save_item({'url': url, 'title': str(i)}) dedup_field = get_dedup_field() table_name = get_collection() conn = get_conn() cursor = conn.cursor() cursor.execute( f'SELECT count(*) FROM {table_name} WHERE {dedup_field} = \'{url}\'' ) conn.commit() res = cursor.fetchone() assert res[0] == 1 cursor.execute( f'SELECT url,title FROM {table_name} WHERE {dedup_field} = \'{url}\'' ) conn.commit() res = cursor.fetchone() assert res[1] == '9' cursor.execute( f'DELETE FROM {table_name} WHERE {dedup_field} = \'{url}\'') conn.commit() cursor.close()