def test_if_data_bag_with_source(): validate.if_data_bag_with_source(DataBag(), "main") with pytest.raises(AttributeError) as excinfo: validate.if_data_bag_with_source(DataBag(), None) assert "data of type databag needs" in str(excinfo.value).lower()
def parse_data_to_items( self, data=None, **kwargs, ) -> Iterator[dict]: if not isinstance(data, DataBag): if data: kwargs["main"] = data data = DataBag(**kwargs) if not data.has_model_manger_instance(): data.init_model_manager(self) drop_item_exceptions = [] for iter_data in self._apply_data_processors(data): try: yield self._data_to_item(iter_data) except self._drop_item_exception as drop_item_exception: # we store drop item exceptions so that other variations could # get processed and we throw stored exceptions after iteration # has ended drop_item_exceptions.append(drop_item_exception) if drop_item_exceptions: if len(drop_item_exceptions) > 1: raise self._drop_item_exception(drop_item_exceptions) raise drop_item_exceptions[0]
def test_item_parser(): data_bag = DataBag(main="groove") assert data_bag["main"] == "groove" data_bag.add("main_new", "peach") assert data_bag["main_new"] == "peach"
def parse_data(self, data=None, **kwargs): if data: kwargs["main"] = data data = DataBag(**kwargs) return self.parse(data)
def test_key_query_get_data_bag_bad_source(query, test_data): db = DataBag(main=test_data) with pytest.raises(ValueError) as excinfo: key(query).get(db) assert "provided data from source" in str(excinfo.value).lower()
def test_choice_lookup_queries_choice_bool_parser_source( pq_query, bool_query, result, ): def generate_choice_parser(**kwargs): return parsers.Choice(choices=[ ( "phone", parsers.Bool( query=jp(bool_query), ccontains=["phone", "CELL"], source="json_data", ), ), ("accessory", ["phone"]), ], **kwargs) data_bag = DataBag(main=data_html.categories, json_data=data_dict.name) # Test lookup queries choice_parser = generate_choice_parser(lookups=[pq(pq_query)]) assert choice_parser.parse(data_bag) == result # Test lookup parsers choice_parser = generate_choice_parser( lookups=[parsers.Text(pq(pq_query))]) assert choice_parser.parse(data_bag) == result
def _data_to_item(self, data: DataBag): item = data.get_all() item = self._apply_item_processors(item) if not item: return None return self._remove_protected_item_keys(item)
def process_item_parser(self, item_key: str, data: DataBag): item_parser = self._item_parsers[item_key] if item_parser is None: return None if isinstance(item_parser, (str, bool, float, int, list, dict)): return item_parser elif isinstance(item_parser, models.ItemModel): data = data.copy(item_parser.model_manager) return item_parser.parse_item(data) elif isinstance(item_parser, BaseParser): return item_parser.parse(data) return item_parser(data)
def variants_data(data: DataBag, source: str): original_variants_data: dict = data[source] total_variants = len(original_variants_data) for variant_key, variant_multi_data in original_variants_data.items(): data_copy = data.copy() variant_data = variant_multi_data[0] data_copy[source] = variant_data data_copy["{}_variants".format(source)] = variant_multi_data data_copy["{}_variants_len".format(source)] = total_variants data_copy["{}_key".format(source)] = variant_key yield data_copy
def parse(self, data: DataBag) -> Iterator[DataBag]: source_data = data[self._source] if self._debug_raw: print(source_data) if self._process_source_data: source_data = self._process_source_data(source_data) transformed_data = self._process_data(source_data) if self._debug: print(transformed_data) if self._multi: for iter_transformed_data in transformed_data: data_copy = data.copy() yield self._transformed_data_to_data(iter_transformed_data, data_copy) else: yield self._transformed_data_to_data(transformed_data, data)
def test_key_query_get_data_bag_source(): db = DataBag(json_data=test_data_dict) assert key("product_type").get(db, source="json_data") == "smartphone" assert key("product_type").get(db, "json_data") == "smartphone"
def test_key_query_get_data_bag(query, test_data, result): db = DataBag(main=test_data) assert key(query).get(db) == result
import pytest from easydata.data import DataBag from easydata.models import ItemModel from easydata.parsers.data import Data from easydata.parsers.text import Text from easydata.queries import jp from tests.factory import data_dict db = DataBag(main=data_dict.item_with_options, additional_data=data_dict.stock) def process_raw_value(value, data): return "{} {}".format(value, str(data["additional_data"]["stock"])) def test_base_data_query(): item_data = Data(query=jp("info.name")) assert item_data.parse(db) == "EasyBook pro 15" def test_base_data_from_item(): item_model = ItemModel() item_model.item_name = Data(query=jp("title")) item_model.item_brand = Data(from_item="name") result = item_model.parse_item(data_dict.title) assert result == {"brand": "Easybook Pro 13", "name": "Easybook Pro 13"} def test_base_data_field_query_as_first_parameter():
from easydata.data import DataBag from easydata.queries import jp from easydata.utils import parse from tests.factory import data_dict dict_db = DataBag(main=data_dict.variants_data_multi) def test_query_search(): test_data = {"brand": "Groove"} assert parse.query_search(query=jp("brand"), data=test_data) == "Groove" def test_query_search_data_bag(): assert (parse.query_search(query=jp("data.title"), data=dict_db, source="main") == "EasyData Pro")
} }; """ html_text = "<div><p>EasyData</p></div>" @pytest.mark.parametrize( "query, test_data, source, result", [ ('basePrice": "(.*?)"', json_text, "main", "149.95"), ('basePrice": "(.*?)"', json_text, None, "149.95"), ('wrongSearch": "(.*?)"', json_text, None, None), ('basePrice": "(.*?)"', None, None, None), ('basePrice": "(.*?)"', "", None, None), ('basePrice": "(.*?)"', DataBag(main=json_text), "main", "149.95"), ('brand": "(.*?)"', { "brand": "EasyData" }, None, "EasyData"), ("<p>(.*?)</p>", PyQuery("<div><p>EasyData</p></div>"), None, "EasyData"), # Test that outer html tags are also shown when PyQuery converts back to text ("<p>(.*?)</p>", PyQuery("<p>EasyData</p>"), None, "EasyData"), ], ) def test_re_query(query, test_data, source, result): assert re(query).get(test_data, source) == result def test_re_query_wrong_type_exception(): with pytest.raises(TypeError) as excinfo:
def load_data_bag_with_model(): model_manager = ModelManager(ProductJsonModel()) data_bag = DataBag(main=json.dumps(data_dict.item_with_options)) data_bag.init_model_manager(model_manager) return data_bag