Exemple #1
0
def test_if_data_bag_with_source():
    validate.if_data_bag_with_source(DataBag(), "main")

    with pytest.raises(AttributeError) as excinfo:
        validate.if_data_bag_with_source(DataBag(), None)

    assert "data of type databag needs" in str(excinfo.value).lower()
Exemple #2
0
    def parse_data_to_items(
        self,
        data=None,
        **kwargs,
    ) -> Iterator[dict]:

        if not isinstance(data, DataBag):
            if data:
                kwargs["main"] = data

            data = DataBag(**kwargs)

        if not data.has_model_manger_instance():
            data.init_model_manager(self)

        drop_item_exceptions = []

        for iter_data in self._apply_data_processors(data):
            try:
                yield self._data_to_item(iter_data)
            except self._drop_item_exception as drop_item_exception:
                # we store drop item exceptions so that other variations could
                # get processed and we throw stored exceptions after iteration
                # has ended
                drop_item_exceptions.append(drop_item_exception)

        if drop_item_exceptions:
            if len(drop_item_exceptions) > 1:
                raise self._drop_item_exception(drop_item_exceptions)

            raise drop_item_exceptions[0]
Exemple #3
0
def test_item_parser():
    data_bag = DataBag(main="groove")

    assert data_bag["main"] == "groove"

    data_bag.add("main_new", "peach")

    assert data_bag["main_new"] == "peach"
Exemple #4
0
    def parse_data(self, data=None, **kwargs):
        if data:
            kwargs["main"] = data

        data = DataBag(**kwargs)

        return self.parse(data)
Exemple #5
0
def test_key_query_get_data_bag_bad_source(query, test_data):
    db = DataBag(main=test_data)

    with pytest.raises(ValueError) as excinfo:
        key(query).get(db)

    assert "provided data from source" in str(excinfo.value).lower()
Exemple #6
0
def test_choice_lookup_queries_choice_bool_parser_source(
    pq_query,
    bool_query,
    result,
):
    def generate_choice_parser(**kwargs):
        return parsers.Choice(choices=[
            (
                "phone",
                parsers.Bool(
                    query=jp(bool_query),
                    ccontains=["phone", "CELL"],
                    source="json_data",
                ),
            ),
            ("accessory", ["phone"]),
        ],
                              **kwargs)

    data_bag = DataBag(main=data_html.categories, json_data=data_dict.name)

    # Test lookup queries
    choice_parser = generate_choice_parser(lookups=[pq(pq_query)])

    assert choice_parser.parse(data_bag) == result

    # Test lookup parsers
    choice_parser = generate_choice_parser(
        lookups=[parsers.Text(pq(pq_query))])

    assert choice_parser.parse(data_bag) == result
Exemple #7
0
    def _data_to_item(self, data: DataBag):
        item = data.get_all()

        item = self._apply_item_processors(item)

        if not item:
            return None

        return self._remove_protected_item_keys(item)
Exemple #8
0
    def process_item_parser(self, item_key: str, data: DataBag):
        item_parser = self._item_parsers[item_key]

        if item_parser is None:
            return None

        if isinstance(item_parser, (str, bool, float, int, list, dict)):
            return item_parser
        elif isinstance(item_parser, models.ItemModel):
            data = data.copy(item_parser.model_manager)

            return item_parser.parse_item(data)
        elif isinstance(item_parser, BaseParser):
            return item_parser.parse(data)

        return item_parser(data)
Exemple #9
0
def variants_data(data: DataBag, source: str):
    original_variants_data: dict = data[source]

    total_variants = len(original_variants_data)

    for variant_key, variant_multi_data in original_variants_data.items():
        data_copy = data.copy()

        variant_data = variant_multi_data[0]

        data_copy[source] = variant_data
        data_copy["{}_variants".format(source)] = variant_multi_data
        data_copy["{}_variants_len".format(source)] = total_variants
        data_copy["{}_key".format(source)] = variant_key

        yield data_copy
Exemple #10
0
    def parse(self, data: DataBag) -> Iterator[DataBag]:
        source_data = data[self._source]

        if self._debug_raw:
            print(source_data)

        if self._process_source_data:
            source_data = self._process_source_data(source_data)

        transformed_data = self._process_data(source_data)

        if self._debug:
            print(transformed_data)

        if self._multi:
            for iter_transformed_data in transformed_data:
                data_copy = data.copy()

                yield self._transformed_data_to_data(iter_transformed_data,
                                                     data_copy)
        else:
            yield self._transformed_data_to_data(transformed_data, data)
Exemple #11
0
def test_key_query_get_data_bag_source():
    db = DataBag(json_data=test_data_dict)

    assert key("product_type").get(db, source="json_data") == "smartphone"
    assert key("product_type").get(db, "json_data") == "smartphone"
Exemple #12
0
def test_key_query_get_data_bag(query, test_data, result):
    db = DataBag(main=test_data)
    assert key(query).get(db) == result
Exemple #13
0
import pytest

from easydata.data import DataBag
from easydata.models import ItemModel
from easydata.parsers.data import Data
from easydata.parsers.text import Text
from easydata.queries import jp
from tests.factory import data_dict

db = DataBag(main=data_dict.item_with_options, additional_data=data_dict.stock)


def process_raw_value(value, data):
    return "{} {}".format(value, str(data["additional_data"]["stock"]))


def test_base_data_query():
    item_data = Data(query=jp("info.name"))
    assert item_data.parse(db) == "EasyBook pro 15"


def test_base_data_from_item():
    item_model = ItemModel()
    item_model.item_name = Data(query=jp("title"))
    item_model.item_brand = Data(from_item="name")

    result = item_model.parse_item(data_dict.title)
    assert result == {"brand": "Easybook Pro 13", "name": "Easybook Pro 13"}


def test_base_data_field_query_as_first_parameter():
Exemple #14
0
from easydata.data import DataBag
from easydata.queries import jp
from easydata.utils import parse
from tests.factory import data_dict

dict_db = DataBag(main=data_dict.variants_data_multi)


def test_query_search():
    test_data = {"brand": "Groove"}

    assert parse.query_search(query=jp("brand"), data=test_data) == "Groove"


def test_query_search_data_bag():
    assert (parse.query_search(query=jp("data.title"),
                               data=dict_db,
                               source="main") == "EasyData Pro")
Exemple #15
0
    }
};
"""

html_text = "<div><p>EasyData</p></div>"


@pytest.mark.parametrize(
    "query, test_data, source, result",
    [
        ('basePrice": "(.*?)"', json_text, "main", "149.95"),
        ('basePrice": "(.*?)"', json_text, None, "149.95"),
        ('wrongSearch": "(.*?)"', json_text, None, None),
        ('basePrice": "(.*?)"', None, None, None),
        ('basePrice": "(.*?)"', "", None, None),
        ('basePrice": "(.*?)"', DataBag(main=json_text), "main", "149.95"),
        ('brand": "(.*?)"', {
            "brand": "EasyData"
        }, None, "EasyData"),
        ("<p>(.*?)</p>", PyQuery("<div><p>EasyData</p></div>"), None,
         "EasyData"),
        # Test that outer html tags are also shown when PyQuery converts back to text
        ("<p>(.*?)</p>", PyQuery("<p>EasyData</p>"), None, "EasyData"),
    ],
)
def test_re_query(query, test_data, source, result):
    assert re(query).get(test_data, source) == result


def test_re_query_wrong_type_exception():
    with pytest.raises(TypeError) as excinfo:
Exemple #16
0
def load_data_bag_with_model():
    model_manager = ModelManager(ProductJsonModel())

    data_bag = DataBag(main=json.dumps(data_dict.item_with_options))
    data_bag.init_model_manager(model_manager)
    return data_bag