Exemple #1
0
def parse_reviews():
    date_latest = utils.parse_latest_date(REVIEWS_DIR)
    reviews_latest = f"{REVIEWS_DIR}/{date_latest}"

    categories = os.listdir(reviews_latest)
    categories = [c for c in categories if not c.startswith(".")]

    reviews = []
    for category in categories:
        products = os.listdir(f"{reviews_latest}/{category}")
        products = [p for p in products if not p.startswith(".")]

        for product in products:
            product_id = product[: product.index(".json")]
            product_reviews = load_json(f"{reviews_latest}/{category}/{product}")

            for review in product_reviews["data"]:
                review_dict = {
                    "product_id": product_id,
                    "source_id": review["id"],
                    "date": datetime.strptime(review["date"], "%d.%m.%Y"),
                    "rating": review["rating"],
                    "comment_plus": review["comment"]["plus"],
                    "comment_minus": review["comment"]["minus"],
                    "comment_text": review["comment"]["text"],
                }

                review_rating = review["feedback"]["reviewsRating"]
                approved, rated = _parse_approved_rated(review_rating)
                review_dict.update({"review_approved": approved, "review_rated": rated})
                reviews.append(review_dict)

    with open(f"{DB_DUMPS_DIR}/reviews.json", "wb") as f:
        f.write(orjson.dumps(reviews))
Exemple #2
0
def parse_products():
    date_latest = utils.parse_latest_date(PRODUCTS_DIR)
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]
    categories_with_specs = (
        "desktops",
        "notebooks",
    )

    products = []
    used_products = set()

    for category in categories_with_specs:
        category_products = load_json(
            f"{SPECS_DIR}/{date_latest}/{category}-list.json")
        used_products.update({p["source_id"] for p in category_products})
        products.extend(category_products)

    for category in categories:
        category_products = load_json(f"{products_latest}/{category}")
        category_products = [p for p in category_products]
        products.extend(category_products)

    with open(f"{DB_DUMPS_DIR}/products.json", "w") as f:
        f.write(json.dumps(products, ensure_ascii=False))
    def __init__(self, category):
        super().__init__()

        parse_date = parse_latest_date(PRODUCTS_DIR)
        self.parse_list = f"{PRODUCTS_DIR}/{parse_date}/{category}-list.json"

        self.output_dir = f"{REVIEWS_DIR}/{date.today()}/{category}"
        os.makedirs(self.output_dir, exist_ok=True)

        self.log(f"Parser for {category} reviews has been started.")
Exemple #4
0
def parse_products():
    date_latest = utils.parse_latest_date(PRODUCTS_DIR)
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]

    products = []
    for category in categories:
        category_products = load_json(f"{products_latest}/{category}")
        products.extend(category_products)

    with open(f"{DB_DUMPS_DIR}/products.json", "w") as f:
        f.write(json.dumps(products, ensure_ascii=False))
Exemple #5
0
def parse_specs():
    date_latest = utils.parse_latest_date(SPECS_DIR)
    categories_with_specs = (
        "desktops",
        "notebooks",
    )

    specs = []
    for category in categories_with_specs:
        products = load_json(f"{SPECS_DIR}/{date_latest}/{category}-specs.json")
        for product in products:
            specs.append(processed_specs(product))

    with open(f"{DB_DUMPS_DIR}/specs.json", "w") as f:
        json.dump(specs, f, ensure_ascii=False)
Exemple #6
0
def parse_categories():
    date_latest = utils.parse_latest_date(f"{PRODUCTS_DIR}")
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]

    categories_set = set()
    for category in categories:
        products = load_json(f"{products_latest}/{category}")
        for product in products:
            name = product["category_name"].replace("%20", " ")
            categories_set.add((name, product["category_id"]))

    categories_sorted = sorted(categories_set, key=lambda x: (x[0], x[1]))
    categories_dict = [{"name": c[0], "source_id": c[1]} for c in categories_sorted]

    with open(f"{DB_DUMPS_DIR}/categories.json", "w") as f:
        json.dump(categories_dict, f)
Exemple #7
0
 def __init__(self, category):
     super().__init__()
     parse_date = parse_latest_date(SPECS_DIR)
     self.products_json = f"{SPECS_DIR}/{parse_date}/{category}-list.json"
Exemple #8
0
def _insert_from_source(source_file, query):
    db = LocalSession()
    date = utils.parse_latest_date(DB_DUMPS_DIR)

    data: List[dict] = load_json(f"{DB_DUMPS_DIR}/{date}/{source_file}")
    db.bulk_insert_dicts(query, data)