Ejemplo n.º 1
0
class PropertyService(plugins.Plugin):
    directions_service = injection.Dependency()
    property_repository = injection.Dependency()

    def find(self, max_price=None, favorite=None, area=None, limit=None):
        try:
            props = list(
                self.property_repository.find(max_price=max_price,
                                              favorite=favorite,
                                              area=area,
                                              limit=limit))
        except exceptions.InvalidQuery as err:
            raise ValueError(str(err))

        return props

    async def to_work(self, prop_id: str, mode: str, refresh: bool = False):
        prop = self.property_repository.get(prop_id)

        if prop.toWork and not refresh:
            return prop.toWork

        origin = prop.location

        route = await self.directions_service.to_work(origin, mode)

        prop = prop.replace(toWork=route)
        self.property_repository.update(prop)

        return route

    def favorite(self, prop_id: str, val: bool):
        prop = self.property_repository.get(prop_id)
        if prop.favorite != val:
            prop = prop.replace(favorite=val)
            self.property_repository.update(prop)

    def ban(self, prop_id: str, val: bool):
        prop = self.property_repository.get(prop_id)
        if prop.banned != val:
            prop = prop.replace(banned=val)
            self.property_repository.update(prop)

    def clear_properties(self, banned=False, favorites=False):
        self.property_repository.clear(banned=banned, favorites=favorites)

    def save_search_area(self, name: str, geojson):
        self.property_repository.set_search_area(name, geojson)

    def get_search_areas(self):
        return self.property_repository.get_search_areas()
Ejemplo n.º 2
0
class AuthService(injection.Component):
    user_repository = injection.Dependency()

    def register(self, username, password):
        if not (isinstance(password, str) and len(password) < 4):
            raise ValueError("Password should be a string of 4+ characters.")

        password = generate_password_hash(password)
        user = User(username=username, password=password)
        self.user_repository.add_user(user)

    def get_tokens(self, username, password):
        if not self._are_credentials_valid(username, password):
            return None

        return {
            "access_token": create_access_token(identity=username),
            "refresh_token": create_refresh_token(identity=username),
            "username": username,
        }

    def _are_credentials_valid(self, username, password):
        try:
            user = self.user_repository.get_user(username)
        except exceptions.EntityNotFound:
            return False

        return user.is_password_valid(password)
Ejemplo n.º 3
0
class MemoryConfiguration(DefaultConfiguration):
    """Canned config which is merged with the default config."""

    config_overrides = injection.Dependency()

    def load_usercfg(self):
        return self.config_overrides
Ejemplo n.º 4
0
class CribPipeline(base.WithInjection):
    property_repository = injection.Dependency()

    def process_item(self, item, spider):
        if item["existing"]:
            self.property_repository.update(item["prop"])
        else:
            self.property_repository.insert(item["prop"])
Ejemplo n.º 5
0
class ScrapeService(plugins.Plugin):
    _scrape = injection.Dependency("scrape")

    def scrape(self, search):
        spider = "rightmove"
        settings = {
            "RIGHTMOVE_SEARCHES": [search],
        }
        self._scrape.crawl(spider, loglevel="INFO", settings_override=settings)
Ejemplo n.º 6
0
class LoadedConfiguration(DefaultConfiguration):
    """Configuration which is loaded through a config loader component."""

    config_file = injection.Dependency()

    def load_usercfg(self):
        if self.config_file:
            with open(self.config_file, "r") as fp:
                return _load(self.config_loaders, fp)
        else:
            return {}
Ejemplo n.º 7
0
class RightmoveSpider(base.WithInjection, scrapy.Spider):
    name: str = "rightmove"
    property_repository = injection.Dependency()

    def start_requests(self) -> Iterable[scrapy.Request]:
        urls = self.settings.getlist("RIGHTMOVE_SEARCHES") or []
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response: Response) -> PR:
        model = _load_model(response)
        for page in _get_pages(response, model):
            yield scrapy.Request(page, callback=self.parse_page)

        yield from self.parse_propertymodel(response, model)

    def parse_page(self, response: Response) -> PR:
        model = _load_model(response)
        yield from self.parse_propertymodel(response, model)

    def parse_propertymodel(self, response: Response, model: Dict) -> PR:
        properties = model["properties"]
        for data in properties:
            _make_id(data)
            existing = self.property_repository.get(data["id"])
            if existing and existing.banned:
                continue
            callback = functools.partial(self.parse_property, data, existing)
            yield response.follow(data["propertyUrl"], callback=callback)

    def parse_property(self, data, existing, response: Response) -> PR:
        model = _load_property_page_model(response)

        propertyData = model["propertyData"]
        data["bedrooms"] = propertyData["bedrooms"]
        data["displayAddress"] = propertyData["address"]["displayAddress"]
        data["propertyImages"] = [img["url"] for img in propertyData["images"]]
        data["floorplanImages"] = [img["url"] for img in propertyData["floorplans"]]
        data["keyFeatures"] = propertyData["keyFeatures"]
        data["lettingInformation"] = propertyData["lettings"]
        prop = to_prop(data, existing)
        yield PropertyItem({"prop": prop, "existing": existing})
Ejemplo n.º 8
0
class DefaultConfiguration(injection.Component):
    """Default configuration of crib."""

    config_loaders = injection.Dependency()

    def __init__(self, name, container):
        super().__init__(name, container)
        self._cfg = None

    def __getitem__(self, key):
        if self._cfg is None:
            self._cfg = self.load()
        return self._cfg.get(key, {})

    def load(self):
        default = _load_default(self.config_loaders)
        user_cfg = self.load_usercfg()
        cfg = _merge_config(default, user_cfg)
        return cfg

    def load_usercfg(self):
        return {}
Ejemplo n.º 9
0
class Flask(injection.Component, Quart):
    _crib_config = injection.Infrastructure("config")
    user_repository = injection.Dependency()
    property_repository = injection.Dependency()
    directions_service = injection.Dependency()
    directions_repository = injection.Dependency()
    property_service = injection.Dependency()
    auth_service = injection.Dependency()
    scrape_service = injection.Dependency()

    def __init__(self, *args, **kwargs):
        self._name = None
        super(Flask, self).__init__(*args, **kwargs)

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        self._name = value
Ejemplo n.º 10
0
class ZooplaSpider(base.WithInjection, scrapy.Spider):
    name: str = "zoopla"
    property_repository = injection.Dependency()

    def start_requests(self) -> Iterable[scrapy.Request]:
        urls = self.settings.getlist("ZOOPLA_SEARCHES") or []
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response: Response) -> PR:
        model = json.loads(response.body)
        for page in _get_pages(response, model):
            yield scrapy.Request(page, callback=self.parse_page)
        yield from self.parse_propertymodel(response, model)

    def parse_page(self, response: Response) -> PR:
        model = json.loads(response.body)
        yield from self.parse_propertymodel(response, model)

    def parse_propertymodel(self, response: Response, model: Dict) -> PR:
        properties = model["listing"]
        for data in properties:
            _make_id(data)
            existing = self.property_repository.get(data["id"])
            if existing and existing.banned:
                continue
            callback = functools.partial(self.parse_property, data, existing)
            yield response.follow(data["details_url"], callback=callback)

    def parse_property(self, data, existing, response: Response) -> PR:
        propd = {
            "bedrooms":
            int(data["num_bedrooms"]),
            "displayAddress":
            data["displayable_address"],
            "feesApply":
            False,
            "firstVisibleDate":
            _to_dt(data["first_published_date"]),
            "id":
            data["id"],
            "location": {
                "latitude": data["latitude"],
                "longitude": data["longitude"]
            },
            "price": {
                "amount": data["rental_prices"]["per_month"],
                "currencyCode": "GPB",
                "frequency": "monthly",
            },
            "propertyImages":
            self._get_property_images(response),
            "floorplanImages":
            self._get_floorplan_images(response),
            "propertySubType":
            data["property_type"],
            "propertyTypeFullDescription":
            "{} bedroom {}".format(data["num_bedrooms"],
                                   data["property_type"]),
            "propertyUrl":
            data["details_url"],
            "students":
            False,
            "summary":
            data["description"],
            "transactionType":
            "rent",
            "keyFeatures":
            self._get_key_features(response),
            "lettingInformation": {
                "Updated": _to_dt(data["last_published_date"]),
                "Furnishing": data["furnished_state"],
            },
            "feesApplyText":
            data.get("letting_fees", ""),
            "favorite":
            existing.favorite if existing else False,
            "toWork":
            existing.toWork.asdict() if existing and existing.toWork else None,
        }
        prop = Property.fromdict(propd)
        yield PropertyItem({"prop": prop, "existing": existing})

    def _get_key_features(self, response: Response) -> List[str]:
        xpath = "//ul[contains(@class, 'dp-features-list--bullets')]/li/text()"
        return [s.strip() for s in response.xpath(xpath).extract()]

    def _get_property_images(self, response: Response) -> List[str]:
        data = json.loads(
            response.xpath("/html/body/script[4]/text()").extract_first())
        photos = data["@graph"][3]["photo"]
        return [p["contentUrl"] for p in photos]

    def _get_floorplan_images(self, response: Response) -> List[str]:
        xpath = "//div[@id = 'floorplan-1']//div[contains(@class, 'ui-modal-gallery__asset')]/@style"
        style = response.xpath(xpath).extract_first()
        if style:
            match = re.match(r".*url\('(.*)'\).*", style)
            if match:
                return [(match.group(1))]
        return []
Ejemplo n.º 11
0
class DirectionsService(plugins.Plugin):
    directions_repository = injection.Dependency()

    @classmethod
    def config_schema(cls) -> Dict[str, Any]:
        return {
            "work-location": {
                "type": "dict",
                "required": True,
                "schema": {
                    "latitude": {
                        "type": "float",
                        "required": True
                    },
                    "longitude": {
                        "type": "float",
                        "required": True
                    },
                },
            },
            "search-area": {
                "type": "dict",
                "required": True,
                "schema": {
                    "northEast": {
                        "type": "dict",
                        "required": True,
                        "schema": {
                            "lat": {
                                "type": "float",
                                "required": True
                            },
                            "lng": {
                                "type": "float",
                                "required": True
                            },
                        },
                    },
                    "southWest": {
                        "type": "dict",
                        "required": True,
                        "schema": {
                            "lat": {
                                "type": "float",
                                "required": True
                            },
                            "lng": {
                                "type": "float",
                                "required": True
                            },
                        },
                    },
                    "latsamples": {
                        "type": "float",
                        "required": True
                    },
                    "lngsamples": {
                        "type": "float",
                        "required": True
                    },
                },
            },
        }

    @abc.abstractmethod
    async def to_work(self, origin: Location, mode: str) -> Dict:
        return {}

    async def fetch_map_to_work(self, mode: str) -> None:
        for i, ll in list(enumerate(self.raster_map())):
            log.info("Fetching #%s", i)
            route = await self.to_work(Location(**ll), mode)
            try:
                d = Direction.fromdict(route)
            except Exception as err:
                log.info("%s", err)
            else:
                self.directions_repository.insert(d)

    def raster_map(self) -> Iterable[Dict]:
        ne = self.config["search-area"]["northEast"]
        sw = self.config["search-area"]["southWest"]
        latsamples = self.config["search-area"]["latsamples"]
        lngsamples = self.config["search-area"]["lngsamples"]
        latdelta = ne["lat"] - sw["lat"]
        lngdelta = ne["lng"] - sw["lng"]
        for lat in frange(sw["lat"], ne["lat"], latdelta / latsamples):
            for lng in frange(sw["lng"], ne["lng"], lngdelta / lngsamples):
                yield {"latitude": lat, "longitude": lng}

    def to_work_durations(self, colormap: str,
                          maxDuration: int) -> Iterable[Dict[str, Any]]:
        try:
            cmap = cmocean.cm.cmap_d[colormap]
        except KeyError:
            raise ValueError(f"Invalid color map {colormap}")

        getDuration = operator.itemgetter("durationValue")
        durations = [
            dur for dur in self.directions_repository.get_to_work_durations()
            if getDuration(dur) < maxDuration
        ]
        if not durations:
            return durations

        maxD = getDuration(max(durations, key=getDuration))
        minD = getDuration(min(durations, key=getDuration))

        colors = self._color_values(minD, maxD, cmap)

        offset = minD + 1
        for d in durations:
            v = d["durationValue"]
            d["color"] = colors[v - offset]
        log.debug("Fetched %s durations", len(durations))

        return durations

    def colormaps(self) -> Iterable[str]:
        return list(cmocean.cm.cmap_d.keys())

    @staticmethod
    def _color_values(minV, maxV, colormap):
        delta = maxV - minV
        colormap = colormap._resample(delta)
        rgb_values = colormap(numpy.arange(delta))[:, :-1]
        hex_values = [rgb2hex(rgb) for rgb in rgb_values]
        return hex_values

    def get_area(self, max_duration=43 * 60, alpha=None, hullbuffer=None):
        area = self.directions_repository.get_to_work_area(max_duration)
        if area:
            return area

        directions = [[
            d["location"][1], d["location"][0]
        ] for d in self.directions_repository.get_to_work_durations()
                      if d["durationValue"] <= max_duration]
        area = map_analysis.get_area(directions, alpha, hullbuffer)
        self.directions_repository.insert_to_work_area(
            max_duration=max_duration, area=area)
        return area
Ejemplo n.º 12
0
class RightmoveSpider(base.WithInjection, scrapy.Spider):
    name: str = "rightmove"
    property_repository = injection.Dependency()

    def start_requests(self) -> Iterable[scrapy.Request]:
        urls = self.settings.getlist("RIGHTMOVE_SEARCHES") or []
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response: Response) -> PR:
        model = _load_model(response)
        for page in _get_pages(response, model):
            yield scrapy.Request(page, callback=self.parse_page)

        yield from self.parse_propertymodel(response, model)

    def parse_page(self, response: Response) -> PR:
        model = _load_model(response)
        yield from self.parse_propertymodel(response, model)

    def parse_propertymodel(self, response: Response, model: Dict) -> PR:
        properties = model["properties"]
        for data in properties:
            _make_id(data)
            existing = self.property_repository.get(data["id"])
            if existing and existing.banned:
                continue
            callback = functools.partial(self.parse_property, data, existing)
            yield response.follow(data["propertyUrl"], callback=callback)

    def parse_property(self, data, existing, response: Response) -> PR:
        data["propertyImages"] = self._get_property_images(response)
        data["floorplanImages"] = self._get_floorplan_images(response)
        data["lettingInformation"] = self._get_letting_information(response)
        data["keyFeatures"] = self._get_key_features(response)
        data["summary"] = self._get_summary(response)
        prop = to_prop(data, existing)
        yield PropertyItem({"prop": prop, "existing": existing})

    def _get_key_features(self, response: Response) -> List[str]:
        xpath = "//div[contains(@class,'key-features')]/ul/li/text()"
        return response.xpath(xpath).extract()

    def _get_letting_information(self, response: Response) -> Dict[str, str]:
        xpath = "//div[@id='lettingInformation']//td/text()"
        flat_info = response.xpath(xpath).extract()
        tuples = zip(flat_info[::2], flat_info[1::2])
        table_info = dict((k.rstrip(": "), v) for k, v in tuples)
        return table_info

    def _get_summary(self, response: Response) -> str:
        xpath = "//div[@id='description']//div[@class='sect ']/node()"
        return "\n".join(response.xpath(xpath).extract()).strip()

    def _get_property_images(self, response: Response) -> List[str]:
        xpath = "//div[@class='gallery gallery-grid']/ul/*/a/img/@src"
        return response.xpath(xpath).extract()

    def _get_floorplan_images(self, response: Response) -> List[str]:
        xpath = "//div[contains(@class,'floorplancontent')]//img/@src"
        return list(set(response.xpath(xpath).extract()))