class WgGesuchtCrawlerTest(unittest.TestCase):

    TEST_URL = 'https://www.wg-gesucht.de/wohnungen-in-Berlin.8.2.1.0.html?offer_filter=1&city_id=8&noDeact=1&categories%5B%5D=2&rent_types%5B%5D=0&sMin=70&rMax=1500&rmMin=2&fur=2&sin=2&exc=2&img_only=1'

    def setUp(self):
        self.crawler = CrawlWgGesucht()

    def test(self):
        soup = self.crawler.get_page(self.TEST_URL)
        self.assertIsNotNone(soup, "Should get a soup from the URL")
        entries = self.crawler.extract_data(soup)
        self.assertIsNotNone(entries, "Should parse entries from search URL")
        self.assertTrue(len(entries) > 0, "Should have at least one entry")
        self.assertTrue(entries[0]['id'] > 0, "Id should be parsed")
        self.assertTrue(
            entries[0]['url'].startswith(
                "https://www.wg-gesucht.de/wohnungen"),
            u"URL should be an apartment link")
        for attr in [
                'title', 'price', 'size', 'rooms', 'address', 'image', 'from'
        ]:
            self.assertIsNotNone(entries[0][attr], attr + " should be set")
        for attr in ['to']:
            found = reduce(lambda i, e: attr in e or i, entries, False)
            self.assertTrue(found, "Expected " + attr + " to sometimes be set")
Exemple #2
0
class Config:
    """Class to represent flathunter configuration"""

    __log__ = logging.getLogger('flathunt')
    __searchers__ = [
        CrawlImmobilienscout(),
        CrawlWgGesucht(),
        CrawlEbayKleinanzeigen(),
        CrawlImmowelt()
    ]

    def __init__(self, filename=None, string=None):
        if string is not None:
            self.config = yaml.safe_load(string)
            return
        if filename is None:
            filename = os.path.dirname(
                os.path.abspath(__file__)) + "/../config.yaml"
        self.__log__.info("Using config %s", filename)
        with open(filename) as file:
            self.config = yaml.safe_load(file)

    def __iter__(self):
        """Emulate dictionary"""
        return self.config.__iter__()

    def __getitem__(self, value):
        """Emulate dictionary"""
        return self.config[value]

    def get(self, key, value=None):
        """Emulate dictionary"""
        return self.config.get(key, value)

    def database_location(self):
        """Return the location of the database folder"""
        if "database_location" in self.config:
            return self.config["database_location"]
        return os.path.abspath(
            os.path.dirname(os.path.abspath(__file__)) + "/..")

    @staticmethod
    def set_searchers(searchers):
        """Update the active search plugins"""
        Config.__searchers__ = searchers

    @staticmethod
    def searchers():
        """Get the list of search plugins"""
        return Config.__searchers__

    def get_filter(self):
        """Read the configured filter"""
        builder = Filter.builder()
        builder.read_config(self.config)
        return builder.build()
Exemple #3
0
def launch_flat_hunt(config):
    searchers = [CrawlImmobilienscout(), CrawlWgGesucht(),CrawlEbayKleinanzeigen()]
    id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__)))

    hunter = Hunter()
    hunter.hunt_flats(config, searchers, id_watch)

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time',60*10))
        hunter.hunt_flats(config, searchers, id_watch)
Exemple #4
0
class Config:

    __log__ = logging.getLogger(__name__)
    __searchers__ = [
        CrawlImmobilienscout(),
        CrawlWgGesucht(),
        CrawlEbayKleinanzeigen(),
        CrawlImmowelt()
    ]

    def __init__(self, filename=None, string=None):
        if string is not None:
            self.config = yaml.safe_load(string)
            return
        if filename is None:
            filename = os.path.dirname(
                os.path.abspath(__file__)) + "/../config.yaml"
        self.__log__.info("Using config %s" % filename)
        with open(filename) as file:
            self.config = yaml.safe_load(file)

    def __iter__(self):
        return self.config.__iter__()

    def __getitem__(self, value):
        return self.config[value]

    def get(self, key, value=None):
        return self.config.get(key, value)

    def database_location(self):
        if "database_location" in self.config:
            return self.config["database_location"]
        return os.path.abspath(
            os.path.dirname(os.path.abspath(__file__)) + "/..")

    @staticmethod
    def set_searchers(searchers):
        Config.__searchers__ = searchers

    @staticmethod
    def searchers():
        return Config.__searchers__

    def get_filter(self):
        builder = Filter.builder()
        builder.read_config(self.config)
        return builder.build()
Exemple #5
0
 def __init__(self, filename=None, string=None):
     if string is not None:
         self.config = yaml.safe_load(string)
     else:
         if filename is None:
             filename = os.path.dirname(os.path.abspath(__file__)) + "/../config.yaml"
         self.__log__.info("Using config %s", filename)
         with open(filename) as file:
             self.config = yaml.safe_load(file)
     self.__searchers__ = [CrawlImmobilienscout(self),
                           CrawlWgGesucht(self),
                           CrawlEbayKleinanzeigen(self),
                           CrawlImmowelt(self),
                           CrawlSubito(self),
                           CrawlImmobiliare(self),
                           CrawlIdealista(self)]
Exemple #6
0
import os

from flathunter.crawl_ebaykleinanzeigen import CrawlEbayKleinanzeigen
from flathunter.crawl_immobilienscout import CrawlImmobilienscout
from flathunter.crawl_wggesucht import CrawlWgGesucht
from flathunter.crawl_immowelt import CrawlImmowelt
from flathunter.idmaintainer import IdMaintainer
from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer
from flathunter.hunter import Hunter
from flathunter.config import Config

from flathunter.web import app

searchers = [
    CrawlImmobilienscout(),
    CrawlWgGesucht(),
    CrawlEbayKleinanzeigen(),
    CrawlImmowelt()
]
if __name__ == '__main__':
    # Use the SQLite DB file if we are running locally
    id_watch = IdMaintainer('%s/processed_ids.db' %
                            os.path.dirname(os.path.abspath(__file__)))
else:
    # Use Google Cloud DB if we run on the cloud
    id_watch = GoogleCloudIdMaintainer()

hunter = Hunter(Config(), searchers, id_watch)

app.config["HUNTER"] = hunter
Exemple #7
0
 def setUp(self):
     self.crawler = CrawlWgGesucht()
 def setUp(self):
     self.crawler = CrawlWgGesucht(Config(string=self.DUMMY_CONFIG))