コード例 #1
0
ファイル: hemnet.py プロジェクト: djjupa/hemnet
 def parseLocal(self):
     doc = Request.s_unicodeResponse(open("response.html").read());
     brokers = {"totalItems" : 0, "results" : {}};
     result = self.parseItems(doc.xpath("//div[contains(@class, 'item result')]"), brokers); 
     result = self.createResultItem(result);
     self.printSearchHeader(result);
     for idx, item in enumerate(result.get("results")[:10]):
         self.printBroker(idx, item);
コード例 #2
0
 def parseLocal(self):
     doc = Request.s_unicodeResponse(open("response.html").read())
     brokers = {
         "totalItems": 0,
         "results": {}
     }
     result = self.parseItems(
         doc.xpath("//div[contains(@class, 'item result')]"), brokers)
     result = self.createResultItem(result)
     self.printSearchHeader(result)
     for idx, item in enumerate(result.get("results")[:10]):
         self.printBroker(idx, item)
コード例 #3
0
    def __init__(self):
        self.log = Logger("Hemnet")
        self.request = Request()

        #Base objects for searches and results
        self.baseUrl = "http://www.hemnet.se"
        self.baseSearch = self.baseUrl + "/sok/create"
        self.baseLocation = self.baseUrl + "/locations/show?"
        self.baseResult = self.baseUrl + "/resultat"
        self.searchQuery = {}

        #Basetype, english -> Swedish
        self.translatedTypes = {
            "municipality": "Kommun",
            "district": u"Område",
            "postal_city": "Stadsdel",
            "region": u"Län",
            "street": "Gata",
            "city": "Stad"
        }
        #BaseAverageTypes  -> Swedish
        self.translatedAverageTypes = {
            "age": u"List ålder",
            "price": "Medelpris",
            "price_m2": u"Pris per m²",
            "size": u"Storlek (m²)",
            "rooms": "Antal rum",
            "fee": u"Månadsavgift",
            "price_change_up": u"Prisökning (%)",
            "price_change_down": u"Prissäkning (%)"
        }

        #searchTypes
        self.searchTypes = {
            "f": "fritidshus",
            "v": "villa",
            "t": "tomt",
            "r": "radhus",
            "g": "gard",
            "b": "bostadsratt",
            "o": "other",
            "a": "all"
        }
        #Items to get average for
        self.itemAverageTypes = {
            "age": 0,
            "price": 0,
            "price_m2": 0,
            "size": 0,
            "rooms": 0,
            "fee": 0,
            "price_change_up": 0,
            "price_change_down": 0
        }

        #Base result format
        self.resultFormat = {
            "totalItems": 0,
            "results": {}
        }
        self.log.info("Initiated Hemnet")
コード例 #4
0
class Hemnet():
    def __init__(self):
        self.log = Logger("Hemnet")
        self.request = Request()

        #Base objects for searches and results
        self.baseUrl = "http://www.hemnet.se"
        self.baseSearch = self.baseUrl + "/sok/create"
        self.baseLocation = self.baseUrl + "/locations/show?"
        self.baseResult = self.baseUrl + "/resultat"
        self.searchQuery = {}

        #Basetype, english -> Swedish
        self.translatedTypes = {
            "municipality": "Kommun",
            "district": u"Område",
            "postal_city": "Stadsdel",
            "region": u"Län",
            "street": "Gata",
            "city": "Stad"
        }
        #BaseAverageTypes  -> Swedish
        self.translatedAverageTypes = {
            "age": u"List ålder",
            "price": "Medelpris",
            "price_m2": u"Pris per m²",
            "size": u"Storlek (m²)",
            "rooms": "Antal rum",
            "fee": u"Månadsavgift",
            "price_change_up": u"Prisökning (%)",
            "price_change_down": u"Prissäkning (%)"
        }

        #searchTypes
        self.searchTypes = {
            "f": "fritidshus",
            "v": "villa",
            "t": "tomt",
            "r": "radhus",
            "g": "gard",
            "b": "bostadsratt",
            "o": "other",
            "a": "all"
        }
        #Items to get average for
        self.itemAverageTypes = {
            "age": 0,
            "price": 0,
            "price_m2": 0,
            "size": 0,
            "rooms": 0,
            "fee": 0,
            "price_change_up": 0,
            "price_change_down": 0
        }

        #Base result format
        self.resultFormat = {
            "totalItems": 0,
            "results": {}
        }
        self.log.info("Initiated Hemnet")

    '''
        Searchdata is a formpost in a very specific format
    '''

    def createSearchFormData(self, data, specificType='a'):
        locationData = [{
            "id": (data.get("id")),
            "name": (data.get("name")),
            "parent_id": (data.get("parent_location").get("id")),
            "parent_name": (data.get("parent_location").get("name"))
        }]

        searchData = {
            "search[location_search]": locationData,
            "search[location_ids][]": data.get("id"),
            "search[region_id]": -1,
            "search[municipality_ids][]": -1,
            "search[country_id]": 0,
            "search[item_types][]": "%s" % self.searchTypes[specificType],
            "search[price_min]": '',
            "search[price_max]": '',
            "search[fee_max]": '',
            "search[rooms_min]": '',
            "search[living_area_min]": '',
            "search[keywords]": '',
            "commit": ''
        }
        return searchData

    def searchRequest(self, query):
        return self.request.postRequest(self.baseSearch, query)

    '''
        Pass a list of keys and a dict of data to caluclate average value for each key
    '''

    def avgByKey(self, keys, data):
        final = {}
        for d in data:
            for k in d.keys():
                if k in keys:
                    final[k] = final.get(k, 0) + d[k]
        for k in final.keys():
            final[k] = final[k] / len(data)
        return final

    def getLocationQueryURL(self, query):
        return "%sq=%s" % (self.baseLocation,
                           urllib.quote(query.encode('utf-8')))

    @cache.methodcache.cache('findLocations', expire=72000)
    def findLocations(self, query, extra, area=None):
        queryURL = self.getLocationQueryURL(query)
        cacheResult = cache.locations.get(hashlib.md5(queryURL).hexdigest())
        if (cacheResult is not None):
            print "Found cached loc"
            return cacheResult

        locFormData = []
        locResponse = self.request.getResponse(queryURL, None)
        jdata = json.loads(locResponse)
        print json.dumps(jdata, indent=4)
        formData = {}
        locations = []
        for id, item in enumerate(jdata):
            item["score"] = Levenshtein.ratio(
                item.get("location").get("name"), query)
            if (area is not None):
                if (item.get("location").get("parent_location").get(
                        "name").find(area) != -1):
                    formData = self.createSearchFormData(
                        item.get("location"), extra)
                    locations.append(item)
                    locFormData.append(formData)
            else:
                formData = self.createSearchFormData(item.get("location"),
                                                     extra)
                locations.append(item)
                locFormData.append(formData)

        locations = sorted(locations, key=itemgetter('score'), reverse=True)
        result = {
            'search': locFormData,
            'area': area,
            'locations': locations
        }
        cache.locations[hashlib.md5(queryURL).hexdigest()] = result
        return result

    @cache.methodcache.cache('performSearch', expire=72000)
    def performSearch(self, searchData):
        hashkey = hashlib.md5(json.dumps(searchData,
                                         sort_keys=True)).hexdigest()
        cachedResult = cache.storage.get(hashkey)
        if (cachedResult is not None):
            print "Found cached searchResponse"
            return cachedResult

        print "Performing search on " + json.dumps(searchData, indent=4)
        searchRequest = self.searchRequest(searchData)
        searchResponse = self.request.getUnicodeDoc(searchRequest)
        resultData = self.parseResult(searchResponse, self.resultFormat)
        result = self.createResultItem(resultData)

        print "Storing hash " + hashkey

        chart_list = cache.storage.get(hashkey, {})

        # metadata is the chart item minus the actual list plus a size
        metadata_keys = filter(lambda k: k != 'list', result.keys())
        metadata = {key: result[key] for key in metadata_keys}
        chart_list[hashkey] = metadata
        cache.storage[hashkey] = chart_list[hashkey]
        return result

    def parseResult(self, doc, brokers={}):
        brokers = self.parseItems(
            doc.xpath("//div[contains(@class, 'item result')]"), brokers)
        nextpage = doc.xpath('//a[@class="next_page"]')

        try:
            url = nextpage[0].attrib["href"]
            if url is not None:
                self.log.info("Parsing %s" % url)
                nextDoc = self.request.requestUnicodeDoc(self.baseUrl + url)
                self.parseResult(nextDoc, brokers)
        except Exception, e:
            self.log.debug("ParseResult %s" % e)
            pass

        return brokers
コード例 #5
0
ファイル: hemnet.py プロジェクト: djjupa/hemnet
    def __init__(self):
        self.log = Logger("Hemnet");
        self.request = Request();

        #Base objects for searches and results
        self.baseUrl = "http://www.hemnet.se";
        self.baseSearch = self.baseUrl + "/sok/create";
        self.baseLocation = self.baseUrl + "/locations/show?";
        self.baseResult = self.baseUrl + "/resultat";
        self.searchQuery = {}

        #Basetype, english -> Swedish
        self.translatedTypes = {
            "municipality" : "Kommun",
            "district" : u"Område",
            "postal_city" : "Stadsdel",
            "region" : u"Län",
            "street" : "Gata",
            "city" : "Stad"
        }
        #BaseAverageTypes  -> Swedish
        self.translatedAverageTypes = {
            "age" : u"List ålder",
            "price" : "Medelpris",
            "price_m2" : u"Pris per m²",
            "size" : u"Storlek (m²)",
            "rooms" : "Antal rum",
            "fee" : u"Månadsavgift",
            "price_change_up" : u"Prisökning (%)",
            "price_change_down" : u"Prissäkning (%)"
        }
        
        #searchTypes
        self.searchTypes = {
            "f" : "fritidshus",
            "v" : "villa",
            "t" : "tomt",
            "r" : "radhus",
            "g" : "gard",
            "b" : "bostadsratt",
            "o" : "other",
            "a" : "all"
        }
        #Items to get average for        
        self.itemAverageTypes = {
            "age" : 0, 
            "price" : 0, 
            "price_m2" : 0, 
            "size" : 0, 
            "rooms" : 0, 
            "fee" : 0,
            "price_change_up" : 0,
            "price_change_down" : 0
        };

        #Base result format
        self.resultFormat = {
            "totalItems" : 0, 
            "results" : {}
        };
        self.log.info("Initiated Hemnet");
コード例 #6
0
ファイル: hemnet.py プロジェクト: djjupa/hemnet
class Hemnet() :
    def __init__(self):
        self.log = Logger("Hemnet");
        self.request = Request();

        #Base objects for searches and results
        self.baseUrl = "http://www.hemnet.se";
        self.baseSearch = self.baseUrl + "/sok/create";
        self.baseLocation = self.baseUrl + "/locations/show?";
        self.baseResult = self.baseUrl + "/resultat";
        self.searchQuery = {}

        #Basetype, english -> Swedish
        self.translatedTypes = {
            "municipality" : "Kommun",
            "district" : u"Område",
            "postal_city" : "Stadsdel",
            "region" : u"Län",
            "street" : "Gata",
            "city" : "Stad"
        }
        #BaseAverageTypes  -> Swedish
        self.translatedAverageTypes = {
            "age" : u"List ålder",
            "price" : "Medelpris",
            "price_m2" : u"Pris per m²",
            "size" : u"Storlek (m²)",
            "rooms" : "Antal rum",
            "fee" : u"Månadsavgift",
            "price_change_up" : u"Prisökning (%)",
            "price_change_down" : u"Prissäkning (%)"
        }
        
        #searchTypes
        self.searchTypes = {
            "f" : "fritidshus",
            "v" : "villa",
            "t" : "tomt",
            "r" : "radhus",
            "g" : "gard",
            "b" : "bostadsratt",
            "o" : "other",
            "a" : "all"
        }
        #Items to get average for        
        self.itemAverageTypes = {
            "age" : 0, 
            "price" : 0, 
            "price_m2" : 0, 
            "size" : 0, 
            "rooms" : 0, 
            "fee" : 0,
            "price_change_up" : 0,
            "price_change_down" : 0
        };

        #Base result format
        self.resultFormat = {
            "totalItems" : 0, 
            "results" : {}
        };
        self.log.info("Initiated Hemnet");
    
    '''
        Searchdata is a formpost in a very specific format
    '''
    def createSearchFormData(self, data, specificType = 'a') :
        locationData = [{
            "id": (data.get("id")),
            "name": (data.get("name")),
            "parent_id": (data.get("parent_location").get("id")),
            "parent_name": (data.get("parent_location").get("name"))
        }]

        searchData = {
            "search[location_search]" : locationData,
            "search[location_ids][]": data.get("id"),
            "search[region_id]":-1,
            "search[municipality_ids][]":-1,
            "search[country_id]":0,
            "search[item_types][]": "%s" % self.searchTypes[specificType],
            "search[price_min]": '',
            "search[price_max]": '',
            "search[fee_max]": '',
            "search[rooms_min]": '',
            "search[living_area_min]": '',
            "search[keywords]":'',
            "commit": ''
        }
        return searchData;

    def searchRequest(self, query) :
        return self.request.postRequest(self.baseSearch, query);

    '''
        Pass a list of keys and a dict of data to caluclate average value for each key
    '''
    def avgByKey(self, keys, data):
        final = {}
        for d in data:
            for k in d.keys():
                if k in keys: 
                    final[k] = final.get(k,0) + d[k]
        for k in final.keys():
            final[k] = final[k]/len(data);
        return final;

    def getLocationQueryURL(self, query):
        return "%sq=%s" % (self.baseLocation, urllib.quote(query.encode('utf-8')))

    @cache.methodcache.cache('findLocations', expire=72000)    
    def findLocations(self, query, extra, area = None) :
        queryURL = self.getLocationQueryURL(query);
        cacheResult = cache.locations.get(hashlib.md5(queryURL).hexdigest());
        if( cacheResult is not None):
            print "Found cached loc";
            return cacheResult;

        locFormData = []
        locResponse = self.request.getResponse(queryURL, None)
        jdata = json.loads(locResponse);
        print json.dumps(jdata, indent=4);
        formData = {}
        locations = []
        for id, item in enumerate(jdata) :
            item["score"] = Levenshtein.ratio(item.get("location").get("name"), query)
            if( area is not None ):
                if( item.get("location").get("parent_location").get("name").find(area) != -1 ):
                    formData = self.createSearchFormData(item.get("location"), extra);
                    locations.append(item)
                    locFormData.append(formData);
            else: 
                formData = self.createSearchFormData(item.get("location"), extra);
                locations.append(item)    
                locFormData.append(formData);


        locations = sorted(locations, key=itemgetter('score'), reverse=True)
        result = {'search' : locFormData, 'area' : area, 'locations' : locations };
        cache.locations[hashlib.md5(queryURL).hexdigest()] = result
        return result;

    @cache.methodcache.cache('performSearch', expire=72000) 
    def performSearch(self, searchData):
        hashkey = hashlib.md5(
            json.dumps(searchData, sort_keys=True)
        ).hexdigest();
        cachedResult = cache.storage.get(hashkey);
        if(cachedResult is not None):
            print "Found cached searchResponse";
            return cachedResult;

        print "Performing search on " + json.dumps(searchData, indent=4); 
        searchRequest = self.searchRequest(searchData);
        searchResponse = self.request.getUnicodeDoc(searchRequest);
        resultData = self.parseResult(searchResponse, self.resultFormat);
        result = self.createResultItem(resultData);
        
        print "Storing hash " + hashkey;


        chart_list = cache.storage.get(hashkey, {})

        # metadata is the chart item minus the actual list plus a size
        metadata_keys = filter(lambda k: k != 'list', result.keys())
        metadata = { key: result[key] for key in metadata_keys }
        chart_list[hashkey] = metadata
        cache.storage[hashkey] = chart_list[hashkey]
        return result;


    def parseResult(self, doc, brokers = {}) :
        brokers = self.parseItems(doc.xpath("//div[contains(@class, 'item result')]"), brokers); 
        nextpage = doc.xpath('//a[@class="next_page"]');
        
        try:
            url = nextpage[0].attrib["href"];
            if url is not None:
                self.log.info("Parsing %s" % url);
                nextDoc = self.request.requestUnicodeDoc(self.baseUrl + url);
                self.parseResult(nextDoc, brokers);
        except Exception,e:
            self.log.debug("ParseResult %s" % e)
            pass;
        
        return brokers;