コード例 #1
0
ファイル: telephones.py プロジェクト: relique/izba
def parse(conn_type='gsm'):
    data = {}
    domain = 'http://diesel.elcat.kg'
    if conn_type == 'gsm':
        url = '%s/index.php?showforum=192' % domain
    elif conn_type == 'cdma':
        url = '%s/index.php?showforum=194' % domain
    elif conn_type == 'tdma':
        url = '%s/index.php?showforum=193' % domain
    source = get_html_from(url)
    if source is None:
        return
    html = unicode(source, 'windows-1251').encode('utf-8')
    html = html[html.rfind('<!-- END PINNED -->'):]
    hrefs = []
    for match in re.finditer('tid-link-', html):
        sub = html[match.end():match.start()+30]
        hrefs.append('%s/index.php?showtopic=%s' % (domain, 
                                                    sub.split('" href=')[0]))
    for step, href in enumerate(hrefs[:5]):
        innersource = get_html_from(href)
        if innersource is None:
            continue
        innerhtml = unicode(innersource, 'windows-1251').encode('utf-8')
        match = innerhtml[innerhtml.find('style=\'word-wrap:break-word;\'><div><b>'):]
        title = match[38:match.find('</div></td>')]
        pattern = re.compile(ur'Продаю\s(\w+)\s(\w+)', re.UNICODE)
        string_re = re.search(pattern, unicode(title.lower(), 'utf-8'))

        if string_re is not None:
            try:
                str(string_re).decode('ascii')
            except UnicodeEncodeError:
                continue
            brand = string_re.group(1)
            model = string_re.group(2)
        else:
            continue

        continue_loop = False

        if brand == u'iphone':
            if _is_model(string_re.group(), except_='iPhone'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Apple')
            for iphone in IPHONE_MODELS:
                if model == iphone.lower():
                    if data.get('model', None) is not None:
                        data['model'] = 'iPhone {0}'.format(iphone)
                        data['has_3g'] = 'yes'
                        data['has_wifi'] = 'yes'
                    else:
                        continue_loop = True
        elif brand == u'samsung':
            if _is_model(string_re.group(), except_='Samsung'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Samsung')
            for samsung in SAMSUNG_MODELS:
                if model == samsung.lower():
                    if data.get('model', None) is not None:
                        data['model'] = samsung
                    else:
                        continue_loop = True
        elif brand == u'htc':
            if _is_model(string_re.group(), except_='HTC'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='HTC')
            for htc in HTC_MODELS:
                if model == htc.lower():
                    if data.get('model', None) is not None:
                        data['model'] = htc
                    else:
                        continue_loop = True
        elif brand == u'blackberry':
            if _is_model(string_re.group(), except_='BlackBerry'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='BlackBerry')
            for blackberry in BLACKBERRY_MODELS:
                if model == blackberry.lower():
                    if data.get('model', None) is not None:
                        data['model'] = blackberry
                    else:
                        continue_loop = True
        elif brand == u'motorola':
            if _is_model(string_re.group(), except_='Motorola'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Motorola')
            for motorola in MOTOROLA_MODELS:
                if model == motorola.lower():
                    if data.get('model', None) is not None:
                        data['model'] = motorola
                    else:
                        continue_loop = True
        elif brand == u'sony':
            if _is_model(string_re.group(), except_='Sony'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Sony')
            for sony in SONY_MODELS:
                if model == sony.lower():
                    if data.get('model', None) is not None:
                        data['model'] = sony
                    else:
                        continue_loop = True
        elif brand == u'nokia':
            if _is_model(string_re.group(), except_='Nokia'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Nokia')
            for nokia in NOKIA_MODELS:
                if model == nokia.lower():
                    if data.get('model', None) is not None:
                        data['model'] = nokia
                    else:
                        continue_loop = True
        elif brand == u'huawei':
            if _is_model(string_re.group(), except_='Huawei'):
                continue_loop = True
            data['brand'] = TelephoneBrand.objects.get(name='Huawei')
            for huawei in HUAWEI_MODELS:
                if model == huawei.lower():
                    if data.get('model', None) is not None:
                        data['model'] = huawei
                    else:
                        continue_loop = True

        if continue_loop or not data.get('brand', '') or \
            not data.get('model', ''):
                continue

        if conn_type == 'gsm':
            data['conn_type'] = 'gsm'
        elif conn_type == 'cdma':
            data['conn_type'] = 'cdma'
        elif conn_type == 'tdma':
            data['conn_type'] = 'tdma'
        
        data['published'] = False
        data['featured'] = False
        data['seller'] = None
        data['location'] = 'bishkek'
        data['ip'] = '212.42.102.203'
        price_pattern = re.compile(ur'(\d{1,3}?[\.,\s]?\d{3})\s?(сом|с\.)', 
                                   re.UNICODE)
        price_re = re.search(price_pattern, unicode(title, 'utf-8'))
        if price_re is None:
            continue
        price = ''
        for s in price_re.group(1):
            if s.isdigit():
                price += s
        try:
            data['price'] = Decimal(price)
        except TypeError:
            continue

        match = innerhtml[innerhtml.find('<div class="postcolor" id=\'post'):]
        body = match[42:match.find(u'<!-- подпись -->'.encode('utf-8'))]

        if u'новый' in unicode(title.lower(), 'utf-8') or \
            u'новый' in unicode(body.lower(), 'utf-8'):
                data['condition'] = 'new'
        else:
            data['condition'] = 'used'

        phone_pattern = re.compile(r'([0]|\+996)\s?([357]\d{2})\s?(\d{2}\s?\d{2}\s?\d{2})\D', 
                                   re.UNICODE)
        phone_re = re.search(phone_pattern, unicode(body, 'utf-8'))
        if phone_re is None:
            continue
        try:
            data['phone'] = clean_phone(str(phone_re.group()))
        except:
            continue
        if data.get('phone', None) is None or str(data['price']) in data['phone']:
            continue
        try:
            hash = u'%s|%s|%s|%s|%s' % (
                data['brand'], 
                data['model'], 
                'bishkek', 
                data['phone'],
                data['ip']
            )
        except KeyError:
            continue
        data['hash'] = hashlib.sha256(hash).hexdigest()
        try:
            Telephone.objects.get(hash=data['hash'])
        except Telephone.DoesNotExist:
            pass
        else:
            continue
        instance = Telephone.objects.create(**data)
        im_pattern = re.compile(r'monthly_\d{2}_\d{4}/post-\d+-\d+_thumb\.jpg', 
                                re.UNICODE)
        im_re = re.findall(im_pattern, unicode(innerhtml, 'utf-8'))
        if not im_re:
            continue
        for id, src in enumerate(im_re):
            im_url = 'http://diesel.elcat.kg/uploads/%s' % src.replace('_thumb.', '.')
            uid = getpwnam('django').pw_uid
            gid = grp.getgrnam('www-data').gr_gid
            f = get_html_from(im_url)
            if f is None:
                return
            fname = src.split('/post-')[-1].replace('_thumb.', '.')
            path = image.encode_image_name(None, fname)
            localpath = settings.MEDIA_ROOT + path
            local_f = open(localpath, 'w')
            local_f.write(f)
            local_f.close()
            im_size = Image.open(localpath).size
            if im_size[0] < 370 and im_size[1] < 200:
                if os.path.isfile(localpath):
                    os.remove(localpath)
            else:
                local_f = open(localpath, 'r')
                if id == 0:
                    instance.image_1.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_1.path, uid, gid)
                elif id == 1:
                    instance.image_2.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_2.path, uid, gid)
                elif id == 2:
                    instance.image_3.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_3.path, uid, gid)
                elif id == 3:
                    instance.image_4.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_4.path, uid, gid)
                else:
                    local_f.close()
                    break
        instance.save()
コード例 #2
0
ファイル: houses.py プロジェクト: relique/izba
def parse():
    data = {}
    domain = 'http://diesel.elcat.kg'
    url = '%s/index.php?showforum=225' % domain
    source = get_html_from(url)
    if source is None:
        return
    html = unicode(source, 'windows-1251').encode('utf-8')
    html = html[html.rfind('<!-- END PINNED -->'):]
    hrefs = []
    for match in re.finditer('tid-link-', html):
        sub = html[match.end():match.start()+30]
        hrefs.append('%s/index.php?showtopic=%s' % (domain, 
                                                    sub.split('" href=')[0]))
    for step, href in enumerate(hrefs[:7]):
        innersource = get_html_from(href)
        if innersource is None:
            continue
        innerhtml = unicode(innersource, 'windows-1251').encode('utf-8')
        match = innerhtml[innerhtml.find('style=\'word-wrap:break-word;\'><div><b>'):]
        title = match[38:match.find('</div></td>')]
        data['published'] = True
        data['featured'] = False
        data['seller'] = None
        data['section'] = 'sale'
        data['location'] = 'bishkek'
        data['ip'] = '212.42.102.203'
        data['rooms'] = get_rooms(title)
        price_pattern = re.compile(r'(\d{1,3}[\.,\s]?\d{3}\s?[\u0024])|([\u0024]\s?\d{1,3}[\.,\s]?\d{3})', 
                                   re.UNICODE)
        price_re = re.search(price_pattern, unicode(title, 'utf-8'))
        if price_re is None:
            continue
        price = ''
        for s in price_re.group():
            if s.isdigit():
                price += s
        try:
            data['price'] = currency.convert_currency(Decimal(price), 'KGS')
        except:
            continue
        if u'куплю'.encode('utf-8') in title.lower() or not data['rooms'] or \
        u'обмен'.encode('utf-8') in title.lower():
            continue
        if is_realtor(title):
            data['is_realtor'] = 'yes'
        else:
            data['is_realtor'] = 'no'
        
        match = innerhtml[innerhtml.find('<div class="postcolor" id=\'post'):]
        body = match[42:match.find(u'<!-- подпись -->'.encode('utf-8'))]

        if u'новый' in unicode(title.lower(), 'utf-8') or \
            u'новый' in unicode(body.lower(), 'utf-8'):
                data['condition'] = 'new'
        else:
            data['condition'] = 'used'

        floor_pattern = re.compile(ur'(\d{1,2})((\s?/\s?|\sиз\s)\d{1,2})?\s?(этаж|эт\.)', 
                                   re.UNICODE)
        floor_re = re.search(floor_pattern, unicode(body, 'utf-8'))
        if floor_re is None:
            floor_re = re.search(floor_pattern, unicode(title, 'utf-8'))
        if floor_re is not None:
            data['floor'] = floor_re.group(1)

        area_pattern = re.compile(ur'(\d+[\.,]?\d+?)\s?(кв\.?[/\s]?м\.?|м2)', 
                                  re.UNICODE)
        area_re = re.search(area_pattern, unicode(body, 'utf-8'))
        if area_re is None:
            area_re = re.search(area_pattern, unicode(title, 'utf-8'))
        if area_re is not None:
            data['area'] = area_re.group(1)

        phone_pattern = re.compile(r'([0]|\+996)\s?([357]\d{2})\s?(\d{2}\s?\d{2}\s?\d{2})\D', 
                                   re.UNICODE)
        phone_re = re.search(phone_pattern, unicode(body, 'utf-8'))
        if phone_re is None:
            continue
        try:
            data['phone'] = clean_phone(str(phone_re.group()))
        except:
            continue
        if data.get('phone', None) is None or str(data['price']) in str(data['phone']):
            continue
        try:
            hash = u'%s|%s|%s|%s' % (
                data['rooms'], 
                'bishkek', 
                data['phone'],
                data['ip']
            )
        except KeyError:
            continue
        data['hash'] = hashlib.sha256(hash).hexdigest()
        try:
            House.objects.get(hash=data['hash'])
        except House.DoesNotExist:
            pass
        else:
            continue
        instance = House.objects.create(**data)
        im_pattern = re.compile(r'monthly_\d{2}_\d{4}/post-\d+-\d+_thumb\.jpg', 
                                re.UNICODE)
        im_re = re.findall(im_pattern, unicode(innerhtml, 'utf-8'))
        if not im_re:
            continue
        for id, src in enumerate(im_re):
            im_url = 'http://diesel.elcat.kg/uploads/%s' % src.replace('_thumb.', '.')
            uid = getpwnam('django').pw_uid
            gid = grp.getgrnam('www-data').gr_gid
            f = get_html_from(im_url)
            if f is None:
                return
            fname = src.split('/post-')[-1].replace('_thumb.', '.')
            path = image.encode_image_name(None, fname)
            localpath = settings.MEDIA_ROOT + path
            local_f = open(localpath, 'w')
            local_f.write(f)
            local_f.close()
            im_size = Image.open(localpath).size
            if im_size[0] < 370 and im_size[1] < 200:
                if os.path.isfile(localpath):
                    os.remove(localpath)
            else:
                local_f = open(localpath, 'r')
                if id == 0:
                    instance.image_1.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_1.path, uid, gid)
                elif id == 1:
                    instance.image_2.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_2.path, uid, gid)
                elif id == 2:
                    instance.image_3.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_3.path, uid, gid)
                elif id == 3:
                    instance.image_4.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_4.path, uid, gid)
                else:
                    local_f.close()
                    break
        instance.save()
コード例 #3
0
ファイル: autodoska.py プロジェクト: relique/izba
def parse():
    data = {}
    domain = 'http://auto.doska.kg'
    urls = ['%s/carsfind/page:%s/?order=date_new' % (domain, page) for page in range(1, 7)]
    for url in urls:
        soup = BeautifulSoup(get_html_from(url))
        if soup is None:
            return
        href = str()
        for id, table in enumerate(soup.find_all('table')):
            if id == 3:
                containers = table.find_all('tr')
                for item in containers:
                    for id, td in enumerate(item.find_all('td')):
                        if id == 0:
                            if 'photononb' in td.a.img['src']:
                                return
                            href = domain + td.a['href']
                        if id == 1:
                            title = td.find('a', {'class': 'fs15'}).string
                            try:
                                title.decode('ascii')
                            except UnicodeEncodeError:
                                return
                            for brand in AutomobileBrand.objects.all():
                                if brand.name in title:
                                    data['brand'] = brand
                                    data['model'] = title.replace(brand.name, '').lstrip()
                            
                            fields = td.find('div', {'class': 'fs12'}).contents[2]
                            try:
                                data['year'] = int(td.find('div', {'class': 'fs12'}).b.string[:4])
                            except:
                                return
                            
                            if get_color(fields) is not None:
                                data['color'] = get_color(fields)
                            
                            if get_body_type(fields) is not None:
                                data['body_type'] = get_body_type(fields)
                            
                            if u'Механика' in fields.string:
                                data['transmission'] = 'mechanic'
                            elif u'Автомат' in fields.string:
                                data['transmission'] = 'automatic'
                            elif u'Типтроник' in fields.string:
                                data['transmission'] = 'tiptronic'
                            else:
                                return
                            
                            if u'правый руль' in fields.string.lower():
                                data['steering_wheel'] = 'right'
                            else:
                                data['steering_wheel'] = 'left'

                            try:
                                sub = fields[:fields.find(u'л.')]
                                if sub:
                                    if len(sub[sub.rfind(',')+1:].strip()) <= 10:
                                        data['engine_capacity'] = sub[sub.rfind(',')+1:].strip()
                            except:
                                pass

                            if get_drive_type(fields) is not None:
                                data['drive_type'] = get_drive_type(fields)

                    contents = u''.join([unicode(td) for td in item.contents])
                    price = str()
                    for s in contents[contents.find('$')+1:contents.find('$')+7]:
                        if s.isdigit():
                            price += s
                    data['price'] = currency.convert_currency(Decimal(price), 'KGS')

                    innersoup = BeautifulSoup(get_html_from(href))
                    data['phone'] = clean_phone(innersoup.find('span', {'class': 'text_c_blue'}).string)
                    if data.get('phone', None) is None:
                        continue
                    data['ip'] = '212.42.117.66'
                    try:
                        hash = u'%s|%s|%s|%s' % (
                            data['brand'].name, 
                            data['model'], 
                            data['phone'],
                            data['ip']
                        )
                        data['hash'] = hashlib.sha256(hash).hexdigest()
                    except:
                        continue
                    try:
                        Automobile.objects.get(hash=data['hash'])
                    except Automobile.DoesNotExist:
                        pass
                    else:
                        continue
                    data['published'] = True
                    data['featured'] = False
                    data['seller'] = None
                    data['condition'] = 'used'
                    data['location'] = 'bishkek'
                    auto = Automobile.objects.create(**data)

                    id = 0
                    for link in innersoup.find_all('a'):
                        if link.has_key('rel'):
                            im_url = link['href']
                            uid = getpwnam('django').pw_uid
                            gid = grp.getgrnam('www-data').gr_gid
                            f = get_html_from(im_url)
                            if f is None:
                                return
                            fname = im_url.replace('http://auto.static.akipress.org/', '')[im_url.find('/')-1:]
                            path = image.encode_image_name(None, fname)
                            localpath = settings.MEDIA_ROOT + path
                            local_f = open(localpath, 'w')
                            local_f.write(f)
                            local_f.close()
                            im_size = Image.open(localpath).size
                            if im_size[0] < 370 and im_size[1] < 200:
                                if os.path.isfile(localpath):
                                    os.remove(localpath)
                            else:
                                local_f = open(localpath, 'r')
                                if id == 0:
                                    auto.image_1.save(fname, File(local_f))
                                    local_f.close()
                                    os.chown(auto.image_1.path, uid, gid)
                                elif id == 1:
                                    auto.image_2.save(fname, File(local_f))
                                    local_f.close()
                                    os.chown(auto.image_2.path, uid, gid)
                                elif id == 2:
                                    auto.image_3.save(fname, File(local_f))
                                    local_f.close()
                                    os.chown(auto.image_3.path, uid, gid)
                                elif id == 3:
                                    auto.image_4.save(fname, File(local_f))
                                    local_f.close()
                                    os.chown(auto.image_4.path, uid, gid)
                                else:
                                    local_f.close()
                                    break
                            id += 1
                    auto.save()
コード例 #4
0
ファイル: avtogid.py プロジェクト: relique/izba
def parse():
    data = {}
    domain = 'http://www.avtogid.kg'
    urls = ['%s/search/search?cat=1&page=%s' % (domain, page) for page in range(1, 7)]
    for url in urls:
        soup = BeautifulSoup(get_html_from(url))
        if soup is None:
            return
        containers = soup.find_all('div', {'class': 'sub-selected'})
        for item in containers:
            table_tag = item.find('div', {'class': 'table'})
            title_tag = item.find('div', {'class': 'res-car-name'})
            title = title_tag.h3.a.string
            href = domain + title_tag.h3.a['href']
            try:
                title.decode('ascii')
            except UnicodeEncodeError:
                continue
            for brand in AutomobileBrand.objects.all():
                if brand.name in title:
                    data['brand'] = brand
                    data['model'] = title.replace(brand.name, '').lstrip()
            price = str()
            for s in title_tag.h4.string:
                if s.isdigit():
                    price += s
            data['price'] = currency.convert_currency(Decimal(price), 'KGS')
            for id, td in enumerate(table_tag.table.tbody.find_all('td')):
                if id == 0:
                    data['year'] = int(td.string[:4])
                if id == 1:
                    data['engine_capacity'] = td.string
                if id == 2:
                    if get_color(td) is not None:
                        data['color'] = get_color(td)
                if id == 3:
                    if td.string == u'Автомат':
                        data['transmission'] = 'automatic'
                    elif td.string == u'Типтроник':
                        data['transmission'] = 'tiptronic'
                    elif td.string == u'Механическая':
                        data['transmission'] = 'mechanic'
                    else:
                        continue

            innersoup = BeautifulSoup(get_html_from(href))
            if innersoup is None:
                return
            desc_strs = innersoup.find('div', {'class': 'description'}).p.strings
            try:
                data['desc'] = u'. '.join(desc_strs)
            except AttributeError:
                pass
            else:
                if len(data['desc']) < 5:
                    data['desc'] = ''
            tel_strs = innersoup.find('div', {'class': 'tel-soc'}).strings
            for id, string in enumerate(tel_strs):
                if id == 2:
                    if string:
                        data['phone'] = clean_phone(string)
                    else:
                        continue
            if data.get('phone', None) is None:
                continue
            data['ip'] = '212.42.117.66'
            try:
                hash = u'%s|%s|%s|%s' % (
                    data['brand'].name, 
                    data['model'], 
                    data['phone'],
                    data['ip']
                )
            except KeyError:
                continue
            data['hash'] = hashlib.sha256(hash).hexdigest()
            try:
                Automobile.objects.get(hash=data['hash'])
            except Automobile.DoesNotExist:
                pass
            else:
                continue
            data['published'] = True
            data['featured'] = False
            data['seller'] = None
            data['condition'] = 'used'
            data['location'] = 'bishkek'
            auto = Automobile.objects.create(**data)
            try:
                im_url = innersoup.find('a', {'class': 'main-image'})['href']
            except TypeError:
                pass
            else:
                uid = getpwnam('django').pw_uid
                gid = grp.getgrnam('www-data').gr_gid
                f = get_html_from(domain + im_url)
                if f is None:
                    return
                fname = im_url.replace('/upload/images/', '')
                path = image.encode_image_name(None, fname)
                localpath = settings.MEDIA_ROOT + path
                local_f = open(localpath, 'w')
                local_f.write(f)
                local_f.close()
                im_size = Image.open(localpath).size
                if im_size[0] < 370 and im_size[1] < 200:
                    if os.path.isfile(localpath):
                        os.remove(localpath)
                else:
                    local_f = open(localpath, 'r')
                    auto.image_1.save(fname, File(local_f))
                    local_f.close()
                    os.chown(auto.image_1.path, uid, gid)
                try:
                    thumbs = innersoup.find_all('a', {'class': 'thumbnail'})
                except TypeError:
                    pass
                else:
                    for id, im in enumerate(thumbs):
                        f = get_html_from(domain + im['href'])
                        if f is None:
                            return
                        fname = im['href'].replace('/upload/images/', '')
                        path = image.encode_image_name(None, fname)
                        localpath = settings.MEDIA_ROOT + path
                        local_f = open(localpath, 'w')
                        local_f.write(f)
                        local_f.close()
                        im_size = Image.open(localpath).size
                        if im_size[0] < 370 and im_size[1] < 200:
                            if os.path.isfile(localpath):
                                os.remove(localpath)
                        else:
                            local_f = open(localpath, 'r')
                            if id == 0:
                                auto.image_2.save(fname, File(local_f))
                                local_f.close()
                                os.chown(auto.image_2.path, uid, gid)
                            elif id == 1:
                                auto.image_3.save(fname, File(local_f))
                                local_f.close()
                                os.chown(auto.image_3.path, uid, gid)
                            elif id == 2:
                                auto.image_4.save(fname, File(local_f))
                                local_f.close()
                                os.chown(auto.image_4.path, uid, gid)
                            else:
                                local_f.close()
                                break
            auto.save()
コード例 #5
0
ファイル: domik.py プロジェクト: relique/izba
def parse():
    data = {}
    model = None
    domain = 'http://www.domik.kg'
    urls = ['%s/offers/sell/page_%s/' % (domain, page) for page in range(1, 7)]
    for url in urls:
        soup = BeautifulSoup(get_html_from(url))
        if soup is None:
            return
        containers = soup.find_all('div', {'class': 'pix_r'})
        for item in containers:
            title = unicode()
            title_tags = item.find('h1', {'class': 'i_sell'}).find_all('a')
            for id, tag in enumerate(title_tags):
                if id == 0:
                    href = tag['href']
                    title = tag.string.strip().replace('\n', '').replace('\t', '')
                break
            rooms = str()
            for s in title:
                if s.isdigit():
                    rooms += s
            try:
                data['rooms'] = int(rooms)
            except ValueError:
                continue
            for string in item.strings:
                if u'Бишкек' in string:
                    data['location'] = 'bishkek'
                elif u'Иссык-Кульская область' in string:
                    data['location'] = 'issykkul'
                elif u'Чуйская область' in string:
                    data['location'] = 'chui'
                elif u'Джалал-Абадская область' in string:
                    data['location'] = 'jalalabad'
                elif u'Баткенская область' in string:
                    data['location'] = 'batken'
                elif u'Ошская область' in string:
                    data['location'] = 'osh'
                elif u'Таласская область' in string:
                    data['location'] = 'talas'
                else:
                    continue
            price = str()
            try:
                for s in item.find('div', {'class': 'price_item'}).string:
                    if s.isdigit():
                        price += s
            except TypeError:
                continue
            else:
                if price:
                    data['price'] = currency.convert_currency(Decimal(price), 'KGS')
                else:
                    continue
            
            innersoup = BeautifulSoup(get_html_from(url))
            if innersoup is None:
                return
            try:
                main_tags = innersoup.find('td', {'id': 'fotoz'})
                addr_strs = main_tags.find_next_sibling('td').h3.find_next_sibling('div').strings
                addr = unicode()
                for id, string in enumerate(addr_strs):
                    if id > 1:
                        addr += u'%s. ' % string
            except:
                continue
            if len(addr.strip()):
                data['address'] = addr.strip()
            else:
                continue
            desc = innersoup.find('div', {'class': 'fullcont'}).string
            try:
                data['desc'] = desc.strip()
            except AttributeError:
                data['desc'] = ''
            else:
                if len(data['desc']) < 5:
                    data['desc'] = ''
            data['phone'] = clean_phone(innersoup.find('div', {'class': 'fullcont'}).find_next_sibling('div', {'class': 'private'}).string)
            if data.get('phone', None) is None:
                continue
            data['ip'] = '176.126.165.34'
            if len(data['phone']) > 15:
                continue
            try:
                hash = u'%s|%s|%s|%s' % (
                    data['rooms'], 
                    data['location'], 
                    data['phone'],
                    data['ip']
                )
            except KeyError:
                continue
            data['hash'] = hashlib.sha256(hash).hexdigest()
            data_strs = main_tags.find_next_sibling('td').h3.find_next_sibling('div').find_next_sibling('div').strings
            floor = str()
            for id, string in enumerate(data_strs):
                if id == 2:
                    for s in string:
                        if s.isdigit():
                            floor += s
                if id == 5:
                    try:
                        data['area'] = int(string)
                    except ValueError:
                        pass
            try:
                int(floor)
            except ValueError:
                continue
            else:
                data['floor'] = floor
            data['is_realtor'] = 'yes'
            for tag in innersoup.find_all('div', {'class': 'private'}):
                if u'(собственник)' in tag.string:
                    data['is_realtor'] = 'no'
            
            data['published'] = True
            data['featured'] = False
            data['seller'] = None
            data['condition'] = 'used'
            data['section'] = 'sale'
            if u'квартиру' in title:
                try:
                    Apartment.objects.get(hash=data['hash'])
                except Apartment.DoesNotExist:
                    pass
                else:
                    continue
                instance = Apartment.objects.create(**data)
            elif u'дом' in title:
                try:
                    House.objects.get(hash=data['hash'])
                except House.DoesNotExist:
                    pass
                else:
                    continue
                instance = House.objects.create(**data)
            else:
                continue
            try:
                im_url = innersoup.find('img', {'id': 'foto_prev'})['src'].replace('_m.', '.')
            except TypeError:
                pass
            else:
                uid = getpwnam('django').pw_uid
                gid = grp.getgrnam('www-data').gr_gid
                f = get_html_from(im_url)
                if f is None:
                    return
                fname = im_url.split('/')[-1]
                path = image.encode_image_name(None, fname)
                localpath = settings.MEDIA_ROOT + path
                local_f = open(localpath, 'w')
                local_f.write(f)
                local_f.close()
                im_size = Image.open(localpath).size
                if im_size[0] < 370 and im_size[1] < 200:
                    if os.path.isfile(localpath):
                        os.remove(localpath)
                else:
                    local_f = open(localpath, 'r')
                    instance.image_1.save(fname, File(local_f))
                    local_f.close()
                    os.chown(instance.image_1.path, uid, gid)
                try:
                    thumb_tags = innersoup.find_all('a', {'class': 'fotoz_link'})
                except TypeError:
                    pass
                else:
                    for id, tag in enumerate(thumb_tags):
                        if id == 0:
                            continue
                        if isinstance(tag['rel'], list):
                            thumb_url = tag['rel'][0].replace('_m.', '.')
                        else:
                            thumb_url = tag['rel'].replace('_m.', '.')
                        f = get_html_from(thumb_url)
                        if f is None:
                            return
                        fname = thumb_url.split('/')[-1]
                        path = image.encode_image_name(None, fname)
                        localpath = settings.MEDIA_ROOT + path
                        local_f = open(localpath, 'w')
                        local_f.write(f)
                        local_f.close()
                        im_size = Image.open(localpath).size
                        if im_size[0] < 370 and im_size[1] < 200:
                            if os.path.isfile(localpath):
                                os.remove(localpath)
                        else:
                            local_f = open(localpath, 'r')
                            if id == 1:
                                instance.image_2.save(fname, File(local_f))
                                local_f.close()
                                os.chown(instance.image_2.path, uid, gid)
                            elif id == 2:
                                instance.image_3.save(fname, File(local_f))
                                local_f.close()
                                os.chown(instance.image_3.path, uid, gid)
                            elif id == 3:
                                instance.image_4.save(fname, File(local_f))
                                local_f.close()
                                os.chown(instance.image_4.path, uid, gid)
                            else:
                                local_f.close()
                                break
            instance.save()