def render_automobiles_view(request): """ Renders automobile section. """ data = {} brand = request.GET.get('brand', None) cond = request.GET.get('condition', None) body = request.GET.get('body_type', None) trans = request.GET.get('transmission', None) year = request.GET.get('year', None) price = request.GET.get('price', None) loc = request.COOKIES.get('ads_location', None) items = Automobile.objects.filter(published=True).order_by('-pub_date') if brand: items = items.filter(brand=brand) if cond: items = items.filter(condition=cond) if body: items = items.filter(body_type=body) if trans: items = items.filter(transmission=trans) if year: miny, maxy = year.split(' - ')[0], year.split(' - ')[1] items = items.filter(year__range=[miny, maxy]) if price: minp, maxp = currency.process_price(price, 'USD') items = items.filter(price__range=[minp, maxp]) data['min_price'] = currency.convert_currency(minp, 'USD') data['max_price'] = currency.convert_currency(maxp, 'USD') else: data['min_price'] = 15000 data['max_price'] = 30000 if loc != 'all' and loc is not None: items = items.filter(location=loc) data['sortform'] = forms.AutomobileSortingForm(request.GET or None) for field in Automobile._meta.get_all_field_names(): data['sortform'].errors[field] = '' cache_key = 'autos_feat_items_%s' % loc data['featured_items'] = cache.get(cache_key) if not data['featured_items']: data['featured_items'] = Automobile.get_featured(loc) cache.set(cache_key, data['featured_items'], 900) data.update({ 'USD': True, 'sortid': 'autos', 'title': Automobile._meta.verbose_name_plural, 'cache_time': settings.CACHE_TIME, 'url': request.get_full_path(), 'items': list(chain(items.filter(featured=True), items.filter(featured=False))) }) return render(request, TMPL, data)
def render_plots_view(request): """ Renders plot section. """ data = {} real = request.GET.get('is_realtor', None) cond = request.GET.get('condition', None) addr = request.GET.get('address', None) area = request.GET.get('area', None) price = request.GET.get('price', None) loc = request.COOKIES.get('ads_location', None) items = Plot.objects.filter(published=True).order_by('-pub_date') if real: items = items.filter(is_realtor=real) if cond: items = items.filter(condition=cond) if addr: items = items.filter(address=addr) if area: items = items.filter(area=area) if price: minp, maxp = currency.process_price(price, 'USD') items = items.filter(price__range=[minp, maxp]) data['min_price'] = currency.convert_currency(minp, 'USD') data['max_price'] = currency.convert_currency(maxp, 'USD') else: data['min_price'] = 5000 data['max_price'] = 40000 if loc != 'all' and loc is not None: items = items.filter(location=loc) data['sortform'] = forms.PlotSortingForm(request.GET or None) for field in Plot._meta.get_all_field_names(): data['sortform'].errors[field] = '' cache_key = 'plots_feat_items_%s' % loc data['featured_items'] = cache.get(cache_key) if not data['featured_items']: data['featured_items'] = Plot.get_featured(loc) cache.set(cache_key, data['featured_items'], 900) data.update({ 'USD': True, 'no_rooms': True, 'plot': True, 'sortid': 'plots', 'sortclass': 'scfilter-2rows', 'title': Plot._meta.verbose_name_plural, 'cache_time': settings.CACHE_TIME, 'url': request.get_full_path(), 'items': list(chain(items.filter(featured=True), items.filter(featured=False))) }) return render(request, TMPL, data)
def facebook_apartments(): try: item = Apartment.objects.filter( published=True, section='sale', rooms__gt=0, pub_date__range=[datetime.datetime.now()-datetime.timedelta(days=2), datetime.datetime.now()] ).exclude(image_1='').exclude(area='').order_by('-pub_date')[0] except IndexError as e: return try: social.FacebookPost.objects.get(item=item) except social.FacebookPost.DoesNotExist: pass else: return data = {'comment': u'Еженедельная подборка: %s-комн. квартира' % item.rooms, 'name': u'%s-комн. квартира' % item.rooms, 'link': BASEURL + item.get_absolute_url(), 'caption': u'Цена: $' + humanize.intcomma(currency.convert_currency(item.price, 'USD')), 'picture': BASEURL + item.image_1.url} if item.desc: data['description'] = item.desc else: data['description'] = u'Продаётся %s-комнатная квартира. Площадь %s \ кв.м.' % (item.rooms, item.area) social.FacebookPost.objects.create(item=item) post_to_fb(**data)
def facebook_autos(): try: item = Automobile.objects.filter( published=True, year__range=[1998, datetime.date.today().year], pub_date__range=[datetime.datetime.now()-datetime.timedelta(hours=10), datetime.datetime.now()] ).exclude(image_1='').order_by('-pub_date')[0] except IndexError as e: return try: social.FacebookPost.objects.get(item=item) except social.FacebookPost.DoesNotExist: pass else: return data = {'comment': u'Еженедельная подборка: %s %s' % (item.brand, item.model), 'name': u'%s %s, %s г.' % (item.brand, item.model, item.year), 'link': BASEURL + item.get_absolute_url(), 'caption': u'Цена: $' + humanize.intcomma(currency.convert_currency(item.price, 'USD')), 'picture': BASEURL + item.image_1.url} if item.desc: data['description'] = item.desc else: data['description'] = u'Продаётся автомобиль %s %s %s года.' \ % (item.brand, item.model, item.year) social.FacebookPost.objects.create(item=item) post_to_fb(**data)
def clean_price(self): price = self.cleaned_data['price'] try: cleaned = decimal.Decimal(price) except decimal.InvalidOperation: raise forms.ValidationError(_(u'Поле "Цена" должно содержать только цифры.')) if len(str(cleaned)) > 8: raise forms.ValidationError(_(u'Цена невероятно высока.')) return currency.convert_currency(cleaned, 'KGS')
def parse(): data = {} domain = 'http://diesel.elcat.kg' url = '%s/index.php?showforum=225' % domain source = get_html_from(url) if source is None: return html = unicode(source, 'windows-1251').encode('utf-8') html = html[html.rfind('<!-- END PINNED -->'):] hrefs = [] for match in re.finditer('tid-link-', html): sub = html[match.end():match.start()+30] hrefs.append('%s/index.php?showtopic=%s' % (domain, sub.split('" href=')[0])) for step, href in enumerate(hrefs[:7]): innersource = get_html_from(href) if innersource is None: continue innerhtml = unicode(innersource, 'windows-1251').encode('utf-8') match = innerhtml[innerhtml.find('style=\'word-wrap:break-word;\'><div><b>'):] title = match[38:match.find('</div></td>')] data['published'] = True data['featured'] = False data['seller'] = None data['section'] = 'sale' data['location'] = 'bishkek' data['ip'] = '212.42.102.203' data['rooms'] = get_rooms(title) price_pattern = re.compile(r'(\d{1,3}[\.,\s]?\d{3}\s?[\u0024])|([\u0024]\s?\d{1,3}[\.,\s]?\d{3})', re.UNICODE) price_re = re.search(price_pattern, unicode(title, 'utf-8')) if price_re is None: continue price = '' for s in price_re.group(): if s.isdigit(): price += s try: data['price'] = currency.convert_currency(Decimal(price), 'KGS') except: continue if u'куплю'.encode('utf-8') in title.lower() or not data['rooms'] or \ u'обмен'.encode('utf-8') in title.lower(): continue if is_realtor(title): data['is_realtor'] = 'yes' else: data['is_realtor'] = 'no' match = innerhtml[innerhtml.find('<div class="postcolor" id=\'post'):] body = match[42:match.find(u'<!-- подпись -->'.encode('utf-8'))] if u'новый' in unicode(title.lower(), 'utf-8') or \ u'новый' in unicode(body.lower(), 'utf-8'): data['condition'] = 'new' else: data['condition'] = 'used' floor_pattern = re.compile(ur'(\d{1,2})((\s?/\s?|\sиз\s)\d{1,2})?\s?(этаж|эт\.)', re.UNICODE) floor_re = re.search(floor_pattern, unicode(body, 'utf-8')) if floor_re is None: floor_re = re.search(floor_pattern, unicode(title, 'utf-8')) if floor_re is not None: data['floor'] = floor_re.group(1) area_pattern = re.compile(ur'(\d+[\.,]?\d+?)\s?(кв\.?[/\s]?м\.?|м2)', re.UNICODE) area_re = re.search(area_pattern, unicode(body, 'utf-8')) if area_re is None: area_re = re.search(area_pattern, unicode(title, 'utf-8')) if area_re is not None: data['area'] = area_re.group(1) phone_pattern = re.compile(r'([0]|\+996)\s?([357]\d{2})\s?(\d{2}\s?\d{2}\s?\d{2})\D', re.UNICODE) phone_re = re.search(phone_pattern, unicode(body, 'utf-8')) if phone_re is None: continue try: data['phone'] = clean_phone(str(phone_re.group())) except: continue if data.get('phone', None) is None or str(data['price']) in str(data['phone']): continue try: hash = u'%s|%s|%s|%s' % ( data['rooms'], 'bishkek', data['phone'], data['ip'] ) except KeyError: continue data['hash'] = hashlib.sha256(hash).hexdigest() try: House.objects.get(hash=data['hash']) except House.DoesNotExist: pass else: continue instance = House.objects.create(**data) im_pattern = re.compile(r'monthly_\d{2}_\d{4}/post-\d+-\d+_thumb\.jpg', re.UNICODE) im_re = re.findall(im_pattern, unicode(innerhtml, 'utf-8')) if not im_re: continue for id, src in enumerate(im_re): im_url = 'http://diesel.elcat.kg/uploads/%s' % src.replace('_thumb.', '.') uid = getpwnam('django').pw_uid gid = grp.getgrnam('www-data').gr_gid f = get_html_from(im_url) if f is None: return fname = src.split('/post-')[-1].replace('_thumb.', '.') path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') if id == 0: instance.image_1.save(fname, File(local_f)) local_f.close() os.chown(instance.image_1.path, uid, gid) elif id == 1: instance.image_2.save(fname, File(local_f)) local_f.close() os.chown(instance.image_2.path, uid, gid) elif id == 2: instance.image_3.save(fname, File(local_f)) local_f.close() os.chown(instance.image_3.path, uid, gid) elif id == 3: instance.image_4.save(fname, File(local_f)) local_f.close() os.chown(instance.image_4.path, uid, gid) else: local_f.close() break instance.save()
def parse(): data = {} domain = 'http://www.avtogid.kg' urls = ['%s/search/search?cat=1&page=%s' % (domain, page) for page in range(1, 7)] for url in urls: soup = BeautifulSoup(get_html_from(url)) if soup is None: return containers = soup.find_all('div', {'class': 'sub-selected'}) for item in containers: table_tag = item.find('div', {'class': 'table'}) title_tag = item.find('div', {'class': 'res-car-name'}) title = title_tag.h3.a.string href = domain + title_tag.h3.a['href'] try: title.decode('ascii') except UnicodeEncodeError: continue for brand in AutomobileBrand.objects.all(): if brand.name in title: data['brand'] = brand data['model'] = title.replace(brand.name, '').lstrip() price = str() for s in title_tag.h4.string: if s.isdigit(): price += s data['price'] = currency.convert_currency(Decimal(price), 'KGS') for id, td in enumerate(table_tag.table.tbody.find_all('td')): if id == 0: data['year'] = int(td.string[:4]) if id == 1: data['engine_capacity'] = td.string if id == 2: if get_color(td) is not None: data['color'] = get_color(td) if id == 3: if td.string == u'Автомат': data['transmission'] = 'automatic' elif td.string == u'Типтроник': data['transmission'] = 'tiptronic' elif td.string == u'Механическая': data['transmission'] = 'mechanic' else: continue innersoup = BeautifulSoup(get_html_from(href)) if innersoup is None: return desc_strs = innersoup.find('div', {'class': 'description'}).p.strings try: data['desc'] = u'. '.join(desc_strs) except AttributeError: pass else: if len(data['desc']) < 5: data['desc'] = '' tel_strs = innersoup.find('div', {'class': 'tel-soc'}).strings for id, string in enumerate(tel_strs): if id == 2: if string: data['phone'] = clean_phone(string) else: continue if data.get('phone', None) is None: continue data['ip'] = '212.42.117.66' try: hash = u'%s|%s|%s|%s' % ( data['brand'].name, data['model'], data['phone'], data['ip'] ) except KeyError: continue data['hash'] = hashlib.sha256(hash).hexdigest() try: Automobile.objects.get(hash=data['hash']) except Automobile.DoesNotExist: pass else: continue data['published'] = True data['featured'] = False data['seller'] = None data['condition'] = 'used' data['location'] = 'bishkek' auto = Automobile.objects.create(**data) try: im_url = innersoup.find('a', {'class': 'main-image'})['href'] except TypeError: pass else: uid = getpwnam('django').pw_uid gid = grp.getgrnam('www-data').gr_gid f = get_html_from(domain + im_url) if f is None: return fname = im_url.replace('/upload/images/', '') path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') auto.image_1.save(fname, File(local_f)) local_f.close() os.chown(auto.image_1.path, uid, gid) try: thumbs = innersoup.find_all('a', {'class': 'thumbnail'}) except TypeError: pass else: for id, im in enumerate(thumbs): f = get_html_from(domain + im['href']) if f is None: return fname = im['href'].replace('/upload/images/', '') path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') if id == 0: auto.image_2.save(fname, File(local_f)) local_f.close() os.chown(auto.image_2.path, uid, gid) elif id == 1: auto.image_3.save(fname, File(local_f)) local_f.close() os.chown(auto.image_3.path, uid, gid) elif id == 2: auto.image_4.save(fname, File(local_f)) local_f.close() os.chown(auto.image_4.path, uid, gid) else: local_f.close() break auto.save()
def parse(): data = {} domain = 'http://auto.doska.kg' urls = ['%s/carsfind/page:%s/?order=date_new' % (domain, page) for page in range(1, 7)] for url in urls: soup = BeautifulSoup(get_html_from(url)) if soup is None: return href = str() for id, table in enumerate(soup.find_all('table')): if id == 3: containers = table.find_all('tr') for item in containers: for id, td in enumerate(item.find_all('td')): if id == 0: if 'photononb' in td.a.img['src']: return href = domain + td.a['href'] if id == 1: title = td.find('a', {'class': 'fs15'}).string try: title.decode('ascii') except UnicodeEncodeError: return for brand in AutomobileBrand.objects.all(): if brand.name in title: data['brand'] = brand data['model'] = title.replace(brand.name, '').lstrip() fields = td.find('div', {'class': 'fs12'}).contents[2] try: data['year'] = int(td.find('div', {'class': 'fs12'}).b.string[:4]) except: return if get_color(fields) is not None: data['color'] = get_color(fields) if get_body_type(fields) is not None: data['body_type'] = get_body_type(fields) if u'Механика' in fields.string: data['transmission'] = 'mechanic' elif u'Автомат' in fields.string: data['transmission'] = 'automatic' elif u'Типтроник' in fields.string: data['transmission'] = 'tiptronic' else: return if u'правый руль' in fields.string.lower(): data['steering_wheel'] = 'right' else: data['steering_wheel'] = 'left' try: sub = fields[:fields.find(u'л.')] if sub: if len(sub[sub.rfind(',')+1:].strip()) <= 10: data['engine_capacity'] = sub[sub.rfind(',')+1:].strip() except: pass if get_drive_type(fields) is not None: data['drive_type'] = get_drive_type(fields) contents = u''.join([unicode(td) for td in item.contents]) price = str() for s in contents[contents.find('$')+1:contents.find('$')+7]: if s.isdigit(): price += s data['price'] = currency.convert_currency(Decimal(price), 'KGS') innersoup = BeautifulSoup(get_html_from(href)) data['phone'] = clean_phone(innersoup.find('span', {'class': 'text_c_blue'}).string) if data.get('phone', None) is None: continue data['ip'] = '212.42.117.66' try: hash = u'%s|%s|%s|%s' % ( data['brand'].name, data['model'], data['phone'], data['ip'] ) data['hash'] = hashlib.sha256(hash).hexdigest() except: continue try: Automobile.objects.get(hash=data['hash']) except Automobile.DoesNotExist: pass else: continue data['published'] = True data['featured'] = False data['seller'] = None data['condition'] = 'used' data['location'] = 'bishkek' auto = Automobile.objects.create(**data) id = 0 for link in innersoup.find_all('a'): if link.has_key('rel'): im_url = link['href'] uid = getpwnam('django').pw_uid gid = grp.getgrnam('www-data').gr_gid f = get_html_from(im_url) if f is None: return fname = im_url.replace('http://auto.static.akipress.org/', '')[im_url.find('/')-1:] path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') if id == 0: auto.image_1.save(fname, File(local_f)) local_f.close() os.chown(auto.image_1.path, uid, gid) elif id == 1: auto.image_2.save(fname, File(local_f)) local_f.close() os.chown(auto.image_2.path, uid, gid) elif id == 2: auto.image_3.save(fname, File(local_f)) local_f.close() os.chown(auto.image_3.path, uid, gid) elif id == 3: auto.image_4.save(fname, File(local_f)) local_f.close() os.chown(auto.image_4.path, uid, gid) else: local_f.close() break id += 1 auto.save()
def parse(): data = {} model = None domain = 'http://www.domik.kg' urls = ['%s/offers/sell/page_%s/' % (domain, page) for page in range(1, 7)] for url in urls: soup = BeautifulSoup(get_html_from(url)) if soup is None: return containers = soup.find_all('div', {'class': 'pix_r'}) for item in containers: title = unicode() title_tags = item.find('h1', {'class': 'i_sell'}).find_all('a') for id, tag in enumerate(title_tags): if id == 0: href = tag['href'] title = tag.string.strip().replace('\n', '').replace('\t', '') break rooms = str() for s in title: if s.isdigit(): rooms += s try: data['rooms'] = int(rooms) except ValueError: continue for string in item.strings: if u'Бишкек' in string: data['location'] = 'bishkek' elif u'Иссык-Кульская область' in string: data['location'] = 'issykkul' elif u'Чуйская область' in string: data['location'] = 'chui' elif u'Джалал-Абадская область' in string: data['location'] = 'jalalabad' elif u'Баткенская область' in string: data['location'] = 'batken' elif u'Ошская область' in string: data['location'] = 'osh' elif u'Таласская область' in string: data['location'] = 'talas' else: continue price = str() try: for s in item.find('div', {'class': 'price_item'}).string: if s.isdigit(): price += s except TypeError: continue else: if price: data['price'] = currency.convert_currency(Decimal(price), 'KGS') else: continue innersoup = BeautifulSoup(get_html_from(url)) if innersoup is None: return try: main_tags = innersoup.find('td', {'id': 'fotoz'}) addr_strs = main_tags.find_next_sibling('td').h3.find_next_sibling('div').strings addr = unicode() for id, string in enumerate(addr_strs): if id > 1: addr += u'%s. ' % string except: continue if len(addr.strip()): data['address'] = addr.strip() else: continue desc = innersoup.find('div', {'class': 'fullcont'}).string try: data['desc'] = desc.strip() except AttributeError: data['desc'] = '' else: if len(data['desc']) < 5: data['desc'] = '' data['phone'] = clean_phone(innersoup.find('div', {'class': 'fullcont'}).find_next_sibling('div', {'class': 'private'}).string) if data.get('phone', None) is None: continue data['ip'] = '176.126.165.34' if len(data['phone']) > 15: continue try: hash = u'%s|%s|%s|%s' % ( data['rooms'], data['location'], data['phone'], data['ip'] ) except KeyError: continue data['hash'] = hashlib.sha256(hash).hexdigest() data_strs = main_tags.find_next_sibling('td').h3.find_next_sibling('div').find_next_sibling('div').strings floor = str() for id, string in enumerate(data_strs): if id == 2: for s in string: if s.isdigit(): floor += s if id == 5: try: data['area'] = int(string) except ValueError: pass try: int(floor) except ValueError: continue else: data['floor'] = floor data['is_realtor'] = 'yes' for tag in innersoup.find_all('div', {'class': 'private'}): if u'(собственник)' in tag.string: data['is_realtor'] = 'no' data['published'] = True data['featured'] = False data['seller'] = None data['condition'] = 'used' data['section'] = 'sale' if u'квартиру' in title: try: Apartment.objects.get(hash=data['hash']) except Apartment.DoesNotExist: pass else: continue instance = Apartment.objects.create(**data) elif u'дом' in title: try: House.objects.get(hash=data['hash']) except House.DoesNotExist: pass else: continue instance = House.objects.create(**data) else: continue try: im_url = innersoup.find('img', {'id': 'foto_prev'})['src'].replace('_m.', '.') except TypeError: pass else: uid = getpwnam('django').pw_uid gid = grp.getgrnam('www-data').gr_gid f = get_html_from(im_url) if f is None: return fname = im_url.split('/')[-1] path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') instance.image_1.save(fname, File(local_f)) local_f.close() os.chown(instance.image_1.path, uid, gid) try: thumb_tags = innersoup.find_all('a', {'class': 'fotoz_link'}) except TypeError: pass else: for id, tag in enumerate(thumb_tags): if id == 0: continue if isinstance(tag['rel'], list): thumb_url = tag['rel'][0].replace('_m.', '.') else: thumb_url = tag['rel'].replace('_m.', '.') f = get_html_from(thumb_url) if f is None: return fname = thumb_url.split('/')[-1] path = image.encode_image_name(None, fname) localpath = settings.MEDIA_ROOT + path local_f = open(localpath, 'w') local_f.write(f) local_f.close() im_size = Image.open(localpath).size if im_size[0] < 370 and im_size[1] < 200: if os.path.isfile(localpath): os.remove(localpath) else: local_f = open(localpath, 'r') if id == 1: instance.image_2.save(fname, File(local_f)) local_f.close() os.chown(instance.image_2.path, uid, gid) elif id == 2: instance.image_3.save(fname, File(local_f)) local_f.close() os.chown(instance.image_3.path, uid, gid) elif id == 3: instance.image_4.save(fname, File(local_f)) local_f.close() os.chown(instance.image_4.path, uid, gid) else: local_f.close() break instance.save()