def get_or_create_band(data): name = data['name'].title() slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name']) try: band = Band.objects.get(slug=slug) except DoesNotExist: band = Band.objects.create(slug=slug, name=name) if not name in band.aliases: band.aliases.append(name) if "musician" in data and data['musician'] and not data['musician'] in band.musicians: band.musicians.append(data['musician']) if not "user" in data: band.users.append(data['musician']) else: if data['musician'] != data['user']: band.users.append(data['musician']) if "user" in data and data['user'] and not data['user'] in band.users: band.users.append(data['user']) if "image" in data and data['image']: band.image = data['image'] if "products" in data and data["products"]: for product in data["products"]: if not product in band.products: band.products.append(product) band.save() return band
def get_or_create_band(data): name = data['name'].title() slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name']) try: band = Band.objects.get(slug=slug) except DoesNotExist: band = Band.objects.create(slug=slug, name=name) if not name in band.aliases: band.aliases.append(name) if "musician" in data and data[ 'musician'] and not data['musician'] in band.musicians: band.musicians.append(data['musician']) if not "user" in data: band.users.append(data['musician']) else: if data['musician'] != data['user']: band.users.append(data['musician']) if "user" in data and data['user'] and not data['user'] in band.users: band.users.append(data['user']) if "image" in data and data['image']: band.image = data['image'] if "products" in data and data["products"]: for product in data["products"]: if not product in band.products: band.products.append(product) band.save() return band
def get_or_create_show(data): title = data['title'].title() slug = get_slug(data['slug']) if "slug" in data else get_slug( data['title']) try: show = Show.objects.get(slug=slug) except DoesNotExist: show = Show.objects.create(slug=slug, title=title) if "artists" in data and type(data["artists"]) is list: for artist in data["artists"]: if not artist.slug in show.artists_slug: show.artists_slug.append(artist.slug) if not show in artist.shows: artist.shows.append(show) artist.save() if "location" in data: if isinstance( data["location"], Location): # Um objeto location tambem pode ter sido passado show.location = data["location"] else: show.location = get_or_create_location(data["location"]) keys_to_check = [ "attendance_count", "cover_image", "description", "datetime_usa", "city", "website" ] for key in keys_to_check: if key in data: setattr(show, key, data[key]) show.save() return show
def get_or_create_show(data): title = data['title'].title() slug = get_slug(data['slug']) if "slug" in data else get_slug(data['title']) try: show = Show.objects.get(slug=slug) except DoesNotExist: show = Show.objects.create(slug=slug, title=title) if "artists" in data and type(data["artists"]) is list: for artist in data["artists"]: if not artist.slug in show.artists_slug: show.artists_slug.append(artist.slug) if not show in artist.shows: artist.shows.append(show) artist.save() if "location" in data: if isinstance(data["location"], Location): # Um objeto location tambem pode ter sido passado show.location = data["location"] else: show.location = get_or_create_location(data["location"]) keys_to_check = ["attendance_count", "cover_image", "description", "datetime_usa", "city", "website"] for key in keys_to_check: if key in data: setattr(show, key, data[key]) show.save() return show
def get_or_create_location(data): try: slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name']) except UnicodeDecodeError: slug = data['name'] try: location = Location.objects.get(slug=slug) except DoesNotExist: data["slug"] = slug location = Location.objects.create(**data) return location
def get_or_create_location(data): try: slug = get_slug(data['slug']) if "slug" in data else get_slug( data['name']) except UnicodeDecodeError: slug = data['name'] try: location = Location.objects.get(slug=slug) except DoesNotExist: data["slug"] = slug location = Location.objects.create(**data) return location
def get_or_create_product(data): name = data['name'].title() slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name']) try: product = Product.objects.get(slug=slug) except DoesNotExist: product = Product.objects.create(slug=slug, name=name) for prop in ["price", "photo", "quantity_type", "quantity_value"]: if prop in data and data[prop]: setattr(product, prop, data[prop]) product.save() return product
def search_band(band_name): current_user = None # TODO: Adicionar em minhas bandas: get_current_user() current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip()) band = get_or_create_band({ 'slug': get_slug(band_name), 'name': band_name, 'user': current_user }) shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True) show = None if shows: show = shows[0][1][0] # Pegando apenas o objeto show da banda return render_template("resultado_uma_banda.html", band=band, show=show, notas=range(11), BANDAS_CAMISAS=BANDAS_CAMISAS, formulario_pag_seguro=formulario_pag_seguro)
def add_band(): name = request.form['band'] user = get_current_user() if user: band = get_or_create_band({'slug': get_slug(name), 'name': name, 'user': user}) return "%s\n%s" % (band.name, band.slug) else: return "Ninguem logado"
def parse_detail(self, response): category = response.css('.meta .category a::text').get() or '' name = helpers.fix_title(response.css('h2::text').get() or '') slug = helpers.get_slug(name) address = response.css('.node p::text').get() or '' city = response.css('.meta .tags a::text').get() or '' phone = response.css( '.field-field-telepon .field-item::text').get() or '' fax = response.css('.field-field-fax .field-item::text').get() or '' email = response.css( '.field-field-email .field-item::text').get() or '' website = response.css( '.field-field-website .field-item::text').get() or '' broker = response.css( '.field-field-broker .field-item::text').get() or '' npwp = response.css('.field-field-npwp .field-item::text').get( ).replace('NPWP', '').strip('\n :') or '' description = '' url = response.url or '' image_name = '' # if len(email) == 0: # self.logger.info('{} : EMPTY EMAIL'.format(url)) # if len(phone) == 0: # self.logger.info('{} : EMPTY PHONE'.format(url)) if self.name in website: website = '' # if len(email) > 0 and len(phone) > 0: image_url = response.css('img::attr(src)').get() if image_url is not None: image_url = image_url.strip() ext = image_url.split('.')[-1] image_name = slug target_dir = 'images/{}/{}.{}'.format(self.name, image_name, ext) self.logger.info('downloading image: {} => {}'.format( image_url, target_dir)) r = helpers.download(image_url, target_dir) if not r: self.logger.info('Failed download {} => {}'.format( image_url, target_dir)) yield { 'category': category.strip(), 'name': name.strip(), 'slug': slug.strip(), 'address': address.strip(), 'city': city.strip(), 'phone': phone.strip(), 'fax': fax.strip(), 'email': email.strip(), 'website': website.strip(), 'broker': broker.strip(), 'npwp': npwp.strip(), 'description': description.strip(), 'url': url.strip(), 'image_name': image_name.strip(), }
def show_from_band(band_name): current_user = None # TODO: Adicionar em minhas bandas: get_current_user() current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip()) band = get_or_create_band({'slug': get_slug(band_name), 'name': band_name, 'user': current_user}) shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True) show = None if shows: show = shows[0][1][0] # Pegando apenas o objeto show da banda elif len(band.users) == 0: band.delete() return render_template("show_de_uma_banda.html", band=band, show=show)
def add_band(): name = request.form['band'] user = get_current_user() if user: band = get_or_create_band({ 'slug': get_slug(name), 'name': name, 'user': user }) return "%s\n%s" % (band.name, band.slug) else: return "Ninguem logado"
def search_band(band_name): current_user = None # TODO: Adicionar em minhas bandas: get_current_user() current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip()) band = get_or_create_band({'slug': get_slug(band_name), 'name': band_name, 'user': current_user}) shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True) show = None if shows: show = shows[0][1][0] # Pegando apenas o objeto show da banda return render_template("resultado_uma_banda.html", band=band, show=show, notas=range(11), BANDAS_CAMISAS=BANDAS_CAMISAS, formulario_pag_seguro=formulario_pag_seguro)
def run_migration(): answers = get_all_answers_from_question("musico-favoritos") answers.extend(get_all_answers_from_question("fa-favoritos")) for answer in answers: bandsList = answer.answer for bands in bandsList.split(","): for splited in bands.split('\n'): band = splited.strip().title() if band: data = { "slug": get_slug(band), "name": band, "user": answer.user } get_or_create_band(data)
def show_from_band(band_name): current_user = None # TODO: Adicionar em minhas bandas: get_current_user() current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip()) band = get_or_create_band({ 'slug': get_slug(band_name), 'name': band_name, 'user': current_user }) shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True) show = None if shows: show = shows[0][1][0] # Pegando apenas o objeto show da banda elif len(band.users) == 0: band.delete() return render_template("show_de_uma_banda.html", band=band, show=show)
for k, v in data.items(): v = remove_unicode(v) v = v.replace(' ', ' ').replace(' ', ' ').replace(' ', ' ') data[k] = v return data print('Load done data...') done = {} with open(file_reputasi, 'r', encoding='utf8') as f: for row in f.read().strip().split('\n'): row = json.loads(row) # print(helpers.fix_title(row['name'])) # done[helpers.get_slug(helpers.fix_title(row['name']), '', True)] = row['url'] done[helpers.get_slug(helpers.fix_title(row['slug']), '', True)] = row['url'] print('{} done data loaded'.format(len(done))) print('Load perusahaan data...') perusahaan = [] skipped_counter = { 'done': 0, 'empty_name': 0, 'empty_address': 0, 'empty_phone': 0, 'empty_email': 0, 'invalid_phone': 0, 'invalid_email': 0, } with open(file_source, 'r') as f: result = [json.loads(row) for row in f.read().strip().split('\n')]
} done_slug = [] done_email = [] done_phone = [] done_website = [] clean = [] print("INFO: start cleaning...") for row in data: # print(row[COL_NAME]) row = clean_data(row) category = row[COL_CATEGORY] # sc = category.lower() # if sc not in categories: # categories.append(sc) name = helpers.fix_title(row[COL_NAME]) slug = helpers.get_slug(name) email = row[COL_EMAIL] phone = row[COL_PHONE] website = row[COL_WEBSITE] city = row[COL_CITY] if len(city) == 0: city = row[COL_ADDRESS].strip().split(' ')[-1].strip() #.lower() row[COL_CITY] = city if slug in done_slug: duplicate['slug'] += 1 print('INFO: dp slug => {}'.format(slug)) continue if email in done_email: duplicate['email'] += 1 print('INFO: dp email => {}'.format(email)) continue
def get_slug_with_unicode_test(self): slug = get_slug(self.title_unicode) self.assertEqual(slug, "este-e-um-outro-teste-eaa")
def get_slug_test(self): slug = get_slug(self.title_normal) self.assertEqual(slug, "whos-using-it")
def parse_detail(self, response): category = '' name = '' address = '' city = '' phone = '' fax = '' email = '' website = '' description = '' url = response.url or '' image_url = '' # check type lis = response.css('.comp-body li') trs = response.css('table.table.description tr') if len(lis) > 0: # type 1 for li in lis: k = li.css('::text').get().strip().split(':')[0].strip() v = li.css('::text').get().strip().split(':')[-1].strip() if len(k) == 0: continue if 'Company Name' in k: name = v elif 'Address' in k: address = v elif 'Telephone' in k: phone = li.css('a::text').get() elif 'Fax' in k: fax = v elif 'Email' in k: email = li.css('a::text').get() # description description = [] for p in response.css('.comp-row > p::text'): txt = p.get().strip() if len(txt) == 0 or 'Description' in txt: continue description.append(txt) description = ' '.join(description) # website website = response.css('.comp-row > p > a::attr(href)').get() or '' if self.name in website: website = '' # category category = response.css('.title-comp .col-sm-10::text')[-1].get() # image_url image_url = response.css('.img-container img::attr(src)').get() or '' elif len(trs) > 0: # type 2 for tr in trs: k = tr.css('td::text')[0].get() v = tr.css('td::text')[-1].get() if len(k) == 0: continue if 'Nama Perusahaan' in k: name = v elif 'Alamat' in k: address = tr.css('td')[-1].css('p::text').get() elif 'Kategori' in k: category = v elif 'Telepon' in k: phone = tr.css('td')[-1].css('a::text').get() elif 'Fax' in k: fax = tr.css('td')[-1].css('a::text').get() elif 'Email' in k: email = tr.css('td')[-1].css('a::text').get() # description description = [] for p in response.css('.container > p::text'): txt = p.get().strip() if len(txt) == 0: continue description.append(txt) description = ' '.join(description) # website website = response.css('a.btn.btn-contactus.btn-go-to::attr(href)').get() or '' if self.name in website: website = '' # image_url image_url = response.css('img.center-img::attr(src)').get() or '' if email is None or len(email) == 0: self.logger.info('{} : EMPTY EMAIL'.format(url)) email = '' if phone is None or len(phone) == 0: self.logger.info('{} : EMPTY PHONE'.format(url)) phone = '' # if len(email) > 0 and len(phone) > 0: name = helpers.fix_title(name) slug = helpers.get_slug(name) if image_url is not None and len(image_url) > 0: image_url = image_url.strip() ext = image_url.split('.')[-1] image_name = slug target_dir = 'images/{}/{}.{}'.format(self.name, image_name, ext) self.logger.info('downloading image: {} => {}'.format(image_url, target_dir)) r = helpers.download(image_url, target_dir) if not r: self.logger.info('Failed download {} => {}'.format(image_url, target_dir)) yield { 'category': category.strip(), 'name': name.strip(), 'slug': slug.strip(), 'address': address.strip(), 'city': city.strip(), 'phone': phone.strip(), 'email': email.strip(), 'website': website.strip(), 'description': description.strip(), 'url': url.strip(), }
def parse_detail(self, response): category = response.css('.breadcrumb li')[-2].css('::text').get() or '' name = helpers.fix_title( response.css('.breadcrumb li')[-1].css('::text').get() or '') slug = helpers.get_slug(name) address = '' city = '' phone = '' fax = '' email = '' website = '' description = '' url = response.url or '' for panel in response.css('.panel'): panel_title = panel.css('.col-xs-10.col-sm-11::text').get().strip() if 'Alamat' in panel_title: address = [] for addr in panel.css('.panel-body::text'): address.append(addr.get().strip()) address = ', '.join(address) elif 'Telepon' in panel_title: phones = panel.css('.panel-body::text') if phones is not None: phone = phones[0].get().strip() if len(phones) > 1: fax = phones[1].get().strip() elif 'Website' in panel_title: website = panel.css('.panel-body a::attr(href)').get().strip() if self.allowed_domains[0] in website: website = '' elif 'Email' in panel_title: email = panel.css('.panel-body a::text').get() elif 'Tentang' in panel_title: description = [] for desc in panel.css('.panel-body::text'): description.append(desc.get().strip()) description = ' '.join(description).strip() if len(description) == 0: for desc in panel.css('.panel-body p::text'): desc = desc.get().strip() if len(desc) >= 200: description = desc break # if len(email) == 0: # self.logger.info('{} : EMPTY EMAIL'.format(url)) # if len(phone) == 0: # self.logger.info('{} : EMPTY PHONE'.format(url)) # if len(email) > 0 and len(phone) > 0: yield { 'category': category.strip(), 'name': name.strip(), 'slug': slug.strip(), 'address': address.strip(), 'city': city.strip(), 'phone': phone.strip(), 'fax': fax.strip(), 'email': email.strip(), 'website': website.strip(), 'description': description.strip(), 'url': url.strip(), }
def parse_detail(self, response): category = response.css('ol.breadcrumb.pull-left > li > a')[-1].css('::text').get() or '' name = response.css('h1.business-title span::text').get() or '' address = [] city = response.css('span[itemprop=addressLocality]::text').get() or '' phone = response.css('span[itemprop=telephone]::text').get() or '' email = '' website = response.css('ul.dropdown-menu > li > a[itemprop=url]::attr(href)').get() or '' description = [] url = response.url or '' # email try: cfemail = response.css('span.__cf_email__::attr(data-cfemail)').get() or '' if len(cfemail) > 0: email = helpers.cfDecodeEmail(cfemail) except: email = '' # address address_1 = response.css('h4 > span > span::text') address_2 = response.css('h4 > span::text') for index, a1 in enumerate(address_1): a1 = a1.get().strip() a2 = address_2[index].get().strip() address.append(a1) address.append(a2) address = ' '.join(address) address = address.replace(' ,', ',') # description for txt in response.css('.col-sm-12 > p p'): d = txt.css('::text').get() or '' description.append(d.strip()) description = '. '.join(description) description = description.replace('..', '.') description = description.replace('. . ', '. ') description = description.replace('. . ', '. ') if len(email) == 0: self.logger.info('{} : EMPTY EMAIL'.format(url)) if len(phone) == 0: self.logger.info('{} : EMPTY PHONE'.format(url)) if len(email) > 0 and len(phone) > 0: image_url = response.css('.detail-listing-img > img::attr(src)').get() if image_url is not None and image_url[-1] != '/': image_url = image_url.strip() ext = image_url.split('.')[-1] image_name = helpers.get_slug(helpers.fix_title(name)) target_dir = 'images/{}/{}'.format(self.name, image_name) self.logger.info('downloading image: {} => {}'.format(image_url, target_dir)) helpers.download(image_url, target_dir) yield { 'category': category.strip(), 'name': name.strip(), 'address': address.strip(), 'city': city.strip(), 'phone': phone.strip(), 'email': email.strip(), 'website': website.strip(), 'description': description.strip(), 'url': url.strip(), }