Ejemplo n.º 1
0
def get_or_create_band(data):
    name = data['name'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name'])
    try:
        band = Band.objects.get(slug=slug)
    except DoesNotExist:
        band = Band.objects.create(slug=slug, name=name)

    if not name in band.aliases:
        band.aliases.append(name)

    if "musician" in data and data['musician'] and not data['musician'] in band.musicians:
        band.musicians.append(data['musician'])
        if not "user" in data:
            band.users.append(data['musician'])
        else:
            if data['musician'] != data['user']:
                band.users.append(data['musician'])

    if "user" in data and data['user'] and not data['user'] in band.users:
        band.users.append(data['user'])

    if "image" in data and data['image']:
        band.image = data['image']

    if "products" in data and data["products"]:
        for product in data["products"]:
            if not product in band.products:
                band.products.append(product)

    band.save()
    return band
Ejemplo n.º 2
0
def get_or_create_band(data):
    name = data['name'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name'])
    try:
        band = Band.objects.get(slug=slug)
    except DoesNotExist:
        band = Band.objects.create(slug=slug, name=name)

    if not name in band.aliases:
        band.aliases.append(name)

    if "musician" in data and data[
            'musician'] and not data['musician'] in band.musicians:
        band.musicians.append(data['musician'])
        if not "user" in data:
            band.users.append(data['musician'])
        else:
            if data['musician'] != data['user']:
                band.users.append(data['musician'])

    if "user" in data and data['user'] and not data['user'] in band.users:
        band.users.append(data['user'])

    if "image" in data and data['image']:
        band.image = data['image']

    if "products" in data and data["products"]:
        for product in data["products"]:
            if not product in band.products:
                band.products.append(product)

    band.save()
    return band
Ejemplo n.º 3
0
def get_or_create_show(data):
    title = data['title'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(
        data['title'])
    try:
        show = Show.objects.get(slug=slug)
    except DoesNotExist:
        show = Show.objects.create(slug=slug, title=title)

    if "artists" in data and type(data["artists"]) is list:
        for artist in data["artists"]:
            if not artist.slug in show.artists_slug:
                show.artists_slug.append(artist.slug)
                if not show in artist.shows:
                    artist.shows.append(show)
                    artist.save()

    if "location" in data:
        if isinstance(
                data["location"],
                Location):  #  Um objeto location tambem pode ter sido passado
            show.location = data["location"]
        else:
            show.location = get_or_create_location(data["location"])

    keys_to_check = [
        "attendance_count", "cover_image", "description", "datetime_usa",
        "city", "website"
    ]
    for key in keys_to_check:
        if key in data:
            setattr(show, key, data[key])

    show.save()
    return show
Ejemplo n.º 4
0
def get_or_create_show(data):
    title = data['title'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(data['title'])
    try:
        show = Show.objects.get(slug=slug)
    except DoesNotExist:
        show = Show.objects.create(slug=slug, title=title)

    if "artists" in data and type(data["artists"]) is list:
        for artist in data["artists"]:
            if not artist.slug in show.artists_slug:
                show.artists_slug.append(artist.slug)
                if not show in artist.shows:
                    artist.shows.append(show)
                    artist.save()

    if "location" in data:
        if isinstance(data["location"], Location): #  Um objeto location tambem pode ter sido passado
            show.location = data["location"]
        else:
            show.location = get_or_create_location(data["location"])

    keys_to_check = ["attendance_count", "cover_image", "description", "datetime_usa", "city", "website"]
    for key in keys_to_check:
        if key in data:
            setattr(show, key, data[key])

    show.save()
    return show
Ejemplo n.º 5
0
def get_or_create_location(data):
    try:
        slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name'])
    except UnicodeDecodeError:
        slug = data['name']
    try:
        location = Location.objects.get(slug=slug)
    except DoesNotExist:
        data["slug"] = slug
        location = Location.objects.create(**data)

    return location
Ejemplo n.º 6
0
def get_or_create_location(data):
    try:
        slug = get_slug(data['slug']) if "slug" in data else get_slug(
            data['name'])
    except UnicodeDecodeError:
        slug = data['name']
    try:
        location = Location.objects.get(slug=slug)
    except DoesNotExist:
        data["slug"] = slug
        location = Location.objects.create(**data)

    return location
Ejemplo n.º 7
0
def get_or_create_product(data):
    name = data['name'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name'])
    try:
        product = Product.objects.get(slug=slug)
    except DoesNotExist:
        product = Product.objects.create(slug=slug, name=name)

    for prop in ["price", "photo", "quantity_type", "quantity_value"]:
        if prop in data and data[prop]:
            setattr(product, prop, data[prop])

    product.save()
    return product
Ejemplo n.º 8
0
def get_or_create_product(data):
    name = data['name'].title()
    slug = get_slug(data['slug']) if "slug" in data else get_slug(data['name'])
    try:
        product = Product.objects.get(slug=slug)
    except DoesNotExist:
        product = Product.objects.create(slug=slug, name=name)

    for prop in ["price", "photo", "quantity_type", "quantity_value"]:
        if prop in data and data[prop]:
            setattr(product, prop, data[prop])

    product.save()
    return product
Ejemplo n.º 9
0
def search_band(band_name):
    current_user = None  # TODO: Adicionar em minhas bandas: get_current_user()
    current_city = "Rio de Janeiro"  # get_current_city(ip=get_client_ip())

    band = get_or_create_band({
        'slug': get_slug(band_name),
        'name': band_name,
        'user': current_user
    })

    shows = get_shows_from_bands([band],
                                 limit_per_artist=1,
                                 city=current_city,
                                 call_lastfm_if_dont_have_shows=True,
                                 call_lastfm_without_subprocess=True)

    show = None

    if shows:
        show = shows[0][1][0]  # Pegando apenas o objeto show da banda

    return render_template("resultado_uma_banda.html",
                           band=band,
                           show=show,
                           notas=range(11),
                           BANDAS_CAMISAS=BANDAS_CAMISAS,
                           formulario_pag_seguro=formulario_pag_seguro)
Ejemplo n.º 10
0
def add_band():
    name = request.form['band']
    user = get_current_user()
    if user:
        band = get_or_create_band({'slug': get_slug(name), 'name': name, 'user': user})
        return "%s\n%s" % (band.name, band.slug)
    else:
        return "Ninguem logado"
Ejemplo n.º 11
0
    def parse_detail(self, response):
        category = response.css('.meta .category a::text').get() or ''
        name = helpers.fix_title(response.css('h2::text').get() or '')
        slug = helpers.get_slug(name)
        address = response.css('.node p::text').get() or ''
        city = response.css('.meta .tags a::text').get() or ''
        phone = response.css(
            '.field-field-telepon .field-item::text').get() or ''
        fax = response.css('.field-field-fax .field-item::text').get() or ''
        email = response.css(
            '.field-field-email .field-item::text').get() or ''
        website = response.css(
            '.field-field-website .field-item::text').get() or ''
        broker = response.css(
            '.field-field-broker .field-item::text').get() or ''
        npwp = response.css('.field-field-npwp .field-item::text').get(
        ).replace('NPWP', '').strip('\n :') or ''
        description = ''
        url = response.url or ''
        image_name = ''

        # if len(email) == 0:
        #     self.logger.info('{} : EMPTY EMAIL'.format(url))
        # if len(phone) == 0:
        #     self.logger.info('{} : EMPTY PHONE'.format(url))

        if self.name in website:
            website = ''

        # if len(email) > 0 and len(phone) > 0:
        image_url = response.css('img::attr(src)').get()
        if image_url is not None:
            image_url = image_url.strip()
            ext = image_url.split('.')[-1]
            image_name = slug
            target_dir = 'images/{}/{}.{}'.format(self.name, image_name, ext)
            self.logger.info('downloading image: {} => {}'.format(
                image_url, target_dir))
            r = helpers.download(image_url, target_dir)
            if not r:
                self.logger.info('Failed download {} => {}'.format(
                    image_url, target_dir))
        yield {
            'category': category.strip(),
            'name': name.strip(),
            'slug': slug.strip(),
            'address': address.strip(),
            'city': city.strip(),
            'phone': phone.strip(),
            'fax': fax.strip(),
            'email': email.strip(),
            'website': website.strip(),
            'broker': broker.strip(),
            'npwp': npwp.strip(),
            'description': description.strip(),
            'url': url.strip(),
            'image_name': image_name.strip(),
        }
Ejemplo n.º 12
0
def show_from_band(band_name):
    current_user = None # TODO: Adicionar em minhas bandas: get_current_user()
    current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip())
    band = get_or_create_band({'slug': get_slug(band_name), 'name': band_name, 'user': current_user})
    shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True)
    show = None
    if shows:
        show = shows[0][1][0] # Pegando apenas o objeto show da banda
    elif len(band.users) == 0:
        band.delete()
    return render_template("show_de_uma_banda.html", band=band, show=show)
Ejemplo n.º 13
0
def add_band():
    name = request.form['band']
    user = get_current_user()
    if user:
        band = get_or_create_band({
            'slug': get_slug(name),
            'name': name,
            'user': user
        })
        return "%s\n%s" % (band.name, band.slug)
    else:
        return "Ninguem logado"
Ejemplo n.º 14
0
def search_band(band_name):
    current_user = None # TODO: Adicionar em minhas bandas: get_current_user()
    current_city = "Rio de Janeiro" # get_current_city(ip=get_client_ip())

    band = get_or_create_band({'slug': get_slug(band_name), 'name': band_name, 'user': current_user})

    shows = get_shows_from_bands([band], limit_per_artist=1, city=current_city, call_lastfm_if_dont_have_shows=True, call_lastfm_without_subprocess=True)

    show = None

    if shows:
        show = shows[0][1][0] # Pegando apenas o objeto show da banda

    return render_template("resultado_uma_banda.html", band=band, show=show, notas=range(11), BANDAS_CAMISAS=BANDAS_CAMISAS,
        formulario_pag_seguro=formulario_pag_seguro)
Ejemplo n.º 15
0
def run_migration():
    answers = get_all_answers_from_question("musico-favoritos")
    answers.extend(get_all_answers_from_question("fa-favoritos"))

    for answer in answers:
        bandsList = answer.answer
        for bands in bandsList.split(","):
            for splited in bands.split('\n'):
                band = splited.strip().title()
                if band:
                    data = {
                        "slug": get_slug(band),
                        "name": band,
                        "user": answer.user
                    }

                    get_or_create_band(data)
Ejemplo n.º 16
0
def run_migration():
    answers = get_all_answers_from_question("musico-favoritos")
    answers.extend(get_all_answers_from_question("fa-favoritos"))

    for answer in answers:
        bandsList = answer.answer
        for bands in bandsList.split(","):
            for splited in bands.split('\n'):
                band = splited.strip().title()
                if band:
                    data = {
                        "slug": get_slug(band),
                        "name": band,
                        "user": answer.user
                    }

                    get_or_create_band(data)
Ejemplo n.º 17
0
def show_from_band(band_name):
    current_user = None  # TODO: Adicionar em minhas bandas: get_current_user()
    current_city = "Rio de Janeiro"  # get_current_city(ip=get_client_ip())
    band = get_or_create_band({
        'slug': get_slug(band_name),
        'name': band_name,
        'user': current_user
    })
    shows = get_shows_from_bands([band],
                                 limit_per_artist=1,
                                 city=current_city,
                                 call_lastfm_if_dont_have_shows=True,
                                 call_lastfm_without_subprocess=True)
    show = None
    if shows:
        show = shows[0][1][0]  # Pegando apenas o objeto show da banda
    elif len(band.users) == 0:
        band.delete()
    return render_template("show_de_uma_banda.html", band=band, show=show)
Ejemplo n.º 18
0
    for k, v in data.items():
        v = remove_unicode(v)
        v = v.replace('  ', ' ').replace('  ', ' ').replace('  ', ' ')
        data[k] = v

    return data


print('Load done data...')
done = {}
with open(file_reputasi, 'r', encoding='utf8') as f:
    for row in f.read().strip().split('\n'):
        row = json.loads(row)
        # print(helpers.fix_title(row['name']))
        # done[helpers.get_slug(helpers.fix_title(row['name']), '', True)] = row['url']
        done[helpers.get_slug(helpers.fix_title(row['slug']), '',
                              True)] = row['url']
print('{} done data loaded'.format(len(done)))

print('Load perusahaan data...')
perusahaan = []
skipped_counter = {
    'done': 0,
    'empty_name': 0,
    'empty_address': 0,
    'empty_phone': 0,
    'empty_email': 0,
    'invalid_phone': 0,
    'invalid_email': 0,
}
with open(file_source, 'r') as f:
    result = [json.loads(row) for row in f.read().strip().split('\n')]
Ejemplo n.º 19
0
}
done_slug = []
done_email = []
done_phone = []
done_website = []
clean = []
print("INFO: start cleaning...")
for row in data:
    # print(row[COL_NAME])
    row = clean_data(row)
    category = row[COL_CATEGORY]
    # sc = category.lower()
    # if sc not in categories:
    #     categories.append(sc)
    name = helpers.fix_title(row[COL_NAME])
    slug = helpers.get_slug(name)
    email = row[COL_EMAIL]
    phone = row[COL_PHONE]
    website = row[COL_WEBSITE]
    city = row[COL_CITY]
    if len(city) == 0:
        city = row[COL_ADDRESS].strip().split(' ')[-1].strip()  #.lower()
        row[COL_CITY] = city
    if slug in done_slug:
        duplicate['slug'] += 1
        print('INFO: dp slug => {}'.format(slug))
        continue
    if email in done_email:
        duplicate['email'] += 1
        print('INFO: dp email => {}'.format(email))
        continue
Ejemplo n.º 20
0
 def get_slug_with_unicode_test(self):
     slug = get_slug(self.title_unicode)
     self.assertEqual(slug, "este-e-um-outro-teste-eaa")
Ejemplo n.º 21
0
 def get_slug_test(self):
     slug = get_slug(self.title_normal)
     self.assertEqual(slug, "whos-using-it")
Ejemplo n.º 22
0
 def get_slug_with_unicode_test(self):
     slug = get_slug(self.title_unicode)
     self.assertEqual(slug, "este-e-um-outro-teste-eaa")
Ejemplo n.º 23
0
 def get_slug_test(self):
     slug = get_slug(self.title_normal)
     self.assertEqual(slug, "whos-using-it")
Ejemplo n.º 24
0
    def parse_detail(self, response):
        category = ''
        name = ''
        address = ''
        city = ''
        phone = ''
        fax = ''
        email = ''
        website = ''
        description = ''
        url = response.url or ''
        image_url = ''
        # check type
        lis = response.css('.comp-body li')
        trs = response.css('table.table.description tr')
        if len(lis) > 0:
            # type 1
            for li in lis:
                k = li.css('::text').get().strip().split(':')[0].strip()
                v = li.css('::text').get().strip().split(':')[-1].strip()
                if len(k) == 0:
                    continue
                if 'Company Name' in k:
                    name = v
                elif 'Address' in k:
                    address = v
                elif 'Telephone' in k:
                    phone = li.css('a::text').get()
                elif 'Fax' in k:
                    fax = v
                elif 'Email' in k:
                    email = li.css('a::text').get()
            # description
            description = []
            for p in response.css('.comp-row > p::text'):
                txt = p.get().strip()
                if len(txt) == 0 or 'Description' in txt:
                    continue
                description.append(txt)
            description = ' '.join(description)
            # website
            website = response.css('.comp-row > p > a::attr(href)').get() or ''
            if self.name in website:
                website = ''
            # category
            category = response.css('.title-comp .col-sm-10::text')[-1].get()
            # image_url
            image_url = response.css('.img-container img::attr(src)').get() or ''
        elif len(trs) > 0:
            # type 2
            for tr in trs:
                k = tr.css('td::text')[0].get()
                v = tr.css('td::text')[-1].get()
                if len(k) == 0:
                    continue
                if 'Nama Perusahaan' in k:
                    name = v
                elif 'Alamat' in k:
                    address = tr.css('td')[-1].css('p::text').get()
                elif 'Kategori' in k:
                    category = v
                elif 'Telepon' in k:
                    phone = tr.css('td')[-1].css('a::text').get()
                elif 'Fax' in k:
                    fax = tr.css('td')[-1].css('a::text').get()
                elif 'Email' in k:
                    email = tr.css('td')[-1].css('a::text').get()
            # description
            description = []
            for p in response.css('.container > p::text'):
                txt = p.get().strip()
                if len(txt) == 0:
                    continue
                description.append(txt)
            description = ' '.join(description)
            # website
            website = response.css('a.btn.btn-contactus.btn-go-to::attr(href)').get() or ''
            if self.name in website:
                website = ''
            # image_url
            image_url = response.css('img.center-img::attr(src)').get() or ''

        if email is None or len(email) == 0:
            self.logger.info('{} : EMPTY EMAIL'.format(url))
            email = ''
        if phone is None or len(phone) == 0:
            self.logger.info('{} : EMPTY PHONE'.format(url))
            phone = ''

        # if len(email) > 0 and len(phone) > 0:
        name = helpers.fix_title(name)
        slug = helpers.get_slug(name)
        if image_url is not None and len(image_url) > 0:
            image_url = image_url.strip()
            ext = image_url.split('.')[-1]
            image_name = slug
            target_dir = 'images/{}/{}.{}'.format(self.name, image_name, ext)
            self.logger.info('downloading image: {} => {}'.format(image_url, target_dir))
            r = helpers.download(image_url, target_dir)
            if not r:
                self.logger.info('Failed download {} => {}'.format(image_url, target_dir))
        yield {
            'category': category.strip(),
            'name': name.strip(),
            'slug': slug.strip(),
            'address': address.strip(),
            'city': city.strip(),
            'phone': phone.strip(),
            'email': email.strip(),
            'website': website.strip(),
            'description': description.strip(),
            'url': url.strip(),
        }
Ejemplo n.º 25
0
    def parse_detail(self, response):
        category = response.css('.breadcrumb li')[-2].css('::text').get() or ''
        name = helpers.fix_title(
            response.css('.breadcrumb li')[-1].css('::text').get() or '')
        slug = helpers.get_slug(name)
        address = ''
        city = ''
        phone = ''
        fax = ''
        email = ''
        website = ''
        description = ''
        url = response.url or ''

        for panel in response.css('.panel'):
            panel_title = panel.css('.col-xs-10.col-sm-11::text').get().strip()
            if 'Alamat' in panel_title:
                address = []
                for addr in panel.css('.panel-body::text'):
                    address.append(addr.get().strip())
                address = ', '.join(address)
            elif 'Telepon' in panel_title:
                phones = panel.css('.panel-body::text')
                if phones is not None:
                    phone = phones[0].get().strip()
                    if len(phones) > 1:
                        fax = phones[1].get().strip()
            elif 'Website' in panel_title:
                website = panel.css('.panel-body a::attr(href)').get().strip()
                if self.allowed_domains[0] in website:
                    website = ''
            elif 'Email' in panel_title:
                email = panel.css('.panel-body a::text').get()
            elif 'Tentang' in panel_title:
                description = []
                for desc in panel.css('.panel-body::text'):
                    description.append(desc.get().strip())
                description = ' '.join(description).strip()
                if len(description) == 0:
                    for desc in panel.css('.panel-body p::text'):
                        desc = desc.get().strip()
                        if len(desc) >= 200:
                            description = desc
                            break

        # if len(email) == 0:
        #     self.logger.info('{} : EMPTY EMAIL'.format(url))
        # if len(phone) == 0:
        #     self.logger.info('{} : EMPTY PHONE'.format(url))

        # if len(email) > 0 and len(phone) > 0:
        yield {
            'category': category.strip(),
            'name': name.strip(),
            'slug': slug.strip(),
            'address': address.strip(),
            'city': city.strip(),
            'phone': phone.strip(),
            'fax': fax.strip(),
            'email': email.strip(),
            'website': website.strip(),
            'description': description.strip(),
            'url': url.strip(),
        }
Ejemplo n.º 26
0
    def parse_detail(self, response):
        category = response.css('ol.breadcrumb.pull-left > li > a')[-1].css('::text').get() or ''
        name = response.css('h1.business-title span::text').get() or ''
        address = []
        city = response.css('span[itemprop=addressLocality]::text').get() or ''
        phone = response.css('span[itemprop=telephone]::text').get() or ''
        email = ''
        website = response.css('ul.dropdown-menu > li > a[itemprop=url]::attr(href)').get() or ''
        description = []
        url = response.url or ''

        # email
        try:
            cfemail = response.css('span.__cf_email__::attr(data-cfemail)').get() or ''
            if len(cfemail) > 0:
                email = helpers.cfDecodeEmail(cfemail)
        except:
            email = ''

        # address
        address_1 = response.css('h4 > span > span::text')
        address_2 = response.css('h4 > span::text')
        for index, a1 in enumerate(address_1):
            a1 = a1.get().strip()
            a2 = address_2[index].get().strip()
            address.append(a1)
            address.append(a2)
        address = ' '.join(address)
        address = address.replace(' ,', ',')

        # description
        for txt in response.css('.col-sm-12 > p p'):
            d = txt.css('::text').get() or ''
            description.append(d.strip())
        description = '. '.join(description)
        description = description.replace('..', '.')
        description = description.replace('. . ', '. ')
        description = description.replace('. . ', '. ')

        if len(email) == 0:
            self.logger.info('{} : EMPTY EMAIL'.format(url))
        if len(phone) == 0:
            self.logger.info('{} : EMPTY PHONE'.format(url))

        if len(email) > 0 and len(phone) > 0:
            image_url = response.css('.detail-listing-img > img::attr(src)').get()
            if image_url is not None and image_url[-1] != '/':
                image_url = image_url.strip()
                ext = image_url.split('.')[-1]
                image_name = helpers.get_slug(helpers.fix_title(name))
                target_dir = 'images/{}/{}'.format(self.name, image_name)
                self.logger.info('downloading image: {} => {}'.format(image_url, target_dir))
                helpers.download(image_url, target_dir)
            yield {
                'category': category.strip(),
                'name': name.strip(),
                'address': address.strip(),
                'city': city.strip(),
                'phone': phone.strip(),
                'email': email.strip(),
                'website': website.strip(),
                'description': description.strip(),
                'url': url.strip(),
            }