def importComps(): f = open('ext_data/wordnik_companies.csv','rU') c = csv.DictReader(f) for row in c: # retrieve or create the ecosystem if row['Ecosystem'] != '': try: eco = Ecosystem.objects.get(name=row['Ecosystem']) except ObjectDoesNotExist: eco = Ecosystem.objects.create(name=row['Ecosystem']) else: eco = None # retrieve or create the market segment AllSegments = [] if row['Segment'] != '': segments = row['Segment'].split(',') for seg in segments: try: newSeg = Segment.objects.get(name=row['Segment']) except ObjectDoesNotExist: newSeg = Segment.objects.create(name=row['Segment']) AllSegments.append(newSeg) else: seg = None # retrieve or create the tags AllTags = [] if row['Tags'] != '': tags = row['Tags'].split(',') for tag in tags: if tag != '': try: newTag = Tag.objects.get(name=tag.strip()) except ObjectDoesNotExist: newTag = Tag.objects.create(name=tag.strip()) AllTags.append(newTag) c = Company(name=unicode(row['Company'],'utf-8','replace'),description=unicode(row['Official Description'],'utf-8','replace'),slogan=unicode(row['Slogan'],'utf-8','replace'),url=row['URL'],platform=row['Platform'],state=row['State']) c.save() if eco is not None: c.ecosystem.add(eco) for seg in AllSegments: c.segments.add(seg) for tag in AllTags: c.tags.add(tag) c.save()
def addCompany(request): if request.method == 'POST': # if the form has been submitted form = CompanyForm(request.POST,request.FILES) # a form is bound to the Post data if form.is_valid(): # validation passes # upload file if request.FILES: handleUploadedFile(request.FILES['logo']) # process submitted form fields name = form.cleaned_data['name'] description = form.cleaned_data['description'] #slogan = form.cleaned_data['slogan'] url = form.cleaned_data['url'] logo = form.cleaned_data['logo'] platform = form.cleaned_data['platform'] state = form.cleaned_data['state'] ecosystems = form.cleaned_data['ecosystem'] segments = form.cleaned_data['segments'] tags = form.cleaned_data['tags'] tags = tags.split(',') # break into separate tags newCo = Company(name=name,description=description,url=url,logo=logo,platform=platform,state=state) newCo.save() for tag in tags: if tag is not '': newTag = None try: newTag = Tag.objects.get(name=tag) except ObjectDoesNotExist: newTag = Tag.objects.create(name=tag) newCo.tags.add(newTag) for eco in ecosystems: newCo.ecosystem.add(eco) for seg in segments: newCo.segments.add(seg) newCo.save() return HttpResponseRedirect('/company/'+str(newCo.id)) # redirect to company detail page else: form = CompanyForm() # unbound form, passes to template segment_list = Segment.objects.all().order_by('name') recent_news = Source.objects.all().order_by('-modified')[:5] return render_to_response('market/company/add.html', { 'form':form, 'segment_list':segment_list, 'recent_news':recent_news }, context_instance=RequestContext(request) )
def handle(self, *args, **options): print('Clearing DB ...') # удаляем записи и картинки Category.objects.all().delete() SubCategory.objects.all().delete() Company.objects.all().delete() try: shutil.rmtree('%s/media' % BASE_DIR) except FileNotFoundError: pass # парсим главную страницу base_url = 'https://west-info.biz/katalog-predpriyatij/' print(f'Start import from {base_url}') res = requests.get(base_url) soup = BeautifulSoup(rez.text, 'html.parser') # находим нужный контент categories = soup.findAll('li', {'class': 'submenu_item'}) for it in categories[:5]: c = Category() c.name = it.find('a').text c.save() print(f'Import {c.name}') subcategories = it.findAll('a', {'class': 'sub2menu_link'}) for kat in subcategories: sub = SubCategory() sub.name = kat.text sub.category = c sub.save() print(f'Import {sub.name}') new_url = f"https://west-info.biz/katalog-predpriyatij{k['href']}" catalog = requests.get(new_url) new_soup = bs(catalog.text, 'html.parser') div = new_soup.findAll('div', {'class': 'teaser-item'}) for item in div: firm_name = item.find('h2', {'class': 'pos-title'}) firm_description = item.find('p') firm_city = item.find('div', {'class': 'element element-text'}) firm_adress = item.findAll('div', {'class': 'element element-text'}) firm_phones = item.find('div', {'class': 'element element-text last'}) a = item.findAll('img') for link in a: print(link['src']) if firm_phones: phones_list = firm_phones.text.replace(' ', '').replace(',', ' ').replace(';', ' ').split() com = Company() if firm_name: com.name = firm_name.text if firm_description: com.description = firm_description.text if firm_city: com.city = firm_city.text if firm_adress and len(firm_adress) >= 2: com.adress = firm_adress[1].text else: com.adress = '-' for z in phones_list: com.phone = p com.category = c com.sub_category = sub com.save() print(f'{com.name} save...')