Ejemplo n.º 1
0
def importComps():
	f = open('ext_data/wordnik_companies.csv','rU')
	c = csv.DictReader(f)
	
	for row in c:
		
		# retrieve or create the ecosystem
		if row['Ecosystem'] != '':
			try:
				eco = Ecosystem.objects.get(name=row['Ecosystem'])
			except ObjectDoesNotExist:
				eco = Ecosystem.objects.create(name=row['Ecosystem'])
		else:
			eco = None
			
		# retrieve or create the market segment
		AllSegments = []
		if row['Segment'] != '':
			segments = row['Segment'].split(',')
			for seg in segments:
				try:
					newSeg = Segment.objects.get(name=row['Segment'])
				except ObjectDoesNotExist:
					newSeg = Segment.objects.create(name=row['Segment'])
				AllSegments.append(newSeg)
		else:
			seg = None
			
		# retrieve or create the tags
		AllTags = []
		if row['Tags'] != '':
			tags = row['Tags'].split(',')
			for tag in tags:
				if tag != '':
					try:
						newTag = Tag.objects.get(name=tag.strip())
					except ObjectDoesNotExist:
						newTag = Tag.objects.create(name=tag.strip())
					AllTags.append(newTag)
	
		c = Company(name=unicode(row['Company'],'utf-8','replace'),description=unicode(row['Official Description'],'utf-8','replace'),slogan=unicode(row['Slogan'],'utf-8','replace'),url=row['URL'],platform=row['Platform'],state=row['State'])
		c.save()
		if eco is not None:
			c.ecosystem.add(eco)
		for seg in AllSegments:
			c.segments.add(seg)
		for tag in AllTags:
			c.tags.add(tag)
		c.save()
Ejemplo n.º 2
0
def addCompany(request):
	if request.method == 'POST': # if the form has been submitted
		form = CompanyForm(request.POST,request.FILES) # a form is bound to the Post data
		if form.is_valid(): # validation passes
			# upload file
			if request.FILES:
				handleUploadedFile(request.FILES['logo'])
			
			# process submitted form fields
			name = form.cleaned_data['name']
			description = form.cleaned_data['description']
			#slogan = form.cleaned_data['slogan']
			url = form.cleaned_data['url']
			logo = form.cleaned_data['logo']
			platform = form.cleaned_data['platform']
			state = form.cleaned_data['state']
			ecosystems = form.cleaned_data['ecosystem']
			segments = form.cleaned_data['segments']
			tags = form.cleaned_data['tags']
			tags = tags.split(',') # break into separate tags
			newCo = Company(name=name,description=description,url=url,logo=logo,platform=platform,state=state)
			newCo.save()
			for tag in tags:
				if tag is not '':
					newTag = None
					try:
						newTag = Tag.objects.get(name=tag)
					except ObjectDoesNotExist:
						newTag = Tag.objects.create(name=tag)
					newCo.tags.add(newTag)
			for eco in ecosystems:
				newCo.ecosystem.add(eco)
			for seg in segments:
				newCo.segments.add(seg)
			newCo.save()
			return HttpResponseRedirect('/company/'+str(newCo.id)) # redirect to company detail page
	else:
		form = CompanyForm() # unbound form, passes to template
	
	segment_list = Segment.objects.all().order_by('name')
	recent_news = Source.objects.all().order_by('-modified')[:5]
	return render_to_response('market/company/add.html', {
		'form':form, 'segment_list':segment_list, 'recent_news':recent_news
		},
		context_instance=RequestContext(request)
	)
Ejemplo n.º 3
0
    def handle(self, *args, **options):
        print('Clearing DB ...')
        # удаляем записи и картинки
        Category.objects.all().delete()
        SubCategory.objects.all().delete()
        Company.objects.all().delete()
        try:
            shutil.rmtree('%s/media' % BASE_DIR)
        except FileNotFoundError:
            pass

        # парсим главную страницу
        base_url = 'https://west-info.biz/katalog-predpriyatij/'
        print(f'Start import from {base_url}')
        res = requests.get(base_url)
        soup = BeautifulSoup(rez.text, 'html.parser')

        # находим нужный контент
        categories = soup.findAll('li', {'class': 'submenu_item'})
        for it in categories[:5]:
            c = Category()
            c.name = it.find('a').text
            c.save()
            print(f'Import {c.name}')
            subcategories = it.findAll('a', {'class': 'sub2menu_link'})
            for kat in subcategories:
                sub = SubCategory()
                sub.name = kat.text
                sub.category = c
                sub.save()
                print(f'Import {sub.name}')              
                new_url = f"https://west-info.biz/katalog-predpriyatij{k['href']}"
                catalog = requests.get(new_url)
                new_soup = bs(catalog.text, 'html.parser')
                div = new_soup.findAll('div', {'class': 'teaser-item'})
                for item in div:
                    firm_name = item.find('h2', {'class': 'pos-title'})
                    firm_description = item.find('p')
                    firm_city = item.find('div', {'class': 'element element-text'})
                    firm_adress = item.findAll('div', {'class': 'element element-text'})
                    firm_phones = item.find('div', {'class': 'element element-text last'})
                    a = item.findAll('img')
                    for link in a:
                        print(link['src'])

                    if firm_phones:
                        phones_list = firm_phones.text.replace(' ', '').replace(',', ' ').replace(';', ' ').split()
                    com = Company()
                    if firm_name:
                        com.name = firm_name.text
                    if firm_description:
                        com.description = firm_description.text
                    if firm_city:
                        com.city = firm_city.text
                    if firm_adress and len(firm_adress) >= 2:
                        com.adress = firm_adress[1].text
                    else:
                        com.adress = '-'
                    
                    for z in phones_list:
                        com.phone = p
                    com.category = c
                    com.sub_category = sub
                    com.save()
                    print(f'{com.name} save...')